From c5232bccc142df2fc8a4b4a5aef144ce1284b1e3 Mon Sep 17 00:00:00 2001
From: hjq <770690987@qq.com>
Date: Fri, 12 Jun 2026 11:09:15 +0800
Subject: [PATCH] =?UTF-8?q?=E6=8A=93=E5=8F=96=E7=94=9F=E4=BA=A7=E5=B7=A5?=
 =?UTF-8?q?=E5=8D=95=EF=BC=8C=E6=8A=93=E5=8F=96=E5=8F=91=E6=96=99=E5=BC=82?=
 =?UTF-8?q?=E5=B8=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 browser_login/fetch_receipt_details_full.py   | 107 +++++---
 .../fetch_receipt_details_incremental.py      | 251 ++++++++++--------
 web_ui/app.py                                 |   4 +
 3 files changed, 208 insertions(+), 154 deletions(-)

diff --git a/browser_login/fetch_receipt_details_full.py b/browser_login/fetch_receipt_details_full.py
index a87aa61..fdb20b0 100644
--- a/browser_login/fetch_receipt_details_full.py
+++ b/browser_login/fetch_receipt_details_full.py
@@ -13,7 +13,7 @@ from login import get_page, log
 from config import OUTPUT_DIR
 
 HOME_URL = "https://yunmes.tftykj.cn/"
-API_TARGET = "ReceiptDetailsCheck_SearchList_Proxy"
+API_TARGET = "ReceiptDetailsCheckFinace_SearchList"
 SAVE_PATH = OUTPUT_DIR / "receipt_details_full_clean.json"
 
 def fetch_receipt_details_full():
@@ -32,53 +32,76 @@ def fetch_receipt_details_full():
             all_clean_items = []
     
     try:
-        log("INFO", f"正在回到主页起点: {HOME_URL}")
-        page.get(HOME_URL)
+        TARGET_URL = "https://yunmes.tftykj.cn/ReceiptDetailsCheckFinace"
+        log("INFO", f"正在直接访问目标页面: {TARGET_URL}")
+        page.get(TARGET_URL)
         page.wait.load_start()
         time.sleep(2)
         
-        menus = [
-            ("第一层: 业务统计报表", 'xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p'),
-            ("第二层: 财务业务报表", 'text:财务业务报表'),
-            ("第三层: 财务收货明细报表", 'text:财务收货明细报表')
-        ]
-        
-        log("INFO", "开始模拟人工点击左侧导航菜单...")
-        for name, xpath in menus:
-            ele = page.ele(xpath, timeout=5)
-            if ele:
-                try: ele.click()
-                except: page.run_js("arguments[0].click();", ele)
-                time.sleep(1.5)
-            else:
-                log("ERR", f"找不到菜单元素: {name}")
-                return
-                
-        log("OK", "✅ 成功点开收货明细报表界面！")
-        
-        # 点击空白处隐藏菜单
-        blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div'
-        blank_ele = page.ele(blank_xpath, timeout=3)
-        if blank_ele:
-            try: blank_ele.click()
-            except: page.run_js("arguments[0].click();", blank_ele)
-            time.sleep(0.5)
+        # 等待数据表格区域出现
+        table = page.ele("xpath://table | .el-table__body", timeout=15)
+        if table:
+            log("OK", "✅ 成功打开财务收货明细报表界面！")
+        else:
+            log("WARN", "表格元素未找到，继续执行")
         
         log("INFO", f"开启底层数据拦截网: {API_TARGET}")
         page.listen.start(API_TARGET)
         
-        packet = page.listen.wait(timeout=10)
+        # 为了能够获取当月的数据，强制设置时间为当月第一天到最后一天，并清理其他条件
+        import datetime, calendar
+        now = datetime.datetime.now()
+        first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d')
+        last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d')
         
-        if not packet:
-            log("INFO", "尝试寻找并点击页面上的【查询】按钮...")
-            query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span'
-            query_btn = page.ele(query_btn_xpath, timeout=3)
+        log("INFO", f"正在自动设置查询时间范围: {first_day} 至 {last_day}")
+        
+        page.run_js(f"""
+            try {{
+                var dates = document.querySelectorAll('.datebox-f, .datetimebox-f, .el-date-editor input');
+                if (dates.length >= 2) {{
+                    dates[0].value = '{first_day}';
+                    dates[1].value = '{last_day}';
+                    dates[0].dispatchEvent(new Event('input', {{ bubbles: true }}));
+                    dates[0].dispatchEvent(new Event('change', {{ bubbles: true }}));
+                    dates[1].dispatchEvent(new Event('input', {{ bubbles: true }}));
+                    dates[1].dispatchEvent(new Event('change', {{ bubbles: true }}));
+                }}
+            }} catch(e) {{ console.log(e); }}
+        """)
+        time.sleep(1)
+        
+        # 寻找并点击页面上的【查询】按钮
+        log("INFO", "尝试寻找并点击页面上的【查询】按钮...")
+        
+        # 使用 DrissionPage 内置选择器尝试寻找
+        query_btn = page.ele('text=查询', timeout=3)
+        if not query_btn:
+            query_btn = page.ele('xpath://button[contains(., "查询")]', timeout=3)
             
-            if query_btn:
-                try: query_btn.click()
-                except: page.run_js("arguments[0].click();", query_btn)
-                packet = page.listen.wait(timeout=15)
+        if query_btn:
+            try: query_btn.click()
+            except: page.run_js("arguments[0].click();", query_btn)
+        else:
+            log("WARN", "常规选择器找不到查询按钮，尝试使用全局 JS 强行寻找...")
+            # 暴力兜底：通过 JS 遍历所有按钮和链接点击
+            clicked = page.run_js("""
+                var btns = document.querySelectorAll('button, a, .l-btn, .el-button');
+                for(var i=0; i<btns.length; i++) {
+                    if(btns[i].innerText && btns[i].innerText.indexOf('查询') !== -1) {
+                        btns[i].click();
+                        return true;
+                    }
+                }
+                return false;
+            """)
+            if not clicked:
+                log("ERR", "找不到查询按钮！")
+                page.listen.stop()
+                return
                 
+        packet = page.listen.wait(timeout=15)
+            
         if not packet:
             log("ERR", "未能拦截到第一页数据，可能网络超时或查询未触发。")
             page.listen.stop()
@@ -91,8 +114,12 @@ def fetch_receipt_details_full():
         body = packet.response.body
         data = body if isinstance(body, (dict, list)) else json.loads(body)
         
-        # 设定开始抓取的页码，1表示从头开始抓全量数据
-        target_resume_page = 690
+        # 设定开始抓取的页码，根据已有数据量动态计算（假设每页50条）
+        target_resume_page = 1
+        if len(all_clean_items) > 0:
+            target_resume_page = max(1, len(all_clean_items) // 50)
+            # 截断已有数据，防止与即将重新抓取的页数重叠导致重复
+            all_clean_items = all_clean_items[:(target_resume_page - 1) * 50]
         
         total_count = 0
         if isinstance(data, dict) and "result" in data:
diff --git a/browser_login/fetch_receipt_details_incremental.py b/browser_login/fetch_receipt_details_incremental.py
index f3fa0c9..9e525be 100644
--- a/browser_login/fetch_receipt_details_incremental.py
+++ b/browser_login/fetch_receipt_details_incremental.py
@@ -13,6 +13,8 @@ import subprocess
 import math
 import random
 import sqlite3
+import datetime
+import calendar
 from pathlib import Path
 
 sys.path.insert(0, str(Path(__file__).parent))
@@ -20,7 +22,7 @@ from login import get_page, log
 from config import DB_PATH
 
 HOME_URL = "https://yunmes.tftykj.cn/"
-API_TARGET = "ReceiptDetailsCheck_SearchList_Proxy"
+API_TARGET = "ReceiptDetailsCheckFinace_SearchList"
 
 def get_local_count(conn):
     """获取本地数据库已有的总记录数"""
@@ -54,50 +56,76 @@ def fetch_receipt_details_incremental():
     page = get_page(port=9222)
     
     try:
-        log("INFO", f"正在回到主页起点: {HOME_URL}")
-        page.get(HOME_URL)
+        TARGET_URL = "https://yunmes.tftykj.cn/ReceiptDetailsCheckFinace"
+        log("INFO", f"正在直接访问目标页面: {TARGET_URL}")
+        page.get(TARGET_URL)
         page.wait.load_start()
         time.sleep(2)
         
-        menus = [
-            ("第一层: 业务统计报表", 'xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p'),
-            ("第二层: 采购业务报表", 'xpath:/html/body/div[7]/div/div[1]/div/div[4]/div/p'),
-            ("第三层: 收货明细报表", 'xpath:/html/body/div[8]/div/div[1]/div/div[4]/div/p')
-        ]
-        
-        log("INFO", "模拟点击左侧导航菜单...")
-        for name, xpath in menus:
-            ele = page.ele(xpath, timeout=5)
-            if ele:
-                try: ele.click()
-                except: page.run_js("arguments[0].click();", ele)
-                time.sleep(1.5)
-            else:
-                log("ERR", f"找不到菜单元素: {name}")
-                return
-                
-        log("OK", "✅ 成功点开收货明细报表界面！")
-        
-        # 隐藏菜单
-        blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div'
-        blank_ele = page.ele(blank_xpath, timeout=3)
-        if blank_ele:
-            try: blank_ele.click()
-            except: page.run_js("arguments[0].click();", blank_ele)
-            time.sleep(0.5)
+        # 等待数据表格区域出现
+        table = page.ele("xpath://table | .el-table__body", timeout=15)
+        if table:
+            log("OK", "✅ 成功打开财务收货明细报表界面！")
+        else:
+            log("WARN", "表格元素未找到，继续执行")
         
         log("INFO", f"开启底层数据拦截网: {API_TARGET}")
         page.listen.start(API_TARGET)
         
-        packet = page.listen.wait(timeout=10)
-        if not packet:
-            query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span'
-            query_btn = page.ele(query_btn_xpath, timeout=3)
-            if query_btn:
-                try: query_btn.click()
-                except: page.run_js("arguments[0].click();", query_btn)
-                packet = page.listen.wait(timeout=15)
+        # 为了能够获取当月的数据，强制设置时间为当月第一天到最后一天，并清理其他条件
+        now = datetime.datetime.now()
+        first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d')
+        last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d')
+        
+        log("INFO", f"正在自动设置查询时间范围: {first_day} 至 {last_day}")
+        
+        page.run_js(f"""
+            try {{
+                var dates = document.querySelectorAll('.datebox-f, .datetimebox-f, .el-date-editor input');
+                if (dates.length >= 2) {{
+                    // 这里适配 ElementUI 或 EasyUI 的日期输入框
+                    dates[0].value = '{first_day}';
+                    dates[1].value = '{last_day}';
+                    // 触发 input 和 change 事件让 Vue/React 感知到值的改变
+                    dates[0].dispatchEvent(new Event('input', {{ bubbles: true }}));
+                    dates[0].dispatchEvent(new Event('change', {{ bubbles: true }}));
+                    dates[1].dispatchEvent(new Event('input', {{ bubbles: true }}));
+                    dates[1].dispatchEvent(new Event('change', {{ bubbles: true }}));
+                }}
+            }} catch(e) {{ console.log(e); }}
+        """)
+        time.sleep(1)
+        
+        # 寻找并点击页面上的【查询】按钮，不再盲目等待刷新
+        log("INFO", "尝试寻找并点击页面上的【查询】按钮...")
+        
+        # 使用 DrissionPage 内置选择器尝试寻找
+        query_btn = page.ele('text=查询', timeout=3)
+        if not query_btn:
+            query_btn = page.ele('xpath://button[contains(., "查询")]', timeout=3)
+            
+        if query_btn:
+            try: query_btn.click()
+            except: page.run_js("arguments[0].click();", query_btn)
+        else:
+            log("WARN", "常规选择器找不到查询按钮，尝试使用全局 JS 强行寻找...")
+            # 暴力兜底：通过 JS 遍历所有按钮和链接点击
+            clicked = page.run_js("""
+                var btns = document.querySelectorAll('button, a, .l-btn, .el-button');
+                for(var i=0; i<btns.length; i++) {
+                    if(btns[i].innerText && btns[i].innerText.indexOf('查询') !== -1) {
+                        btns[i].click();
+                        return true;
+                    }
+                }
+                return false;
+            """)
+            if not clicked:
+                log("ERR", "找不到查询按钮！")
+                return
                 
+        packet = page.listen.wait(timeout=15)
+            
         if not packet:
             log("ERR", "未能拦截到第一页数据，无法获取线上总条数。")
             return
@@ -109,59 +137,82 @@ def fetch_receipt_details_incremental():
         if isinstance(data, dict) and "result" in data:
             remote_count = data["result"].get("totalCount", 0)
             
-        log("INFO", f"🌐 线上 ERP 系统当前总条数: {remote_count} 条")
+        log("INFO", f"🌐 本次查询条件下，线上 ERP 系统共有数据: {remote_count} 条")
         
-        if remote_count <= local_count:
-            log("OK", "🎉 本地数据已是最新状态，无需抓取！")
+        if remote_count == 0:
+            log("OK", "🎉 本次查询条件下无数据，无需抓取！")
             return
             
-        new_items_count = remote_count - local_count
-        log("INFO", f"🔥 发现新增数据: {new_items_count} 条！准备进行增量跳页抓取...")
-        
-        # 每页 50 条，计算应该从哪一页开始抓
-        # 例如: 本地有 37584 条，37584 // 50 = 751 页是满的，所以从第 752 页开始抓
-        start_page = math.floor(local_count / 50) + 1
         end_page = math.ceil(remote_count / 50)
-        
-        log("INFO", f"🎯 智能跳页计算完毕：直接跳转至第 {start_page} 页 (目标到 {end_page} 页)")
-        
-        # 执行跳转
-        if start_page > 1:
-            jumper_input_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/span[3]/div/div//input'
-            input_ele = page.ele(jumper_input_xpath, timeout=5)
-            
-            if not input_ele:
-                jumper_input_xpath = 'xpath://input[@type="number" and @aria-label="页"]'
-                input_ele = page.ele(jumper_input_xpath, timeout=5)
-                
-            if input_ele:
-                input_ele.clear()
-                input_ele.input(str(start_page))
-                time.sleep(0.5)
-                input_ele.input('\n')
-                
-                # 等待跳转后的数据响应
-                packet = page.listen.wait(timeout=15)
-                if not packet:
-                    log("ERR", "跳转失败，未拦截到目标页的数据请求。")
-                    return
-                log("OK", f"✅ 成功跳转至第 {start_page} 页并截获数据！")
-            else:
-                log("ERR", "找不到页码输入框，增量跳转失败！")
-                return
+        log("INFO", f"🎯 准备逐页抓取并比对入库，共需处理 {end_page} 页...")
         
         # =========================================================
-        # 开始处理新增页面的数据并入库
+        # 开始处理数据并比对入库
         # =========================================================
-        current_page = start_page
+        current_page = 1
         cursor = conn.cursor()
         total_inserted = 0
+        total_updated = 0
         
         while current_page <= end_page:
-            body = packet.response.body
-            data = body if isinstance(body, (dict, list)) else json.loads(body)
+            # 如果是第一页，直接处理已有的 packet，不需要点击下一页
+            if current_page > 1:
+                delay = random.uniform(1.5, 3.5)
+                log("INFO", f"⏳ 停顿 {delay:.2f} 秒后准备获取第 {current_page} 页...")
+                time.sleep(delay)
+                
+                next_btn = None
+                for _ in range(3):
+                    # 优先使用 pagination-next，如果不行再尝试其他类名
+                    next_btn = page.ele('xpath://*[contains(@class, "pagination-next")]', timeout=3)
+                    if not next_btn:
+                        next_btn = page.ele('xpath://button[contains(@class, "btn-next")]', timeout=3)
+                    if next_btn:
+                        break
+                    time.sleep(1)
+                
+                if not next_btn:
+                    next_btn = page.ele('xpath://i[contains(@class, "el-icon-arrow-right")]/parent::button', timeout=3)
+                
+                if not next_btn:
+                    log("ERR", "找不到下一页按钮，可能页面异常或已到底部，停止抓取。")
+                    break
+                    
+                # 检查按钮是否被禁用
+                class_str = str(next_btn.attr("class"))
+                aria_disabled = next_btn.attr("aria-disabled")
+                is_disabled_attr = next_btn.attr("disabled") is not None
+                
+                # 如果这个按钮外部包着一个 <li> 或者是其他容器，也要检查它的父元素是不是 disabled
+                parent_class_str = ""
+                try:
+                    parent_ele = next_btn.parent()
+                    parent_class_str = str(parent_ele.attr("class"))
+                except:
+                    pass
+                
+                if "disabled" in class_str or "disabled" in parent_class_str or is_disabled_attr or aria_disabled == "true":
+                    log("OK", "🏁 下一页按钮已被禁用，说明已经到达最后一页！")
+                    break
+                
+                try: 
+                    # 尝试 JS 点击（翻页按钮有时会被其他浮层遮挡，JS 点击最稳妥）
+                    page.run_js("arguments[0].click();", next_btn)
+                except Exception as e: 
+                    log("ERR", f"JS 点击下一页失败: {e}，尝试普通点击...")
+                    next_btn.click()
+                    
+                packet = page.listen.wait(timeout=15)
+                if not packet:
+                    log("ERR", f"第 {current_page} 页请求超时或未触发，中止抓取。")
+                    break
+                    
+                body = packet.response.body
+                data = body if isinstance(body, (dict, list)) else json.loads(body)
             
             inserted_this_page = 0
+            updated_this_page = 0
+            
             if isinstance(data, dict) and "result" in data:
                 items = data["result"].get("items", [])
                 
@@ -170,11 +221,10 @@ def fetch_receipt_details_incremental():
                         row_no = item.get("rowsNum")
                         mat_code = item.get("materialCode")
                         
-                        # 检查是否存在，如果存在则更新数量和金额，不存在则插入
-                        cursor.execute('SELECT id FROM receipt_details WHERE purchase_order_code = ? AND row_no = ? AND material_code = ?', (po_code, row_no, mat_code))
+                        # 检查是否存在，根据采购订单号和物料代码进行双条件比对
+                        cursor.execute('SELECT id FROM receipt_details WHERE purchase_order_code = ? AND material_code = ?', (po_code, mat_code))
                         existing_record = cursor.fetchone()
                         
-                        # 进货数量（件数）永远只取原始的 plannedPurchaseQuantity，不取转换后的
                         p_qty = item.get("plannedPurchaseQuantity")
                         r_qty = item.get("convertGoodsQuantity") if item.get("convertGoodsQuantity") is not None else item.get("goodsQuantity")
                         
@@ -184,8 +234,8 @@ def fetch_receipt_details_incremental():
                                 SET purchase_qty = ?, receive_qty = ?, receive_price = ?, total_amount = ?
                                 WHERE id = ?
                             ''', (p_qty, r_qty, item.get("receivePrice"), item.get("receiveAmount"), existing_record[0]))
-                            # 算作更新，为了记录日志
-                            inserted_this_page += 1
+                            updated_this_page += 1
+                            total_updated += 1
                         else:
                             cursor.execute('''
                             INSERT INTO receipt_details (
@@ -217,41 +267,14 @@ def fetch_receipt_details_incremental():
                             total_inserted += 1
                         
                 conn.commit()
-                log("OK", f"第 {current_page} 页处理完毕，成功截获 {inserted_this_page} 条数据并存入数据库。")
-            
-            # 还有下一页则继续点击
-            if current_page < end_page:
-                delay = random.uniform(1.5, 3.5)
-                log("INFO", f"⏳ 停顿 {delay:.2f} 秒后点击下一页...")
-                time.sleep(delay)
+                log("OK", f"第 {current_page} 页处理完毕，新增 {inserted_this_page} 条，更新 {updated_this_page} 条。")
+            else:
+                log("ERR", f"第 {current_page} 页数据结构异常。")
+                break
                 
-                # 同步全量脚本的优化：重试机制与兼容的类名匹配
-                next_btn = None
-                for _ in range(3):
-                    next_btn = page.ele('xpath://button[contains(@class, "btn-next")]', timeout=3)
-                    if next_btn:
-                        break
-                    time.sleep(1)
-                
-                # 备用定位方式：直接找右箭头图标所在的按钮
-                if not next_btn:
-                    next_btn = page.ele('xpath://i[contains(@class, "el-icon-arrow-right")]/parent::button', timeout=3)
-                
-                if next_btn:
-                    try: next_btn.click()
-                    except: page.run_js("arguments[0].click();", next_btn)
-                    
-                    packet = page.listen.wait(timeout=15)
-                    if not packet:
-                        log("ERR", f"第 {current_page + 1} 页请求超时！")
-                        break
-                else:
-                    log("ERR", "重试 3 次后仍然找不到下一页按钮！")
-                    break
-                    
             current_page += 1
             
-        log("OK", f"🎉 增量同步大功告成！总计向数据库执行了 {total_inserted} 次插入/更新操作！")
+        log("OK", f"🎉 增量抓取全部结束！总计新增 {total_inserted} 条，更新 {total_updated} 条。")
             
     except Exception as e:
         log("ERR", f"发生全局异常: {e}")
diff --git a/web_ui/app.py b/web_ui/app.py
index b4de997..f44b2da 100644
--- a/web_ui/app.py
+++ b/web_ui/app.py
@@ -171,6 +171,7 @@ def get_receipts():
     supplier_name = request.args.get('supplier_name', '').strip()
     material_name = request.args.get('material_name', '').strip()
     po_code = request.args.get('po_code', '').strip()
+    material_code = request.args.get('material_code', '').strip()
 
     conn = get_db_connection()
     
@@ -187,6 +188,9 @@ def get_receipts():
     if po_code:
         query_conditions.append("purchase_order_code LIKE ?")
         params.append(f"%{po_code}%")
+    if material_code:
+        query_conditions.append("material_code LIKE ?")
+        params.append(f"%{material_code}%")
         
     where_clause = ""
     if query_conditions: