新内容

2026-04-27 15:23:00 +08:00
parent fc8f14b301
commit 29954a7af0
8 changed files with 1072 additions and 2 deletions
--- a/browser_login/auto_launcher.py
+++ b/browser_login/auto_launcher.py
@@ -0,0 +1,80 @@
+"""
+ERP 数据同步器 - 自动化浏览器拉起与登录守护模块
+目标:
+1. 自动寻找本地安装的 Chrome 浏览器。
+2. 以 9222 端口和独立的用户数据目录启动（互不干扰，持久化登录状态）。
+3. 弹出 ERP 登录页面，等待用户手动登录（解决滑块验证码等问题）。
+4. 登录成功后，将浏览器挂在后台作为保活引擎。
+"""
+import sys
+import time
+from pathlib import Path
+from DrissionPage import ChromiumOptions, ChromiumPage
+from config import DATA_DIR
+
+# 独立的用户数据目录，放在外部持久化目录，这样重启不会丢失登录状态
+USER_DATA_DIR = DATA_DIR / "browser_login" / "chrome_user_data"
+HOME_URL = "https://yunmes.tftykj.cn/"
+
+def start_and_wait_login():
+    print("🚀 [1/3] 正在配置内置 Chrome 浏览器引擎...")
+    
+    # 初始化配置
+    co = ChromiumOptions()
+    co.set_local_port(9222)
+    # 指定一个独立的用户数据存放文件夹
+    co.set_user_data_path(str(USER_DATA_DIR))
+    # 忽略证书错误等常规反爬配置
+    co.ignore_certificate_errors()
+    
+    print("🌍 [2/3] 正在拉起浏览器并前往 ERP 登录页...")
+    try:
+        # 这一步会自动寻找你电脑上的 Chrome，如果没有运行在 9222 端口，它会自动帮你启动一个！
+        page = ChromiumPage(co)
+    except Exception as e:
+        print(f"❌ 启动浏览器失败，请确保电脑安装了 Chrome 浏览器！报错信息: {e}")
+        return None
+        
+    page.get(HOME_URL)
+    
+    print("\n" + "="*50)
+    print("👀 [等待人工介入] 请在弹出的浏览器窗口中完成登录操作！")
+    print("💡 提示: 输入账号密码、通过滑块验证码，直到进入 ERP 系统主界面。")
+    print("="*50 + "\n")
+    
+    # 循环检查登录状态
+    # 怎么判断登录成功？ERP 登录前 URL 通常带有 Login 等字样，或者登录后页面会出现类似“退出”、“注销”或者用户名的元素
+    # 我们这里通过检测页面中是否出现了主菜单的特有元素，或者通过监听一个登录后的特有接口来判断
+    is_logged_in = False
+    
+    while not is_logged_in:
+        time.sleep(2)
+        # 假设登录后页面会出现“首页”或者用户的头像/名字（这里的 xpath 需要根据你们 ERP 登录后的实际情况微调，我们先用一个保险的：看看有没有业务统计报表的菜单）
+        # 如果还在登录页，肯定找不到这个元素
+        try:
+            # 这里的元素用来验证是否已经成功进入系统内部
+            menu_ele = page.ele('xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p', timeout=1)
+            if menu_ele:
+                is_logged_in = True
+        except:
+            pass
+            
+        if not is_logged_in:
+            print("⏳ 等待登录中...")
+            
+    print("\n✅ [3/3] 检测到登录成功！")
+    print("🔒 登录状态已保存，你可以随时关闭或者最小化这个浏览器窗口。")
+    print("🤖 爬虫引擎已挂载至后台，可以开始点击前端界面的【同步数据】按钮了！\n")
+    
+    return page
+
+if __name__ == "__main__":
+    # 单独运行此文件即可拉起浏览器
+    page = start_and_wait_login()
+    if page:
+        # 为了不让脚本退出，这里写个死循环保活（在真正的桌面软件中，这里就是启动 Flask 后端和 Webview 窗口的地方）
+        try:
+            while True:
+                time.sleep(10)
+        except KeyboardInterrupt:
+            print("👋 守护进程已退出。")
--- a/browser_login/bom_query.py
+++ b/browser_login/bom_query.py
@@ -14,11 +14,10 @@ from pathlib import Path

 sys.path.insert(0, str(Path(__file__).parent))
 from login import get_page, login, login_manual, log, dump_page_state
+from config import OUTPUT_DIR

 BOM_PAGE_URL = "https://yunmes.tftykj.cn/MaterialBom"
 BOM_API_PATH = "MaterialBom_SearchList_Proxy"
-OUTPUT_DIR   = Path(__file__).parent / "output"
-OUTPUT_DIR.mkdir(exist_ok=True)


 # ── 导航到 BOM 页面 ───────────────────────────────────────────────────────────
--- a/browser_login/config.py
+++ b/browser_login/config.py
@@ -0,0 +1,17 @@
+import sys
+import os
+from pathlib import Path
+
+def get_data_dir():
+    """获取持久化数据存放目录（数据库、输出文件等，保证重启不丢失）"""
+    if getattr(sys, 'frozen', False):
+        return Path(os.path.dirname(sys.executable))
+    return Path(__file__).parent.parent
+
+DATA_DIR = get_data_dir()
+
+# 通用输出目录，用于存放 JSON 文件和 SQLite 数据库
+OUTPUT_DIR = DATA_DIR / "browser_login" / "output"
+OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+DB_PATH = OUTPUT_DIR / "erp_data.db"
--- a/browser_login/fetch_bom_cost_full_tree.py
+++ b/browser_login/fetch_bom_cost_full_tree.py
@@ -0,0 +1,227 @@
+"""
+BOM 成本 - 终极树状结构抓取脚本 (全站 1400+ 父件及 5 层嵌套子件)
+目标: 
+1. 抓取所有父件（成本核算表主页）
+2. 暗网请求所有父件下对应的 BOM 成本数据（扁平的 5 层数据）
+3. 实时清洗并重组为完美嵌套的 JSON 树
+"""
+import sys
+import json
+import time
+import random
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from login import get_page, log
+from config import OUTPUT_DIR
+
+PAGE_URL = "https://yunmes.tftykj.cn/PartBomCostAccounting"
+API_PARENT = "PartBomCostAccounting_SearchList_Proxy"
+
+# 最终保存的文件名
+TREE_FILE_PATH = OUTPUT_DIR / "bom_cost_full_tree_final.json"
+
+def build_nested_tree(flat_items):
+    """将包含 _parentId 的扁平列表转换为嵌套树"""
+    if not flat_items:
+        return []
+        
+    node_dict = {}
+    for item in flat_items:
+        son_no = item.get("sonNO")
+        if not son_no:
+            continue
+            
+        clean_node = {
+            "sonNO": son_no, 
+            "_parentId": item.get("_parentId"), 
+            "bomLevel": item.get("bomLevel"),
+            "childMaterialCode": item.get("childMaterialCode"),
+            "childMaterialName": item.get("childMaterialName"),
+            "usageQty": item.get("childrenMaterialConsumption", 1),
+            "sub_items": []
+        }
+        node_dict[son_no] = clean_node
+        
+    roots = []
+    for son_no, node in node_dict.items():
+        parent_id = node.get("_parentId")
+        if parent_id is None:
+            roots.append(node)
+        else:
+            parent_node = node_dict.get(parent_id)
+            if parent_node:
+                parent_node["sub_items"].append(node)
+                
+    # 清理建树临时字段
+    def clean_temp(node_list):
+        for node in node_list:
+            node.pop("sonNO", None)
+            node.pop("_parentId", None)
+            if node["sub_items"]:
+                clean_temp(node["sub_items"])
+            else:
+                node.pop("sub_items", None)
+                
+    clean_temp(roots)
+    return roots
+
+
+def fetch_bom_cost_tree():
+    log("INFO", "=== 🌳 启动 BOM 成本终极抓取 (多层嵌套自动重组) ===")
+    page = get_page(port=9222)
+    clean_parents_list = []
+    
+    try:
+        # =========================================================
+        # 第一阶段：获取父件基础信息
+        # =========================================================
+        log("INFO", f"正在访问安全的父件页面: {PAGE_URL}")
+        page.get(PAGE_URL)
+        page.wait.load_start()
+        
+        log("INFO", f"开启父件 API 网络监听: {API_PARENT}")
+        page.listen.start(API_PARENT)
+        page.refresh()
+        
+        current_page = 1
+        total_records = 0
+        
+        while True:
+            log("INFO", f"等待第 {current_page} 页父件 API 响应...")
+            packet = page.listen.wait(timeout=20)
+            
+            if not packet:
+                log("ERR", f"超时未收到第 {current_page} 页数据，父件扫荡结束。")
+                break
+                
+            body = packet.response.body
+            data = body if isinstance(body, (dict, list)) else json.loads(body)
+            
+            if isinstance(data, dict) and "result" in data:
+                items = data["result"].get("items", [])
+                total_records = data["result"].get("totalCount", 0)
+                
+                for item in items:
+                    # 注意：我们要拿的是 parentMaterialId，因为这是传给 BOM 成本 API 的关键参数 materialId
+                    clean_parent = {
+                        "_id": item.get("id"), # 这个是 partBomCostAccountingId
+                        "_materialId": item.get("parentMaterialId"), # 这个是传给子件的 materialId
+                        "parentMaterialCode": item.get("parentMaterialCode"),
+                        "parentMaterialName": item.get("parentMaterialName"),
+                        "bom_cost_tree": [] # 准备挂载这棵树
+                    }
+                    clean_parents_list.append(clean_parent)
+                    
+                log("OK", f"提取了 {len(items)} 个父件。总进度: {len(clean_parents_list)}/{total_records}")
+                
+                if len(clean_parents_list) >= total_records or len(items) == 0:
+                    break
+            else:
+                break
+                
+            # 准备翻页
+            next_btn_xpath = "xpath:/html/body/div[1]/div/div[3]/table/tbody/tr/td[10]/a/span/span[2]"
+            next_btn = page.ele(next_btn_xpath, timeout=5)
+            
+            if next_btn:
+                parent_a = next_btn.parent(2)
+                if parent_a and "disabled" in parent_a.attr("class"):
+                    log("INFO", "已到达最后一页。")
+                    break
+                page.run_js("arguments[0].click();", next_btn)
+                time.sleep(1.5) 
+            else:
+                log("WARN", "未找到下一页按钮，停止翻页。")
+                break
+                
+            current_page += 1
+            
+        page.listen.stop()
+        
+        # =========================================================
+        # 第二阶段：暗网递归注入，重组 5 层嵌套树
+        # =========================================================
+        log("INFO", f"=== 🚀 开始为 {len(clean_parents_list)} 个父件抓取 BOM 成本树 ===")
+        
+        js_template = """
+        return new Promise((resolve, reject) => {
+            if (typeof $ !== 'undefined' && $.ajax) {
+                $.ajax({
+                    url: '/api/services/TfTechApi/PartBom/PartBom_SearchByTreeCost',
+                    type: 'POST',
+                    data: {
+                        materialId: MATERIAL_ID_PLACEHOLDER,
+                        partBomCostAccountingId: ACCOUNTING_ID_PLACEHOLDER,
+                        childMaterialCode: '',
+                        childMaterialName: '',
+                        childMaterialSpecification: '',
+                        childMaterialModel: ''
+                    },
+                    headers: {
+                        'referer': 'https://yunmes.tftykj.cn/PartBomCostAccounting/Detail?id=ACCOUNTING_ID_PLACEHOLDER'
+                    },
+                    success: function(response) {
+                        resolve({status: 'success', data: response});
+                    },
+                    error: function(xhr, status, error) {
+                        resolve({status: 'error', data: xhr.responseText || error});
+                    }
+                });
+            } else {
+                resolve({status: 'error', data: 'No jQuery'});
+            }
+        });
+        """
+        
+        for index, parent in enumerate(clean_parents_list):
+            accounting_id = parent.get("_id")
+            material_id = parent.get("_materialId")
+            parent_code = parent.get("parentMaterialCode", "未知")
+            
+            if not accounting_id or not material_id:
+                continue
+                
+            log("INFO", f"[{index+1}/{len(clean_parents_list)}] 正在请求 BOM 成本树 (Code: {parent_code})...")
+            
+            js_code = js_template.replace("MATERIAL_ID_PLACEHOLDER", str(material_id)).replace("ACCOUNTING_ID_PLACEHOLDER", str(accounting_id))
+            result = page.run_js(js_code)
+            
+            if result and result.get('status') == 'success':
+                data = result.get('data')
+                if isinstance(data, str):
+                    try: data = json.loads(data)
+                    except: pass
+                        
+                if isinstance(data, dict) and "result" in data:
+                    flat_items = data["result"]
+                    if isinstance(flat_items, list):
+                        # 核心：调用刚才验证成功的重组函数，把扁平列表变成 5 层树
+                        nested_tree = build_nested_tree(flat_items)
+                        parent["bom_cost_tree"] = nested_tree
+                        log("OK", f"  └── 成功重组了一棵包含 {len(flat_items)} 个节点的多层树。")
+            else:
+                log("ERR", f"  └── 请求失败: {result.get('data') if result else '未知错误'}")
+                
+            time.sleep(random.uniform(0.3, 0.7))
+            
+            if (index + 1) % 10 == 0 or (index + 1) == len(clean_parents_list):
+                # 最终保存前，清理一下用于请求的临时字段
+                clean_save_list = []
+                for p in clean_parents_list[:index+1]:
+                    clean_p = dict(p)
+                    clean_p.pop("_id", None)
+                    clean_p.pop("_materialId", None)
+                    clean_save_list.append(clean_p)
+                    
+                with open(TREE_FILE_PATH, "w", encoding="utf-8") as f:
+                    json.dump(clean_save_list, f, ensure_ascii=False, indent=2)
+                log("INFO", f"💾 进度已实时保存至 JSON ({index+1}/{len(clean_parents_list)})")
+
+        log("OK", f"=== 🏆 终极 BOM 成本多层树状抓取完成！文件路径: {TREE_FILE_PATH} ===")
+
+    except Exception as e:
+        log("ERR", f"发生异常: {e}")
+
+if __name__ == "__main__":
+    fetch_bom_cost_tree()
--- a/browser_login/fetch_receipt_details_full.py
+++ b/browser_login/fetch_receipt_details_full.py
@@ -0,0 +1,217 @@
+"""
+收货明细报表 - 全量分页抓取 (精简字段模式)
+目标: 模拟点击菜单，过滤 11 个核心字段，并循环点击下一页，直到所有数据抓取完毕。
+"""
+import sys
+import json
+import time
+import random
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from login import get_page, log
+from config import OUTPUT_DIR
+
+HOME_URL = "https://yunmes.tftykj.cn/"
+API_TARGET = "ReceiptDetailsCheck_SearchList_Proxy"
+SAVE_PATH = OUTPUT_DIR / "receipt_details_full_clean.json"
+
+def fetch_receipt_details_full():
+    log("INFO", "=== 🚚 启动收货明细报表全量抓取 (精简字段模式) ===")
+    page = get_page(port=9222)
+    all_clean_items = []
+    
+    try:
+        log("INFO", f"正在回到主页起点: {HOME_URL}")
+        page.get(HOME_URL)
+        page.wait.load_start()
+        time.sleep(2)
+        
+        menus = [
+            ("第一层: 业务统计报表", 'xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p'),
+            ("第二层: 采购业务报表", 'xpath:/html/body/div[7]/div/div[1]/div/div[4]/div/p'),
+            ("第三层: 收货明细报表", 'xpath:/html/body/div[8]/div/div[1]/div/div[4]/div/p')
+        ]
+        
+        log("INFO", "开始模拟人工点击左侧导航菜单...")
+        for name, xpath in menus:
+            ele = page.ele(xpath, timeout=5)
+            if ele:
+                try: ele.click()
+                except: page.run_js("arguments[0].click();", ele)
+                time.sleep(1.5)
+            else:
+                log("ERR", f"找不到菜单元素: {name}")
+                return
+                
+        log("OK", "✅ 成功点开收货明细报表界面！")
+        
+        # 点击空白处隐藏菜单
+        blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div'
+        blank_ele = page.ele(blank_xpath, timeout=3)
+        if blank_ele:
+            try: blank_ele.click()
+            except: page.run_js("arguments[0].click();", blank_ele)
+            time.sleep(0.5)
+        
+        log("INFO", f"开启底层数据拦截网: {API_TARGET}")
+        page.listen.start(API_TARGET)
+        
+        packet = page.listen.wait(timeout=10)
+        
+        if not packet:
+            log("INFO", "尝试寻找并点击页面上的【查询】按钮...")
+            query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span'
+            query_btn = page.ele(query_btn_xpath, timeout=3)
+            
+            if query_btn:
+                try: query_btn.click()
+                except: page.run_js("arguments[0].click();", query_btn)
+                packet = page.listen.wait(timeout=15)
+                
+        if not packet:
+            log("ERR", "未能拦截到第一页数据，可能网络超时或查询未触发。")
+            page.listen.stop()
+            return
+            
+        # =========================================================
+        # 第一页数据处理
+        # =========================================================
+        log("OK", f"🎉 成功拦截到第一页数据！HTTP: {packet.response.status}")
+        body = packet.response.body
+        data = body if isinstance(body, (dict, list)) else json.loads(body)
+        
+        total_count = 0
+        if isinstance(data, dict) and "result" in data:
+            total_count = data["result"].get("totalCount", 0)
+            items = data["result"].get("items", [])
+            for item in items:
+                all_clean_items.append({
+                    "采购订单号": item.get("purchaseOrderCode"),
+                    "行号": item.get("rowsNum"),
+                    "物料代码": item.get("materialCode"),
+                    "物料名称": item.get("materialName"),
+                    "物料规格": item.get("materialSpecification"),
+                    "仓库代码": item.get("warehouseCode"),
+                    "仓库名称": item.get("warehouseName"),
+                    "供应商代码": item.get("supplierCode"),
+                    "供应商名称": item.get("supplierName"),
+                    "单位名称": item.get("unitName"),
+                    "转换单位": item.get("convertUnitName"),
+                    "收货单价": item.get("receivePrice"),
+                    "收货时间": item.get("receiptTime"),
+                    "进货数量": item.get("convertPlannedPurchaseQuantity") if item.get("convertPlannedPurchaseQuantity") is not None else item.get("plannedPurchaseQuantity"),
+                    "收货数量": item.get("convertGoodsQuantity") if item.get("convertGoodsQuantity") is not None else item.get("goodsQuantity"),
+                    "收货总金额": item.get("receiveAmount")
+                })
+            log("OK", f"第一页清洗完成，提取了 {len(items)} 条数据。后端报告总条数: {total_count}")
+            
+        page_num = 1
+        
+        # =========================================================
+        # 循环翻页抓取
+        # =========================================================
+        next_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/button[2]'
+        
+        while True:
+            # 引入“类人”随机延迟（2.5 秒到 5.5 秒之间随机）
+            delay = random.uniform(2.5, 5.5)
+            log("INFO", f"⏳ 模拟真人停顿 {delay:.2f} 秒后，准备点击下一页...")
+            time.sleep(delay)
+            
+            # 偶尔的“长休息”（模拟用户看累了或者喝口水），每抓 50 页额外休息 10-20 秒
+            if page_num > 1 and page_num % 50 == 0:
+                long_delay = random.uniform(10.0, 20.0)
+                log("INFO", f"☕️ 已经连续高强度翻了 {page_num} 页，触发风控规避机制，假装喝水休息 {long_delay:.2f} 秒...")
+                time.sleep(long_delay)
+            
+            next_btn = page.ele(next_btn_xpath, timeout=5)
+            if not next_btn:
+                log("ERR", "找不到下一页按钮，翻页中止。")
+                break
+                
+            # 检查按钮是否被禁用
+            class_str = str(next_btn.attr("class"))
+            aria_disabled = next_btn.attr("aria-disabled")
+            is_disabled_attr = next_btn.attr("disabled") is not None
+            
+            if "disabled" in class_str or is_disabled_attr or aria_disabled == "true":
+                log("OK", "🏁 下一页按钮已被禁用，说明已经到达最后一页！")
+                break
+                
+            page_num += 1
+            log("INFO", f"正在点击【下一页】抓取第 {page_num} 页...")
+            
+            try: 
+                next_btn.click()
+            except Exception as e: 
+                log("ERR", f"普通点击失败: {e}，尝试 JS 点击...")
+                page.run_js("arguments[0].click();", next_btn)
+                
+            # 等待新一页的 API 响应
+            packet = page.listen.wait(timeout=15)
+            if not packet:
+                log("ERR", f"第 {page_num} 页请求超时或未触发，中止抓取。")
+                break
+                
+            body = packet.response.body
+            data = body if isinstance(body, (dict, list)) else json.loads(body)
+            
+            if isinstance(data, dict) and "result" in data:
+                items = data["result"].get("items", [])
+                if not items:
+                    log("WARN", f"第 {page_num} 页返回了空列表，可能已无数据。")
+                    break
+                    
+                for item in items:
+                    all_clean_items.append({
+                        "采购订单号": item.get("purchaseOrderCode"),
+                        "行号": item.get("rowsNum"),
+                        "物料代码": item.get("materialCode"),
+                        "物料名称": item.get("materialName"),
+                        "物料规格": item.get("materialSpecification"),
+                        "仓库代码": item.get("warehouseCode"),
+                        "仓库名称": item.get("warehouseName"),
+                        "供应商代码": item.get("supplierCode"),
+                        "供应商名称": item.get("supplierName"),
+                        "单位名称": item.get("unitName"),
+                        "转换单位": item.get("convertUnitName"),
+                        "收货单价": item.get("receivePrice"),
+                        "收货时间": item.get("receiptTime"),
+                        "进货数量": item.get("convertPlannedPurchaseQuantity") if item.get("convertPlannedPurchaseQuantity") is not None else item.get("plannedPurchaseQuantity"),
+                        "收货数量": item.get("convertGoodsQuantity") if item.get("convertGoodsQuantity") is not None else item.get("goodsQuantity"),
+                        "收货总金额": item.get("receiveAmount")
+                    })
+                log("OK", f"第 {page_num} 页清洗完成，累计提取 {len(all_clean_items)} 条数据。")
+                
+                # 每 10 页自动保存一次，防止意外崩溃导致数据丢失
+                if page_num % 10 == 0:
+                    with open(SAVE_PATH, "w", encoding="utf-8") as f:
+                        json.dump(all_clean_items, f, ensure_ascii=False, indent=2)
+                    log("INFO", f"💾 自动存档: 已保存 {len(all_clean_items)} 条记录至本地。")
+            else:
+                log("ERR", f"第 {page_num} 页数据结构异常，中止。")
+                break
+
+        page.listen.stop()
+        
+        # =========================================================
+        # 最终保存
+        # =========================================================
+        if all_clean_items:
+            with open(SAVE_PATH, "w", encoding="utf-8") as f:
+                json.dump(all_clean_items, f, ensure_ascii=False, indent=2)
+            log("OK", f"🎉 全部抓取完成！总计成功提取 {len(all_clean_items)} 条数据。")
+            log("OK", f"数据已保存至: {SAVE_PATH}")
+            
+    except Exception as e:
+        log("ERR", f"发生全局异常: {e}")
+        # 异常时尝试抢救数据
+        if all_clean_items:
+            rescue_path = OUTPUT_DIR / "receipt_details_RESCUE.json"
+            with open(rescue_path, "w", encoding="utf-8") as f:
+                json.dump(all_clean_items, f, ensure_ascii=False, indent=2)
+            log("INFO", f"🆘 触发异常保存，抢救了 {len(all_clean_items)} 条数据。")
+
+if __name__ == "__main__":
+    fetch_receipt_details_full()
--- a/browser_login/fetch_receipt_details_incremental.py
+++ b/browser_login/fetch_receipt_details_incremental.py
@@ -0,0 +1,252 @@
+"""
+收货明细报表 - 智能增量同步脚本
+目标: 
+1. 自动连接本地 SQLite 数据库查询当前存量。
+2. 进入 ERP 系统截获第一页 API，提取系统总条数。
+3. 精准计算需要跳转的起始页码，并在前端页面自动完成跳转。
+4. 仅提取新增页面的数据，内存去重后插入 SQLite，绝不重复抓取历史数据。
+"""
+import sys
+import json
+import time
+import math
+import random
+import sqlite3
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent))
+from login import get_page, log
+from config import DB_PATH
+
+HOME_URL = "https://yunmes.tftykj.cn/"
+API_TARGET = "ReceiptDetailsCheck_SearchList_Proxy"
+
+def get_local_count(conn):
+    """获取本地数据库已有的总记录数"""
+    cursor = conn.cursor()
+    cursor.execute("SELECT COUNT(*) FROM receipt_details")
+    return cursor.fetchone()[0]
+
+def item_exists(cursor, item):
+    """判断某条明细是否已在数据库中存在（基于采购单号+行号+物料代码组合判断）"""
+    po_code = item.get("purchaseOrderCode")
+    row_no = item.get("rowsNum")
+    mat_code = item.get("materialCode")
+    
+    cursor.execute('''
+        SELECT 1 FROM receipt_details 
+        WHERE purchase_order_code = ? AND row_no = ? AND material_code = ?
+    ''', (po_code, row_no, mat_code))
+    return cursor.fetchone() is not None
+
+def fetch_receipt_details_incremental():
+    log("INFO", "=== 🚀 启动收货明细报表 - 智能增量同步 ===")
+    
+    if not DB_PATH.exists():
+        log("ERR", f"找不到数据库文件: {DB_PATH}，请先执行全量导入！")
+        return
+        
+    conn = sqlite3.connect(DB_PATH)
+    local_count = get_local_count(conn)
+    log("INFO", f"📦 本地数据库当前总计: {local_count} 条数据")
+    
+    page = get_page(port=9222)
+    
+    try:
+        log("INFO", f"正在回到主页起点: {HOME_URL}")
+        page.get(HOME_URL)
+        page.wait.load_start()
+        time.sleep(2)
+        
+        menus = [
+            ("第一层: 业务统计报表", 'xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p'),
+            ("第二层: 采购业务报表", 'xpath:/html/body/div[7]/div/div[1]/div/div[4]/div/p'),
+            ("第三层: 收货明细报表", 'xpath:/html/body/div[8]/div/div[1]/div/div[4]/div/p')
+        ]
+        
+        log("INFO", "模拟点击左侧导航菜单...")
+        for name, xpath in menus:
+            ele = page.ele(xpath, timeout=5)
+            if ele:
+                try: ele.click()
+                except: page.run_js("arguments[0].click();", ele)
+                time.sleep(1.5)
+            else:
+                log("ERR", f"找不到菜单元素: {name}")
+                return
+                
+        log("OK", "✅ 成功点开收货明细报表界面！")
+        
+        # 隐藏菜单
+        blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div'
+        blank_ele = page.ele(blank_xpath, timeout=3)
+        if blank_ele:
+            try: blank_ele.click()
+            except: page.run_js("arguments[0].click();", blank_ele)
+            time.sleep(0.5)
+        
+        log("INFO", f"开启底层数据拦截网: {API_TARGET}")
+        page.listen.start(API_TARGET)
+        
+        packet = page.listen.wait(timeout=10)
+        if not packet:
+            query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span'
+            query_btn = page.ele(query_btn_xpath, timeout=3)
+            if query_btn:
+                try: query_btn.click()
+                except: page.run_js("arguments[0].click();", query_btn)
+                packet = page.listen.wait(timeout=15)
+                
+        if not packet:
+            log("ERR", "未能拦截到第一页数据，无法获取线上总条数。")
+            return
+            
+        body = packet.response.body
+        data = body if isinstance(body, (dict, list)) else json.loads(body)
+        
+        remote_count = 0
+        if isinstance(data, dict) and "result" in data:
+            remote_count = data["result"].get("totalCount", 0)
+            
+        log("INFO", f"🌐 线上 ERP 系统当前总条数: {remote_count} 条")
+        
+        if remote_count <= local_count:
+            log("OK", "🎉 本地数据已是最新状态，无需抓取！")
+            return
+            
+        new_items_count = remote_count - local_count
+        log("INFO", f"🔥 发现新增数据: {new_items_count} 条！准备进行增量跳页抓取...")
+        
+        # 每页 50 条，计算应该从哪一页开始抓
+        # 例如: 本地有 37584 条，37584 // 50 = 751 页是满的，所以从第 752 页开始抓
+        start_page = math.floor(local_count / 50) + 1
+        end_page = math.ceil(remote_count / 50)
+        
+        log("INFO", f"🎯 智能跳页计算完毕：直接跳转至第 {start_page} 页 (目标到 {end_page} 页)")
+        
+        # 执行跳转
+        if start_page > 1:
+            jumper_input_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/span[3]/div/div//input'
+            input_ele = page.ele(jumper_input_xpath, timeout=5)
+            
+            if not input_ele:
+                jumper_input_xpath = 'xpath://input[@type="number" and @aria-label="页"]'
+                input_ele = page.ele(jumper_input_xpath, timeout=5)
+                
+            if input_ele:
+                input_ele.clear()
+                input_ele.input(str(start_page))
+                time.sleep(0.5)
+                input_ele.input('\n')
+                
+                # 等待跳转后的数据响应
+                packet = page.listen.wait(timeout=15)
+                if not packet:
+                    log("ERR", "跳转失败，未拦截到目标页的数据请求。")
+                    return
+                log("OK", f"✅ 成功跳转至第 {start_page} 页并截获数据！")
+            else:
+                log("ERR", "找不到页码输入框，增量跳转失败！")
+                return
+        
+        # =========================================================
+        # 开始处理新增页面的数据并入库
+        # =========================================================
+        current_page = start_page
+        cursor = conn.cursor()
+        total_inserted = 0
+        
+        while current_page <= end_page:
+            body = packet.response.body
+            data = body if isinstance(body, (dict, list)) else json.loads(body)
+            
+            inserted_this_page = 0
+            if isinstance(data, dict) and "result" in data:
+                items = data["result"].get("items", [])
+                
+                for item in items:
+                        po_code = item.get("purchaseOrderCode")
+                        row_no = item.get("rowsNum")
+                        mat_code = item.get("materialCode")
+                        
+                        # 检查是否存在，如果存在则更新数量和金额，不存在则插入
+                        cursor.execute('SELECT id FROM receipt_details WHERE purchase_order_code = ? AND row_no = ? AND material_code = ?', (po_code, row_no, mat_code))
+                        existing_record = cursor.fetchone()
+                        
+                        p_qty = item.get("convertPlannedPurchaseQuantity") if item.get("convertPlannedPurchaseQuantity") is not None else item.get("plannedPurchaseQuantity")
+                        r_qty = item.get("convertGoodsQuantity") if item.get("convertGoodsQuantity") is not None else item.get("goodsQuantity")
+                        
+                        if existing_record:
+                            cursor.execute('''
+                                UPDATE receipt_details 
+                                SET purchase_qty = ?, receive_qty = ?, receive_price = ?, total_amount = ?
+                                WHERE id = ?
+                            ''', (p_qty, r_qty, item.get("receivePrice"), item.get("receiveAmount"), existing_record[0]))
+                            # 算作更新，为了记录日志
+                            inserted_this_page += 1
+                        else:
+                            cursor.execute('''
+                            INSERT INTO receipt_details (
+                                purchase_order_code, row_no, material_code, material_name, 
+                                material_specification, warehouse_code, warehouse_name, 
+                                supplier_code, supplier_name, unit_name, conversion_unit,
+                                receive_price, receipt_time,
+                                purchase_qty, receive_qty, total_amount
+                            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                            ''', (
+                                po_code,
+                                row_no,
+                                mat_code,
+                                item.get("materialName"),
+                                item.get("materialSpecification"),
+                                item.get("warehouseCode"),
+                                item.get("warehouseName"),
+                                item.get("supplierCode"),
+                                item.get("supplierName"),
+                                item.get("unitName"),
+                                item.get("convertUnitName"),
+                                item.get("receivePrice"),
+                                item.get("receiptTime"),
+                                p_qty,
+                                r_qty,
+                                item.get("receiveAmount")
+                            ))
+                            inserted_this_page += 1
+                            total_inserted += 1
+                        
+                conn.commit()
+                log("OK", f"第 {current_page} 页处理完毕，成功入库 {inserted_this_page} 条新数据。")
+            
+            # 还有下一页则继续点击
+            if current_page < end_page:
+                delay = random.uniform(1.5, 3.5)
+                log("INFO", f"⏳ 停顿 {delay:.2f} 秒后点击下一页...")
+                time.sleep(delay)
+                
+                next_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/button[2]'
+                next_btn = page.ele(next_btn_xpath, timeout=5)
+                
+                if next_btn:
+                    try: next_btn.click()
+                    except: page.run_js("arguments[0].click();", next_btn)
+                    
+                    packet = page.listen.wait(timeout=15)
+                    if not packet:
+                        log("ERR", f"第 {current_page + 1} 页请求超时！")
+                        break
+                else:
+                    log("ERR", "找不到下一页按钮！")
+                    break
+                    
+            current_page += 1
+            
+        log("OK", f"🎉 增量同步大功告成！总计入库 {total_inserted} 条全新数据！")
+        
+    except Exception as e:
+        log("ERR", f"发生全局异常: {e}")
+    finally:
+        conn.close()
+        page.listen.stop()
+
+if __name__ == "__main__":
+    fetch_receipt_details_incremental()
--- a/browser_login/import_to_sqlite.py
+++ b/browser_login/import_to_sqlite.py
@@ -0,0 +1,227 @@
+import sqlite3
+import json
+from pathlib import Path
+import os
+from config import OUTPUT_DIR, DB_PATH
+
+RECEIPT_JSON = OUTPUT_DIR / "receipt_details_full_clean.json"
+BOM_JSON = OUTPUT_DIR / "bom_cost_full_tree_final.json"
+
+def init_db():
+    """初始化数据库并创建表"""
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+
+    # 创建收货明细表
+    cursor.execute('''
+    CREATE TABLE IF NOT EXISTS receipt_details (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        purchase_order_code TEXT,
+        row_no INTEGER,
+        material_code TEXT,
+        material_name TEXT,
+        material_specification TEXT,
+        warehouse_code TEXT,
+        warehouse_name TEXT,
+        supplier_code TEXT,
+        supplier_name TEXT,
+        unit_name TEXT,
+        conversion_unit TEXT,
+        receive_price REAL,
+        receipt_time TEXT,
+        purchase_qty REAL,
+        receive_qty REAL,
+        total_amount REAL
+    )
+    ''')
+
+    # 为收货明细表创建索引以加速查询
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_material_code ON receipt_details(material_code)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_supplier_name ON receipt_details(supplier_name)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_time ON receipt_details(receipt_time)')
+
+    # 创建 BOM 成本表（父件表）
+    cursor.execute('DROP TABLE IF EXISTS bom_child')
+    cursor.execute('DROP TABLE IF EXISTS bom_parent')
+
+    cursor.execute('''
+    CREATE TABLE bom_parent (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        parent_material_code TEXT UNIQUE,
+        parent_material_name TEXT
+    )
+    ''')
+
+    # 创建 BOM 成本表（子件明细表）
+    # 由于是树状结构，我们采用“邻接表”模型，记录每个节点的 parent_id
+    cursor.execute('''
+    CREATE TABLE bom_child (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        parent_material_code TEXT, -- 归属的最顶层父件
+        node_material_code TEXT,
+        node_material_name TEXT,
+        bom_level INTEGER,
+        parent_node_id INTEGER,    -- 指向上一级子件的 id，如果是一级子件则为空
+        usage_qty REAL DEFAULT 1.0,
+        FOREIGN KEY(parent_material_code) REFERENCES bom_parent(parent_material_code),
+        FOREIGN KEY(parent_node_id) REFERENCES bom_child(id)
+    )
+    ''')
+
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_bom_child_parent_code ON bom_child(parent_material_code)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_bom_child_node_code ON bom_child(node_material_code)')
+
+    conn.commit()
+    return conn
+
+def import_receipt_details(conn):
+    """导入收货明细数据"""
+    if not RECEIPT_JSON.exists():
+        print(f"找不到收货明细文件: {RECEIPT_JSON}")
+        return
+
+    print("开始导入收货明细数据...")
+    with open(RECEIPT_JSON, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+
+    cursor = conn.cursor()
+    # 清空旧数据（如果需要重复运行），并且我们现在要更新表结构
+    cursor.execute('DROP TABLE IF EXISTS receipt_details')
+    cursor.execute('''
+    CREATE TABLE receipt_details (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        purchase_order_code TEXT,
+        row_no INTEGER,
+        material_code TEXT,
+        material_name TEXT,
+        material_specification TEXT,
+        warehouse_code TEXT,
+        warehouse_name TEXT,
+        supplier_code TEXT,
+        supplier_name TEXT,
+        unit_name TEXT,
+        conversion_unit TEXT,
+        receive_price REAL,
+        receipt_time TEXT,
+        purchase_qty REAL,
+        receive_qty REAL,
+        total_amount REAL
+    )
+    ''')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_material_code ON receipt_details(material_code)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_supplier_name ON receipt_details(supplier_name)')
+    cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_time ON receipt_details(receipt_time)')
+
+    count = 0
+    for item in data:
+        p_qty = item.get("进货数量")
+        r_qty = item.get("收货数量")
+
+        cursor.execute('''
+        INSERT INTO receipt_details (
+            purchase_order_code, row_no, material_code, material_name, 
+            material_specification, warehouse_code, warehouse_name, 
+            supplier_code, supplier_name, unit_name, conversion_unit,
+            receive_price, receipt_time,
+            purchase_qty, receive_qty, total_amount
+        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+        ''', (
+            item.get("采购订单号"),
+            item.get("行号"),
+            item.get("物料代码"),
+            item.get("物料名称"),
+            item.get("物料规格"),
+            item.get("仓库代码"),
+            item.get("仓库名称"),
+            item.get("供应商代码"),
+            item.get("供应商名称"),
+            item.get("单位名称"),
+            item.get("转换单位"),
+            item.get("收货单价"),
+            item.get("收货时间"),
+            p_qty,
+            r_qty,
+            item.get("收货总金额")
+        ))
+        count += 1
+    
+    conn.commit()
+    print(f"成功导入 {count} 条收货明细数据！")
+
+def _insert_bom_tree(cursor, parent_material_code, tree_nodes, parent_node_id=None):
+    """递归插入 BOM 树节点"""
+    for node in tree_nodes:
+        # 提取当前节点信息
+        node_code = node.get("childMaterialCode")
+        node_name = node.get("childMaterialName")
+        bom_level = node.get("bomLevel")
+        usage_qty = float(node.get("usageQty") or 1.0)
+        
+        # 插入当前节点
+        cursor.execute('''
+        INSERT INTO bom_child (
+            parent_material_code, node_material_code, node_material_name, bom_level, parent_node_id, usage_qty
+        ) VALUES (?, ?, ?, ?, ?, ?)
+        ''', (parent_material_code, node_code, node_name, bom_level, parent_node_id, usage_qty))
+        
+        # 获取刚插入的节点 ID，作为其子节点的 parent_node_id
+        current_node_id = cursor.lastrowid
+        
+        # 如果有子节点，递归插入
+        sub_items = node.get("sub_items", [])
+        if sub_items:
+            _insert_bom_tree(cursor, parent_material_code, sub_items, current_node_id)
+
+def import_bom_data(conn):
+    """导入 BOM 成本树状数据"""
+    if not BOM_JSON.exists():
+        print(f"找不到 BOM 成本文件: {BOM_JSON}")
+        return
+
+    print("开始导入 BOM 成本数据...")
+    with open(BOM_JSON, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+
+    cursor = conn.cursor()
+    # 清空旧数据
+    cursor.execute('DELETE FROM bom_child')
+    cursor.execute('DELETE FROM bom_parent')
+    
+    parent_count = 0
+    for parent in data:
+        parent_code = parent.get("parentMaterialCode")
+        parent_name = parent.get("parentMaterialName")
+        
+        # 忽略空父件
+        if not parent_code:
+            continue
+            
+        try:
+            cursor.execute('''
+            INSERT INTO bom_parent (parent_material_code, parent_material_name)
+            VALUES (?, ?)
+            ''', (parent_code, parent_name))
+            parent_count += 1
+            
+            # 递归处理这棵树
+            tree = parent.get("bom_cost_tree", [])
+            if tree:
+                _insert_bom_tree(cursor, parent_code, tree, parent_node_id=None)
+                
+        except sqlite3.IntegrityError:
+            print(f"警告: 父件重复 {parent_code}，跳过")
+    
+    conn.commit()
+    
+    # 统计插入的子件数量
+    cursor.execute('SELECT COUNT(*) FROM bom_child')
+    child_count = cursor.fetchone()[0]
+    print(f"成功导入 {parent_count} 个 BOM 父件，包含 {child_count} 个子件节点！")
+
+if __name__ == "__main__":
+    print(f"数据库文件将保存在: {DB_PATH}")
+    conn = init_db()
+    import_receipt_details(conn)
+    import_bom_data(conn)
+    conn.close()
+    print("全部导入完成！你可以使用 SQLite 客户端连接 erp_data.db 查看数据。")
--- a/browser_login/keep_alive.py
+++ b/browser_login/keep_alive.py
@@ -0,0 +1,51 @@
+"""
+ERP 浏览器保活服务 (手工填写账号密码专用)
+
+运行此脚本后，在弹出的 Chrome 浏览器中手工登录。
+登录成功后，不要关闭终端和浏览器。
+其他抓取脚本（指定同一端口）就可以直接复用这个已经登录的浏览器实例了！
+"""
+import sys
+import time
+from pathlib import Path
+
+# 引入现有的登录模块
+sys.path.insert(0, str(Path(__file__).parent))
+from login import get_page, login_manual, log
+
+def main():
+    log("INFO", "=== 🚀 启动浏览器保活服务 (端口: 9222) ===")
+    # 统一使用 9222 端口，后续所有抓取脚本也连这个端口
+    page = get_page(port=9222)
+    
+    try:
+        # 调用已有的手工登录逻辑：打开网页，等待用户手工操作
+        log("INFO", "等待您在弹出的浏览器中完成手工登录...")
+        
+        # 这里的 login_manual 已经有轮询检测是否登录成功的逻辑了
+        ok = login_manual(page)
+        
+        if ok:
+            log("OK", "✅ 登录成功！浏览器已进入保活状态。")
+            log("INFO", "==================================================")
+            log("INFO", "⚠️ 请勿关闭此终端窗口和弹出的 Chrome 浏览器！")
+            log("INFO", "👉 现在您可以新开一个终端，去运行其他的抓取脚本了。")
+            log("INFO", "🛑 如果要结束保活关闭浏览器，请在此终端按 Ctrl+C。")
+            log("INFO", "==================================================")
+            
+            # 死循环保活，直到用户手动在终端按 Ctrl+C 退出
+            while True:
+                time.sleep(10)
+        else:
+            log("ERR", "❌ 登录超时或失败，保活服务即将退出。")
+            page.quit()
+
+    except KeyboardInterrupt:
+        log("INFO", "接收到退出信号 (Ctrl+C)，正在关闭浏览器...")
+        page.quit()
+    except Exception as e:
+        log("ERR", f"发生异常: {e}")
+        page.quit()
+
+if __name__ == "__main__":
+    main()