抓取生产工单，抓取发料异常

2026-06-11 19:38:16 +08:00
parent a160d5d48f
commit 94c81cdc4f
10 changed files with 160 additions and 28 deletions
--- a/browser_login/auto_fetch_abnormal_report.py
+++ b/browser_login/auto_fetch_abnormal_report.py
@@ -73,6 +73,22 @@ def fetch_report_data(page):
    first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d')
    last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d')
    
+    # ==== 断点续传逻辑 ====
+    state_file = OUTPUT_DIR / 'abnormal_sync_state.json'
+    start_page = 1
+    if state_file.exists():
+        try:
+            with open(state_file, 'r', encoding='utf-8') as f:
+                state = json.load(f)
+                if state.get('month') == f"{now.year}-{now.month}":
+                    saved_page = state.get('current_page', 1)
+                    if saved_page > 1:
+                        start_page = saved_page
+                        print(f"发现上次中断记录，准备从第 {start_page} 页恢复抓取...")
+        except Exception as e:
+            print(f"读取状态文件失败: {e}")
+    # ====================
+    
    print(f"设置下单日期为当月: {first_day} 至 {last_day}，并清理发料情况过滤条件...")
    
    # 使用注入到全部 iframe 的 JS 强制执行 EasyUI 方法
@@ -121,7 +137,6 @@ def fetch_report_data(page):
                }}
                
                // 4. [提速黑科技]：强行把每页请求的数量从 50 条改为 500 条
-                // 找到底部的分页组件并修改它的 pageSize，这样点击查询时就会一次请求 500 条
                var paginations = doc.querySelectorAll('.pagination');
                for(var i=0; i<paginations.length; i++) {{
                    try {{ win.$(paginations[i]).pagination({{pageSize: 500}}); }} catch(e) {{}}
@@ -168,20 +183,25 @@ def fetch_report_data(page):
    
    current_page = 1
    total_inserted = 0
+    total_pages = 1
    
    print("开始监听网络请求，寻找 API 数据包...")
    while True:
-        print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 正在收集并解析网络数据包...")
-        packets = target_tab.listen.steps(timeout=5)
-        
+        print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 正在收集并解析网络数据包...")
+        try:
+            packets = target_tab.listen.steps(timeout=5)
+        except Exception as e:
+            print(f"❌ 监听数据包时页面发生异常 (可能是会话超时跳转): {e}")
+            print("♻️ 准备触发断点续传机制，重新进入菜单...")
+            return False
+            
        found_data = False
-        total_pages = 1
        
        for p in packets:
                if 'SearchCustomReportBySQL_Proxy' in p.url or 'CustomTableViewData' in p.url or 'SeachList' in p.url:
-                    print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
+                    print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
                    if p.method == 'POST' and p.response and p.response.body:
-                        print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 这是一个 POST 请求，且包含 response body")
+                        print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 这是一个 POST 请求，且包含 response body")
                        try:
                            body = p.response.body
                            data = body if isinstance(body, (dict, list)) else json.loads(body)
@@ -211,14 +231,34 @@ def fetch_report_data(page):
                                    print(f"❌ 保存异常报表数据到数据库失败: {db_err}")
                                
                                found_data = True
+                                
+                                # 只有当我们不是处于准备跳页的初始阶段时，才将进度记录到文件
+                                if not (current_page == 1 and start_page > 1):
+                                    try:
+                                        with open(state_file, 'w', encoding='utf-8') as f:
+                                            json.dump({
+                                                'month': f"{now.year}-{now.month}",
+                                                'current_page': current_page,
+                                                'total_pages': total_pages
+                                            }, f)
+                                    except Exception as e:
+                                        print(f"保存进度失败: {e}")
+                                        pass
+
                            else:
-                                print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 数据结构不匹配。")
+                                print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 数据结构不匹配。")
                        except Exception as e:
-                            print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 解析数据包出错: {e}")
+                            print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 解析数据包出错: {e}")
                            pass
        
        if not found_data:
-            print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 第 {current_page} 页等待了超时，没有拦截到匹配的报表数据...")
+            # 检查是否由于会话超时被系统强制跳转回首页
+            if "Home/Index" in target_tab.url or target_tab.url == "https://yunmes.tftykj.cn/":
+                print("❌ 警告：页面已跳转回首页，可能是会话超时或被强制登出。")
+                print(f"进度已保存 (停留在第 {current_page} 页)，下次启动抓取任务将自动从中断处继续！")
+                return False
+                
+            print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 第 {current_page} 页等待了超时，没有拦截到匹配的报表数据...")
            
            # 再给一次机会等3秒
            print("再等待3秒重试...")
@@ -226,8 +266,36 @@ def fetch_report_data(page):
            # 重新让上面解析
            continue
            
+        # ====== 触发断点续传跳页 ======
+        if current_page == 1 and start_page > 1:
+            print(f"===================================")
+            print(f"⏭️ 触发断点续传，跳过第 1 页，直接跳转到第 {start_page} 页...")
+            print(f"===================================")
+            current_page = start_page
+            target_tab.run_js(f"""
+                var iframes = document.querySelectorAll('iframe');
+                for(var j=0; j<iframes.length; j++) {{
+                    try {{
+                        var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
+                        var win = iframes[j].contentWindow;
+                        var paginations = doc.querySelectorAll('.pagination');
+                        for(var i=0; i<paginations.length; i++) {{
+                            try {{ win.$(paginations[i]).pagination('select', {start_page}); }} catch(e) {{}}
+                        }}
+                    }} catch(e) {{}}
+                }}
+            """)
+            time.sleep(2)
+            continue
+        # ==============================
+            
        if current_page >= total_pages:
            print(f"已到达最后一页 (共 {total_pages} 页)，抓取完成！")
+            try:
+                if state_file.exists():
+                    state_file.unlink() # 抓取完毕后清除记录
+            except:
+                pass
            break
            
        print(f"准备抓取下一页 (第 {current_page + 1} 页)...")