抓取生产工单，赚取发料异常

2026-06-11 15:58:56 +08:00
parent 66eecd0daa
commit 5b19790037
40 changed files with 4942 additions and 54 deletions
--- a/browser_login/auto_fetch_abnormal_report.py
+++ b/browser_login/auto_fetch_abnormal_report.py
@@ -0,0 +1,249 @@
+import sys
+import time
+from pathlib import Path
+import datetime
+import calendar
+import json
+sys.path.insert(0, str(Path(__file__).parent))
+from login import get_page
+from config import OUTPUT_DIR
+
+def navigate_to_report(page):
+    print("正在打开主页...")
+    page.get("https://yunmes.tftykj.cn/")
+    page.wait.load_start()
+    time.sleep(2)
+    
+    print("正在打开 生产工单发料异常检查报表...")
+    try:
+        m1 = page.ele('text=自定义报表管理')
+        if m1: 
+            print("点击第一级...")
+            m1.click()
+            time.sleep(1)
+        
+        # 找到展开后的第二级
+        for m in page.eles('text:自定义报表管理'):
+            try:
+                m.click()
+            except:
+                pass
+        time.sleep(1)
+        
+        for m in page.eles('text:自定义报表'):
+            if m.text == '自定义报表':
+                try:
+                    m.click()
+                    print("点击第三级...")
+                except:
+                    pass
+        time.sleep(2)
+        
+        ele = page.ele('text:生产工单发料异常检查报表', timeout=5)
+        if ele:
+            print("找到报表行，选中...")
+            ele.parent('tag:tr').click()
+            time.sleep(0.5)
+            btn = page.ele('text=进入自定义报表')
+            if btn:
+                print("点击进入自定义报表...")
+                btn.click()
+                time.sleep(3)
+                print("成功进入报表！")
+                return True
+            else:
+                print("未找到进入按钮。")
+                return False
+        else:
+            print("未能找到 '生产工单发料异常检查报表'")
+            return False
+    except Exception as e:
+        print(f"执行导航过程中发生异常: {e}")
+        return False
+
+def fetch_report_data(page):
+    # Wait for the new tab to be ready
+    time.sleep(3)
+    target_tab = page.get_tab(page.latest_tab)
+    
+    # Wait for the label to appear
+    target_tab.ele('text:下单日期(开始)', timeout=10)
+    
+    now = datetime.datetime.now()
+    first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d')
+    last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d')
+    
+    print(f"设置下单日期为当月: {first_day} 至 {last_day}，并清理发料情况过滤条件...")
+    
+    # 使用注入到全部 iframe 的 JS 强制执行 EasyUI 方法
+    target_tab.run_js(f"""
+        var iframes = document.querySelectorAll('iframe');
+        for(var j=0; j<iframes.length; j++) {{
+            try {{
+                var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
+                var win = iframes[j].contentWindow;
+                
+                // 1. 设置开始日期
+                var startInputs = doc.querySelectorAll('.input_StartValue.datebox-f');
+                if (startInputs.length > 0) {{
+                    win.$(startInputs[0]).datebox('setValue', '{first_day}');
+                }}
+                
+                // 2. 设置结束日期
+                var endInputs = doc.querySelectorAll('.input_EndValue.datebox-f');
+                if (endInputs.length > 0) {{
+                    win.$(endInputs[0]).datebox('setValue', '{last_day}');
+                }}
+                
+                // 3. 清理所有下拉框（包括发料情况）
+                var combos = doc.querySelectorAll('.combobox-f, .textbox-f');
+                for(var i=0; i<combos.length; i++) {{
+                    try {{ win.$(combos[i]).combobox('clear'); }} catch(e) {{}}
+                }}
+                
+                // 4. [提速黑科技]：强行把每页请求的数量从 50 条改为 500 条
+                // 找到底部的分页组件并修改它的 pageSize，这样点击查询时就会一次请求 500 条
+                var paginations = doc.querySelectorAll('.pagination');
+                for(var i=0; i<paginations.length; i++) {{
+                    try {{ win.$(paginations[i]).pagination({{pageSize: 500}}); }} catch(e) {{}}
+                }}
+            }} catch(e) {{}}
+        }}
+    """)
+    
+    print("日期和条件设置完成，准备查询...")
+    
+    print("正在查找并点击查询按钮...")
+    
+    # 因为查询按钮在 iframe 里，我们不能直接用 target_tab 找，必须用 JS 去触发点击
+    target_tab.listen.start()
+    
+    target_tab.run_js("""
+        var iframes = document.querySelectorAll('iframe');
+        var clicked = false;
+        for(var j=0; j<iframes.length; j++) {
+            try {
+                var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
+                var btn = doc.querySelector('#onSearch');
+                if(!btn) {
+                    var spans = doc.querySelectorAll('.l-btn-text');
+                    for(var i=0; i<spans.length; i++) {
+                        if(spans[i].innerText === '查询') {
+                            btn = spans[i].parentNode.parentNode;
+                            break;
+                        }
+                    }
+                }
+                if(btn) {
+                    btn.click();
+                    console.log('Clicked search button inside iframe');
+                    clicked = true;
+                }
+            } catch(e) {}
+        }
+        return clicked;
+    """)
+    
+    print("点击指令已发送，等待报表数据加载 (3秒)...")
+    time.sleep(3)
+    
+    current_page = 1
+    total_inserted = 0
+    
+    print("开始监听网络请求，寻找 API 数据包...")
+    while True:
+        packets = target_tab.listen.steps()
+        print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 收集到 {len(packets)} 个网络数据包，正在解析...")
+        
+        found_data = False
+        total_pages = 1
+        
+        for p in packets:
+                if 'SearchCustomReportBySQL_Proxy' in p.url or 'CustomTableViewData' in p.url or 'SeachList' in p.url:
+                    print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
+                    if p.method == 'POST' and p.response and p.response.body:
+                        print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 这是一个 POST 请求，且包含 response body")
+                        try:
+                            body = p.response.body
+                            data = body if isinstance(body, (dict, list)) else json.loads(body)
+                            
+                            if isinstance(data, dict) and 'result' in data and isinstance(data['result'], dict) and 'items' in data['result']:
+                                res = data['result']
+                                total_count = res.get('totalCount', 0)
+                                items = res.get('items', [])
+                                
+                                print("===================================")
+                                print(f"✅ 成功拦截到报表数据API (第 {current_page} 页)")
+                                print(f"✅ 数据总条数: {total_count}, 当前页条数: {len(items)}")
+                                print("===================================")
+                                
+                                total_pages = (total_count + 499) // 500 if total_count > 0 else 1
+                                
+                                # Import and save to database
+                                try:
+                                    import import_to_sqlite
+                                    if items:
+                                        inserted = import_to_sqlite.import_abnormal_report_data(items)
+                                        total_inserted += inserted
+                                        print(f"✅ 成功将本页 {inserted} 条异常报表数据存入数据库")
+                                except Exception as db_err:
+                                    print(f"❌ 保存异常报表数据到数据库失败: {db_err}")
+                                
+                                found_data = True
+                            else:
+                                print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 数据结构不匹配。")
+                        except Exception as e:
+                            print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 解析数据包出错: {e}")
+                            pass
+        
+        if not found_data:
+            print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 第 {current_page} 页等待了3秒，没有拦截到匹配的报表数据...")
+            
+            # 再给一次机会等3秒
+            print("再等待3秒重试...")
+            time.sleep(3)
+            retry_packets = target_tab.listen.steps()
+            print(f"重试收集到 {len(retry_packets)} 个数据包。")
+            if not retry_packets:
+                print(f"彻底没有数据，停止抓取。")
+                break
+            else:
+                packets.extend(retry_packets)
+                # 重新让上面解析
+                continue
+            
+        if current_page >= total_pages:
+            print(f"已到达最后一页 (共 {total_pages} 页)，抓取完成！")
+            break
+            
+        print(f"准备抓取下一页 (第 {current_page + 1} 页)...")
+        time.sleep(1)
+        
+        # 尝试点击下一页 (同样需要穿透 iframe)
+        target_tab.run_js("""
+            var iframes = document.querySelectorAll('iframe');
+            for(var j=0; j<iframes.length; j++) {
+                try {
+                    var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
+                    var nextBtn = doc.querySelector('.pagination-next');
+                    if(nextBtn && nextBtn.tagName === 'SPAN') {
+                        nextBtn = nextBtn.parentNode;
+                    }
+                    if(nextBtn) {
+                        nextBtn.click();
+                    }
+                } catch(e) {}
+            }
+        """)
+        
+        # 等待新的网络请求
+        time.sleep(2)
+        current_page += 1
+            
+    print(f"🎉 异常报表全量抓取大功告成！总计入库: {total_inserted} 条。")
+
+if __name__ == '__main__':
+    page = get_page(port=9222)
+    success = navigate_to_report(page)
+    if success:
+        fetch_report_data(page)