""" 生产工单报表 - 智能增量同步脚本 """ import sys import json import time import math import random import sqlite3 from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from login import get_page, log from config import DB_PATH from import_to_sqlite import init_db HOME_URL = "https://yunmes.tftykj.cn/" PAGE_URL = "https://yunmes.tftykj.cn/WorkOrdersQuery" API_TARGET = "WorkOrdersDetailed_SearchListAll_Proxy" def get_local_count(conn): """获取本地数据库已有的总记录数""" cursor = conn.cursor() cursor.execute("SELECT COUNT(*) FROM work_orders") return cursor.fetchone()[0] def fetch_work_orders_incremental(): log("INFO", "=== 🚀 启动生产工单查询 - 智能增量同步 ===") # 确保表结构已初始化 conn = init_db() local_count = get_local_count(conn) log("INFO", f"📦 本地数据库当前总计: {local_count} 条数据") page = get_page(port=9222) try: log("INFO", f"正在进入工单页面: {PAGE_URL}") page.get(PAGE_URL) table = page.ele("xpath://table | .el-table__body", timeout=10) if not table: log("WARN", "未加载出工单页面表格元素,继续尝试监听...") # 添加一小段硬延时,确保页面 JS 完全执行完毕 time.sleep(2) log("INFO", f"开启底层数据拦截网: {API_TARGET}") page.listen.start(API_TARGET) # 点击查询按钮触发第一页请求 query_btn = page.ele('#Search', timeout=3) if not query_btn: # 兼容 ElementUI 的按钮 query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span' query_btn = page.ele(query_btn_xpath, timeout=3) if query_btn: try: query_btn.click() except: page.run_js("arguments[0].click();", query_btn) packet = page.listen.wait(timeout=15) if not packet: # 备用方案:刷新页面 page.refresh() packet = page.listen.wait(timeout=15) if not packet: log("ERR", "未能拦截到第一页数据,无法获取线上总条数。") return body = packet.response.body data = body if isinstance(body, (dict, list)) else json.loads(body) remote_count = 0 if isinstance(data, dict) and "result" in data: result = data["result"] if isinstance(result, dict): remote_count = result.get("totalCount", 0) log("INFO", f"🌐 线上 ERP 系统当前总条数: {remote_count} 条") if remote_count <= local_count: log("INFO", f"本地已有 {local_count} 条数据,但根据策略,我们将强制进行一轮全量更新检查...") log("INFO", f"🔥 准备进行全量跳页抓取...") # --- 【增量抓取策略优化】:不再根据总量做分页跳转 --- # 始终从第 1 页(即最新发生变化/新增的工单页)开始抓取, # 并往后翻页,直到发现连续 N 页的数据在本地数据库中都已经存在,即认为“增量部分”已抓取完毕。 start_page = 1 end_page = math.ceil(remote_count / 50) log("INFO", f"🎯 增量抓取策略启动:从第 {start_page} 页向后抓取,直至遇到全为已存旧数据的页面。") current_page = start_page cursor = conn.cursor() total_inserted = 0 total_updated = 0 consecutive_old_pages = 0 # 连续多少页都是老数据 while current_page <= end_page: body = packet.response.body data = body if isinstance(body, (dict, list)) else json.loads(body) inserted_this_page = 0 if isinstance(data, dict) and "result" in data: result = data.get("result", {}) if isinstance(result, dict): items = result.get("items", []) page_inserted = 0 page_updated = 0 for item in items: wo_number = item.get("workOrdersNumber") line_no = item.get("lineNumber") mat_code = item.get("materialCode") if not wo_number or not mat_code: continue # 检查此记录在本地是否已存在,以及关键状态是否发生变化 cursor.execute(""" SELECT status, total_issue_number FROM work_orders WHERE work_orders_number = ? AND line_number = ? AND material_code = ? """, (wo_number, line_no, mat_code)) existing_record = cursor.fetchone() new_status = item.get("status") new_total_issue_number = item.get("hasIssueNumber") if not existing_record: # 本地不存在,执行插入 cursor.execute(''' INSERT INTO work_orders ( work_orders_number, line_number, material_code, material_name, material_specification, status, unit_name, cost_price, issue_number, total_issue_number, issue_amount, issue_amount_total, executor_user_name, execution_time, production_order_no, warehouse_name, materials_user_name, work_orders_remark ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', ( wo_number, line_no, mat_code, item.get("materialName"), item.get("materialSpecification"), new_status, item.get("unitName"), item.get("costPrice"), item.get("issueNumber"), new_total_issue_number, item.get("issueAmount"), item.get("issueAmountTotal"), item.get("executorUserName"), item.get("executionTime"), item.get("productionOrderNo"), item.get("warehouseName"), item.get("materialsUserName"), item.get("workOrdersRemark") )) page_inserted += 1 total_inserted += 1 else: # 本地已存在,检查关键状态或数量是否有更新 old_status = existing_record[0] old_total_issue_number = existing_record[1] if str(old_status) != str(new_status) or str(old_total_issue_number) != str(new_total_issue_number): cursor.execute(''' UPDATE work_orders SET status = ?, cost_price = ?, issue_number = ?, total_issue_number = ?, issue_amount = ?, issue_amount_total = ?, executor_user_name = ?, execution_time = ?, warehouse_name = ?, materials_user_name = ? WHERE work_orders_number = ? AND line_number = ? AND material_code = ? ''', ( new_status, item.get("costPrice"), item.get("issueNumber"), new_total_issue_number, item.get("issueAmount"), item.get("issueAmountTotal"), item.get("executorUserName"), item.get("executionTime"), item.get("warehouseName"), item.get("materialsUserName"), wo_number, line_no, mat_code )) page_updated += 1 total_updated += 1 conn.commit() log("OK", f"第 {current_page} 页处理完毕: 新增 {page_inserted} 条, 更新 {page_updated} 条。") # 增量判定逻辑:如果当前页全部都在本地存在,且没有任何一条发生了状态/数量的更新 # 则说明我们已经追溯到了历史旧数据,不需要再继续往后翻页抓取了! if page_inserted == 0 and page_updated == 0: consecutive_old_pages += 1 log("INFO", f"⚡️ 第 {current_page} 页全为无变动的旧数据 (累计 {consecutive_old_pages} 页)") if consecutive_old_pages >= 2: log("OK", "🎉 连续 2 页未发现新数据或变动数据,增量抓取完成,提前结束!") break else: # 只要有任何一条插入或更新,重置计数器 consecutive_old_pages = 0 if current_page < end_page: delay = random.uniform(1.5, 3.5) log("INFO", f"⏳ 停顿 {delay:.2f} 秒后点击下一页...") time.sleep(delay) next_btn = None for _ in range(3): # 尝试 EasyUI 的下一页按钮 next_btn = page.ele('xpath://*[contains(@class, "pagination-next")]/ancestor::a', timeout=3) if not next_btn: # 兼容 ElementUI 的下一页按钮 next_btn = page.ele('xpath://button[contains(@class, "btn-next")]', timeout=3) if next_btn: break time.sleep(1) if next_btn: try: next_btn.click() except: page.run_js("arguments[0].click();", next_btn) packet = page.listen.wait(timeout=15) if not packet: log("ERR", f"第 {current_page + 1} 页请求超时!") break else: log("ERR", "重试 3 次后仍然找不到下一页按钮!") break current_page += 1 log("OK", f"🎉 增量同步大功告成!总计向数据库执行了 {total_inserted} 次插入/更新操作!") except Exception as e: log("ERR", f"发生全局异常: {e}") import traceback traceback.print_exc() finally: if 'conn' in locals() and conn: conn.close() if 'page' in locals() and page: try: page.listen.stop() except Exception: pass if __name__ == "__main__": fetch_work_orders_incremental()