Files
datie-bom/browser_login/fetch_work_orders_incremental.py

242 lines
11 KiB
Python

"""
生产工单报表 - 智能增量同步脚本
"""
import sys
import json
import time
import math
import random
import sqlite3
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from login import get_page, log
from config import DB_PATH
from import_to_sqlite import init_db
HOME_URL = "https://yunmes.tftykj.cn/"
PAGE_URL = "https://yunmes.tftykj.cn/WorkOrdersQuery"
API_TARGET = "WorkOrdersDetailed_SearchListAll_Proxy"
def get_local_count(conn):
"""获取本地数据库已有的总记录数"""
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM work_orders")
return cursor.fetchone()[0]
def fetch_work_orders_incremental():
log("INFO", "=== 🚀 启动生产工单查询 - 智能增量同步 ===")
# 确保表结构已初始化
conn = init_db()
local_count = get_local_count(conn)
log("INFO", f"📦 本地数据库当前总计: {local_count} 条数据")
page = get_page(port=9222)
try:
log("INFO", f"正在进入工单页面: {PAGE_URL}")
page.get(PAGE_URL)
table = page.ele("xpath://table | .el-table__body", timeout=10)
if not table:
log("WARN", "未加载出工单页面表格元素,继续尝试监听...")
# 添加一小段硬延时,确保页面 JS 完全执行完毕
time.sleep(2)
log("INFO", f"开启底层数据拦截网: {API_TARGET}")
page.listen.start(API_TARGET)
# 点击查询按钮触发第一页请求
query_btn = page.ele('#Search', timeout=3)
if not query_btn:
# 兼容 ElementUI 的按钮
query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span'
query_btn = page.ele(query_btn_xpath, timeout=3)
if query_btn:
try: query_btn.click()
except: page.run_js("arguments[0].click();", query_btn)
packet = page.listen.wait(timeout=15)
if not packet:
# 备用方案:刷新页面
page.refresh()
packet = page.listen.wait(timeout=15)
if not packet:
log("ERR", "未能拦截到第一页数据,无法获取线上总条数。")
return
body = packet.response.body
data = body if isinstance(body, (dict, list)) else json.loads(body)
remote_count = 0
if isinstance(data, dict) and "result" in data:
result = data["result"]
if isinstance(result, dict):
remote_count = result.get("totalCount", 0)
log("INFO", f"🌐 线上 ERP 系统当前总条数: {remote_count}")
if remote_count <= local_count:
log("INFO", f"本地已有 {local_count} 条数据,但根据策略,我们将强制进行一轮全量更新检查...")
log("INFO", f"🔥 准备进行全量跳页抓取...")
# --- 【增量抓取策略优化】:不再根据总量做分页跳转 ---
# 始终从第 1 页(即最新发生变化/新增的工单页)开始抓取,
# 并往后翻页,直到发现连续 N 页的数据在本地数据库中都已经存在,即认为“增量部分”已抓取完毕。
start_page = 1
end_page = math.ceil(remote_count / 50)
log("INFO", f"🎯 增量抓取策略启动:从第 {start_page} 页向后抓取,直至遇到全为已存旧数据的页面。")
current_page = start_page
cursor = conn.cursor()
total_inserted = 0
total_updated = 0
consecutive_old_pages = 0 # 连续多少页都是老数据
while current_page <= end_page:
body = packet.response.body
data = body if isinstance(body, (dict, list)) else json.loads(body)
inserted_this_page = 0
if isinstance(data, dict) and "result" in data:
result = data.get("result", {})
if isinstance(result, dict):
items = result.get("items", [])
page_inserted = 0
page_updated = 0
for item in items:
wo_number = item.get("workOrdersNumber")
line_no = item.get("lineNumber")
mat_code = item.get("materialCode")
if not wo_number or not mat_code:
continue
# 检查此记录在本地是否已存在,以及关键状态是否发生变化
cursor.execute("""
SELECT status, total_issue_number FROM work_orders
WHERE work_orders_number = ? AND line_number = ? AND material_code = ?
""", (wo_number, line_no, mat_code))
existing_record = cursor.fetchone()
new_status = item.get("status")
new_total_issue_number = item.get("hasIssueNumber")
if not existing_record:
# 本地不存在,执行插入
cursor.execute('''
INSERT INTO work_orders (
work_orders_number, line_number, material_code, material_name, material_specification,
status, unit_name, cost_price, issue_number, total_issue_number,
issue_amount, issue_amount_total, executor_user_name, execution_time,
production_order_no, warehouse_name, materials_user_name, work_orders_remark
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (
wo_number, line_no, mat_code, item.get("materialName"), item.get("materialSpecification"),
new_status, item.get("unitName"), item.get("costPrice"), item.get("issueNumber"),
new_total_issue_number, item.get("issueAmount"), item.get("issueAmountTotal"),
item.get("executorUserName"), item.get("executionTime"), item.get("productionOrderNo"),
item.get("warehouseName"), item.get("materialsUserName"), item.get("workOrdersRemark")
))
page_inserted += 1
total_inserted += 1
else:
# 本地已存在,检查关键状态或数量是否有更新
old_status = existing_record[0]
old_total_issue_number = existing_record[1]
if str(old_status) != str(new_status) or str(old_total_issue_number) != str(new_total_issue_number):
cursor.execute('''
UPDATE work_orders SET
status = ?,
cost_price = ?,
issue_number = ?,
total_issue_number = ?,
issue_amount = ?,
issue_amount_total = ?,
executor_user_name = ?,
execution_time = ?,
warehouse_name = ?,
materials_user_name = ?
WHERE work_orders_number = ? AND line_number = ? AND material_code = ?
''', (
new_status, item.get("costPrice"), item.get("issueNumber"), new_total_issue_number,
item.get("issueAmount"), item.get("issueAmountTotal"), item.get("executorUserName"),
item.get("executionTime"), item.get("warehouseName"), item.get("materialsUserName"),
wo_number, line_no, mat_code
))
page_updated += 1
total_updated += 1
conn.commit()
log("OK", f"{current_page} 页处理完毕: 新增 {page_inserted} 条, 更新 {page_updated} 条。")
# 增量判定逻辑:如果当前页全部都在本地存在,且没有任何一条发生了状态/数量的更新
# 则说明我们已经追溯到了历史旧数据,不需要再继续往后翻页抓取了!
if page_inserted == 0 and page_updated == 0:
consecutive_old_pages += 1
log("INFO", f"⚡️ 第 {current_page} 页全为无变动的旧数据 (累计 {consecutive_old_pages} 页)")
if consecutive_old_pages >= 2:
log("OK", "🎉 连续 2 页未发现新数据或变动数据,增量抓取完成,提前结束!")
break
else:
# 只要有任何一条插入或更新,重置计数器
consecutive_old_pages = 0
if current_page < end_page:
delay = random.uniform(1.5, 3.5)
log("INFO", f"⏳ 停顿 {delay:.2f} 秒后点击下一页...")
time.sleep(delay)
next_btn = None
for _ in range(3):
# 尝试 EasyUI 的下一页按钮
next_btn = page.ele('xpath://*[contains(@class, "pagination-next")]/ancestor::a', timeout=3)
if not next_btn:
# 兼容 ElementUI 的下一页按钮
next_btn = page.ele('xpath://button[contains(@class, "btn-next")]', timeout=3)
if next_btn:
break
time.sleep(1)
if next_btn:
try: next_btn.click()
except: page.run_js("arguments[0].click();", next_btn)
packet = page.listen.wait(timeout=15)
if not packet:
log("ERR", f"{current_page + 1} 页请求超时!")
break
else:
log("ERR", "重试 3 次后仍然找不到下一页按钮!")
break
current_page += 1
log("OK", f"🎉 增量同步大功告成!总计向数据库执行了 {total_inserted} 次插入/更新操作!")
except Exception as e:
log("ERR", f"发生全局异常: {e}")
import traceback
traceback.print_exc()
finally:
if 'conn' in locals() and conn:
conn.close()
if 'page' in locals() and page:
try:
page.listen.stop()
except Exception:
pass
if __name__ == "__main__":
fetch_work_orders_incremental()