抓取生产工单,抓取发料异常

This commit is contained in:
hjq
2026-06-11 19:38:16 +08:00
parent a160d5d48f
commit 94c81cdc4f
10 changed files with 160 additions and 28 deletions

View File

@@ -73,6 +73,22 @@ def fetch_report_data(page):
first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d')
last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d')
# ==== 断点续传逻辑 ====
state_file = OUTPUT_DIR / 'abnormal_sync_state.json'
start_page = 1
if state_file.exists():
try:
with open(state_file, 'r', encoding='utf-8') as f:
state = json.load(f)
if state.get('month') == f"{now.year}-{now.month}":
saved_page = state.get('current_page', 1)
if saved_page > 1:
start_page = saved_page
print(f"发现上次中断记录,准备从第 {start_page} 页恢复抓取...")
except Exception as e:
print(f"读取状态文件失败: {e}")
# ====================
print(f"设置下单日期为当月: {first_day}{last_day},并清理发料情况过滤条件...")
# 使用注入到全部 iframe 的 JS 强制执行 EasyUI 方法
@@ -121,7 +137,6 @@ def fetch_report_data(page):
}}
// 4. [提速黑科技]:强行把每页请求的数量从 50 条改为 500 条
// 找到底部的分页组件并修改它的 pageSize这样点击查询时就会一次请求 500 条
var paginations = doc.querySelectorAll('.pagination');
for(var i=0; i<paginations.length; i++) {{
try {{ win.$(paginations[i]).pagination({{pageSize: 500}}); }} catch(e) {{}}
@@ -168,20 +183,25 @@ def fetch_report_data(page):
current_page = 1
total_inserted = 0
total_pages = 1
print("开始监听网络请求,寻找 API 数据包...")
while True:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 正在收集并解析网络数据包...")
packets = target_tab.listen.steps(timeout=5)
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 正在收集并解析网络数据包...")
try:
packets = target_tab.listen.steps(timeout=5)
except Exception as e:
print(f"❌ 监听数据包时页面发生异常 (可能是会话超时跳转): {e}")
print("♻️ 准备触发断点续传机制,重新进入菜单...")
return False
found_data = False
total_pages = 1
for p in packets:
if 'SearchCustomReportBySQL_Proxy' in p.url or 'CustomTableViewData' in p.url or 'SeachList' in p.url:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
if p.method == 'POST' and p.response and p.response.body:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 这是一个 POST 请求,且包含 response body")
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 这是一个 POST 请求,且包含 response body")
try:
body = p.response.body
data = body if isinstance(body, (dict, list)) else json.loads(body)
@@ -211,14 +231,34 @@ def fetch_report_data(page):
print(f"❌ 保存异常报表数据到数据库失败: {db_err}")
found_data = True
# 只有当我们不是处于准备跳页的初始阶段时,才将进度记录到文件
if not (current_page == 1 and start_page > 1):
try:
with open(state_file, 'w', encoding='utf-8') as f:
json.dump({
'month': f"{now.year}-{now.month}",
'current_page': current_page,
'total_pages': total_pages
}, f)
except Exception as e:
print(f"保存进度失败: {e}")
pass
else:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 数据结构不匹配。")
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 数据结构不匹配。")
except Exception as e:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 解析数据包出错: {e}")
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 解析数据包出错: {e}")
pass
if not found_data:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 第 {current_page} 页等待了超时,没有拦截到匹配的报表数据...")
# 检查是否由于会话超时被系统强制跳转回首页
if "Home/Index" in target_tab.url or target_tab.url == "https://yunmes.tftykj.cn/":
print("❌ 警告:页面已跳转回首页,可能是会话超时或被强制登出。")
print(f"进度已保存 (停留在第 {current_page} 页),下次启动抓取任务将自动从中断处继续!")
return False
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 第 {current_page} 页等待了超时,没有拦截到匹配的报表数据...")
# 再给一次机会等3秒
print("再等待3秒重试...")
@@ -226,8 +266,36 @@ def fetch_report_data(page):
# 重新让上面解析
continue
# ====== 触发断点续传跳页 ======
if current_page == 1 and start_page > 1:
print(f"===================================")
print(f"⏭️ 触发断点续传,跳过第 1 页,直接跳转到第 {start_page} 页...")
print(f"===================================")
current_page = start_page
target_tab.run_js(f"""
var iframes = document.querySelectorAll('iframe');
for(var j=0; j<iframes.length; j++) {{
try {{
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
var win = iframes[j].contentWindow;
var paginations = doc.querySelectorAll('.pagination');
for(var i=0; i<paginations.length; i++) {{
try {{ win.$(paginations[i]).pagination('select', {start_page}); }} catch(e) {{}}
}}
}} catch(e) {{}}
}}
""")
time.sleep(2)
continue
# ==============================
if current_page >= total_pages:
print(f"已到达最后一页 (共 {total_pages} 页),抓取完成!")
try:
if state_file.exists():
state_file.unlink() # 抓取完毕后清除记录
except:
pass
break
print(f"准备抓取下一页 (第 {current_page + 1} 页)...")