抓取生产工单,抓取发料异常
This commit is contained in:
@@ -73,6 +73,22 @@ def fetch_report_data(page):
|
||||
first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d')
|
||||
last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d')
|
||||
|
||||
# ==== 断点续传逻辑 ====
|
||||
state_file = OUTPUT_DIR / 'abnormal_sync_state.json'
|
||||
start_page = 1
|
||||
if state_file.exists():
|
||||
try:
|
||||
with open(state_file, 'r', encoding='utf-8') as f:
|
||||
state = json.load(f)
|
||||
if state.get('month') == f"{now.year}-{now.month}":
|
||||
saved_page = state.get('current_page', 1)
|
||||
if saved_page > 1:
|
||||
start_page = saved_page
|
||||
print(f"发现上次中断记录,准备从第 {start_page} 页恢复抓取...")
|
||||
except Exception as e:
|
||||
print(f"读取状态文件失败: {e}")
|
||||
# ====================
|
||||
|
||||
print(f"设置下单日期为当月: {first_day} 至 {last_day},并清理发料情况过滤条件...")
|
||||
|
||||
# 使用注入到全部 iframe 的 JS 强制执行 EasyUI 方法
|
||||
@@ -121,7 +137,6 @@ def fetch_report_data(page):
|
||||
}}
|
||||
|
||||
// 4. [提速黑科技]:强行把每页请求的数量从 50 条改为 500 条
|
||||
// 找到底部的分页组件并修改它的 pageSize,这样点击查询时就会一次请求 500 条
|
||||
var paginations = doc.querySelectorAll('.pagination');
|
||||
for(var i=0; i<paginations.length; i++) {{
|
||||
try {{ win.$(paginations[i]).pagination({{pageSize: 500}}); }} catch(e) {{}}
|
||||
@@ -168,20 +183,25 @@ def fetch_report_data(page):
|
||||
|
||||
current_page = 1
|
||||
total_inserted = 0
|
||||
total_pages = 1
|
||||
|
||||
print("开始监听网络请求,寻找 API 数据包...")
|
||||
while True:
|
||||
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 正在收集并解析网络数据包...")
|
||||
packets = target_tab.listen.steps(timeout=5)
|
||||
|
||||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 正在收集并解析网络数据包...")
|
||||
try:
|
||||
packets = target_tab.listen.steps(timeout=5)
|
||||
except Exception as e:
|
||||
print(f"❌ 监听数据包时页面发生异常 (可能是会话超时跳转): {e}")
|
||||
print("♻️ 准备触发断点续传机制,重新进入菜单...")
|
||||
return False
|
||||
|
||||
found_data = False
|
||||
total_pages = 1
|
||||
|
||||
for p in packets:
|
||||
if 'SearchCustomReportBySQL_Proxy' in p.url or 'CustomTableViewData' in p.url or 'SeachList' in p.url:
|
||||
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
|
||||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
|
||||
if p.method == 'POST' and p.response and p.response.body:
|
||||
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 这是一个 POST 请求,且包含 response body")
|
||||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 这是一个 POST 请求,且包含 response body")
|
||||
try:
|
||||
body = p.response.body
|
||||
data = body if isinstance(body, (dict, list)) else json.loads(body)
|
||||
@@ -211,14 +231,34 @@ def fetch_report_data(page):
|
||||
print(f"❌ 保存异常报表数据到数据库失败: {db_err}")
|
||||
|
||||
found_data = True
|
||||
|
||||
# 只有当我们不是处于准备跳页的初始阶段时,才将进度记录到文件
|
||||
if not (current_page == 1 and start_page > 1):
|
||||
try:
|
||||
with open(state_file, 'w', encoding='utf-8') as f:
|
||||
json.dump({
|
||||
'month': f"{now.year}-{now.month}",
|
||||
'current_page': current_page,
|
||||
'total_pages': total_pages
|
||||
}, f)
|
||||
except Exception as e:
|
||||
print(f"保存进度失败: {e}")
|
||||
pass
|
||||
|
||||
else:
|
||||
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 数据结构不匹配。")
|
||||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 数据结构不匹配。")
|
||||
except Exception as e:
|
||||
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 解析数据包出错: {e}")
|
||||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 解析数据包出错: {e}")
|
||||
pass
|
||||
|
||||
if not found_data:
|
||||
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 第 {current_page} 页等待了超时,没有拦截到匹配的报表数据...")
|
||||
# 检查是否由于会话超时被系统强制跳转回首页
|
||||
if "Home/Index" in target_tab.url or target_tab.url == "https://yunmes.tftykj.cn/":
|
||||
print("❌ 警告:页面已跳转回首页,可能是会话超时或被强制登出。")
|
||||
print(f"进度已保存 (停留在第 {current_page} 页),下次启动抓取任务将自动从中断处继续!")
|
||||
return False
|
||||
|
||||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 第 {current_page} 页等待了超时,没有拦截到匹配的报表数据...")
|
||||
|
||||
# 再给一次机会等3秒
|
||||
print("再等待3秒重试...")
|
||||
@@ -226,8 +266,36 @@ def fetch_report_data(page):
|
||||
# 重新让上面解析
|
||||
continue
|
||||
|
||||
# ====== 触发断点续传跳页 ======
|
||||
if current_page == 1 and start_page > 1:
|
||||
print(f"===================================")
|
||||
print(f"⏭️ 触发断点续传,跳过第 1 页,直接跳转到第 {start_page} 页...")
|
||||
print(f"===================================")
|
||||
current_page = start_page
|
||||
target_tab.run_js(f"""
|
||||
var iframes = document.querySelectorAll('iframe');
|
||||
for(var j=0; j<iframes.length; j++) {{
|
||||
try {{
|
||||
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
|
||||
var win = iframes[j].contentWindow;
|
||||
var paginations = doc.querySelectorAll('.pagination');
|
||||
for(var i=0; i<paginations.length; i++) {{
|
||||
try {{ win.$(paginations[i]).pagination('select', {start_page}); }} catch(e) {{}}
|
||||
}}
|
||||
}} catch(e) {{}}
|
||||
}}
|
||||
""")
|
||||
time.sleep(2)
|
||||
continue
|
||||
# ==============================
|
||||
|
||||
if current_page >= total_pages:
|
||||
print(f"已到达最后一页 (共 {total_pages} 页),抓取完成!")
|
||||
try:
|
||||
if state_file.exists():
|
||||
state_file.unlink() # 抓取完毕后清除记录
|
||||
except:
|
||||
pass
|
||||
break
|
||||
|
||||
print(f"准备抓取下一页 (第 {current_page + 1} 页)...")
|
||||
|
||||
Reference in New Issue
Block a user