332 lines
15 KiB
Python
332 lines
15 KiB
Python
import sys
|
||
import time
|
||
from pathlib import Path
|
||
import datetime
|
||
import calendar
|
||
import json
|
||
sys.path.insert(0, str(Path(__file__).parent))
|
||
from login import get_page
|
||
from config import OUTPUT_DIR
|
||
|
||
def navigate_to_report(page):
|
||
print("正在打开主页...")
|
||
page.get("https://yunmes.tftykj.cn/")
|
||
page.wait.load_start()
|
||
time.sleep(2)
|
||
|
||
print("正在打开 生产工单发料异常检查报表...")
|
||
try:
|
||
m1 = page.ele('text=自定义报表管理')
|
||
if m1:
|
||
print("点击第一级...")
|
||
m1.click()
|
||
time.sleep(1)
|
||
|
||
# 找到展开后的第二级
|
||
for m in page.eles('text:自定义报表管理'):
|
||
try:
|
||
m.click()
|
||
except:
|
||
pass
|
||
time.sleep(1)
|
||
|
||
for m in page.eles('text:自定义报表'):
|
||
if m.text == '自定义报表':
|
||
try:
|
||
m.click()
|
||
print("点击第三级...")
|
||
except:
|
||
pass
|
||
time.sleep(2)
|
||
|
||
ele = page.ele('text:生产工单发料异常检查报表', timeout=5)
|
||
if ele:
|
||
print("找到报表行,选中...")
|
||
ele.parent('tag:tr').click()
|
||
time.sleep(0.5)
|
||
btn = page.ele('text=进入自定义报表')
|
||
if btn:
|
||
print("点击进入自定义报表...")
|
||
btn.click()
|
||
time.sleep(3)
|
||
print("成功进入报表!")
|
||
return True
|
||
else:
|
||
print("未找到进入按钮。")
|
||
return False
|
||
else:
|
||
print("未能找到 '生产工单发料异常检查报表'")
|
||
return False
|
||
except Exception as e:
|
||
print(f"执行导航过程中发生异常: {e}")
|
||
return False
|
||
|
||
def fetch_report_data(page):
|
||
# Wait for the new tab to be ready
|
||
time.sleep(3)
|
||
target_tab = page.get_tab(page.latest_tab)
|
||
|
||
# Wait for the label to appear
|
||
target_tab.ele('text:下单日期(开始)', timeout=10)
|
||
|
||
now = datetime.datetime.now()
|
||
first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d')
|
||
last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d')
|
||
|
||
# ==== 断点续传逻辑 ====
|
||
state_file = OUTPUT_DIR / 'abnormal_sync_state.json'
|
||
start_page = 1
|
||
if state_file.exists():
|
||
try:
|
||
with open(state_file, 'r', encoding='utf-8') as f:
|
||
state = json.load(f)
|
||
if state.get('month') == f"{now.year}-{now.month}":
|
||
saved_page = state.get('current_page', 1)
|
||
if saved_page > 1:
|
||
start_page = saved_page
|
||
print(f"发现上次中断记录,准备从第 {start_page} 页恢复抓取...")
|
||
except Exception as e:
|
||
print(f"读取状态文件失败: {e}")
|
||
# ====================
|
||
|
||
print(f"设置下单日期为当月: {first_day} 至 {last_day},并清理发料情况过滤条件...")
|
||
|
||
# 使用注入到全部 iframe 的 JS 强制执行 EasyUI 方法
|
||
target_tab.run_js(f"""
|
||
var iframes = document.querySelectorAll('iframe');
|
||
for(var j=0; j<iframes.length; j++) {{
|
||
try {{
|
||
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
|
||
var win = iframes[j].contentWindow;
|
||
|
||
// 1. 自动设置下单日期为当月
|
||
// (暴力匹配所有的 datebox,前两个一般就是开始和结束)
|
||
var dates = doc.querySelectorAll('.datebox-f, .datetimebox-f');
|
||
if (dates.length >= 2) {{
|
||
try {{ win.$(dates[0]).datetimebox('setValue', '{first_day} 00:00:00'); }} catch(e) {{}}
|
||
try {{ win.$(dates[0]).datebox('setValue', '{first_day} 00:00:00'); }} catch(e) {{}}
|
||
|
||
try {{ win.$(dates[1]).datetimebox('setValue', '{last_day} 23:59:59'); }} catch(e) {{}}
|
||
try {{ win.$(dates[1]).datebox('setValue', '{last_day} 23:59:59'); }} catch(e) {{}}
|
||
}} else {{
|
||
// 备用方案:寻找附近元素
|
||
var allSpans = doc.querySelectorAll('span, td, th');
|
||
for(var k=0; k<allSpans.length; k++) {{
|
||
var txt = allSpans[k].innerText || '';
|
||
if (txt.indexOf('下单日期(开始)') !== -1) {{
|
||
var input = allSpans[k].parentNode.parentNode.querySelector('.textbox-f');
|
||
if(input) {{
|
||
try {{ win.$(input).datetimebox('setValue', '{first_day} 00:00:00'); }} catch(e) {{}}
|
||
try {{ win.$(input).datebox('setValue', '{first_day} 00:00:00'); }} catch(e) {{}}
|
||
}}
|
||
}}
|
||
if (txt.indexOf('下单日期(结束)') !== -1) {{
|
||
var input = allSpans[k].parentNode.parentNode.querySelector('.textbox-f');
|
||
if(input) {{
|
||
try {{ win.$(input).datetimebox('setValue', '{last_day} 23:59:59'); }} catch(e) {{}}
|
||
try {{ win.$(input).datebox('setValue', '{last_day} 23:59:59'); }} catch(e) {{}}
|
||
}}
|
||
}}
|
||
}}
|
||
}}
|
||
|
||
// 3. 清理发料情况下拉框 (千万不能选 textbox-f,否则会把刚才填的日期清空!)
|
||
var combos = doc.querySelectorAll('.combobox-f');
|
||
for(var i=0; i<combos.length; i++) {{
|
||
try {{ win.$(combos[i]).combobox('clear'); }} catch(e) {{}}
|
||
}}
|
||
|
||
// 4. [提速黑科技]:强行把每页请求的数量从 50 条改为 500 条
|
||
var paginations = doc.querySelectorAll('.pagination');
|
||
for(var i=0; i<paginations.length; i++) {{
|
||
try {{ win.$(paginations[i]).pagination({{pageSize: 500}}); }} catch(e) {{}}
|
||
}}
|
||
}} catch(e) {{}}
|
||
}}
|
||
""")
|
||
|
||
print("日期和条件设置完成,准备查询...")
|
||
|
||
print("正在查找并点击查询按钮...")
|
||
|
||
# 因为查询按钮在 iframe 里,我们不能直接用 target_tab 找,必须用 JS 去触发点击
|
||
target_tab.listen.start()
|
||
|
||
target_tab.run_js("""
|
||
var iframes = document.querySelectorAll('iframe');
|
||
var clicked = false;
|
||
for(var j=0; j<iframes.length; j++) {
|
||
try {
|
||
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
|
||
var btn = doc.querySelector('#onSearch');
|
||
if(!btn) {
|
||
var spans = doc.querySelectorAll('.l-btn-text');
|
||
for(var i=0; i<spans.length; i++) {
|
||
if(spans[i].innerText === '查询') {
|
||
btn = spans[i].parentNode.parentNode;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if(btn) {
|
||
btn.click();
|
||
console.log('Clicked search button inside iframe');
|
||
clicked = true;
|
||
}
|
||
} catch(e) {}
|
||
}
|
||
return clicked;
|
||
""")
|
||
|
||
print("点击指令已发送,等待报表数据加载 (3秒)...")
|
||
time.sleep(3)
|
||
|
||
current_page = 1
|
||
total_inserted = 0
|
||
total_pages = 1
|
||
|
||
print("开始监听网络请求,寻找 API 数据包...")
|
||
while True:
|
||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 正在收集并解析网络数据包...")
|
||
try:
|
||
packets = target_tab.listen.steps(timeout=5)
|
||
except Exception as e:
|
||
print(f"❌ 监听数据包时页面发生异常 (可能是会话超时跳转): {e}")
|
||
print("♻️ 准备触发断点续传机制,重新进入菜单...")
|
||
return False
|
||
|
||
found_data = False
|
||
|
||
for p in packets:
|
||
if 'SearchCustomReportBySQL_Proxy' in p.url or 'CustomTableViewData' in p.url or 'SeachList' in p.url:
|
||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
|
||
if p.method == 'POST' and p.response and p.response.body:
|
||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 这是一个 POST 请求,且包含 response body")
|
||
try:
|
||
body = p.response.body
|
||
data = body if isinstance(body, (dict, list)) else json.loads(body)
|
||
|
||
if isinstance(data, dict) and 'result' in data and isinstance(data['result'], dict) and 'items' in data['result']:
|
||
res = data['result']
|
||
total_count = res.get('totalCount', 0)
|
||
items = res.get('items', [])
|
||
|
||
# 动态计算真实的总页数 (防止前端强改500条失败)
|
||
actual_page_size = len(items) if len(items) > 0 else 50
|
||
total_pages = (total_count + actual_page_size - 1) // actual_page_size if total_count > 0 else 1
|
||
|
||
print("===================================")
|
||
print(f"✅ 成功拦截到报表数据API (第 {current_page}/{total_pages} 页)")
|
||
print(f"✅ 数据总条数: {total_count}, 当前页条数: {len(items)}")
|
||
print("===================================")
|
||
|
||
# Import and save to database
|
||
try:
|
||
import import_to_sqlite
|
||
if items:
|
||
inserted = import_to_sqlite.import_abnormal_report_data(items)
|
||
total_inserted += inserted
|
||
print(f"✅ 成功将本页 {inserted} 条异常报表数据存入数据库")
|
||
except Exception as db_err:
|
||
print(f"❌ 保存异常报表数据到数据库失败: {db_err}")
|
||
|
||
found_data = True
|
||
|
||
# 只有当我们不是处于准备跳页的初始阶段时,才将进度记录到文件
|
||
if not (current_page == 1 and start_page > 1):
|
||
try:
|
||
with open(state_file, 'w', encoding='utf-8') as f:
|
||
json.dump({
|
||
'month': f"{now.year}-{now.month}",
|
||
'current_page': current_page,
|
||
'total_pages': total_pages
|
||
}, f)
|
||
except Exception as e:
|
||
print(f"保存进度失败: {e}")
|
||
pass
|
||
|
||
else:
|
||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 数据结构不匹配。")
|
||
except Exception as e:
|
||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 解析数据包出错: {e}")
|
||
pass
|
||
|
||
if not found_data:
|
||
# 检查是否由于会话超时被系统强制跳转回首页
|
||
if "Home/Index" in target_tab.url or target_tab.url == "https://yunmes.tftykj.cn/":
|
||
print("❌ 警告:页面已跳转回首页,可能是会话超时或被强制登出。")
|
||
print(f"进度已保存 (停留在第 {current_page} 页),下次启动抓取任务将自动从中断处继续!")
|
||
return False
|
||
|
||
print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] 第 {current_page} 页等待了超时,没有拦截到匹配的报表数据...")
|
||
|
||
# 再给一次机会等3秒
|
||
print("再等待3秒重试...")
|
||
time.sleep(3)
|
||
# 重新让上面解析
|
||
continue
|
||
|
||
# ====== 触发断点续传跳页 ======
|
||
if current_page == 1 and start_page > 1:
|
||
print(f"===================================")
|
||
print(f"⏭️ 触发断点续传,跳过第 1 页,直接跳转到第 {start_page} 页...")
|
||
print(f"===================================")
|
||
current_page = start_page
|
||
target_tab.run_js(f"""
|
||
var iframes = document.querySelectorAll('iframe');
|
||
for(var j=0; j<iframes.length; j++) {{
|
||
try {{
|
||
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
|
||
var win = iframes[j].contentWindow;
|
||
var paginations = doc.querySelectorAll('.pagination');
|
||
for(var i=0; i<paginations.length; i++) {{
|
||
try {{ win.$(paginations[i]).pagination('select', {start_page}); }} catch(e) {{}}
|
||
}}
|
||
}} catch(e) {{}}
|
||
}}
|
||
""")
|
||
time.sleep(2)
|
||
continue
|
||
# ==============================
|
||
|
||
if current_page >= total_pages:
|
||
print(f"已到达最后一页 (共 {total_pages} 页),抓取完成!")
|
||
try:
|
||
if state_file.exists():
|
||
state_file.unlink() # 抓取完毕后清除记录
|
||
except:
|
||
pass
|
||
break
|
||
|
||
print(f"准备抓取下一页 (第 {current_page + 1} 页)...")
|
||
time.sleep(1)
|
||
|
||
# 尝试点击下一页 (同样需要穿透 iframe)
|
||
target_tab.run_js("""
|
||
var iframes = document.querySelectorAll('iframe');
|
||
for(var j=0; j<iframes.length; j++) {
|
||
try {
|
||
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
|
||
var nextBtn = doc.querySelector('.pagination-next');
|
||
if(nextBtn && nextBtn.tagName === 'SPAN') {
|
||
nextBtn = nextBtn.parentNode;
|
||
}
|
||
if(nextBtn) {
|
||
nextBtn.click();
|
||
}
|
||
} catch(e) {}
|
||
}
|
||
""")
|
||
|
||
# 等待新的网络请求
|
||
time.sleep(2)
|
||
current_page += 1
|
||
|
||
print(f"🎉 异常报表全量抓取大功告成!总计入库: {total_inserted} 条。")
|
||
|
||
if __name__ == '__main__':
|
||
page = get_page(port=9222)
|
||
success = navigate_to_report(page)
|
||
if success:
|
||
fetch_report_data(page)
|