抓取生产工单,赚取发料异常

This commit is contained in:
hjq
2026-06-11 15:58:56 +08:00
parent 66eecd0daa
commit 5b19790037
40 changed files with 4942 additions and 54 deletions

View File

@@ -0,0 +1,249 @@
import sys
import time
from pathlib import Path
import datetime
import calendar
import json
sys.path.insert(0, str(Path(__file__).parent))
from login import get_page
from config import OUTPUT_DIR
def navigate_to_report(page):
print("正在打开主页...")
page.get("https://yunmes.tftykj.cn/")
page.wait.load_start()
time.sleep(2)
print("正在打开 生产工单发料异常检查报表...")
try:
m1 = page.ele('text=自定义报表管理')
if m1:
print("点击第一级...")
m1.click()
time.sleep(1)
# 找到展开后的第二级
for m in page.eles('text:自定义报表管理'):
try:
m.click()
except:
pass
time.sleep(1)
for m in page.eles('text:自定义报表'):
if m.text == '自定义报表':
try:
m.click()
print("点击第三级...")
except:
pass
time.sleep(2)
ele = page.ele('text:生产工单发料异常检查报表', timeout=5)
if ele:
print("找到报表行,选中...")
ele.parent('tag:tr').click()
time.sleep(0.5)
btn = page.ele('text=进入自定义报表')
if btn:
print("点击进入自定义报表...")
btn.click()
time.sleep(3)
print("成功进入报表!")
return True
else:
print("未找到进入按钮。")
return False
else:
print("未能找到 '生产工单发料异常检查报表'")
return False
except Exception as e:
print(f"执行导航过程中发生异常: {e}")
return False
def fetch_report_data(page):
# Wait for the new tab to be ready
time.sleep(3)
target_tab = page.get_tab(page.latest_tab)
# Wait for the label to appear
target_tab.ele('text:下单日期(开始)', timeout=10)
now = datetime.datetime.now()
first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d')
last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d')
print(f"设置下单日期为当月: {first_day}{last_day},并清理发料情况过滤条件...")
# 使用注入到全部 iframe 的 JS 强制执行 EasyUI 方法
target_tab.run_js(f"""
var iframes = document.querySelectorAll('iframe');
for(var j=0; j<iframes.length; j++) {{
try {{
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
var win = iframes[j].contentWindow;
// 1. 设置开始日期
var startInputs = doc.querySelectorAll('.input_StartValue.datebox-f');
if (startInputs.length > 0) {{
win.$(startInputs[0]).datebox('setValue', '{first_day}');
}}
// 2. 设置结束日期
var endInputs = doc.querySelectorAll('.input_EndValue.datebox-f');
if (endInputs.length > 0) {{
win.$(endInputs[0]).datebox('setValue', '{last_day}');
}}
// 3. 清理所有下拉框(包括发料情况)
var combos = doc.querySelectorAll('.combobox-f, .textbox-f');
for(var i=0; i<combos.length; i++) {{
try {{ win.$(combos[i]).combobox('clear'); }} catch(e) {{}}
}}
// 4. [提速黑科技]:强行把每页请求的数量从 50 条改为 500 条
// 找到底部的分页组件并修改它的 pageSize这样点击查询时就会一次请求 500 条
var paginations = doc.querySelectorAll('.pagination');
for(var i=0; i<paginations.length; i++) {{
try {{ win.$(paginations[i]).pagination({{pageSize: 500}}); }} catch(e) {{}}
}}
}} catch(e) {{}}
}}
""")
print("日期和条件设置完成,准备查询...")
print("正在查找并点击查询按钮...")
# 因为查询按钮在 iframe 里,我们不能直接用 target_tab 找,必须用 JS 去触发点击
target_tab.listen.start()
target_tab.run_js("""
var iframes = document.querySelectorAll('iframe');
var clicked = false;
for(var j=0; j<iframes.length; j++) {
try {
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
var btn = doc.querySelector('#onSearch');
if(!btn) {
var spans = doc.querySelectorAll('.l-btn-text');
for(var i=0; i<spans.length; i++) {
if(spans[i].innerText === '查询') {
btn = spans[i].parentNode.parentNode;
break;
}
}
}
if(btn) {
btn.click();
console.log('Clicked search button inside iframe');
clicked = true;
}
} catch(e) {}
}
return clicked;
""")
print("点击指令已发送,等待报表数据加载 (3秒)...")
time.sleep(3)
current_page = 1
total_inserted = 0
print("开始监听网络请求,寻找 API 数据包...")
while True:
packets = target_tab.listen.steps()
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 收集到 {len(packets)} 个网络数据包,正在解析...")
found_data = False
total_pages = 1
for p in packets:
if 'SearchCustomReportBySQL_Proxy' in p.url or 'CustomTableViewData' in p.url or 'SeachList' in p.url:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
if p.method == 'POST' and p.response and p.response.body:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 这是一个 POST 请求,且包含 response body")
try:
body = p.response.body
data = body if isinstance(body, (dict, list)) else json.loads(body)
if isinstance(data, dict) and 'result' in data and isinstance(data['result'], dict) and 'items' in data['result']:
res = data['result']
total_count = res.get('totalCount', 0)
items = res.get('items', [])
print("===================================")
print(f"✅ 成功拦截到报表数据API (第 {current_page} 页)")
print(f"✅ 数据总条数: {total_count}, 当前页条数: {len(items)}")
print("===================================")
total_pages = (total_count + 499) // 500 if total_count > 0 else 1
# Import and save to database
try:
import import_to_sqlite
if items:
inserted = import_to_sqlite.import_abnormal_report_data(items)
total_inserted += inserted
print(f"✅ 成功将本页 {inserted} 条异常报表数据存入数据库")
except Exception as db_err:
print(f"❌ 保存异常报表数据到数据库失败: {db_err}")
found_data = True
else:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 数据结构不匹配。")
except Exception as e:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 解析数据包出错: {e}")
pass
if not found_data:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 第 {current_page} 页等待了3秒没有拦截到匹配的报表数据...")
# 再给一次机会等3秒
print("再等待3秒重试...")
time.sleep(3)
retry_packets = target_tab.listen.steps()
print(f"重试收集到 {len(retry_packets)} 个数据包。")
if not retry_packets:
print(f"彻底没有数据,停止抓取。")
break
else:
packets.extend(retry_packets)
# 重新让上面解析
continue
if current_page >= total_pages:
print(f"已到达最后一页 (共 {total_pages} 页),抓取完成!")
break
print(f"准备抓取下一页 (第 {current_page + 1} 页)...")
time.sleep(1)
# 尝试点击下一页 (同样需要穿透 iframe)
target_tab.run_js("""
var iframes = document.querySelectorAll('iframe');
for(var j=0; j<iframes.length; j++) {
try {
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
var nextBtn = doc.querySelector('.pagination-next');
if(nextBtn && nextBtn.tagName === 'SPAN') {
nextBtn = nextBtn.parentNode;
}
if(nextBtn) {
nextBtn.click();
}
} catch(e) {}
}
""")
# 等待新的网络请求
time.sleep(2)
current_page += 1
print(f"🎉 异常报表全量抓取大功告成!总计入库: {total_inserted} 条。")
if __name__ == '__main__':
page = get_page(port=9222)
success = navigate_to_report(page)
if success:
fetch_report_data(page)