Files
datie-bom/browser_login/auto_fetch_abnormal_report.py

264 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import sys
import time
from pathlib import Path
import datetime
import calendar
import json
sys.path.insert(0, str(Path(__file__).parent))
from login import get_page
from config import OUTPUT_DIR
def navigate_to_report(page):
print("正在打开主页...")
page.get("https://yunmes.tftykj.cn/")
page.wait.load_start()
time.sleep(2)
print("正在打开 生产工单发料异常检查报表...")
try:
m1 = page.ele('text=自定义报表管理')
if m1:
print("点击第一级...")
m1.click()
time.sleep(1)
# 找到展开后的第二级
for m in page.eles('text:自定义报表管理'):
try:
m.click()
except:
pass
time.sleep(1)
for m in page.eles('text:自定义报表'):
if m.text == '自定义报表':
try:
m.click()
print("点击第三级...")
except:
pass
time.sleep(2)
ele = page.ele('text:生产工单发料异常检查报表', timeout=5)
if ele:
print("找到报表行,选中...")
ele.parent('tag:tr').click()
time.sleep(0.5)
btn = page.ele('text=进入自定义报表')
if btn:
print("点击进入自定义报表...")
btn.click()
time.sleep(3)
print("成功进入报表!")
return True
else:
print("未找到进入按钮。")
return False
else:
print("未能找到 '生产工单发料异常检查报表'")
return False
except Exception as e:
print(f"执行导航过程中发生异常: {e}")
return False
def fetch_report_data(page):
# Wait for the new tab to be ready
time.sleep(3)
target_tab = page.get_tab(page.latest_tab)
# Wait for the label to appear
target_tab.ele('text:下单日期(开始)', timeout=10)
now = datetime.datetime.now()
first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d')
last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d')
print(f"设置下单日期为当月: {first_day}{last_day},并清理发料情况过滤条件...")
# 使用注入到全部 iframe 的 JS 强制执行 EasyUI 方法
target_tab.run_js(f"""
var iframes = document.querySelectorAll('iframe');
for(var j=0; j<iframes.length; j++) {{
try {{
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
var win = iframes[j].contentWindow;
// 1. 自动设置下单日期为当月
// (暴力匹配所有的 datebox前两个一般就是开始和结束)
var dates = doc.querySelectorAll('.datebox-f, .datetimebox-f');
if (dates.length >= 2) {{
try {{ win.$(dates[0]).datetimebox('setValue', '{first_day} 00:00:00'); }} catch(e) {{}}
try {{ win.$(dates[0]).datebox('setValue', '{first_day} 00:00:00'); }} catch(e) {{}}
try {{ win.$(dates[1]).datetimebox('setValue', '{last_day} 23:59:59'); }} catch(e) {{}}
try {{ win.$(dates[1]).datebox('setValue', '{last_day} 23:59:59'); }} catch(e) {{}}
}} else {{
// 备用方案:寻找附近元素
var allSpans = doc.querySelectorAll('span, td, th');
for(var k=0; k<allSpans.length; k++) {{
var txt = allSpans[k].innerText || '';
if (txt.indexOf('下单日期(开始)') !== -1) {{
var input = allSpans[k].parentNode.parentNode.querySelector('.textbox-f');
if(input) {{
try {{ win.$(input).datetimebox('setValue', '{first_day} 00:00:00'); }} catch(e) {{}}
try {{ win.$(input).datebox('setValue', '{first_day} 00:00:00'); }} catch(e) {{}}
}}
}}
if (txt.indexOf('下单日期(结束)') !== -1) {{
var input = allSpans[k].parentNode.parentNode.querySelector('.textbox-f');
if(input) {{
try {{ win.$(input).datetimebox('setValue', '{last_day} 23:59:59'); }} catch(e) {{}}
try {{ win.$(input).datebox('setValue', '{last_day} 23:59:59'); }} catch(e) {{}}
}}
}}
}}
}}
// 3. 清理发料情况下拉框 (千万不能选 textbox-f否则会把刚才填的日期清空)
var combos = doc.querySelectorAll('.combobox-f');
for(var i=0; i<combos.length; i++) {{
try {{ win.$(combos[i]).combobox('clear'); }} catch(e) {{}}
}}
// 4. [提速黑科技]:强行把每页请求的数量从 50 条改为 500 条
// 找到底部的分页组件并修改它的 pageSize这样点击查询时就会一次请求 500 条
var paginations = doc.querySelectorAll('.pagination');
for(var i=0; i<paginations.length; i++) {{
try {{ win.$(paginations[i]).pagination({{pageSize: 500}}); }} catch(e) {{}}
}}
}} catch(e) {{}}
}}
""")
print("日期和条件设置完成,准备查询...")
print("正在查找并点击查询按钮...")
# 因为查询按钮在 iframe 里,我们不能直接用 target_tab 找,必须用 JS 去触发点击
target_tab.listen.start()
target_tab.run_js("""
var iframes = document.querySelectorAll('iframe');
var clicked = false;
for(var j=0; j<iframes.length; j++) {
try {
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
var btn = doc.querySelector('#onSearch');
if(!btn) {
var spans = doc.querySelectorAll('.l-btn-text');
for(var i=0; i<spans.length; i++) {
if(spans[i].innerText === '查询') {
btn = spans[i].parentNode.parentNode;
break;
}
}
}
if(btn) {
btn.click();
console.log('Clicked search button inside iframe');
clicked = true;
}
} catch(e) {}
}
return clicked;
""")
print("点击指令已发送,等待报表数据加载 (3秒)...")
time.sleep(3)
current_page = 1
total_inserted = 0
print("开始监听网络请求,寻找 API 数据包...")
while True:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 正在收集并解析网络数据包...")
packets = target_tab.listen.steps(timeout=5)
found_data = False
total_pages = 1
for p in packets:
if 'SearchCustomReportBySQL_Proxy' in p.url or 'CustomTableViewData' in p.url or 'SeachList' in p.url:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
if p.method == 'POST' and p.response and p.response.body:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 这是一个 POST 请求,且包含 response body")
try:
body = p.response.body
data = body if isinstance(body, (dict, list)) else json.loads(body)
if isinstance(data, dict) and 'result' in data and isinstance(data['result'], dict) and 'items' in data['result']:
res = data['result']
total_count = res.get('totalCount', 0)
items = res.get('items', [])
# 动态计算真实的总页数 (防止前端强改500条失败)
actual_page_size = len(items) if len(items) > 0 else 50
total_pages = (total_count + actual_page_size - 1) // actual_page_size if total_count > 0 else 1
print("===================================")
print(f"✅ 成功拦截到报表数据API (第 {current_page}/{total_pages} 页)")
print(f"✅ 数据总条数: {total_count}, 当前页条数: {len(items)}")
print("===================================")
# Import and save to database
try:
import import_to_sqlite
if items:
inserted = import_to_sqlite.import_abnormal_report_data(items)
total_inserted += inserted
print(f"✅ 成功将本页 {inserted} 条异常报表数据存入数据库")
except Exception as db_err:
print(f"❌ 保存异常报表数据到数据库失败: {db_err}")
found_data = True
else:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 数据结构不匹配。")
except Exception as e:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 解析数据包出错: {e}")
pass
if not found_data:
print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 第 {current_page} 页等待了超时,没有拦截到匹配的报表数据...")
# 再给一次机会等3秒
print("再等待3秒重试...")
time.sleep(3)
# 重新让上面解析
continue
if current_page >= total_pages:
print(f"已到达最后一页 (共 {total_pages} 页),抓取完成!")
break
print(f"准备抓取下一页 (第 {current_page + 1} 页)...")
time.sleep(1)
# 尝试点击下一页 (同样需要穿透 iframe)
target_tab.run_js("""
var iframes = document.querySelectorAll('iframe');
for(var j=0; j<iframes.length; j++) {
try {
var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
var nextBtn = doc.querySelector('.pagination-next');
if(nextBtn && nextBtn.tagName === 'SPAN') {
nextBtn = nextBtn.parentNode;
}
if(nextBtn) {
nextBtn.click();
}
} catch(e) {}
}
""")
# 等待新的网络请求
time.sleep(2)
current_page += 1
print(f"🎉 异常报表全量抓取大功告成!总计入库: {total_inserted} 条。")
if __name__ == '__main__':
page = get_page(port=9222)
success = navigate_to_report(page)
if success:
fetch_report_data(page)