datie-bom/browser_login/auto_fetch_abnormal_report.py

import sys
import time
from pathlib import Path
import datetime
import calendar
import json
sys.path.insert(0, str(Path(__file__).parent))
from login import get_page
from config import OUTPUT_DIR

def navigate_to_report(page):
    print("正在打开主页...")
    page.get("https://yunmes.tftykj.cn/")
    page.wait.load_start()
    time.sleep(2)

    print("正在打开 生产工单发料异常检查报表...")
    try:
        m1 = page.ele('text=自定义报表管理')
        if m1:
            print("点击第一级...")
            m1.click()
            time.sleep(1)

        # 找到展开后的第二级
        for m in page.eles('text:自定义报表管理'):
            try:
                m.click()
            except:
                pass
        time.sleep(1)

        for m in page.eles('text:自定义报表'):
            if m.text == '自定义报表':
                try:
                    m.click()
                    print("点击第三级...")
                except:
                    pass
        time.sleep(2)

        ele = page.ele('text:生产工单发料异常检查报表', timeout=5)
        if ele:
            print("找到报表行，选中...")
            ele.parent('tag:tr').click()
            time.sleep(0.5)
            btn = page.ele('text=进入自定义报表')
            if btn:
                print("点击进入自定义报表...")
                btn.click()
                time.sleep(3)
                print("成功进入报表！")
                return True
            else:
                print("未找到进入按钮。")
                return False
        else:
            print("未能找到 '生产工单发料异常检查报表'")
            return False
    except Exception as e:
        print(f"执行导航过程中发生异常: {e}")
        return False

def fetch_report_data(page):
    # Wait for the new tab to be ready
    time.sleep(3)
    target_tab = page.get_tab(page.latest_tab)

    # Wait for the label to appear
    target_tab.ele('text:下单日期(开始)', timeout=10)

    now = datetime.datetime.now()
    first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d')
    last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d')

    print(f"设置下单日期为当月: {first_day} 至 {last_day}，并清理发料情况过滤条件...")

    # 使用注入到全部 iframe 的 JS 强制执行 EasyUI 方法
    target_tab.run_js(f"""
        var iframes = document.querySelectorAll('iframe');
        for(var j=0; j<iframes.length; j++) {{
            try {{
                var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
                var win = iframes[j].contentWindow;

                // 1. 设置开始日期
                var startInputs = doc.querySelectorAll('.input_StartValue.datebox-f');
                if (startInputs.length > 0) {{
                    win.$(startInputs[0]).datebox('setValue', '{first_day}');
                }}

                // 2. 设置结束日期
                var endInputs = doc.querySelectorAll('.input_EndValue.datebox-f');
                if (endInputs.length > 0) {{
                    win.$(endInputs[0]).datebox('setValue', '{last_day}');
                }}

                // 3. 清理所有下拉框（包括发料情况）
                var combos = doc.querySelectorAll('.combobox-f, .textbox-f');
                for(var i=0; i<combos.length; i++) {{
                    try {{ win.$(combos[i]).combobox('clear'); }} catch(e) {{}}
                }}

                // 4. [提速黑科技]：强行把每页请求的数量从 50 条改为 500 条
                // 找到底部的分页组件并修改它的 pageSize，这样点击查询时就会一次请求 500 条
                var paginations = doc.querySelectorAll('.pagination');
                for(var i=0; i<paginations.length; i++) {{
                    try {{ win.$(paginations[i]).pagination({{pageSize: 500}}); }} catch(e) {{}}
                }}
            }} catch(e) {{}}
        }}
    """)

    print("日期和条件设置完成，准备查询...")

    print("正在查找并点击查询按钮...")

    # 因为查询按钮在 iframe 里，我们不能直接用 target_tab 找，必须用 JS 去触发点击
    target_tab.listen.start()

    target_tab.run_js("""
        var iframes = document.querySelectorAll('iframe');
        var clicked = false;
        for(var j=0; j<iframes.length; j++) {
            try {
                var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
                var btn = doc.querySelector('#onSearch');
                if(!btn) {
                    var spans = doc.querySelectorAll('.l-btn-text');
                    for(var i=0; i<spans.length; i++) {
                        if(spans[i].innerText === '查询') {
                            btn = spans[i].parentNode.parentNode;
                            break;
                        }
                    }
                }
                if(btn) {
                    btn.click();
                    console.log('Clicked search button inside iframe');
                    clicked = true;
                }
            } catch(e) {}
        }
        return clicked;
    """)

    print("点击指令已发送，等待报表数据加载 (3秒)...")
    time.sleep(3)

    current_page = 1
    total_inserted = 0

    print("开始监听网络请求，寻找 API 数据包...")
    while True:
        packets = target_tab.listen.steps()
        print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 收集到 {len(packets)} 个网络数据包，正在解析...")

        found_data = False
        total_pages = 1

        for p in packets:
                if 'SearchCustomReportBySQL_Proxy' in p.url or 'CustomTableViewData' in p.url or 'SeachList' in p.url:
                    print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 命中目标 URL: {p.url[:100]}...")
                    if p.method == 'POST' and p.response and p.response.body:
                        print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 这是一个 POST 请求，且包含 response body")
                        try:
                            body = p.response.body
                            data = body if isinstance(body, (dict, list)) else json.loads(body)

                            if isinstance(data, dict) and 'result' in data and isinstance(data['result'], dict) and 'items' in data['result']:
                                res = data['result']
                                total_count = res.get('totalCount', 0)
                                items = res.get('items', [])

                                print("===================================")
                                print(f"✅ 成功拦截到报表数据API (第 {current_page} 页)")
                                print(f"✅ 数据总条数: {total_count}, 当前页条数: {len(items)}")
                                print("===================================")

                                total_pages = (total_count + 499) // 500 if total_count > 0 else 1

                                # Import and save to database
                                try:
                                    import import_to_sqlite
                                    if items:
                                        inserted = import_to_sqlite.import_abnormal_report_data(items)
                                        total_inserted += inserted
                                        print(f"✅ 成功将本页 {inserted} 条异常报表数据存入数据库")
                                except Exception as db_err:
                                    print(f"❌ 保存异常报表数据到数据库失败: {db_err}")

                                found_data = True
                            else:
                                print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 数据结构不匹配。")
                        except Exception as e:
                            print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 解析数据包出错: {e}")
                            pass

        if not found_data:
            print(f"[{datetime.datetime.now().strftime('%H:%M:%S')}] 第 {current_page} 页等待了3秒，没有拦截到匹配的报表数据...")

            # 再给一次机会等3秒
            print("再等待3秒重试...")
            time.sleep(3)
            retry_packets = target_tab.listen.steps()
            print(f"重试收集到 {len(retry_packets)} 个数据包。")
            if not retry_packets:
                print(f"彻底没有数据，停止抓取。")
                break
            else:
                packets.extend(retry_packets)
                # 重新让上面解析
                continue

        if current_page >= total_pages:
            print(f"已到达最后一页 (共 {total_pages} 页)，抓取完成！")
            break

        print(f"准备抓取下一页 (第 {current_page + 1} 页)...")
        time.sleep(1)

        # 尝试点击下一页 (同样需要穿透 iframe)
        target_tab.run_js("""
            var iframes = document.querySelectorAll('iframe');
            for(var j=0; j<iframes.length; j++) {
                try {
                    var doc = iframes[j].contentDocument || iframes[j].contentWindow.document;
                    var nextBtn = doc.querySelector('.pagination-next');
                    if(nextBtn && nextBtn.tagName === 'SPAN') {
                        nextBtn = nextBtn.parentNode;
                    }
                    if(nextBtn) {
                        nextBtn.click();
                    }
                } catch(e) {}
            }
        """)

        # 等待新的网络请求
        time.sleep(2)
        current_page += 1

    print(f"🎉 异常报表全量抓取大功告成！总计入库: {total_inserted} 条。")

if __name__ == '__main__':
    page = get_page(port=9222)
    success = navigate_to_report(page)
    if success:
        fetch_report_data(page)