""" 质量报表 (Basis Quality Report) - 时间窗口滑动增量抓取 目标: 采用底层请求拦截与篡改技术,强行指定“下单日期(开始)”为特定的时间窗口,抓取数据。 """ import sys import json import time import random import urllib.parse from datetime import datetime, timedelta from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from login import get_page, log from config import OUTPUT_DIR HOME_URL = "https://yunmes.tftykj.cn/" API_TARGET = "SearchCustomReportBySQL_Proxy" SAVE_PATH = OUTPUT_DIR / "basis_quality_incremental.json" def fetch_basis_quality_incremental(): # 动态计算时间窗口(使用内置的 timedelta 计算过去 90 天,避免依赖外部库) end_date = datetime.now() start_date = end_date - timedelta(days=90) start_date_str = start_date.strftime("%Y-%m-%d 00:00:00") end_date_str = end_date.strftime("%Y-%m-%d 23:59:59") # URL 编码 encoded_start = urllib.parse.quote(start_date_str) encoded_end = urllib.parse.quote(end_date_str) log("INFO", f"=== 🚀 启动质量报表 - 时间滑动增量抓取 ===") log("INFO", f"📅 设定的滑动窗口: {start_date_str} -> {end_date_str}") page = get_page(port=9222) all_clean_items = [] try: log("INFO", f"正在回到主页起点: {HOME_URL}") page.get(HOME_URL) page.wait.load_start() time.sleep(2) menus = [ ("进入质量报表", 'xpath://*[@id="el-collapse-content-21"]/div/div/div/div[1]/div/div/div[6]/div') ] log("INFO", "开始模拟人工点击左侧导航菜单...") for name, xpath in menus: ele = page.ele(xpath, timeout=5) if ele: try: ele.click() except: page.run_js("arguments[0].click();", ele) else: log("ERR", f"找不到菜单元素: {name}") return log("OK", "✅ 成功点开质量报表界面!") time.sleep(2) # 开启普通的数据监听 log("INFO", f"开启底层拦截网: {API_TARGET}") page.listen.start(API_TARGET) # ========================================================= # 循环翻页抓取逻辑 (测试模式:仅抓取前 3 页) # ========================================================= current_page = 1 query_btn_xpath = 'xpath://*[@id="customTable-search-area"]/div[1]/div/div[1]/a[2]/span/span' while current_page <= 3: # 限制只抓取前 3 页用于测试 # 1. 因为我们无法用 DrissionPage 的 listen 修改发送出去的 POST Data # 我们直接在 Python 层发送一个 JS Fetch 请求,完全模拟原有的请求,但带上我们自己构造的 Payload! log("INFO", f"正在通过底层 JS Fetch 强行注入带时间窗口的请求... (页码: {current_page})") # 注意:这里的 new_payload 必须转义所有的单双引号以适配 JS 字符串拼接 base_payload = f"page={current_page}&rows=50&id=80&sqlFilter%5BfieldList%5D%5B0%5D%5Bid%5D=17647&sqlFilter%5BfieldList%5D%5B0%5D%5Bfield%5D=%E4%B8%8B%E5%8D%95%E6%97%A5%E6%9C%9F(%E7%BB%93%E6%9D%9F)&sqlFilter%5BfieldList%5D%5B0%5D%5BfieldTranslate%5D=%5B%E4%B8%8B%E5%8D%95%E6%97%A5%E6%9C%9F(%E7%BB%93%E6%9D%9F)%5D&sqlFilter%5BfieldList%5D%5B0%5D%5BstartValue%5D={encoded_end}&sqlFilter%5BfieldList%5D%5B0%5D%5BendValue%5D=&sqlFilter%5BfieldList%5D%5B0%5D%5BcompareEnum%5D=0&sqlFilter%5BfieldList%5D%5B0%5D%5BfieldDataType%5D=2&sqlFilter%5BfieldList%5D%5B0%5D%5BorderNumber%5D=&sqlFilter%5BfieldList%5D%5B0%5D%5BorderType%5D=0&sqlFilter%5BfieldList%5D%5B0%5D%5BisTimeLimit%5D=false&sqlFilter%5BfieldList%5D%5B0%5D%5BlimitLength%5D=0&sqlFilter%5BfieldList%5D%5B0%5D%5BdateType%5D=1&sqlFilter%5BfieldList%5D%5B0%5D%5BdateDefaultType%5D=0&sqlFilter%5BfieldList%5D%5B0%5D%5BisSqlField%5D=false&sqlFilter%5BfieldList%5D%5B0%5D%5Bcondition%5D=0&sqlFilter%5BfieldList%5D%5B0%5D%5BgetValue%5D=&sqlFilter%5BfieldList%5D%5B0%5D%5BbackgroundColor%5D=&sqlFilter%5BfieldList%5D%5B0%5D%5BfontColor%5D=&sqlFilter%5BfieldList%5D%5B0%5D%5BisSeachParam%5D=true&sqlFilter%5BfieldList%5D%5B0%5D%5BdefaultValue%5D=&sqlFilter%5BfieldList%5D%5B0%5D%5Bwidth%5D=&sqlFilter%5BfieldList%5D%5B0%5D%5BdefaultTime%5D=&sqlFilter%5BfieldList%5D%5B0%5D%5BsearchParamEnableVal%5D=0&sqlFilter%5BfieldList%5D%5B0%5D%5BoptionMode%5D=0&sqlFilter%5BfieldList%5D%5B1%5D%5Bid%5D=17646&sqlFilter%5BfieldList%5D%5B1%5D%5Bfield%5D=%E4%B8%8B%E5%8D%95%E6%97%A5%E6%9C%9F(%E5%BC%80%E5%A7%8B)&sqlFilter%5BfieldList%5D%5B1%5D%5BfieldTranslate%5D=%5B%E4%B8%8B%E5%8D%95%E6%97%A5%E6%9C%9F(%E5%BC%80%E5%A7%8B)%5D&sqlFilter%5BfieldList%5D%5B1%5D%5BstartValue%5D={encoded_start}&sqlFilter%5BfieldList%5D%5B1%5D%5BendValue%5D=&sqlFilter%5BfieldList%5D%5B1%5D%5BcompareEnum%5D=0&sqlFilter%5BfieldList%5D%5B1%5D%5BfieldDataType%5D=2&sqlFilter%5BfieldList%5D%5B1%5D%5BorderNumber%5D=&sqlFilter%5BfieldList%5D%5B1%5D%5BorderType%5D=0&sqlFilter%5BfieldList%5D%5B1%5D%5BisTimeLimit%5D=false&sqlFilter%5BfieldList%5D%5B1%5D%5BlimitLength%5D=0&sqlFilter%5BfieldList%5D%5B1%5D%5BdateType%5D=1&sqlFilter%5BfieldList%5D%5B1%5D%5BdateDefaultType%5D=0&sqlFilter%5BfieldList%5D%5B1%5D%5BisSqlField%5D=false&sqlFilter%5BfieldList%5D%5B1%5D%5Bcondition%5D=0&sqlFilter%5BfieldList%5D%5B1%5D%5BgetValue%5D=&sqlFilter%5BfieldList%5D%5B1%5D%5BbackgroundColor%5D=&sqlFilter%5BfieldList%5D%5B1%5D%5BfontColor%5D=&sqlFilter%5BfieldList%5D%5B1%5D%5BisSeachParam%5D=true&sqlFilter%5BfieldList%5D%5B1%5D%5BdefaultValue%5D=&sqlFilter%5BfieldList%5D%5B1%5D%5Bwidth%5D=&sqlFilter%5BfieldList%5D%5B1%5D%5BdefaultTime%5D=&sqlFilter%5BfieldList%5D%5B1%5D%5BsearchParamEnableVal%5D=1&sqlFilter%5BfieldList%5D%5B1%5D%5BoptionMode%5D=0&isAll=false" # 强行在页面中注入一个 Fetch 请求。由于在页面上下文中运行,它会自动带上所有的 Cookies 和 Auth Token! fetch_js = f""" fetch('/api/services/TfTechApi/SQLSolution/SearchCustomReportBySQL_Proxy', {{ method: 'POST', headers: {{ 'accept': 'application/json, text/javascript, */*; q=0.01', 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'x-requested-with': 'XMLHttpRequest' }}, body: '{base_payload}' }}); """ page.run_js(fetch_js) # 2. 等待我们注入的请求响应 packet = page.listen.wait(timeout=15) if not packet: log("ERR", f"第 {current_page} 页注入请求超时或未触发,中止抓取。") break # 3. 解析数据 body = packet.response.body data = body if isinstance(body, (dict, list)) else json.loads(body) if isinstance(data, dict) and "result" in data: # 检查 result 是否是字典,如果直接是列表则直接取用 if isinstance(data["result"], dict): items = data["result"].get("items", []) elif isinstance(data["result"], list): items = data["result"] else: items = [] if not items: log("WARN", f"第 {current_page} 页返回了空列表,可能该时间段内无数据。") break for item in items: all_clean_items.append(item) log("OK", f"第 {current_page} 页清洗完成,累计提取 {len(all_clean_items)} 条数据。") if current_page % 10 == 0: with open(SAVE_PATH, "w", encoding="utf-8") as f: json.dump(all_clean_items, f, ensure_ascii=False, indent=2) else: log("ERR", f"第 {current_page} 页数据结构异常,中止。") break current_page += 1 # 最终保存 if all_clean_items: with open(SAVE_PATH, "w", encoding="utf-8") as f: json.dump(all_clean_items, f, ensure_ascii=False, indent=2) log("OK", f"🎉 抓取完成!总计成功提取 {len(all_clean_items)} 条数据。") log("OK", f"数据已保存至: {SAVE_PATH}") except Exception as e: log("ERR", f"发生全局异常: {e}") finally: try: page.listen.stop() log("INFO", "🛑 已释放浏览器监听资源。") except: pass if __name__ == "__main__": fetch_basis_quality_incremental()