""" 发料单报表 - 导航测试脚本 目标: 模拟点击菜单,进入“发料单报表”页面。 """ import sys import json import time import random from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) from login import get_page, log from config import OUTPUT_DIR HOME_URL = "https://yunmes.tftykj.cn/" API_TARGET = "WorkOrdersDetailed_SearchListAll_Proxy" SAVE_PATH = OUTPUT_DIR / "issue_receipt_details_full.json" def fetch_issue_receipt_details(): log("INFO", "=== 🚀 启动发料单报表全量数据抓取 ===") # 强制复用 9222 端口,不关闭浏览器 page = get_page(port=9222) all_clean_items = [] if SAVE_PATH.exists(): try: with open(SAVE_PATH, "r", encoding="utf-8") as f: all_clean_items = json.load(f) log("INFO", f"📦 已加载本地历史存档,包含 {len(all_clean_items)} 条数据。") except Exception as e: log("WARN", f"加载本地存档失败: {e},将从空列表开始。") all_clean_items = [] try: log("INFO", f"正在回到主页起点: {HOME_URL}") page.get(HOME_URL) page.wait.load_start() time.sleep(2) menus = [ ("第一层: 业务统计报表", 'xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p'), ("第二层: 生产业务报表(推测)", 'xpath:/html/body/div[7]/div/div[1]/div/div[9]/div/p'), ("第三层: 发料单报表", 'xpath:/html/body/div[8]/div/div[1]/div/div[6]/div/p') ] log("INFO", "开始模拟人工点击左侧导航菜单...") for name, xpath in menus: ele = page.ele(xpath, timeout=5) if ele: try: ele.click() except: page.run_js("arguments[0].click();", ele) time.sleep(1.5) else: log("ERR", f"找不到菜单元素: {name}") return log("OK", "✅ 成功点开发料单报表界面!") # 点击空白处隐藏菜单 blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div' blank_ele = page.ele(blank_xpath, timeout=3) if blank_ele: try: blank_ele.click() except: page.run_js("arguments[0].click();", blank_ele) time.sleep(0.5) log("INFO", f"开启底层数据拦截网: {API_TARGET}") page.listen.start(API_TARGET) # 等待页面自动发起的请求 packet = page.listen.wait(timeout=10) if not packet: log("INFO", "尝试寻找并点击页面上的【查询】按钮...") query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span' query_btn = page.ele(query_btn_xpath, timeout=3) if query_btn: try: query_btn.click() except: page.run_js("arguments[0].click();", query_btn) packet = page.listen.wait(timeout=15) if not packet: log("ERR", "未能拦截到数据请求,可能网络超时或查询未触发。") return # 设定开始抓取的页码,如果因为中断需要断点续传,请修改此变量 # 刚才抓到了 95 页,我们需要从 96 页开始继续 target_resume_page = 1 # ========================================================= # 第一页数据处理 # ========================================================= log("OK", f"🎉 成功拦截到第一页数据!HTTP 状态码: {packet.response.status}") body = packet.response.body data = body if isinstance(body, (dict, list)) else json.loads(body) total_count = 0 if isinstance(data, dict) and "result" in data: total_count = data["result"].get("totalCount", 0) items = data["result"].get("items", []) log("OK", f"后端报告总条数: {total_count}") # 只有当不是断点续传(即从第1页开始)时,才把第一页的数据加入列表 if target_resume_page <= 1: # 由于可能触发断点,如果是重新抓取,这里直接覆盖 if not all_clean_items: for item in items: all_clean_items.append(_extract_fields(item)) log("OK", f"第一页清洗完成,提取了 {len(items)} 条数据。") else: log("INFO", f"本地已有数据,跳过第一页保存,走翻页逻辑(注意:发料单可能需要您清空旧存档才能从头抓,这里先保留累加)") else: log("INFO", f"触发断点续传,跳过第一页的数据保存。后端报告总条数: {total_count}") else: log("ERR", "第一页返回的数据结构异常。") return page_num = 1 # ========================================================= # 断点续传逻辑跳转 # ========================================================= if target_resume_page > 1: log("INFO", f"🚀 触发断点续传机制!准备直接跳转到第 {target_resume_page} 页...") # 尝试找页码输入框 jumper_input_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/span[3]/div/div//input' input_ele = page.ele(jumper_input_xpath, timeout=5) if not input_ele: jumper_input_xpath = 'xpath://input[@type="number" and @aria-label="页"]' input_ele = page.ele(jumper_input_xpath, timeout=5) if input_ele: input_ele.clear() input_ele.input(str(target_resume_page)) time.sleep(0.5) input_ele.input('\n') packet = page.listen.wait(timeout=15) if not packet: log("ERR", "断点跳转失败,未拦截到目标页的数据请求。") return log("OK", f"✅ 成功跳转至第 {target_resume_page} 页并截获数据!") page_num = target_resume_page # 读取并解析断点页的数据 body = packet.response.body data = body if isinstance(body, (dict, list)) else json.loads(body) if isinstance(data, dict) and "result" in data: items = data["result"].get("items", []) for item in items: all_clean_items.append(_extract_fields(item)) log("OK", f"第 {page_num} 页清洗完成,累计提取 {len(all_clean_items)} 条数据。") else: log("ERR", "找不到页码输入框,断点跳转失败,将从第 1 页继续!") # ========================================================= # 循环翻页抓取 # ========================================================= while True: # 引入“类人”随机延迟 delay = random.uniform(2.5, 5.5) log("INFO", f"⏳ 模拟真人停顿 {delay:.2f} 秒后,准备点击下一页...") time.sleep(delay) if page_num > 1 and page_num % 50 == 0: long_delay = random.uniform(10.0, 20.0) log("INFO", f"☕️ 已经连续高强度翻了 {page_num} 页,触发风控规避机制,假装喝水休息 {long_delay:.2f} 秒...") time.sleep(long_delay) # 用户指定的下一页按钮 xpath next_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/button[2]' next_btn = page.ele(next_btn_xpath, timeout=3) if not next_btn: log("ERR", "找不到下一页按钮,尝试强制刷新页面或终止。") break # 检查按钮是否被禁用 class_str = str(next_btn.attr("class")) aria_disabled = next_btn.attr("aria-disabled") is_disabled_attr = next_btn.attr("disabled") is not None if "disabled" in class_str or is_disabled_attr or aria_disabled == "true": log("OK", "🏁 下一页按钮已被禁用,说明已经到达最后一页!") break page_num += 1 log("INFO", f"正在点击【下一页】抓取第 {page_num} 页...") try: next_btn.click() except Exception as e: log("ERR", f"普通点击失败: {e},尝试 JS 点击...") page.run_js("arguments[0].click();", next_btn) # 等待新一页的 API 响应 packet = page.listen.wait(timeout=15) if not packet: log("ERR", f"第 {page_num} 页请求超时或未触发,中止抓取。") break body = packet.response.body data = body if isinstance(body, (dict, list)) else json.loads(body) if isinstance(data, dict) and "result" in data: items = data["result"].get("items", []) if not items: log("WARN", f"第 {page_num} 页返回了空列表,可能已无数据。") break for item in items: all_clean_items.append(_extract_fields(item)) log("OK", f"第 {page_num} 页清洗完成,累计提取 {len(all_clean_items)} 条数据。") # 每 10 页自动保存一次 if page_num % 10 == 0: with open(SAVE_PATH, "w", encoding="utf-8") as f: json.dump(all_clean_items, f, ensure_ascii=False, indent=2) log("INFO", f"💾 自动存档: 已保存 {len(all_clean_items)} 条记录至本地。") else: log("ERR", f"第 {page_num} 页数据结构异常,中止。") break page.listen.stop() # 最终保存 if all_clean_items: with open(SAVE_PATH, "w", encoding="utf-8") as f: json.dump(all_clean_items, f, ensure_ascii=False, indent=2) log("OK", f"🎉 全部抓取完成!总计成功提取 {len(all_clean_items)} 条数据。") log("OK", f"数据已保存至: {SAVE_PATH}") except Exception as e: log("ERR", f"发生全局异常: {e}") if all_clean_items: rescue_path = OUTPUT_DIR / "issue_receipt_details_RESCUE.json" with open(rescue_path, "w", encoding="utf-8") as f: json.dump(all_clean_items, f, ensure_ascii=False, indent=2) log("INFO", f"🆘 触发异常保存,抢救了 {len(all_clean_items)} 条数据。") finally: try: page.listen.stop() log("INFO", "🛑 已释放浏览器监听资源,保持浏览器开启。") except: pass def _extract_fields(item): """提取所需的字段""" return { "生产任务单号": item.get("productionOrderNo"), "生产物料代码": item.get("productMaterialCode"), "生产物料名称": item.get("productMaterialName"), "生产物料规格": item.get("productMaterialSpecification"), "发料单号": item.get("workOrdersNumber"), "状态": item.get("status"), "物料规格": item.get("materialSpecification"), "物料名称": item.get("materialName"), "物料代码": item.get("materialCode"), "发料数量": item.get("issueNumber"), "已发料数量": item.get("hasIssueNumber"), "金额": item.get("amount"), "成本价": item.get("costPrice"), "发料金额": item.get("issueAmount"), "生产订单备注": item.get("productionOrderRemark"), "明细备注": item.get("detailedRemark"), "单位名称": item.get("unitName"), "仓库名称": item.get("warehouseName"), "行号": item.get("lineNumber"), "发料单备注": item.get("workOrdersRemark"), "执行人名称": item.get("executorUserName"), "物料型号": item.get("materialModel"), "执行时间": item.get("executionTime"), "领料人": item.get("materialsUserName"), "生产物料型号": item.get("productMaterialModel"), "自定义字段": item.get("customField"), "部门代码": item.get("departmentInformationCode"), "部门名称": item.get("departmentInformationName"), "图片文件": item.get("imageFile"), "汇总金额": item.get("issueAmountTotal"), "物料组代码": item.get("materialGroupCode"), "物料组名称": item.get("materialGroupName"), "单价小数位数": item.get("numnberOfReservedDigits"), "单价进位策略": item.get("placeMentStrategy"), "单价": item.get("price"), "销售订单号": item.get("salesOrderCode") } if __name__ == "__main__": fetch_issue_receipt_details()