Files
datie-bom/browser_login/fetch_issue_receipt_details.py

298 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
发料单报表 - 导航测试脚本
目标: 模拟点击菜单,进入“发料单报表”页面。
"""
import sys
import json
import time
import random
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from login import get_page, log
from config import OUTPUT_DIR
HOME_URL = "https://yunmes.tftykj.cn/"
API_TARGET = "WorkOrdersDetailed_SearchListAll_Proxy"
SAVE_PATH = OUTPUT_DIR / "issue_receipt_details_full.json"
def fetch_issue_receipt_details():
log("INFO", "=== 🚀 启动发料单报表全量数据抓取 ===")
# 强制复用 9222 端口,不关闭浏览器
page = get_page(port=9222)
all_clean_items = []
if SAVE_PATH.exists():
try:
with open(SAVE_PATH, "r", encoding="utf-8") as f:
all_clean_items = json.load(f)
log("INFO", f"📦 已加载本地历史存档,包含 {len(all_clean_items)} 条数据。")
except Exception as e:
log("WARN", f"加载本地存档失败: {e},将从空列表开始。")
all_clean_items = []
try:
log("INFO", f"正在回到主页起点: {HOME_URL}")
page.get(HOME_URL)
page.wait.load_start()
time.sleep(2)
menus = [
("第一层: 业务统计报表", 'xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p'),
("第二层: 生产业务报表(推测)", 'xpath:/html/body/div[7]/div/div[1]/div/div[9]/div/p'),
("第三层: 发料单报表", 'xpath:/html/body/div[8]/div/div[1]/div/div[6]/div/p')
]
log("INFO", "开始模拟人工点击左侧导航菜单...")
for name, xpath in menus:
ele = page.ele(xpath, timeout=5)
if ele:
try:
ele.click()
except:
page.run_js("arguments[0].click();", ele)
time.sleep(1.5)
else:
log("ERR", f"找不到菜单元素: {name}")
return
log("OK", "✅ 成功点开发料单报表界面!")
# 点击空白处隐藏菜单
blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div'
blank_ele = page.ele(blank_xpath, timeout=3)
if blank_ele:
try:
blank_ele.click()
except:
page.run_js("arguments[0].click();", blank_ele)
time.sleep(0.5)
log("INFO", f"开启底层数据拦截网: {API_TARGET}")
page.listen.start(API_TARGET)
# 等待页面自动发起的请求
packet = page.listen.wait(timeout=10)
if not packet:
log("INFO", "尝试寻找并点击页面上的【查询】按钮...")
query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span'
query_btn = page.ele(query_btn_xpath, timeout=3)
if query_btn:
try: query_btn.click()
except: page.run_js("arguments[0].click();", query_btn)
packet = page.listen.wait(timeout=15)
if not packet:
log("ERR", "未能拦截到数据请求,可能网络超时或查询未触发。")
return
# 设定开始抓取的页码,如果因为中断需要断点续传,请修改此变量
# 刚才抓到了 95 页,我们需要从 96 页开始继续
target_resume_page = 1
# =========================================================
# 第一页数据处理
# =========================================================
log("OK", f"🎉 成功拦截到第一页数据HTTP 状态码: {packet.response.status}")
body = packet.response.body
data = body if isinstance(body, (dict, list)) else json.loads(body)
total_count = 0
if isinstance(data, dict) and "result" in data:
total_count = data["result"].get("totalCount", 0)
items = data["result"].get("items", [])
log("OK", f"后端报告总条数: {total_count}")
# 只有当不是断点续传即从第1页开始才把第一页的数据加入列表
if target_resume_page <= 1:
# 由于可能触发断点,如果是重新抓取,这里直接覆盖
if not all_clean_items:
for item in items:
all_clean_items.append(_extract_fields(item))
log("OK", f"第一页清洗完成,提取了 {len(items)} 条数据。")
else:
log("INFO", f"本地已有数据,跳过第一页保存,走翻页逻辑(注意:发料单可能需要您清空旧存档才能从头抓,这里先保留累加)")
else:
log("INFO", f"触发断点续传,跳过第一页的数据保存。后端报告总条数: {total_count}")
else:
log("ERR", "第一页返回的数据结构异常。")
return
page_num = 1
# =========================================================
# 断点续传逻辑跳转
# =========================================================
if target_resume_page > 1:
log("INFO", f"🚀 触发断点续传机制!准备直接跳转到第 {target_resume_page} 页...")
# 尝试找页码输入框
jumper_input_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/span[3]/div/div//input'
input_ele = page.ele(jumper_input_xpath, timeout=5)
if not input_ele:
jumper_input_xpath = 'xpath://input[@type="number" and @aria-label=""]'
input_ele = page.ele(jumper_input_xpath, timeout=5)
if input_ele:
input_ele.clear()
input_ele.input(str(target_resume_page))
time.sleep(0.5)
input_ele.input('\n')
packet = page.listen.wait(timeout=15)
if not packet:
log("ERR", "断点跳转失败,未拦截到目标页的数据请求。")
return
log("OK", f"✅ 成功跳转至第 {target_resume_page} 页并截获数据!")
page_num = target_resume_page
# 读取并解析断点页的数据
body = packet.response.body
data = body if isinstance(body, (dict, list)) else json.loads(body)
if isinstance(data, dict) and "result" in data:
items = data["result"].get("items", [])
for item in items:
all_clean_items.append(_extract_fields(item))
log("OK", f"{page_num} 页清洗完成,累计提取 {len(all_clean_items)} 条数据。")
else:
log("ERR", "找不到页码输入框,断点跳转失败,将从第 1 页继续!")
# =========================================================
# 循环翻页抓取
# =========================================================
while True:
# 引入“类人”随机延迟
delay = random.uniform(2.5, 5.5)
log("INFO", f"⏳ 模拟真人停顿 {delay:.2f} 秒后,准备点击下一页...")
time.sleep(delay)
if page_num > 1 and page_num % 50 == 0:
long_delay = random.uniform(10.0, 20.0)
log("INFO", f"☕️ 已经连续高强度翻了 {page_num} 页,触发风控规避机制,假装喝水休息 {long_delay:.2f} 秒...")
time.sleep(long_delay)
# 用户指定的下一页按钮 xpath
next_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/button[2]'
next_btn = page.ele(next_btn_xpath, timeout=3)
if not next_btn:
log("ERR", "找不到下一页按钮,尝试强制刷新页面或终止。")
break
# 检查按钮是否被禁用
class_str = str(next_btn.attr("class"))
aria_disabled = next_btn.attr("aria-disabled")
is_disabled_attr = next_btn.attr("disabled") is not None
if "disabled" in class_str or is_disabled_attr or aria_disabled == "true":
log("OK", "🏁 下一页按钮已被禁用,说明已经到达最后一页!")
break
page_num += 1
log("INFO", f"正在点击【下一页】抓取第 {page_num} 页...")
try:
next_btn.click()
except Exception as e:
log("ERR", f"普通点击失败: {e},尝试 JS 点击...")
page.run_js("arguments[0].click();", next_btn)
# 等待新一页的 API 响应
packet = page.listen.wait(timeout=15)
if not packet:
log("ERR", f"{page_num} 页请求超时或未触发,中止抓取。")
break
body = packet.response.body
data = body if isinstance(body, (dict, list)) else json.loads(body)
if isinstance(data, dict) and "result" in data:
items = data["result"].get("items", [])
if not items:
log("WARN", f"{page_num} 页返回了空列表,可能已无数据。")
break
for item in items:
all_clean_items.append(_extract_fields(item))
log("OK", f"{page_num} 页清洗完成,累计提取 {len(all_clean_items)} 条数据。")
# 每 10 页自动保存一次
if page_num % 10 == 0:
with open(SAVE_PATH, "w", encoding="utf-8") as f:
json.dump(all_clean_items, f, ensure_ascii=False, indent=2)
log("INFO", f"💾 自动存档: 已保存 {len(all_clean_items)} 条记录至本地。")
else:
log("ERR", f"{page_num} 页数据结构异常,中止。")
break
page.listen.stop()
# 最终保存
if all_clean_items:
with open(SAVE_PATH, "w", encoding="utf-8") as f:
json.dump(all_clean_items, f, ensure_ascii=False, indent=2)
log("OK", f"🎉 全部抓取完成!总计成功提取 {len(all_clean_items)} 条数据。")
log("OK", f"数据已保存至: {SAVE_PATH}")
except Exception as e:
log("ERR", f"发生全局异常: {e}")
if all_clean_items:
rescue_path = OUTPUT_DIR / "issue_receipt_details_RESCUE.json"
with open(rescue_path, "w", encoding="utf-8") as f:
json.dump(all_clean_items, f, ensure_ascii=False, indent=2)
log("INFO", f"🆘 触发异常保存,抢救了 {len(all_clean_items)} 条数据。")
finally:
try:
page.listen.stop()
log("INFO", "🛑 已释放浏览器监听资源,保持浏览器开启。")
except:
pass
def _extract_fields(item):
"""提取所需的字段"""
return {
"生产任务单号": item.get("productionOrderNo"),
"生产物料代码": item.get("productMaterialCode"),
"生产物料名称": item.get("productMaterialName"),
"生产物料规格": item.get("productMaterialSpecification"),
"发料单号": item.get("workOrdersNumber"),
"状态": item.get("status"),
"物料规格": item.get("materialSpecification"),
"物料名称": item.get("materialName"),
"物料代码": item.get("materialCode"),
"发料数量": item.get("issueNumber"),
"已发料数量": item.get("hasIssueNumber"),
"金额": item.get("amount"),
"成本价": item.get("costPrice"),
"发料金额": item.get("issueAmount"),
"生产订单备注": item.get("productionOrderRemark"),
"明细备注": item.get("detailedRemark"),
"单位名称": item.get("unitName"),
"仓库名称": item.get("warehouseName"),
"行号": item.get("lineNumber"),
"发料单备注": item.get("workOrdersRemark"),
"执行人名称": item.get("executorUserName"),
"物料型号": item.get("materialModel"),
"执行时间": item.get("executionTime"),
"领料人": item.get("materialsUserName"),
"生产物料型号": item.get("productMaterialModel"),
"自定义字段": item.get("customField"),
"部门代码": item.get("departmentInformationCode"),
"部门名称": item.get("departmentInformationName"),
"图片文件": item.get("imageFile"),
"汇总金额": item.get("issueAmountTotal"),
"物料组代码": item.get("materialGroupCode"),
"物料组名称": item.get("materialGroupName"),
"单价小数位数": item.get("numnberOfReservedDigits"),
"单价进位策略": item.get("placeMentStrategy"),
"单价": item.get("price"),
"销售订单号": item.get("salesOrderCode")
}
if __name__ == "__main__":
fetch_issue_receipt_details()