Files
datie-bom/browser_login/fetch_basis_quality_sample.py

110 lines
4.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
质量报表 (Basis Quality Report) - 样本抓取脚本
目标: 模拟点击菜单进入页面,拦截 BasisQualityReport_GetValueFieldListNew_Proxy 接口,提取前 5 条数据进行结构分析。
"""
import sys
import json
import time
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from login import get_page, log
from config import OUTPUT_DIR
HOME_URL = "https://yunmes.tftykj.cn/"
API_TARGET = "SearchCustomReportBySQL_Proxy"
SAVE_PATH = OUTPUT_DIR / "basis_quality_sample.json"
def fetch_basis_quality_sample():
log("INFO", "=== 🧪 启动质量报表样本抓取 (前5条) ===")
page = get_page(port=9222)
try:
log("INFO", f"正在回到主页起点: {HOME_URL}")
page.get(HOME_URL)
page.wait.load_start()
time.sleep(2)
menus = [
("进入质量报表", 'xpath://*[@id="el-collapse-content-21"]/div/div/div/div[1]/div/div/div[6]/div')
]
# 核心修改:因为数据是一进页面就加载,所以必须在点击菜单【之前】就开始监听!
log("INFO", f"开启底层数据拦截网: {API_TARGET} (提前开启,以防错过初始加载)")
page.listen.start(API_TARGET)
log("INFO", "开始模拟人工点击左侧导航菜单...")
for name, xpath in menus:
ele = page.ele(xpath, timeout=5)
if ele:
try: ele.click()
except: page.run_js("arguments[0].click();", ele)
else:
log("ERR", f"找不到菜单元素: {name}")
return
log("OK", "✅ 成功点开质量报表界面!")
# 尝试点击空白处隐藏可能遮挡的菜单 (根据实际情况可能需要调整或注释掉)
blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div'
blank_ele = page.ele(blank_xpath, timeout=3)
if blank_ele:
try: blank_ele.click()
except: pass
log("INFO", "等待拦截初始加载的数据包...")
packet = page.listen.wait(timeout=15)
if not packet:
log("ERR", "未能拦截到数据请求,可能网络超时。")
page.listen.stop()
return
# =========================================================
# 数据处理
# =========================================================
log("OK", f"🎉 成功拦截到数据HTTP 状态码: {packet.response.status}")
body = packet.response.body
data = body if isinstance(body, (dict, list)) else json.loads(body)
sample_items = []
if isinstance(data, dict) and "result" in data:
# 根据提供的 curl 参数 {"wageCalculationPlanId":80,"basisQualityReportId":23}
# 这个接口的返回结构可能与之前的 SearchList 不同,这里做个宽泛的判断
items = data["result"]
# 如果 result 直接是个列表,或者里面包着 items 列表
if isinstance(items, dict) and "items" in items:
items = items["items"]
elif not isinstance(items, list):
# 如果既不是列表也不是包含 items 的字典,就把整个 result 放进去看看
items = [items]
log("INFO", f"本页包含 {len(items)} 条数据,准备提取前 5 条。")
for item in items[:5]:
sample_items.append(item)
with open(SAVE_PATH, "w", encoding="utf-8") as f:
json.dump(sample_items, f, ensure_ascii=False, indent=2)
log("OK", f"💾 样本提取完成!已保存 {len(sample_items)} 条记录至: {SAVE_PATH}")
else:
log("ERR", "返回的数据结构中找不到 'result' 节点。")
# 把原始结构存下来方便分析
with open(SAVE_PATH, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
log("INFO", f"已将原始返回数据保存至: {SAVE_PATH} 以供分析。")
except Exception as e:
log("ERR", f"发生全局异常: {e}")
finally:
try:
page.listen.stop()
log("INFO", "🛑 已释放浏览器监听资源。")
except:
pass
if __name__ == "__main__":
fetch_basis_quality_sample()