110 lines
4.4 KiB
Python
110 lines
4.4 KiB
Python
"""
|
||
质量报表 (Basis Quality Report) - 样本抓取脚本
|
||
目标: 模拟点击菜单进入页面,拦截 BasisQualityReport_GetValueFieldListNew_Proxy 接口,提取前 5 条数据进行结构分析。
|
||
"""
|
||
import sys
|
||
import json
|
||
import time
|
||
from pathlib import Path
|
||
|
||
sys.path.insert(0, str(Path(__file__).parent))
|
||
from login import get_page, log
|
||
from config import OUTPUT_DIR
|
||
|
||
HOME_URL = "https://yunmes.tftykj.cn/"
|
||
API_TARGET = "SearchCustomReportBySQL_Proxy"
|
||
SAVE_PATH = OUTPUT_DIR / "basis_quality_sample.json"
|
||
|
||
def fetch_basis_quality_sample():
|
||
log("INFO", "=== 🧪 启动质量报表样本抓取 (前5条) ===")
|
||
page = get_page(port=9222)
|
||
|
||
try:
|
||
log("INFO", f"正在回到主页起点: {HOME_URL}")
|
||
page.get(HOME_URL)
|
||
page.wait.load_start()
|
||
time.sleep(2)
|
||
|
||
menus = [
|
||
("进入质量报表", 'xpath://*[@id="el-collapse-content-21"]/div/div/div/div[1]/div/div/div[6]/div')
|
||
]
|
||
|
||
# 核心修改:因为数据是一进页面就加载,所以必须在点击菜单【之前】就开始监听!
|
||
log("INFO", f"开启底层数据拦截网: {API_TARGET} (提前开启,以防错过初始加载)")
|
||
page.listen.start(API_TARGET)
|
||
|
||
log("INFO", "开始模拟人工点击左侧导航菜单...")
|
||
for name, xpath in menus:
|
||
ele = page.ele(xpath, timeout=5)
|
||
if ele:
|
||
try: ele.click()
|
||
except: page.run_js("arguments[0].click();", ele)
|
||
else:
|
||
log("ERR", f"找不到菜单元素: {name}")
|
||
return
|
||
|
||
log("OK", "✅ 成功点开质量报表界面!")
|
||
|
||
# 尝试点击空白处隐藏可能遮挡的菜单 (根据实际情况可能需要调整或注释掉)
|
||
blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div'
|
||
blank_ele = page.ele(blank_xpath, timeout=3)
|
||
if blank_ele:
|
||
try: blank_ele.click()
|
||
except: pass
|
||
|
||
log("INFO", "等待拦截初始加载的数据包...")
|
||
packet = page.listen.wait(timeout=15)
|
||
|
||
if not packet:
|
||
log("ERR", "未能拦截到数据请求,可能网络超时。")
|
||
page.listen.stop()
|
||
return
|
||
|
||
# =========================================================
|
||
# 数据处理
|
||
# =========================================================
|
||
log("OK", f"🎉 成功拦截到数据!HTTP 状态码: {packet.response.status}")
|
||
body = packet.response.body
|
||
data = body if isinstance(body, (dict, list)) else json.loads(body)
|
||
|
||
sample_items = []
|
||
|
||
if isinstance(data, dict) and "result" in data:
|
||
# 根据提供的 curl 参数 {"wageCalculationPlanId":80,"basisQualityReportId":23}
|
||
# 这个接口的返回结构可能与之前的 SearchList 不同,这里做个宽泛的判断
|
||
items = data["result"]
|
||
|
||
# 如果 result 直接是个列表,或者里面包着 items 列表
|
||
if isinstance(items, dict) and "items" in items:
|
||
items = items["items"]
|
||
elif not isinstance(items, list):
|
||
# 如果既不是列表也不是包含 items 的字典,就把整个 result 放进去看看
|
||
items = [items]
|
||
|
||
log("INFO", f"本页包含 {len(items)} 条数据,准备提取前 5 条。")
|
||
|
||
for item in items[:5]:
|
||
sample_items.append(item)
|
||
|
||
with open(SAVE_PATH, "w", encoding="utf-8") as f:
|
||
json.dump(sample_items, f, ensure_ascii=False, indent=2)
|
||
log("OK", f"💾 样本提取完成!已保存 {len(sample_items)} 条记录至: {SAVE_PATH}")
|
||
else:
|
||
log("ERR", "返回的数据结构中找不到 'result' 节点。")
|
||
# 把原始结构存下来方便分析
|
||
with open(SAVE_PATH, "w", encoding="utf-8") as f:
|
||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||
log("INFO", f"已将原始返回数据保存至: {SAVE_PATH} 以供分析。")
|
||
|
||
except Exception as e:
|
||
log("ERR", f"发生全局异常: {e}")
|
||
finally:
|
||
try:
|
||
page.listen.stop()
|
||
log("INFO", "🛑 已释放浏览器监听资源。")
|
||
except:
|
||
pass
|
||
|
||
if __name__ == "__main__":
|
||
fetch_basis_quality_sample()
|