新内容
This commit is contained in:
80
browser_login/auto_launcher.py
Normal file
80
browser_login/auto_launcher.py
Normal file
@@ -0,0 +1,80 @@
|
||||
"""
|
||||
ERP 数据同步器 - 自动化浏览器拉起与登录守护模块
|
||||
目标:
|
||||
1. 自动寻找本地安装的 Chrome 浏览器。
|
||||
2. 以 9222 端口和独立的用户数据目录启动(互不干扰,持久化登录状态)。
|
||||
3. 弹出 ERP 登录页面,等待用户手动登录(解决滑块验证码等问题)。
|
||||
4. 登录成功后,将浏览器挂在后台作为保活引擎。
|
||||
"""
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from DrissionPage import ChromiumOptions, ChromiumPage
|
||||
from config import DATA_DIR
|
||||
|
||||
# 独立的用户数据目录,放在外部持久化目录,这样重启不会丢失登录状态
|
||||
USER_DATA_DIR = DATA_DIR / "browser_login" / "chrome_user_data"
|
||||
HOME_URL = "https://yunmes.tftykj.cn/"
|
||||
|
||||
def start_and_wait_login():
|
||||
print("🚀 [1/3] 正在配置内置 Chrome 浏览器引擎...")
|
||||
|
||||
# 初始化配置
|
||||
co = ChromiumOptions()
|
||||
co.set_local_port(9222)
|
||||
# 指定一个独立的用户数据存放文件夹
|
||||
co.set_user_data_path(str(USER_DATA_DIR))
|
||||
# 忽略证书错误等常规反爬配置
|
||||
co.ignore_certificate_errors()
|
||||
|
||||
print("🌍 [2/3] 正在拉起浏览器并前往 ERP 登录页...")
|
||||
try:
|
||||
# 这一步会自动寻找你电脑上的 Chrome,如果没有运行在 9222 端口,它会自动帮你启动一个!
|
||||
page = ChromiumPage(co)
|
||||
except Exception as e:
|
||||
print(f"❌ 启动浏览器失败,请确保电脑安装了 Chrome 浏览器!报错信息: {e}")
|
||||
return None
|
||||
|
||||
page.get(HOME_URL)
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("👀 [等待人工介入] 请在弹出的浏览器窗口中完成登录操作!")
|
||||
print("💡 提示: 输入账号密码、通过滑块验证码,直到进入 ERP 系统主界面。")
|
||||
print("="*50 + "\n")
|
||||
|
||||
# 循环检查登录状态
|
||||
# 怎么判断登录成功?ERP 登录前 URL 通常带有 Login 等字样,或者登录后页面会出现类似“退出”、“注销”或者用户名的元素
|
||||
# 我们这里通过检测页面中是否出现了主菜单的特有元素,或者通过监听一个登录后的特有接口来判断
|
||||
is_logged_in = False
|
||||
|
||||
while not is_logged_in:
|
||||
time.sleep(2)
|
||||
# 假设登录后页面会出现“首页”或者用户的头像/名字(这里的 xpath 需要根据你们 ERP 登录后的实际情况微调,我们先用一个保险的:看看有没有业务统计报表的菜单)
|
||||
# 如果还在登录页,肯定找不到这个元素
|
||||
try:
|
||||
# 这里的元素用来验证是否已经成功进入系统内部
|
||||
menu_ele = page.ele('xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p', timeout=1)
|
||||
if menu_ele:
|
||||
is_logged_in = True
|
||||
except:
|
||||
pass
|
||||
|
||||
if not is_logged_in:
|
||||
print("⏳ 等待登录中...")
|
||||
|
||||
print("\n✅ [3/3] 检测到登录成功!")
|
||||
print("🔒 登录状态已保存,你可以随时关闭或者最小化这个浏览器窗口。")
|
||||
print("🤖 爬虫引擎已挂载至后台,可以开始点击前端界面的【同步数据】按钮了!\n")
|
||||
|
||||
return page
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 单独运行此文件即可拉起浏览器
|
||||
page = start_and_wait_login()
|
||||
if page:
|
||||
# 为了不让脚本退出,这里写个死循环保活(在真正的桌面软件中,这里就是启动 Flask 后端和 Webview 窗口的地方)
|
||||
try:
|
||||
while True:
|
||||
time.sleep(10)
|
||||
except KeyboardInterrupt:
|
||||
print("👋 守护进程已退出。")
|
||||
@@ -14,11 +14,10 @@ from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from login import get_page, login, login_manual, log, dump_page_state
|
||||
from config import OUTPUT_DIR
|
||||
|
||||
BOM_PAGE_URL = "https://yunmes.tftykj.cn/MaterialBom"
|
||||
BOM_API_PATH = "MaterialBom_SearchList_Proxy"
|
||||
OUTPUT_DIR = Path(__file__).parent / "output"
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
|
||||
|
||||
# ── 导航到 BOM 页面 ───────────────────────────────────────────────────────────
|
||||
|
||||
17
browser_login/config.py
Normal file
17
browser_login/config.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
def get_data_dir():
|
||||
"""获取持久化数据存放目录(数据库、输出文件等,保证重启不丢失)"""
|
||||
if getattr(sys, 'frozen', False):
|
||||
return Path(os.path.dirname(sys.executable))
|
||||
return Path(__file__).parent.parent
|
||||
|
||||
DATA_DIR = get_data_dir()
|
||||
|
||||
# 通用输出目录,用于存放 JSON 文件和 SQLite 数据库
|
||||
OUTPUT_DIR = DATA_DIR / "browser_login" / "output"
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
DB_PATH = OUTPUT_DIR / "erp_data.db"
|
||||
227
browser_login/fetch_bom_cost_full_tree.py
Normal file
227
browser_login/fetch_bom_cost_full_tree.py
Normal file
@@ -0,0 +1,227 @@
|
||||
"""
|
||||
BOM 成本 - 终极树状结构抓取脚本 (全站 1400+ 父件及 5 层嵌套子件)
|
||||
目标:
|
||||
1. 抓取所有父件(成本核算表主页)
|
||||
2. 暗网请求所有父件下对应的 BOM 成本数据(扁平的 5 层数据)
|
||||
3. 实时清洗并重组为完美嵌套的 JSON 树
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from login import get_page, log
|
||||
from config import OUTPUT_DIR
|
||||
|
||||
PAGE_URL = "https://yunmes.tftykj.cn/PartBomCostAccounting"
|
||||
API_PARENT = "PartBomCostAccounting_SearchList_Proxy"
|
||||
|
||||
# 最终保存的文件名
|
||||
TREE_FILE_PATH = OUTPUT_DIR / "bom_cost_full_tree_final.json"
|
||||
|
||||
def build_nested_tree(flat_items):
|
||||
"""将包含 _parentId 的扁平列表转换为嵌套树"""
|
||||
if not flat_items:
|
||||
return []
|
||||
|
||||
node_dict = {}
|
||||
for item in flat_items:
|
||||
son_no = item.get("sonNO")
|
||||
if not son_no:
|
||||
continue
|
||||
|
||||
clean_node = {
|
||||
"sonNO": son_no,
|
||||
"_parentId": item.get("_parentId"),
|
||||
"bomLevel": item.get("bomLevel"),
|
||||
"childMaterialCode": item.get("childMaterialCode"),
|
||||
"childMaterialName": item.get("childMaterialName"),
|
||||
"usageQty": item.get("childrenMaterialConsumption", 1),
|
||||
"sub_items": []
|
||||
}
|
||||
node_dict[son_no] = clean_node
|
||||
|
||||
roots = []
|
||||
for son_no, node in node_dict.items():
|
||||
parent_id = node.get("_parentId")
|
||||
if parent_id is None:
|
||||
roots.append(node)
|
||||
else:
|
||||
parent_node = node_dict.get(parent_id)
|
||||
if parent_node:
|
||||
parent_node["sub_items"].append(node)
|
||||
|
||||
# 清理建树临时字段
|
||||
def clean_temp(node_list):
|
||||
for node in node_list:
|
||||
node.pop("sonNO", None)
|
||||
node.pop("_parentId", None)
|
||||
if node["sub_items"]:
|
||||
clean_temp(node["sub_items"])
|
||||
else:
|
||||
node.pop("sub_items", None)
|
||||
|
||||
clean_temp(roots)
|
||||
return roots
|
||||
|
||||
|
||||
def fetch_bom_cost_tree():
|
||||
log("INFO", "=== 🌳 启动 BOM 成本终极抓取 (多层嵌套自动重组) ===")
|
||||
page = get_page(port=9222)
|
||||
clean_parents_list = []
|
||||
|
||||
try:
|
||||
# =========================================================
|
||||
# 第一阶段:获取父件基础信息
|
||||
# =========================================================
|
||||
log("INFO", f"正在访问安全的父件页面: {PAGE_URL}")
|
||||
page.get(PAGE_URL)
|
||||
page.wait.load_start()
|
||||
|
||||
log("INFO", f"开启父件 API 网络监听: {API_PARENT}")
|
||||
page.listen.start(API_PARENT)
|
||||
page.refresh()
|
||||
|
||||
current_page = 1
|
||||
total_records = 0
|
||||
|
||||
while True:
|
||||
log("INFO", f"等待第 {current_page} 页父件 API 响应...")
|
||||
packet = page.listen.wait(timeout=20)
|
||||
|
||||
if not packet:
|
||||
log("ERR", f"超时未收到第 {current_page} 页数据,父件扫荡结束。")
|
||||
break
|
||||
|
||||
body = packet.response.body
|
||||
data = body if isinstance(body, (dict, list)) else json.loads(body)
|
||||
|
||||
if isinstance(data, dict) and "result" in data:
|
||||
items = data["result"].get("items", [])
|
||||
total_records = data["result"].get("totalCount", 0)
|
||||
|
||||
for item in items:
|
||||
# 注意:我们要拿的是 parentMaterialId,因为这是传给 BOM 成本 API 的关键参数 materialId
|
||||
clean_parent = {
|
||||
"_id": item.get("id"), # 这个是 partBomCostAccountingId
|
||||
"_materialId": item.get("parentMaterialId"), # 这个是传给子件的 materialId
|
||||
"parentMaterialCode": item.get("parentMaterialCode"),
|
||||
"parentMaterialName": item.get("parentMaterialName"),
|
||||
"bom_cost_tree": [] # 准备挂载这棵树
|
||||
}
|
||||
clean_parents_list.append(clean_parent)
|
||||
|
||||
log("OK", f"提取了 {len(items)} 个父件。总进度: {len(clean_parents_list)}/{total_records}")
|
||||
|
||||
if len(clean_parents_list) >= total_records or len(items) == 0:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
# 准备翻页
|
||||
next_btn_xpath = "xpath:/html/body/div[1]/div/div[3]/table/tbody/tr/td[10]/a/span/span[2]"
|
||||
next_btn = page.ele(next_btn_xpath, timeout=5)
|
||||
|
||||
if next_btn:
|
||||
parent_a = next_btn.parent(2)
|
||||
if parent_a and "disabled" in parent_a.attr("class"):
|
||||
log("INFO", "已到达最后一页。")
|
||||
break
|
||||
page.run_js("arguments[0].click();", next_btn)
|
||||
time.sleep(1.5)
|
||||
else:
|
||||
log("WARN", "未找到下一页按钮,停止翻页。")
|
||||
break
|
||||
|
||||
current_page += 1
|
||||
|
||||
page.listen.stop()
|
||||
|
||||
# =========================================================
|
||||
# 第二阶段:暗网递归注入,重组 5 层嵌套树
|
||||
# =========================================================
|
||||
log("INFO", f"=== 🚀 开始为 {len(clean_parents_list)} 个父件抓取 BOM 成本树 ===")
|
||||
|
||||
js_template = """
|
||||
return new Promise((resolve, reject) => {
|
||||
if (typeof $ !== 'undefined' && $.ajax) {
|
||||
$.ajax({
|
||||
url: '/api/services/TfTechApi/PartBom/PartBom_SearchByTreeCost',
|
||||
type: 'POST',
|
||||
data: {
|
||||
materialId: MATERIAL_ID_PLACEHOLDER,
|
||||
partBomCostAccountingId: ACCOUNTING_ID_PLACEHOLDER,
|
||||
childMaterialCode: '',
|
||||
childMaterialName: '',
|
||||
childMaterialSpecification: '',
|
||||
childMaterialModel: ''
|
||||
},
|
||||
headers: {
|
||||
'referer': 'https://yunmes.tftykj.cn/PartBomCostAccounting/Detail?id=ACCOUNTING_ID_PLACEHOLDER'
|
||||
},
|
||||
success: function(response) {
|
||||
resolve({status: 'success', data: response});
|
||||
},
|
||||
error: function(xhr, status, error) {
|
||||
resolve({status: 'error', data: xhr.responseText || error});
|
||||
}
|
||||
});
|
||||
} else {
|
||||
resolve({status: 'error', data: 'No jQuery'});
|
||||
}
|
||||
});
|
||||
"""
|
||||
|
||||
for index, parent in enumerate(clean_parents_list):
|
||||
accounting_id = parent.get("_id")
|
||||
material_id = parent.get("_materialId")
|
||||
parent_code = parent.get("parentMaterialCode", "未知")
|
||||
|
||||
if not accounting_id or not material_id:
|
||||
continue
|
||||
|
||||
log("INFO", f"[{index+1}/{len(clean_parents_list)}] 正在请求 BOM 成本树 (Code: {parent_code})...")
|
||||
|
||||
js_code = js_template.replace("MATERIAL_ID_PLACEHOLDER", str(material_id)).replace("ACCOUNTING_ID_PLACEHOLDER", str(accounting_id))
|
||||
result = page.run_js(js_code)
|
||||
|
||||
if result and result.get('status') == 'success':
|
||||
data = result.get('data')
|
||||
if isinstance(data, str):
|
||||
try: data = json.loads(data)
|
||||
except: pass
|
||||
|
||||
if isinstance(data, dict) and "result" in data:
|
||||
flat_items = data["result"]
|
||||
if isinstance(flat_items, list):
|
||||
# 核心:调用刚才验证成功的重组函数,把扁平列表变成 5 层树
|
||||
nested_tree = build_nested_tree(flat_items)
|
||||
parent["bom_cost_tree"] = nested_tree
|
||||
log("OK", f" └── 成功重组了一棵包含 {len(flat_items)} 个节点的多层树。")
|
||||
else:
|
||||
log("ERR", f" └── 请求失败: {result.get('data') if result else '未知错误'}")
|
||||
|
||||
time.sleep(random.uniform(0.3, 0.7))
|
||||
|
||||
if (index + 1) % 10 == 0 or (index + 1) == len(clean_parents_list):
|
||||
# 最终保存前,清理一下用于请求的临时字段
|
||||
clean_save_list = []
|
||||
for p in clean_parents_list[:index+1]:
|
||||
clean_p = dict(p)
|
||||
clean_p.pop("_id", None)
|
||||
clean_p.pop("_materialId", None)
|
||||
clean_save_list.append(clean_p)
|
||||
|
||||
with open(TREE_FILE_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(clean_save_list, f, ensure_ascii=False, indent=2)
|
||||
log("INFO", f"💾 进度已实时保存至 JSON ({index+1}/{len(clean_parents_list)})")
|
||||
|
||||
log("OK", f"=== 🏆 终极 BOM 成本多层树状抓取完成!文件路径: {TREE_FILE_PATH} ===")
|
||||
|
||||
except Exception as e:
|
||||
log("ERR", f"发生异常: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
fetch_bom_cost_tree()
|
||||
217
browser_login/fetch_receipt_details_full.py
Normal file
217
browser_login/fetch_receipt_details_full.py
Normal file
@@ -0,0 +1,217 @@
|
||||
"""
|
||||
收货明细报表 - 全量分页抓取 (精简字段模式)
|
||||
目标: 模拟点击菜单,过滤 11 个核心字段,并循环点击下一页,直到所有数据抓取完毕。
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from login import get_page, log
|
||||
from config import OUTPUT_DIR
|
||||
|
||||
HOME_URL = "https://yunmes.tftykj.cn/"
|
||||
API_TARGET = "ReceiptDetailsCheck_SearchList_Proxy"
|
||||
SAVE_PATH = OUTPUT_DIR / "receipt_details_full_clean.json"
|
||||
|
||||
def fetch_receipt_details_full():
|
||||
log("INFO", "=== 🚚 启动收货明细报表全量抓取 (精简字段模式) ===")
|
||||
page = get_page(port=9222)
|
||||
all_clean_items = []
|
||||
|
||||
try:
|
||||
log("INFO", f"正在回到主页起点: {HOME_URL}")
|
||||
page.get(HOME_URL)
|
||||
page.wait.load_start()
|
||||
time.sleep(2)
|
||||
|
||||
menus = [
|
||||
("第一层: 业务统计报表", 'xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p'),
|
||||
("第二层: 采购业务报表", 'xpath:/html/body/div[7]/div/div[1]/div/div[4]/div/p'),
|
||||
("第三层: 收货明细报表", 'xpath:/html/body/div[8]/div/div[1]/div/div[4]/div/p')
|
||||
]
|
||||
|
||||
log("INFO", "开始模拟人工点击左侧导航菜单...")
|
||||
for name, xpath in menus:
|
||||
ele = page.ele(xpath, timeout=5)
|
||||
if ele:
|
||||
try: ele.click()
|
||||
except: page.run_js("arguments[0].click();", ele)
|
||||
time.sleep(1.5)
|
||||
else:
|
||||
log("ERR", f"找不到菜单元素: {name}")
|
||||
return
|
||||
|
||||
log("OK", "✅ 成功点开收货明细报表界面!")
|
||||
|
||||
# 点击空白处隐藏菜单
|
||||
blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div'
|
||||
blank_ele = page.ele(blank_xpath, timeout=3)
|
||||
if blank_ele:
|
||||
try: blank_ele.click()
|
||||
except: page.run_js("arguments[0].click();", blank_ele)
|
||||
time.sleep(0.5)
|
||||
|
||||
log("INFO", f"开启底层数据拦截网: {API_TARGET}")
|
||||
page.listen.start(API_TARGET)
|
||||
|
||||
packet = page.listen.wait(timeout=10)
|
||||
|
||||
if not packet:
|
||||
log("INFO", "尝试寻找并点击页面上的【查询】按钮...")
|
||||
query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span'
|
||||
query_btn = page.ele(query_btn_xpath, timeout=3)
|
||||
|
||||
if query_btn:
|
||||
try: query_btn.click()
|
||||
except: page.run_js("arguments[0].click();", query_btn)
|
||||
packet = page.listen.wait(timeout=15)
|
||||
|
||||
if not packet:
|
||||
log("ERR", "未能拦截到第一页数据,可能网络超时或查询未触发。")
|
||||
page.listen.stop()
|
||||
return
|
||||
|
||||
# =========================================================
|
||||
# 第一页数据处理
|
||||
# =========================================================
|
||||
log("OK", f"🎉 成功拦截到第一页数据!HTTP: {packet.response.status}")
|
||||
body = packet.response.body
|
||||
data = body if isinstance(body, (dict, list)) else json.loads(body)
|
||||
|
||||
total_count = 0
|
||||
if isinstance(data, dict) and "result" in data:
|
||||
total_count = data["result"].get("totalCount", 0)
|
||||
items = data["result"].get("items", [])
|
||||
for item in items:
|
||||
all_clean_items.append({
|
||||
"采购订单号": item.get("purchaseOrderCode"),
|
||||
"行号": item.get("rowsNum"),
|
||||
"物料代码": item.get("materialCode"),
|
||||
"物料名称": item.get("materialName"),
|
||||
"物料规格": item.get("materialSpecification"),
|
||||
"仓库代码": item.get("warehouseCode"),
|
||||
"仓库名称": item.get("warehouseName"),
|
||||
"供应商代码": item.get("supplierCode"),
|
||||
"供应商名称": item.get("supplierName"),
|
||||
"单位名称": item.get("unitName"),
|
||||
"转换单位": item.get("convertUnitName"),
|
||||
"收货单价": item.get("receivePrice"),
|
||||
"收货时间": item.get("receiptTime"),
|
||||
"进货数量": item.get("convertPlannedPurchaseQuantity") if item.get("convertPlannedPurchaseQuantity") is not None else item.get("plannedPurchaseQuantity"),
|
||||
"收货数量": item.get("convertGoodsQuantity") if item.get("convertGoodsQuantity") is not None else item.get("goodsQuantity"),
|
||||
"收货总金额": item.get("receiveAmount")
|
||||
})
|
||||
log("OK", f"第一页清洗完成,提取了 {len(items)} 条数据。后端报告总条数: {total_count}")
|
||||
|
||||
page_num = 1
|
||||
|
||||
# =========================================================
|
||||
# 循环翻页抓取
|
||||
# =========================================================
|
||||
next_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/button[2]'
|
||||
|
||||
while True:
|
||||
# 引入“类人”随机延迟(2.5 秒到 5.5 秒之间随机)
|
||||
delay = random.uniform(2.5, 5.5)
|
||||
log("INFO", f"⏳ 模拟真人停顿 {delay:.2f} 秒后,准备点击下一页...")
|
||||
time.sleep(delay)
|
||||
|
||||
# 偶尔的“长休息”(模拟用户看累了或者喝口水),每抓 50 页额外休息 10-20 秒
|
||||
if page_num > 1 and page_num % 50 == 0:
|
||||
long_delay = random.uniform(10.0, 20.0)
|
||||
log("INFO", f"☕️ 已经连续高强度翻了 {page_num} 页,触发风控规避机制,假装喝水休息 {long_delay:.2f} 秒...")
|
||||
time.sleep(long_delay)
|
||||
|
||||
next_btn = page.ele(next_btn_xpath, timeout=5)
|
||||
if not next_btn:
|
||||
log("ERR", "找不到下一页按钮,翻页中止。")
|
||||
break
|
||||
|
||||
# 检查按钮是否被禁用
|
||||
class_str = str(next_btn.attr("class"))
|
||||
aria_disabled = next_btn.attr("aria-disabled")
|
||||
is_disabled_attr = next_btn.attr("disabled") is not None
|
||||
|
||||
if "disabled" in class_str or is_disabled_attr or aria_disabled == "true":
|
||||
log("OK", "🏁 下一页按钮已被禁用,说明已经到达最后一页!")
|
||||
break
|
||||
|
||||
page_num += 1
|
||||
log("INFO", f"正在点击【下一页】抓取第 {page_num} 页...")
|
||||
|
||||
try:
|
||||
next_btn.click()
|
||||
except Exception as e:
|
||||
log("ERR", f"普通点击失败: {e},尝试 JS 点击...")
|
||||
page.run_js("arguments[0].click();", next_btn)
|
||||
|
||||
# 等待新一页的 API 响应
|
||||
packet = page.listen.wait(timeout=15)
|
||||
if not packet:
|
||||
log("ERR", f"第 {page_num} 页请求超时或未触发,中止抓取。")
|
||||
break
|
||||
|
||||
body = packet.response.body
|
||||
data = body if isinstance(body, (dict, list)) else json.loads(body)
|
||||
|
||||
if isinstance(data, dict) and "result" in data:
|
||||
items = data["result"].get("items", [])
|
||||
if not items:
|
||||
log("WARN", f"第 {page_num} 页返回了空列表,可能已无数据。")
|
||||
break
|
||||
|
||||
for item in items:
|
||||
all_clean_items.append({
|
||||
"采购订单号": item.get("purchaseOrderCode"),
|
||||
"行号": item.get("rowsNum"),
|
||||
"物料代码": item.get("materialCode"),
|
||||
"物料名称": item.get("materialName"),
|
||||
"物料规格": item.get("materialSpecification"),
|
||||
"仓库代码": item.get("warehouseCode"),
|
||||
"仓库名称": item.get("warehouseName"),
|
||||
"供应商代码": item.get("supplierCode"),
|
||||
"供应商名称": item.get("supplierName"),
|
||||
"单位名称": item.get("unitName"),
|
||||
"转换单位": item.get("convertUnitName"),
|
||||
"收货单价": item.get("receivePrice"),
|
||||
"收货时间": item.get("receiptTime"),
|
||||
"进货数量": item.get("convertPlannedPurchaseQuantity") if item.get("convertPlannedPurchaseQuantity") is not None else item.get("plannedPurchaseQuantity"),
|
||||
"收货数量": item.get("convertGoodsQuantity") if item.get("convertGoodsQuantity") is not None else item.get("goodsQuantity"),
|
||||
"收货总金额": item.get("receiveAmount")
|
||||
})
|
||||
log("OK", f"第 {page_num} 页清洗完成,累计提取 {len(all_clean_items)} 条数据。")
|
||||
|
||||
# 每 10 页自动保存一次,防止意外崩溃导致数据丢失
|
||||
if page_num % 10 == 0:
|
||||
with open(SAVE_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(all_clean_items, f, ensure_ascii=False, indent=2)
|
||||
log("INFO", f"💾 自动存档: 已保存 {len(all_clean_items)} 条记录至本地。")
|
||||
else:
|
||||
log("ERR", f"第 {page_num} 页数据结构异常,中止。")
|
||||
break
|
||||
|
||||
page.listen.stop()
|
||||
|
||||
# =========================================================
|
||||
# 最终保存
|
||||
# =========================================================
|
||||
if all_clean_items:
|
||||
with open(SAVE_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump(all_clean_items, f, ensure_ascii=False, indent=2)
|
||||
log("OK", f"🎉 全部抓取完成!总计成功提取 {len(all_clean_items)} 条数据。")
|
||||
log("OK", f"数据已保存至: {SAVE_PATH}")
|
||||
|
||||
except Exception as e:
|
||||
log("ERR", f"发生全局异常: {e}")
|
||||
# 异常时尝试抢救数据
|
||||
if all_clean_items:
|
||||
rescue_path = OUTPUT_DIR / "receipt_details_RESCUE.json"
|
||||
with open(rescue_path, "w", encoding="utf-8") as f:
|
||||
json.dump(all_clean_items, f, ensure_ascii=False, indent=2)
|
||||
log("INFO", f"🆘 触发异常保存,抢救了 {len(all_clean_items)} 条数据。")
|
||||
|
||||
if __name__ == "__main__":
|
||||
fetch_receipt_details_full()
|
||||
252
browser_login/fetch_receipt_details_incremental.py
Normal file
252
browser_login/fetch_receipt_details_incremental.py
Normal file
@@ -0,0 +1,252 @@
|
||||
"""
|
||||
收货明细报表 - 智能增量同步脚本
|
||||
目标:
|
||||
1. 自动连接本地 SQLite 数据库查询当前存量。
|
||||
2. 进入 ERP 系统截获第一页 API,提取系统总条数。
|
||||
3. 精准计算需要跳转的起始页码,并在前端页面自动完成跳转。
|
||||
4. 仅提取新增页面的数据,内存去重后插入 SQLite,绝不重复抓取历史数据。
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import math
|
||||
import random
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from login import get_page, log
|
||||
from config import DB_PATH
|
||||
|
||||
HOME_URL = "https://yunmes.tftykj.cn/"
|
||||
API_TARGET = "ReceiptDetailsCheck_SearchList_Proxy"
|
||||
|
||||
def get_local_count(conn):
|
||||
"""获取本地数据库已有的总记录数"""
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT COUNT(*) FROM receipt_details")
|
||||
return cursor.fetchone()[0]
|
||||
|
||||
def item_exists(cursor, item):
|
||||
"""判断某条明细是否已在数据库中存在(基于采购单号+行号+物料代码组合判断)"""
|
||||
po_code = item.get("purchaseOrderCode")
|
||||
row_no = item.get("rowsNum")
|
||||
mat_code = item.get("materialCode")
|
||||
|
||||
cursor.execute('''
|
||||
SELECT 1 FROM receipt_details
|
||||
WHERE purchase_order_code = ? AND row_no = ? AND material_code = ?
|
||||
''', (po_code, row_no, mat_code))
|
||||
return cursor.fetchone() is not None
|
||||
|
||||
def fetch_receipt_details_incremental():
|
||||
log("INFO", "=== 🚀 启动收货明细报表 - 智能增量同步 ===")
|
||||
|
||||
if not DB_PATH.exists():
|
||||
log("ERR", f"找不到数据库文件: {DB_PATH},请先执行全量导入!")
|
||||
return
|
||||
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
local_count = get_local_count(conn)
|
||||
log("INFO", f"📦 本地数据库当前总计: {local_count} 条数据")
|
||||
|
||||
page = get_page(port=9222)
|
||||
|
||||
try:
|
||||
log("INFO", f"正在回到主页起点: {HOME_URL}")
|
||||
page.get(HOME_URL)
|
||||
page.wait.load_start()
|
||||
time.sleep(2)
|
||||
|
||||
menus = [
|
||||
("第一层: 业务统计报表", 'xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p'),
|
||||
("第二层: 采购业务报表", 'xpath:/html/body/div[7]/div/div[1]/div/div[4]/div/p'),
|
||||
("第三层: 收货明细报表", 'xpath:/html/body/div[8]/div/div[1]/div/div[4]/div/p')
|
||||
]
|
||||
|
||||
log("INFO", "模拟点击左侧导航菜单...")
|
||||
for name, xpath in menus:
|
||||
ele = page.ele(xpath, timeout=5)
|
||||
if ele:
|
||||
try: ele.click()
|
||||
except: page.run_js("arguments[0].click();", ele)
|
||||
time.sleep(1.5)
|
||||
else:
|
||||
log("ERR", f"找不到菜单元素: {name}")
|
||||
return
|
||||
|
||||
log("OK", "✅ 成功点开收货明细报表界面!")
|
||||
|
||||
# 隐藏菜单
|
||||
blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div'
|
||||
blank_ele = page.ele(blank_xpath, timeout=3)
|
||||
if blank_ele:
|
||||
try: blank_ele.click()
|
||||
except: page.run_js("arguments[0].click();", blank_ele)
|
||||
time.sleep(0.5)
|
||||
|
||||
log("INFO", f"开启底层数据拦截网: {API_TARGET}")
|
||||
page.listen.start(API_TARGET)
|
||||
|
||||
packet = page.listen.wait(timeout=10)
|
||||
if not packet:
|
||||
query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span'
|
||||
query_btn = page.ele(query_btn_xpath, timeout=3)
|
||||
if query_btn:
|
||||
try: query_btn.click()
|
||||
except: page.run_js("arguments[0].click();", query_btn)
|
||||
packet = page.listen.wait(timeout=15)
|
||||
|
||||
if not packet:
|
||||
log("ERR", "未能拦截到第一页数据,无法获取线上总条数。")
|
||||
return
|
||||
|
||||
body = packet.response.body
|
||||
data = body if isinstance(body, (dict, list)) else json.loads(body)
|
||||
|
||||
remote_count = 0
|
||||
if isinstance(data, dict) and "result" in data:
|
||||
remote_count = data["result"].get("totalCount", 0)
|
||||
|
||||
log("INFO", f"🌐 线上 ERP 系统当前总条数: {remote_count} 条")
|
||||
|
||||
if remote_count <= local_count:
|
||||
log("OK", "🎉 本地数据已是最新状态,无需抓取!")
|
||||
return
|
||||
|
||||
new_items_count = remote_count - local_count
|
||||
log("INFO", f"🔥 发现新增数据: {new_items_count} 条!准备进行增量跳页抓取...")
|
||||
|
||||
# 每页 50 条,计算应该从哪一页开始抓
|
||||
# 例如: 本地有 37584 条,37584 // 50 = 751 页是满的,所以从第 752 页开始抓
|
||||
start_page = math.floor(local_count / 50) + 1
|
||||
end_page = math.ceil(remote_count / 50)
|
||||
|
||||
log("INFO", f"🎯 智能跳页计算完毕:直接跳转至第 {start_page} 页 (目标到 {end_page} 页)")
|
||||
|
||||
# 执行跳转
|
||||
if start_page > 1:
|
||||
jumper_input_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/span[3]/div/div//input'
|
||||
input_ele = page.ele(jumper_input_xpath, timeout=5)
|
||||
|
||||
if not input_ele:
|
||||
jumper_input_xpath = 'xpath://input[@type="number" and @aria-label="页"]'
|
||||
input_ele = page.ele(jumper_input_xpath, timeout=5)
|
||||
|
||||
if input_ele:
|
||||
input_ele.clear()
|
||||
input_ele.input(str(start_page))
|
||||
time.sleep(0.5)
|
||||
input_ele.input('\n')
|
||||
|
||||
# 等待跳转后的数据响应
|
||||
packet = page.listen.wait(timeout=15)
|
||||
if not packet:
|
||||
log("ERR", "跳转失败,未拦截到目标页的数据请求。")
|
||||
return
|
||||
log("OK", f"✅ 成功跳转至第 {start_page} 页并截获数据!")
|
||||
else:
|
||||
log("ERR", "找不到页码输入框,增量跳转失败!")
|
||||
return
|
||||
|
||||
# =========================================================
|
||||
# 开始处理新增页面的数据并入库
|
||||
# =========================================================
|
||||
current_page = start_page
|
||||
cursor = conn.cursor()
|
||||
total_inserted = 0
|
||||
|
||||
while current_page <= end_page:
|
||||
body = packet.response.body
|
||||
data = body if isinstance(body, (dict, list)) else json.loads(body)
|
||||
|
||||
inserted_this_page = 0
|
||||
if isinstance(data, dict) and "result" in data:
|
||||
items = data["result"].get("items", [])
|
||||
|
||||
for item in items:
|
||||
po_code = item.get("purchaseOrderCode")
|
||||
row_no = item.get("rowsNum")
|
||||
mat_code = item.get("materialCode")
|
||||
|
||||
# 检查是否存在,如果存在则更新数量和金额,不存在则插入
|
||||
cursor.execute('SELECT id FROM receipt_details WHERE purchase_order_code = ? AND row_no = ? AND material_code = ?', (po_code, row_no, mat_code))
|
||||
existing_record = cursor.fetchone()
|
||||
|
||||
p_qty = item.get("convertPlannedPurchaseQuantity") if item.get("convertPlannedPurchaseQuantity") is not None else item.get("plannedPurchaseQuantity")
|
||||
r_qty = item.get("convertGoodsQuantity") if item.get("convertGoodsQuantity") is not None else item.get("goodsQuantity")
|
||||
|
||||
if existing_record:
|
||||
cursor.execute('''
|
||||
UPDATE receipt_details
|
||||
SET purchase_qty = ?, receive_qty = ?, receive_price = ?, total_amount = ?
|
||||
WHERE id = ?
|
||||
''', (p_qty, r_qty, item.get("receivePrice"), item.get("receiveAmount"), existing_record[0]))
|
||||
# 算作更新,为了记录日志
|
||||
inserted_this_page += 1
|
||||
else:
|
||||
cursor.execute('''
|
||||
INSERT INTO receipt_details (
|
||||
purchase_order_code, row_no, material_code, material_name,
|
||||
material_specification, warehouse_code, warehouse_name,
|
||||
supplier_code, supplier_name, unit_name, conversion_unit,
|
||||
receive_price, receipt_time,
|
||||
purchase_qty, receive_qty, total_amount
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
po_code,
|
||||
row_no,
|
||||
mat_code,
|
||||
item.get("materialName"),
|
||||
item.get("materialSpecification"),
|
||||
item.get("warehouseCode"),
|
||||
item.get("warehouseName"),
|
||||
item.get("supplierCode"),
|
||||
item.get("supplierName"),
|
||||
item.get("unitName"),
|
||||
item.get("convertUnitName"),
|
||||
item.get("receivePrice"),
|
||||
item.get("receiptTime"),
|
||||
p_qty,
|
||||
r_qty,
|
||||
item.get("receiveAmount")
|
||||
))
|
||||
inserted_this_page += 1
|
||||
total_inserted += 1
|
||||
|
||||
conn.commit()
|
||||
log("OK", f"第 {current_page} 页处理完毕,成功入库 {inserted_this_page} 条新数据。")
|
||||
|
||||
# 还有下一页则继续点击
|
||||
if current_page < end_page:
|
||||
delay = random.uniform(1.5, 3.5)
|
||||
log("INFO", f"⏳ 停顿 {delay:.2f} 秒后点击下一页...")
|
||||
time.sleep(delay)
|
||||
|
||||
next_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/button[2]'
|
||||
next_btn = page.ele(next_btn_xpath, timeout=5)
|
||||
|
||||
if next_btn:
|
||||
try: next_btn.click()
|
||||
except: page.run_js("arguments[0].click();", next_btn)
|
||||
|
||||
packet = page.listen.wait(timeout=15)
|
||||
if not packet:
|
||||
log("ERR", f"第 {current_page + 1} 页请求超时!")
|
||||
break
|
||||
else:
|
||||
log("ERR", "找不到下一页按钮!")
|
||||
break
|
||||
|
||||
current_page += 1
|
||||
|
||||
log("OK", f"🎉 增量同步大功告成!总计入库 {total_inserted} 条全新数据!")
|
||||
|
||||
except Exception as e:
|
||||
log("ERR", f"发生全局异常: {e}")
|
||||
finally:
|
||||
conn.close()
|
||||
page.listen.stop()
|
||||
|
||||
if __name__ == "__main__":
|
||||
fetch_receipt_details_incremental()
|
||||
227
browser_login/import_to_sqlite.py
Normal file
227
browser_login/import_to_sqlite.py
Normal file
@@ -0,0 +1,227 @@
|
||||
import sqlite3
|
||||
import json
|
||||
from pathlib import Path
|
||||
import os
|
||||
from config import OUTPUT_DIR, DB_PATH
|
||||
|
||||
RECEIPT_JSON = OUTPUT_DIR / "receipt_details_full_clean.json"
|
||||
BOM_JSON = OUTPUT_DIR / "bom_cost_full_tree_final.json"
|
||||
|
||||
def init_db():
|
||||
"""初始化数据库并创建表"""
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 创建收货明细表
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS receipt_details (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
purchase_order_code TEXT,
|
||||
row_no INTEGER,
|
||||
material_code TEXT,
|
||||
material_name TEXT,
|
||||
material_specification TEXT,
|
||||
warehouse_code TEXT,
|
||||
warehouse_name TEXT,
|
||||
supplier_code TEXT,
|
||||
supplier_name TEXT,
|
||||
unit_name TEXT,
|
||||
conversion_unit TEXT,
|
||||
receive_price REAL,
|
||||
receipt_time TEXT,
|
||||
purchase_qty REAL,
|
||||
receive_qty REAL,
|
||||
total_amount REAL
|
||||
)
|
||||
''')
|
||||
|
||||
# 为收货明细表创建索引以加速查询
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_material_code ON receipt_details(material_code)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_supplier_name ON receipt_details(supplier_name)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_time ON receipt_details(receipt_time)')
|
||||
|
||||
# 创建 BOM 成本表(父件表)
|
||||
cursor.execute('DROP TABLE IF EXISTS bom_child')
|
||||
cursor.execute('DROP TABLE IF EXISTS bom_parent')
|
||||
|
||||
cursor.execute('''
|
||||
CREATE TABLE bom_parent (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
parent_material_code TEXT UNIQUE,
|
||||
parent_material_name TEXT
|
||||
)
|
||||
''')
|
||||
|
||||
# 创建 BOM 成本表(子件明细表)
|
||||
# 由于是树状结构,我们采用“邻接表”模型,记录每个节点的 parent_id
|
||||
cursor.execute('''
|
||||
CREATE TABLE bom_child (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
parent_material_code TEXT, -- 归属的最顶层父件
|
||||
node_material_code TEXT,
|
||||
node_material_name TEXT,
|
||||
bom_level INTEGER,
|
||||
parent_node_id INTEGER, -- 指向上一级子件的 id,如果是一级子件则为空
|
||||
usage_qty REAL DEFAULT 1.0,
|
||||
FOREIGN KEY(parent_material_code) REFERENCES bom_parent(parent_material_code),
|
||||
FOREIGN KEY(parent_node_id) REFERENCES bom_child(id)
|
||||
)
|
||||
''')
|
||||
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_bom_child_parent_code ON bom_child(parent_material_code)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_bom_child_node_code ON bom_child(node_material_code)')
|
||||
|
||||
conn.commit()
|
||||
return conn
|
||||
|
||||
def import_receipt_details(conn):
|
||||
"""导入收货明细数据"""
|
||||
if not RECEIPT_JSON.exists():
|
||||
print(f"找不到收货明细文件: {RECEIPT_JSON}")
|
||||
return
|
||||
|
||||
print("开始导入收货明细数据...")
|
||||
with open(RECEIPT_JSON, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
cursor = conn.cursor()
|
||||
# 清空旧数据(如果需要重复运行),并且我们现在要更新表结构
|
||||
cursor.execute('DROP TABLE IF EXISTS receipt_details')
|
||||
cursor.execute('''
|
||||
CREATE TABLE receipt_details (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
purchase_order_code TEXT,
|
||||
row_no INTEGER,
|
||||
material_code TEXT,
|
||||
material_name TEXT,
|
||||
material_specification TEXT,
|
||||
warehouse_code TEXT,
|
||||
warehouse_name TEXT,
|
||||
supplier_code TEXT,
|
||||
supplier_name TEXT,
|
||||
unit_name TEXT,
|
||||
conversion_unit TEXT,
|
||||
receive_price REAL,
|
||||
receipt_time TEXT,
|
||||
purchase_qty REAL,
|
||||
receive_qty REAL,
|
||||
total_amount REAL
|
||||
)
|
||||
''')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_material_code ON receipt_details(material_code)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_supplier_name ON receipt_details(supplier_name)')
|
||||
cursor.execute('CREATE INDEX IF NOT EXISTS idx_receipt_time ON receipt_details(receipt_time)')
|
||||
|
||||
count = 0
|
||||
for item in data:
|
||||
p_qty = item.get("进货数量")
|
||||
r_qty = item.get("收货数量")
|
||||
|
||||
cursor.execute('''
|
||||
INSERT INTO receipt_details (
|
||||
purchase_order_code, row_no, material_code, material_name,
|
||||
material_specification, warehouse_code, warehouse_name,
|
||||
supplier_code, supplier_name, unit_name, conversion_unit,
|
||||
receive_price, receipt_time,
|
||||
purchase_qty, receive_qty, total_amount
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
item.get("采购订单号"),
|
||||
item.get("行号"),
|
||||
item.get("物料代码"),
|
||||
item.get("物料名称"),
|
||||
item.get("物料规格"),
|
||||
item.get("仓库代码"),
|
||||
item.get("仓库名称"),
|
||||
item.get("供应商代码"),
|
||||
item.get("供应商名称"),
|
||||
item.get("单位名称"),
|
||||
item.get("转换单位"),
|
||||
item.get("收货单价"),
|
||||
item.get("收货时间"),
|
||||
p_qty,
|
||||
r_qty,
|
||||
item.get("收货总金额")
|
||||
))
|
||||
count += 1
|
||||
|
||||
conn.commit()
|
||||
print(f"成功导入 {count} 条收货明细数据!")
|
||||
|
||||
def _insert_bom_tree(cursor, parent_material_code, tree_nodes, parent_node_id=None):
|
||||
"""递归插入 BOM 树节点"""
|
||||
for node in tree_nodes:
|
||||
# 提取当前节点信息
|
||||
node_code = node.get("childMaterialCode")
|
||||
node_name = node.get("childMaterialName")
|
||||
bom_level = node.get("bomLevel")
|
||||
usage_qty = float(node.get("usageQty") or 1.0)
|
||||
|
||||
# 插入当前节点
|
||||
cursor.execute('''
|
||||
INSERT INTO bom_child (
|
||||
parent_material_code, node_material_code, node_material_name, bom_level, parent_node_id, usage_qty
|
||||
) VALUES (?, ?, ?, ?, ?, ?)
|
||||
''', (parent_material_code, node_code, node_name, bom_level, parent_node_id, usage_qty))
|
||||
|
||||
# 获取刚插入的节点 ID,作为其子节点的 parent_node_id
|
||||
current_node_id = cursor.lastrowid
|
||||
|
||||
# 如果有子节点,递归插入
|
||||
sub_items = node.get("sub_items", [])
|
||||
if sub_items:
|
||||
_insert_bom_tree(cursor, parent_material_code, sub_items, current_node_id)
|
||||
|
||||
def import_bom_data(conn):
|
||||
"""导入 BOM 成本树状数据"""
|
||||
if not BOM_JSON.exists():
|
||||
print(f"找不到 BOM 成本文件: {BOM_JSON}")
|
||||
return
|
||||
|
||||
print("开始导入 BOM 成本数据...")
|
||||
with open(BOM_JSON, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
cursor = conn.cursor()
|
||||
# 清空旧数据
|
||||
cursor.execute('DELETE FROM bom_child')
|
||||
cursor.execute('DELETE FROM bom_parent')
|
||||
|
||||
parent_count = 0
|
||||
for parent in data:
|
||||
parent_code = parent.get("parentMaterialCode")
|
||||
parent_name = parent.get("parentMaterialName")
|
||||
|
||||
# 忽略空父件
|
||||
if not parent_code:
|
||||
continue
|
||||
|
||||
try:
|
||||
cursor.execute('''
|
||||
INSERT INTO bom_parent (parent_material_code, parent_material_name)
|
||||
VALUES (?, ?)
|
||||
''', (parent_code, parent_name))
|
||||
parent_count += 1
|
||||
|
||||
# 递归处理这棵树
|
||||
tree = parent.get("bom_cost_tree", [])
|
||||
if tree:
|
||||
_insert_bom_tree(cursor, parent_code, tree, parent_node_id=None)
|
||||
|
||||
except sqlite3.IntegrityError:
|
||||
print(f"警告: 父件重复 {parent_code},跳过")
|
||||
|
||||
conn.commit()
|
||||
|
||||
# 统计插入的子件数量
|
||||
cursor.execute('SELECT COUNT(*) FROM bom_child')
|
||||
child_count = cursor.fetchone()[0]
|
||||
print(f"成功导入 {parent_count} 个 BOM 父件,包含 {child_count} 个子件节点!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"数据库文件将保存在: {DB_PATH}")
|
||||
conn = init_db()
|
||||
import_receipt_details(conn)
|
||||
import_bom_data(conn)
|
||||
conn.close()
|
||||
print("全部导入完成!你可以使用 SQLite 客户端连接 erp_data.db 查看数据。")
|
||||
51
browser_login/keep_alive.py
Normal file
51
browser_login/keep_alive.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""
|
||||
ERP 浏览器保活服务 (手工填写账号密码专用)
|
||||
|
||||
运行此脚本后,在弹出的 Chrome 浏览器中手工登录。
|
||||
登录成功后,不要关闭终端和浏览器。
|
||||
其他抓取脚本(指定同一端口)就可以直接复用这个已经登录的浏览器实例了!
|
||||
"""
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
# 引入现有的登录模块
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from login import get_page, login_manual, log
|
||||
|
||||
def main():
|
||||
log("INFO", "=== 🚀 启动浏览器保活服务 (端口: 9222) ===")
|
||||
# 统一使用 9222 端口,后续所有抓取脚本也连这个端口
|
||||
page = get_page(port=9222)
|
||||
|
||||
try:
|
||||
# 调用已有的手工登录逻辑:打开网页,等待用户手工操作
|
||||
log("INFO", "等待您在弹出的浏览器中完成手工登录...")
|
||||
|
||||
# 这里的 login_manual 已经有轮询检测是否登录成功的逻辑了
|
||||
ok = login_manual(page)
|
||||
|
||||
if ok:
|
||||
log("OK", "✅ 登录成功!浏览器已进入保活状态。")
|
||||
log("INFO", "==================================================")
|
||||
log("INFO", "⚠️ 请勿关闭此终端窗口和弹出的 Chrome 浏览器!")
|
||||
log("INFO", "👉 现在您可以新开一个终端,去运行其他的抓取脚本了。")
|
||||
log("INFO", "🛑 如果要结束保活关闭浏览器,请在此终端按 Ctrl+C。")
|
||||
log("INFO", "==================================================")
|
||||
|
||||
# 死循环保活,直到用户手动在终端按 Ctrl+C 退出
|
||||
while True:
|
||||
time.sleep(10)
|
||||
else:
|
||||
log("ERR", "❌ 登录超时或失败,保活服务即将退出。")
|
||||
page.quit()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
log("INFO", "接收到退出信号 (Ctrl+C),正在关闭浏览器...")
|
||||
page.quit()
|
||||
except Exception as e:
|
||||
log("ERR", f"发生异常: {e}")
|
||||
page.quit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user