From c5232bccc142df2fc8a4b4a5aef144ce1284b1e3 Mon Sep 17 00:00:00 2001 From: hjq <770690987@qq.com> Date: Fri, 12 Jun 2026 11:09:15 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8A=93=E5=8F=96=E7=94=9F=E4=BA=A7=E5=B7=A5?= =?UTF-8?q?=E5=8D=95=EF=BC=8C=E6=8A=93=E5=8F=96=E5=8F=91=E6=96=99=E5=BC=82?= =?UTF-8?q?=E5=B8=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- browser_login/fetch_receipt_details_full.py | 107 +++++--- .../fetch_receipt_details_incremental.py | 251 ++++++++++-------- web_ui/app.py | 4 + 3 files changed, 208 insertions(+), 154 deletions(-) diff --git a/browser_login/fetch_receipt_details_full.py b/browser_login/fetch_receipt_details_full.py index a87aa61..fdb20b0 100644 --- a/browser_login/fetch_receipt_details_full.py +++ b/browser_login/fetch_receipt_details_full.py @@ -13,7 +13,7 @@ from login import get_page, log from config import OUTPUT_DIR HOME_URL = "https://yunmes.tftykj.cn/" -API_TARGET = "ReceiptDetailsCheck_SearchList_Proxy" +API_TARGET = "ReceiptDetailsCheckFinace_SearchList" SAVE_PATH = OUTPUT_DIR / "receipt_details_full_clean.json" def fetch_receipt_details_full(): @@ -32,53 +32,76 @@ def fetch_receipt_details_full(): all_clean_items = [] try: - log("INFO", f"正在回到主页起点: {HOME_URL}") - page.get(HOME_URL) + TARGET_URL = "https://yunmes.tftykj.cn/ReceiptDetailsCheckFinace" + log("INFO", f"正在直接访问目标页面: {TARGET_URL}") + page.get(TARGET_URL) page.wait.load_start() time.sleep(2) - menus = [ - ("第一层: 业务统计报表", 'xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p'), - ("第二层: 财务业务报表", 'text:财务业务报表'), - ("第三层: 财务收货明细报表", 'text:财务收货明细报表') - ] - - log("INFO", "开始模拟人工点击左侧导航菜单...") - for name, xpath in menus: - ele = page.ele(xpath, timeout=5) - if ele: - try: ele.click() - except: page.run_js("arguments[0].click();", ele) - time.sleep(1.5) - else: - log("ERR", f"找不到菜单元素: {name}") - return - - log("OK", "✅ 成功点开收货明细报表界面!") - - # 点击空白处隐藏菜单 - blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div' - blank_ele = page.ele(blank_xpath, timeout=3) - if blank_ele: - try: blank_ele.click() - except: page.run_js("arguments[0].click();", blank_ele) - time.sleep(0.5) + # 等待数据表格区域出现 + table = page.ele("xpath://table | .el-table__body", timeout=15) + if table: + log("OK", "✅ 成功打开财务收货明细报表界面!") + else: + log("WARN", "表格元素未找到,继续执行") log("INFO", f"开启底层数据拦截网: {API_TARGET}") page.listen.start(API_TARGET) - packet = page.listen.wait(timeout=10) + # 为了能够获取当月的数据,强制设置时间为当月第一天到最后一天,并清理其他条件 + import datetime, calendar + now = datetime.datetime.now() + first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d') + last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d') - if not packet: - log("INFO", "尝试寻找并点击页面上的【查询】按钮...") - query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span' - query_btn = page.ele(query_btn_xpath, timeout=3) + log("INFO", f"正在自动设置查询时间范围: {first_day} 至 {last_day}") + + page.run_js(f""" + try {{ + var dates = document.querySelectorAll('.datebox-f, .datetimebox-f, .el-date-editor input'); + if (dates.length >= 2) {{ + dates[0].value = '{first_day}'; + dates[1].value = '{last_day}'; + dates[0].dispatchEvent(new Event('input', {{ bubbles: true }})); + dates[0].dispatchEvent(new Event('change', {{ bubbles: true }})); + dates[1].dispatchEvent(new Event('input', {{ bubbles: true }})); + dates[1].dispatchEvent(new Event('change', {{ bubbles: true }})); + }} + }} catch(e) {{ console.log(e); }} + """) + time.sleep(1) + + # 寻找并点击页面上的【查询】按钮 + log("INFO", "尝试寻找并点击页面上的【查询】按钮...") + + # 使用 DrissionPage 内置选择器尝试寻找 + query_btn = page.ele('text=查询', timeout=3) + if not query_btn: + query_btn = page.ele('xpath://button[contains(., "查询")]', timeout=3) - if query_btn: - try: query_btn.click() - except: page.run_js("arguments[0].click();", query_btn) - packet = page.listen.wait(timeout=15) + if query_btn: + try: query_btn.click() + except: page.run_js("arguments[0].click();", query_btn) + else: + log("WARN", "常规选择器找不到查询按钮,尝试使用全局 JS 强行寻找...") + # 暴力兜底:通过 JS 遍历所有按钮和链接点击 + clicked = page.run_js(""" + var btns = document.querySelectorAll('button, a, .l-btn, .el-button'); + for(var i=0; i 0: + target_resume_page = max(1, len(all_clean_items) // 50) + # 截断已有数据,防止与即将重新抓取的页数重叠导致重复 + all_clean_items = all_clean_items[:(target_resume_page - 1) * 50] total_count = 0 if isinstance(data, dict) and "result" in data: diff --git a/browser_login/fetch_receipt_details_incremental.py b/browser_login/fetch_receipt_details_incremental.py index f3fa0c9..9e525be 100644 --- a/browser_login/fetch_receipt_details_incremental.py +++ b/browser_login/fetch_receipt_details_incremental.py @@ -13,6 +13,8 @@ import subprocess import math import random import sqlite3 +import datetime +import calendar from pathlib import Path sys.path.insert(0, str(Path(__file__).parent)) @@ -20,7 +22,7 @@ from login import get_page, log from config import DB_PATH HOME_URL = "https://yunmes.tftykj.cn/" -API_TARGET = "ReceiptDetailsCheck_SearchList_Proxy" +API_TARGET = "ReceiptDetailsCheckFinace_SearchList" def get_local_count(conn): """获取本地数据库已有的总记录数""" @@ -54,50 +56,76 @@ def fetch_receipt_details_incremental(): page = get_page(port=9222) try: - log("INFO", f"正在回到主页起点: {HOME_URL}") - page.get(HOME_URL) + TARGET_URL = "https://yunmes.tftykj.cn/ReceiptDetailsCheckFinace" + log("INFO", f"正在直接访问目标页面: {TARGET_URL}") + page.get(TARGET_URL) page.wait.load_start() time.sleep(2) - menus = [ - ("第一层: 业务统计报表", 'xpath://*[@id="app"]/div/div[1]/div[1]/div[2]/div/div[1]/div/div[10]/div/p'), - ("第二层: 采购业务报表", 'xpath:/html/body/div[7]/div/div[1]/div/div[4]/div/p'), - ("第三层: 收货明细报表", 'xpath:/html/body/div[8]/div/div[1]/div/div[4]/div/p') - ] - - log("INFO", "模拟点击左侧导航菜单...") - for name, xpath in menus: - ele = page.ele(xpath, timeout=5) - if ele: - try: ele.click() - except: page.run_js("arguments[0].click();", ele) - time.sleep(1.5) - else: - log("ERR", f"找不到菜单元素: {name}") - return - - log("OK", "✅ 成功点开收货明细报表界面!") - - # 隐藏菜单 - blank_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[1]/div[2]/div[2]/div/div[1]/div' - blank_ele = page.ele(blank_xpath, timeout=3) - if blank_ele: - try: blank_ele.click() - except: page.run_js("arguments[0].click();", blank_ele) - time.sleep(0.5) + # 等待数据表格区域出现 + table = page.ele("xpath://table | .el-table__body", timeout=15) + if table: + log("OK", "✅ 成功打开财务收货明细报表界面!") + else: + log("WARN", "表格元素未找到,继续执行") log("INFO", f"开启底层数据拦截网: {API_TARGET}") page.listen.start(API_TARGET) - packet = page.listen.wait(timeout=10) - if not packet: - query_btn_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[1]/div/button[1]/span' - query_btn = page.ele(query_btn_xpath, timeout=3) - if query_btn: - try: query_btn.click() - except: page.run_js("arguments[0].click();", query_btn) - packet = page.listen.wait(timeout=15) + # 为了能够获取当月的数据,强制设置时间为当月第一天到最后一天,并清理其他条件 + now = datetime.datetime.now() + first_day = datetime.date(now.year, now.month, 1).strftime('%Y-%m-%d') + last_day = datetime.date(now.year, now.month, calendar.monthrange(now.year, now.month)[1]).strftime('%Y-%m-%d') + + log("INFO", f"正在自动设置查询时间范围: {first_day} 至 {last_day}") + + page.run_js(f""" + try {{ + var dates = document.querySelectorAll('.datebox-f, .datetimebox-f, .el-date-editor input'); + if (dates.length >= 2) {{ + // 这里适配 ElementUI 或 EasyUI 的日期输入框 + dates[0].value = '{first_day}'; + dates[1].value = '{last_day}'; + // 触发 input 和 change 事件让 Vue/React 感知到值的改变 + dates[0].dispatchEvent(new Event('input', {{ bubbles: true }})); + dates[0].dispatchEvent(new Event('change', {{ bubbles: true }})); + dates[1].dispatchEvent(new Event('input', {{ bubbles: true }})); + dates[1].dispatchEvent(new Event('change', {{ bubbles: true }})); + }} + }} catch(e) {{ console.log(e); }} + """) + time.sleep(1) + + # 寻找并点击页面上的【查询】按钮,不再盲目等待刷新 + log("INFO", "尝试寻找并点击页面上的【查询】按钮...") + + # 使用 DrissionPage 内置选择器尝试寻找 + query_btn = page.ele('text=查询', timeout=3) + if not query_btn: + query_btn = page.ele('xpath://button[contains(., "查询")]', timeout=3) + + if query_btn: + try: query_btn.click() + except: page.run_js("arguments[0].click();", query_btn) + else: + log("WARN", "常规选择器找不到查询按钮,尝试使用全局 JS 强行寻找...") + # 暴力兜底:通过 JS 遍历所有按钮和链接点击 + clicked = page.run_js(""" + var btns = document.querySelectorAll('button, a, .l-btn, .el-button'); + for(var i=0; i 1: - jumper_input_xpath = 'xpath://*[@id="app"]/div/div[1]/div[2]/div[2]/div[1]/div[2]/div/div[2]/div[1]/span[3]/div/div//input' - input_ele = page.ele(jumper_input_xpath, timeout=5) - - if not input_ele: - jumper_input_xpath = 'xpath://input[@type="number" and @aria-label="页"]' - input_ele = page.ele(jumper_input_xpath, timeout=5) - - if input_ele: - input_ele.clear() - input_ele.input(str(start_page)) - time.sleep(0.5) - input_ele.input('\n') - - # 等待跳转后的数据响应 - packet = page.listen.wait(timeout=15) - if not packet: - log("ERR", "跳转失败,未拦截到目标页的数据请求。") - return - log("OK", f"✅ 成功跳转至第 {start_page} 页并截获数据!") - else: - log("ERR", "找不到页码输入框,增量跳转失败!") - return + log("INFO", f"🎯 准备逐页抓取并比对入库,共需处理 {end_page} 页...") # ========================================================= - # 开始处理新增页面的数据并入库 + # 开始处理数据并比对入库 # ========================================================= - current_page = start_page + current_page = 1 cursor = conn.cursor() total_inserted = 0 + total_updated = 0 while current_page <= end_page: - body = packet.response.body - data = body if isinstance(body, (dict, list)) else json.loads(body) + # 如果是第一页,直接处理已有的 packet,不需要点击下一页 + if current_page > 1: + delay = random.uniform(1.5, 3.5) + log("INFO", f"⏳ 停顿 {delay:.2f} 秒后准备获取第 {current_page} 页...") + time.sleep(delay) + + next_btn = None + for _ in range(3): + # 优先使用 pagination-next,如果不行再尝试其他类名 + next_btn = page.ele('xpath://*[contains(@class, "pagination-next")]', timeout=3) + if not next_btn: + next_btn = page.ele('xpath://button[contains(@class, "btn-next")]', timeout=3) + if next_btn: + break + time.sleep(1) + + if not next_btn: + next_btn = page.ele('xpath://i[contains(@class, "el-icon-arrow-right")]/parent::button', timeout=3) + + if not next_btn: + log("ERR", "找不到下一页按钮,可能页面异常或已到底部,停止抓取。") + break + + # 检查按钮是否被禁用 + class_str = str(next_btn.attr("class")) + aria_disabled = next_btn.attr("aria-disabled") + is_disabled_attr = next_btn.attr("disabled") is not None + + # 如果这个按钮外部包着一个
  • 或者是其他容器,也要检查它的父元素是不是 disabled + parent_class_str = "" + try: + parent_ele = next_btn.parent() + parent_class_str = str(parent_ele.attr("class")) + except: + pass + + if "disabled" in class_str or "disabled" in parent_class_str or is_disabled_attr or aria_disabled == "true": + log("OK", "🏁 下一页按钮已被禁用,说明已经到达最后一页!") + break + + try: + # 尝试 JS 点击(翻页按钮有时会被其他浮层遮挡,JS 点击最稳妥) + page.run_js("arguments[0].click();", next_btn) + except Exception as e: + log("ERR", f"JS 点击下一页失败: {e},尝试普通点击...") + next_btn.click() + + packet = page.listen.wait(timeout=15) + if not packet: + log("ERR", f"第 {current_page} 页请求超时或未触发,中止抓取。") + break + + body = packet.response.body + data = body if isinstance(body, (dict, list)) else json.loads(body) inserted_this_page = 0 + updated_this_page = 0 + if isinstance(data, dict) and "result" in data: items = data["result"].get("items", []) @@ -170,11 +221,10 @@ def fetch_receipt_details_incremental(): row_no = item.get("rowsNum") mat_code = item.get("materialCode") - # 检查是否存在,如果存在则更新数量和金额,不存在则插入 - cursor.execute('SELECT id FROM receipt_details WHERE purchase_order_code = ? AND row_no = ? AND material_code = ?', (po_code, row_no, mat_code)) + # 检查是否存在,根据采购订单号和物料代码进行双条件比对 + cursor.execute('SELECT id FROM receipt_details WHERE purchase_order_code = ? AND material_code = ?', (po_code, mat_code)) existing_record = cursor.fetchone() - # 进货数量(件数)永远只取原始的 plannedPurchaseQuantity,不取转换后的 p_qty = item.get("plannedPurchaseQuantity") r_qty = item.get("convertGoodsQuantity") if item.get("convertGoodsQuantity") is not None else item.get("goodsQuantity") @@ -184,8 +234,8 @@ def fetch_receipt_details_incremental(): SET purchase_qty = ?, receive_qty = ?, receive_price = ?, total_amount = ? WHERE id = ? ''', (p_qty, r_qty, item.get("receivePrice"), item.get("receiveAmount"), existing_record[0])) - # 算作更新,为了记录日志 - inserted_this_page += 1 + updated_this_page += 1 + total_updated += 1 else: cursor.execute(''' INSERT INTO receipt_details ( @@ -217,41 +267,14 @@ def fetch_receipt_details_incremental(): total_inserted += 1 conn.commit() - log("OK", f"第 {current_page} 页处理完毕,成功截获 {inserted_this_page} 条数据并存入数据库。") - - # 还有下一页则继续点击 - if current_page < end_page: - delay = random.uniform(1.5, 3.5) - log("INFO", f"⏳ 停顿 {delay:.2f} 秒后点击下一页...") - time.sleep(delay) + log("OK", f"第 {current_page} 页处理完毕,新增 {inserted_this_page} 条,更新 {updated_this_page} 条。") + else: + log("ERR", f"第 {current_page} 页数据结构异常。") + break - # 同步全量脚本的优化:重试机制与兼容的类名匹配 - next_btn = None - for _ in range(3): - next_btn = page.ele('xpath://button[contains(@class, "btn-next")]', timeout=3) - if next_btn: - break - time.sleep(1) - - # 备用定位方式:直接找右箭头图标所在的按钮 - if not next_btn: - next_btn = page.ele('xpath://i[contains(@class, "el-icon-arrow-right")]/parent::button', timeout=3) - - if next_btn: - try: next_btn.click() - except: page.run_js("arguments[0].click();", next_btn) - - packet = page.listen.wait(timeout=15) - if not packet: - log("ERR", f"第 {current_page + 1} 页请求超时!") - break - else: - log("ERR", "重试 3 次后仍然找不到下一页按钮!") - break - current_page += 1 - log("OK", f"🎉 增量同步大功告成!总计向数据库执行了 {total_inserted} 次插入/更新操作!") + log("OK", f"🎉 增量抓取全部结束!总计新增 {total_inserted} 条,更新 {total_updated} 条。") except Exception as e: log("ERR", f"发生全局异常: {e}") diff --git a/web_ui/app.py b/web_ui/app.py index b4de997..f44b2da 100644 --- a/web_ui/app.py +++ b/web_ui/app.py @@ -171,6 +171,7 @@ def get_receipts(): supplier_name = request.args.get('supplier_name', '').strip() material_name = request.args.get('material_name', '').strip() po_code = request.args.get('po_code', '').strip() + material_code = request.args.get('material_code', '').strip() conn = get_db_connection() @@ -187,6 +188,9 @@ def get_receipts(): if po_code: query_conditions.append("purchase_order_code LIKE ?") params.append(f"%{po_code}%") + if material_code: + query_conditions.append("material_code LIKE ?") + params.append(f"%{material_code}%") where_clause = "" if query_conditions: