BOM发料对比
This commit is contained in:
29
Dockerfile
29
Dockerfile
@@ -9,22 +9,28 @@ ENV DEBIAN_FRONTEND=noninteractive
|
|||||||
|
|
||||||
# Debian 12 (Bookworm) 的 apt 源文件变成了 /etc/apt/sources.list.d/debian.sources
|
# Debian 12 (Bookworm) 的 apt 源文件变成了 /etc/apt/sources.list.d/debian.sources
|
||||||
# 这里替换源以加速下载,并安装必要的系统依赖
|
# 这里替换源以加速下载,并安装必要的系统依赖
|
||||||
# 必须安装:Xvfb(虚拟屏幕), Chromium(浏览器核心), 中文字体(防乱码)
|
# Linux 生产环境优先使用 Google Chrome Stable,避免 Chromium 在
|
||||||
|
# DevTools WebSocket 握手阶段与 DrissionPage 出现兼容性问题。
|
||||||
RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources 2>/dev/null || \
|
RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources 2>/dev/null || \
|
||||||
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list 2>/dev/null || true && \
|
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list 2>/dev/null || true && \
|
||||||
apt-get update && \
|
apt-get update && \
|
||||||
apt-get install -y --no-install-recommends \
|
apt-get install -y --no-install-recommends \
|
||||||
xvfb \
|
ca-certificates \
|
||||||
xauth \
|
curl \
|
||||||
chromium \
|
gnupg \
|
||||||
chromium-driver \
|
|
||||||
fonts-wqy-zenhei \
|
fonts-wqy-zenhei \
|
||||||
tzdata \
|
tzdata \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& install -m 0755 -d /etc/apt/keyrings && \
|
||||||
|
curl -fsSL https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /etc/apt/keyrings/google-chrome.gpg && \
|
||||||
|
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/google-chrome.gpg] https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list && \
|
||||||
|
apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends google-chrome-stable && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# 设置时区为中国上海
|
# 设置时区为中国上海
|
||||||
ENV TZ=Asia/Shanghai
|
ENV TZ=Asia/Shanghai
|
||||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||||
|
ENV CHROME_BIN=/usr/bin/google-chrome
|
||||||
|
|
||||||
# 复制依赖清单并安装 Python 库
|
# 复制依赖清单并安装 Python 库
|
||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
@@ -38,9 +44,8 @@ COPY . .
|
|||||||
EXPOSE 5050
|
EXPOSE 5050
|
||||||
|
|
||||||
# 启动脚本:
|
# 启动脚本:
|
||||||
# 1. 清理可能因异常重启遗留的虚拟屏幕锁文件(防止 xvfb 报错退出)
|
# 1. 切换到 web_ui 目录执行 gunicorn
|
||||||
# 2. 切换到 web_ui 目录执行 gunicorn
|
# 2. 浏览器自动化服务必须单 worker 运行,避免多个 Gunicorn 进程同时抢占 DevTools 会话
|
||||||
# 3. 使用 xvfb-run -a 自动分配空闲的虚拟屏幕
|
# 3. Headless Chrome 已足够,无需再叠加 Xvfb,减少 Linux 初始化链路的不确定性
|
||||||
# 4. 浏览器自动化服务必须单 worker 运行,避免多个 Gunicorn 进程同时抢占 Chromium DevTools 端口
|
# 4. 使用 gthread 提升单进程下的并发响应能力
|
||||||
# 5. 使用 gthread 提升单进程下的并发响应能力
|
CMD sh -c "cd web_ui && gunicorn -w 1 --threads 8 --worker-class gthread -b 0.0.0.0:5050 --access-logfile - --timeout 120 app:app"
|
||||||
CMD sh -c "rm -f /tmp/.X*-lock && cd web_ui && xvfb-run -a --server-args='-screen 0 1920x1080x24' gunicorn -w 1 --threads 8 --worker-class gthread -b 0.0.0.0:5050 --access-logfile - --timeout 120 app:app"
|
|
||||||
|
|||||||
@@ -74,7 +74,8 @@ if btn:
|
|||||||
print("数据已保存至:", path)
|
print("数据已保存至:", path)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
import traceback
|
||||||
|
print(f"发生全局异常: {e}\n{traceback.format_exc()}")
|
||||||
|
|
||||||
print("没有找到匹配的数据")
|
print("没有找到匹配的数据")
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -314,7 +314,8 @@ def fetch_issue_receipt_incremental():
|
|||||||
log("OK", f"🎉 发料单增量同步大功告成!总计新增了 {total_inserted} 条记录入库!")
|
log("OK", f"🎉 发料单增量同步大功告成!总计新增了 {total_inserted} 条记录入库!")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log("ERR", f"发生全局异常: {e}")
|
import traceback
|
||||||
|
log("ERR", f"发生全局异常: {e}\n{traceback.format_exc()}")
|
||||||
finally:
|
finally:
|
||||||
if 'conn' in locals() and conn:
|
if 'conn' in locals() and conn:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|||||||
@@ -3,8 +3,11 @@ ERP 登录模块 - DrissionPage
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import shutil
|
||||||
import datetime
|
import datetime
|
||||||
|
import subprocess
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
@@ -29,6 +32,11 @@ def is_docker_env() -> bool:
|
|||||||
return os.path.exists("/.dockerenv")
|
return os.path.exists("/.dockerenv")
|
||||||
|
|
||||||
|
|
||||||
|
def is_linux_env() -> bool:
|
||||||
|
"""判断当前是否运行在 Linux 环境。"""
|
||||||
|
return sys.platform.startswith("linux")
|
||||||
|
|
||||||
|
|
||||||
def get_docker_tmp_root() -> Path:
|
def get_docker_tmp_root() -> Path:
|
||||||
"""
|
"""
|
||||||
指定 DrissionPage 在 Docker 中的临时根目录。
|
指定 DrissionPage 在 Docker 中的临时根目录。
|
||||||
@@ -39,6 +47,57 @@ def get_docker_tmp_root() -> Path:
|
|||||||
return tmp_root
|
return tmp_root
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_browser_path() -> str:
|
||||||
|
"""
|
||||||
|
统一解析浏览器路径。
|
||||||
|
Linux 生产环境优先使用 Google Chrome Stable,避免 Chromium 与
|
||||||
|
DrissionPage 在 DevTools WebSocket 握手阶段出现兼容性问题。
|
||||||
|
"""
|
||||||
|
env_candidates = [
|
||||||
|
os.getenv("DRISSION_BROWSER_PATH", "").strip(),
|
||||||
|
os.getenv("CHROME_BIN", "").strip(),
|
||||||
|
os.getenv("BROWSER_PATH", "").strip(),
|
||||||
|
]
|
||||||
|
for candidate in env_candidates:
|
||||||
|
if candidate and os.path.exists(candidate):
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
if is_linux_env():
|
||||||
|
browser_candidates = [
|
||||||
|
"/usr/bin/google-chrome",
|
||||||
|
"/usr/bin/google-chrome-stable",
|
||||||
|
"/usr/bin/chromium",
|
||||||
|
"/usr/bin/chromium-browser",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
browser_candidates = [
|
||||||
|
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||||
|
shutil.which("google-chrome") or "",
|
||||||
|
shutil.which("chromium") or "",
|
||||||
|
shutil.which("chromium-browser") or "",
|
||||||
|
]
|
||||||
|
|
||||||
|
for candidate in browser_candidates:
|
||||||
|
if candidate and os.path.exists(candidate):
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_debug_port(address: str) -> None:
|
||||||
|
"""按实际 DevTools 端口清理僵尸浏览器进程。"""
|
||||||
|
if not address or ":" not in address:
|
||||||
|
return
|
||||||
|
|
||||||
|
debug_port = address.rsplit(":", 1)[-1]
|
||||||
|
subprocess.run(
|
||||||
|
f"lsof -ti tcp:{debug_port} | xargs -r kill -9",
|
||||||
|
shell=True,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ── 日志 ──────────────────────────────────────────────────────────────────────
|
# ── 日志 ──────────────────────────────────────────────────────────────────────
|
||||||
def log(level: str, msg: str):
|
def log(level: str, msg: str):
|
||||||
icons = {"INFO": "ℹ️ ", "OK": "✅", "WARN": "⚠️ ", "ERR": "❌"}
|
icons = {"INFO": "ℹ️ ", "OK": "✅", "WARN": "⚠️ ", "ERR": "❌"}
|
||||||
@@ -63,41 +122,38 @@ def dump_page_state(page: ChromiumPage, label: str = ""):
|
|||||||
# ── 浏览器 ────────────────────────────────────────────────────────────────────
|
# ── 浏览器 ────────────────────────────────────────────────────────────────────
|
||||||
def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
|
def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
|
||||||
co = ChromiumOptions()
|
co = ChromiumOptions()
|
||||||
# 强制在生产环境下使用无头模式
|
|
||||||
is_docker = is_docker_env()
|
is_docker = is_docker_env()
|
||||||
|
browser_path = resolve_browser_path()
|
||||||
|
effective_headless = headless or is_docker
|
||||||
|
|
||||||
if headless or is_docker:
|
if effective_headless:
|
||||||
co.set_argument("--headless=new")
|
co.set_argument("--headless=new")
|
||||||
co.set_argument("--disable-gpu") # Docker 无头模式下强烈建议禁用 GPU,防止 404 和渲染崩溃
|
co.set_argument("--disable-gpu")
|
||||||
|
|
||||||
co.set_argument("--disable-blink-features=AutomationControlled")
|
co.set_argument("--disable-blink-features=AutomationControlled")
|
||||||
co.set_argument("--no-sandbox")
|
co.set_argument("--no-sandbox")
|
||||||
co.set_argument("--disable-dev-shm-usage") # 防止 Docker 共享内存耗尽导致浏览器崩溃
|
co.set_argument("--disable-dev-shm-usage")
|
||||||
co.set_argument("--disable-software-rasterizer") # 配合无头模式禁用软件光栅化器
|
co.set_argument("--disable-software-rasterizer")
|
||||||
co.set_argument("--remote-allow-origins=*") # 解决 Docker 下 websocket 404 问题
|
co.set_argument("--remote-allow-origins=*")
|
||||||
co.set_argument("--remote-debugging-address=127.0.0.1")
|
co.set_argument("--remote-debugging-address=127.0.0.1")
|
||||||
co.set_argument("--disable-web-security")
|
co.set_argument("--disable-web-security")
|
||||||
co.set_argument("--ignore-certificate-errors")
|
co.set_argument("--ignore-certificate-errors")
|
||||||
co.set_argument("--proxy-server=direct://") # 禁用代理
|
co.set_argument("--proxy-server=direct://")
|
||||||
co.set_argument("--proxy-bypass-list=*")
|
co.set_argument("--proxy-bypass-list=*")
|
||||||
co.set_argument("--window-size=1440,900")
|
co.set_argument("--window-size=1440,900")
|
||||||
|
|
||||||
|
if browser_path:
|
||||||
|
co.set_browser_path(browser_path)
|
||||||
|
log("INFO", f"选用浏览器内核: {browser_path}")
|
||||||
|
else:
|
||||||
|
log("WARN", "未解析到明确浏览器路径,将使用 DrissionPage 默认浏览器发现逻辑。")
|
||||||
|
|
||||||
if is_docker:
|
if is_docker:
|
||||||
# Docker 生产环境:由 DrissionPage 自动分配独立端口和 profile,避免僵尸会话导致 404
|
|
||||||
tmp_root = get_docker_tmp_root()
|
tmp_root = get_docker_tmp_root()
|
||||||
co.set_tmp_path(str(tmp_root))
|
co.set_tmp_path(str(tmp_root))
|
||||||
co.auto_port(True)
|
co.auto_port(True)
|
||||||
log("INFO", f"Docker Drission 临时目录: {tmp_root}")
|
log("INFO", f"Docker Drission 临时目录: {tmp_root}")
|
||||||
# 很多 Debian/Ubuntu 系统的 Chromium 实际上是通过 wrapper 脚本调用的
|
|
||||||
# 直接指定确切的执行路径,防止 DrissionPage 底层启动失败
|
|
||||||
if os.path.exists('/usr/bin/chromium'):
|
|
||||||
co.set_browser_path('/usr/bin/chromium')
|
|
||||||
elif os.path.exists('/usr/bin/chromium-browser'):
|
|
||||||
co.set_browser_path('/usr/bin/chromium-browser')
|
|
||||||
elif os.path.exists('/usr/bin/google-chrome'):
|
|
||||||
co.set_browser_path('/usr/bin/google-chrome')
|
|
||||||
else:
|
else:
|
||||||
# 本地开发环境:使用固定端口,方便复用
|
|
||||||
co.set_local_port(port)
|
co.set_local_port(port)
|
||||||
|
|
||||||
# #region debug-point A:drission-target
|
# #region debug-point A:drission-target
|
||||||
@@ -122,6 +178,18 @@ def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
|
|||||||
page = ChromiumPage(opt)
|
page = ChromiumPage(opt)
|
||||||
return page
|
return page
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
actual_address = opt.address or f"127.0.0.1:{port}"
|
||||||
|
log("WARN", f"[DEBUG] ChromiumPage 初始化失败: {e},尝试清理地址 {actual_address} 后重试...")
|
||||||
|
try:
|
||||||
|
cleanup_debug_port(actual_address)
|
||||||
|
time.sleep(1)
|
||||||
|
page = ChromiumPage(opt)
|
||||||
|
log("OK", "[DEBUG] 清理后重试成功!")
|
||||||
|
return page
|
||||||
|
except Exception as retry_e:
|
||||||
|
log("ERR", f"[DEBUG] 清理后重试依然失败: {retry_e}")
|
||||||
|
e = retry_e
|
||||||
|
|
||||||
# #region debug-point B:devtools-http-probe
|
# #region debug-point B:devtools-http-probe
|
||||||
if opt.address:
|
if opt.address:
|
||||||
for endpoint in ("json/version", "json/list"):
|
for endpoint in ("json/version", "json/list"):
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ services:
|
|||||||
- TZ=Asia/Shanghai
|
- TZ=Asia/Shanghai
|
||||||
- ENABLE_BACKGROUND_SCHEDULER=1
|
- ENABLE_BACKGROUND_SCHEDULER=1
|
||||||
- DRISSION_TMP_ROOT=/tmp
|
- DRISSION_TMP_ROOT=/tmp
|
||||||
|
- CHROME_BIN=/usr/bin/google-chrome
|
||||||
volumes:
|
volumes:
|
||||||
# 既然用 Git 拉取了完整代码,直接用相对路径挂载更优雅
|
# 既然用 Git 拉取了完整代码,直接用相对路径挂载更优雅
|
||||||
# 直接挂载整个 output 文件夹,里面的 erp_data.db 自动持久化
|
# 直接挂载整个 output 文件夹,里面的 erp_data.db 自动持久化
|
||||||
|
|||||||
Reference in New Issue
Block a user