BOM发料对比

This commit is contained in:
hjq
2026-06-26 16:22:46 +08:00
parent db105d210a
commit 872caec9c7

View File

@@ -9,6 +9,7 @@ import shutil
import json
import datetime
import subprocess
import urllib.request
from pathlib import Path
from dotenv import load_dotenv
from DrissionPage import ChromiumPage, ChromiumOptions
@@ -49,26 +50,56 @@ def patch_drission_ws_handshake() -> None:
return
def resilient_create_connection(address, **kwargs):
kwargs = dict(kwargs)
# 终极暴力破解法:移除所有可能导致 Chrome 安全校验失败的头
base_kwargs = dict(kwargs)
# 禁止 websocket-client 使用代理
kwargs["http_no_proxy"] = [
"127.0.0.1",
"localhost",
"::1"
# 强制禁用代理,防止请求被容器内的网络规则重定向
no_proxy_hosts = ["127.0.0.1", "localhost", "::1"]
base_kwargs["http_no_proxy"] = no_proxy_hosts
base_kwargs["http_proxy_host"] = None
base_kwargs["http_proxy_port"] = None
# 提取目标端口,用于构造合法的 Host 头
try:
port = address.split(":")[2].split("/")[0]
except IndexError:
port = "9222"
# Chrome 149 增强了 DevTools 的安全校验。
# Host header 必须是 IP 地址或 localhost且必须包含端口号
candidate_kwargs = [
# 策略1最标准的 localhost 组合,带端口
{
**base_kwargs,
"suppress_origin": False,
"header": [f"Host: 127.0.0.1:{port}", "Origin: http://127.0.0.1"]
},
# 策略2最原始、最干净的连接方式类似 curl
{
**base_kwargs,
"suppress_origin": True,
"header": []
},
# 策略3伪装成 localhost 带端口
{
**base_kwargs,
"suppress_origin": False,
"header": [f"Host: localhost:{port}", "Origin: http://localhost"]
}
]
kwargs["http_proxy_host"] = None
kwargs["http_proxy_port"] = None
# 删除 DrissionPage 可能传入的 header
kwargs.pop("header", None)
kwargs.pop("host", None)
kwargs.pop("origin", None)
return raw_ws_create_connection(
address.replace("localhost", "127.0.0.1"),
**kwargs
)
last_err = None
for candidate in candidate_kwargs:
try:
# 强制使用 127.0.0.1,因为在 Docker 内 localhost 可能解析异常
target_url = address.replace("localhost", "127.0.0.1")
return raw_ws_create_connection(target_url, **candidate)
except WebSocketBadStatusException as ws_err:
last_err = ws_err
except Exception as other_err:
last_err = other_err
break
raise last_err
if is_linux_env():
dp_driver_module.create_connection = resilient_create_connection
@@ -131,7 +162,7 @@ def cleanup_debug_port(address: str) -> None:
return
debug_port = address.rsplit(":", 1)[-1]
# 1. 杀掉占用端口的僵尸进程
subprocess.run(
f"lsof -ti tcp:{debug_port} | xargs -r kill -9",
@@ -139,7 +170,7 @@ def cleanup_debug_port(address: str) -> None:
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
# 2. 如果在 Linux 环境,彻底清理残留的 DrissionPage 锁文件,防止 "Failed to create SingletonLock"
if is_linux_env():
subprocess.run(
@@ -161,6 +192,45 @@ def clear_drission_singletons() -> None:
BrowserDriver.BROWSERS.clear()
def probe_devtools_endpoints(address: str, log_output: bool = True) -> dict:
"""探测 DevTools HTTP 端点,并返回解析后的版本信息与标签页信息。"""
result = {"version": None, "list": None}
if not address:
return result
endpoint_map = {"version": "json/version", "list": "json/list"}
for key, endpoint in endpoint_map.items():
url = f"http://{address}/{endpoint}"
try:
with urllib.request.urlopen(url, timeout=2) as resp:
body = resp.read().decode("utf-8", errors="replace")
if log_output:
log("WARN", f"[DEBUG] DevTools 探测 {url} -> HTTP {resp.status}")
log("WARN", f"[DEBUG] DevTools 响应体 {endpoint}: {body[:1000]}")
result[key] = json.loads(body)
except Exception as probe_err:
if log_output:
log("WARN", f"[DEBUG] DevTools 探测失败 {url}: {probe_err}")
return result
def get_first_page_ws_address(devtools_payload: dict) -> str:
"""从 /json/list 响应中提取第一个可用 page/webview 的 ws 地址。"""
tabs = devtools_payload.get("list") or []
if not isinstance(tabs, list):
return ""
for tab in tabs:
if (
isinstance(tab, dict)
and tab.get("type") in ("page", "webview")
and not str(tab.get("url", "")).startswith("devtools://")
and tab.get("webSocketDebuggerUrl")
):
return tab["webSocketDebuggerUrl"]
return ""
# ── 日志 ──────────────────────────────────────────────────────────────────────
def log(level: str, msg: str):
icons = {"INFO": " ", "OK": "", "WARN": "⚠️ ", "ERR": ""}
@@ -206,6 +276,7 @@ def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
co.set_argument("--disable-software-rasterizer")
co.set_argument("--remote-allow-origins=*")
co.set_argument("--remote-debugging-address=127.0.0.1")
co.set_argument("--disable-web-security")
co.set_argument("--ignore-certificate-errors")
co.set_argument("--proxy-server=direct://")
co.set_argument("--proxy-bypass-list=*")
@@ -250,6 +321,22 @@ def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
actual_address = opt.address or f"127.0.0.1:{port}"
log("WARN", f"[DEBUG] ChromiumPage 初始化失败: {e}")
devtools_payload = probe_devtools_endpoints(actual_address, log_output=True) if opt.address else {"version": None, "list": None}
fallback_page_ws = get_first_page_ws_address(devtools_payload)
if is_linux_env() and fallback_page_ws and "Handshake status 404 Not Found" in str(e):
log("WARN", f"[DEBUG] Browser WS 握手失败,尝试降级连接 Page WS: {fallback_page_ws}")
try:
clear_drission_singletons()
fallback_co = ChromiumOptions()
fallback_co.set_address(fallback_page_ws)
page = ChromiumPage(fallback_co)
log("OK", "[DEBUG] 已通过 Page WS 降级连接成功。")
return page
except Exception as ws_only_e:
log("ERR", f"[DEBUG] Page WS 降级连接失败: {ws_only_e}")
e = ws_only_e
log("WARN", f"[DEBUG] 尝试清理地址 {actual_address} 后重试...")
try:
clear_drission_singletons()
@@ -261,7 +348,11 @@ def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
except Exception as retry_e:
log("ERR", f"[DEBUG] 清理后重试依然失败: {retry_e}")
e = retry_e
# #region debug-point B:devtools-http-probe
if not opt.address:
log("WARN", "[DEBUG] DevTools 探测跳过address 为空")
# #endregion
log("ERR", f"浏览器初始化失败: {e}")
raise
@@ -386,9 +477,5 @@ if __name__ == "__main__":
except KeyboardInterrupt:
log("INFO", "用户中断")
finally:
try:
if page:
page.quit()
except Exception:
pass
page.quit()
log("INFO", "浏览器已关闭")