BOM发料对比
This commit is contained in:
@@ -9,6 +9,7 @@ import shutil
|
||||
import json
|
||||
import datetime
|
||||
import subprocess
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
from DrissionPage import ChromiumPage, ChromiumOptions
|
||||
@@ -49,26 +50,56 @@ def patch_drission_ws_handshake() -> None:
|
||||
return
|
||||
|
||||
def resilient_create_connection(address, **kwargs):
|
||||
kwargs = dict(kwargs)
|
||||
# 终极暴力破解法:移除所有可能导致 Chrome 安全校验失败的头
|
||||
base_kwargs = dict(kwargs)
|
||||
|
||||
# 禁止 websocket-client 使用代理
|
||||
kwargs["http_no_proxy"] = [
|
||||
"127.0.0.1",
|
||||
"localhost",
|
||||
"::1"
|
||||
# 强制禁用代理,防止请求被容器内的网络规则重定向
|
||||
no_proxy_hosts = ["127.0.0.1", "localhost", "::1"]
|
||||
base_kwargs["http_no_proxy"] = no_proxy_hosts
|
||||
base_kwargs["http_proxy_host"] = None
|
||||
base_kwargs["http_proxy_port"] = None
|
||||
|
||||
# 提取目标端口,用于构造合法的 Host 头
|
||||
try:
|
||||
port = address.split(":")[2].split("/")[0]
|
||||
except IndexError:
|
||||
port = "9222"
|
||||
|
||||
# Chrome 149 增强了 DevTools 的安全校验。
|
||||
# Host header 必须是 IP 地址或 localhost,且必须包含端口号!
|
||||
candidate_kwargs = [
|
||||
# 策略1:最标准的 localhost 组合,带端口
|
||||
{
|
||||
**base_kwargs,
|
||||
"suppress_origin": False,
|
||||
"header": [f"Host: 127.0.0.1:{port}", "Origin: http://127.0.0.1"]
|
||||
},
|
||||
# 策略2:最原始、最干净的连接方式(类似 curl)
|
||||
{
|
||||
**base_kwargs,
|
||||
"suppress_origin": True,
|
||||
"header": []
|
||||
},
|
||||
# 策略3:伪装成 localhost 带端口
|
||||
{
|
||||
**base_kwargs,
|
||||
"suppress_origin": False,
|
||||
"header": [f"Host: localhost:{port}", "Origin: http://localhost"]
|
||||
}
|
||||
]
|
||||
kwargs["http_proxy_host"] = None
|
||||
kwargs["http_proxy_port"] = None
|
||||
|
||||
# 删除 DrissionPage 可能传入的 header
|
||||
kwargs.pop("header", None)
|
||||
kwargs.pop("host", None)
|
||||
kwargs.pop("origin", None)
|
||||
|
||||
return raw_ws_create_connection(
|
||||
address.replace("localhost", "127.0.0.1"),
|
||||
**kwargs
|
||||
)
|
||||
last_err = None
|
||||
for candidate in candidate_kwargs:
|
||||
try:
|
||||
# 强制使用 127.0.0.1,因为在 Docker 内 localhost 可能解析异常
|
||||
target_url = address.replace("localhost", "127.0.0.1")
|
||||
return raw_ws_create_connection(target_url, **candidate)
|
||||
except WebSocketBadStatusException as ws_err:
|
||||
last_err = ws_err
|
||||
except Exception as other_err:
|
||||
last_err = other_err
|
||||
break
|
||||
raise last_err
|
||||
|
||||
if is_linux_env():
|
||||
dp_driver_module.create_connection = resilient_create_connection
|
||||
@@ -131,7 +162,7 @@ def cleanup_debug_port(address: str) -> None:
|
||||
return
|
||||
|
||||
debug_port = address.rsplit(":", 1)[-1]
|
||||
|
||||
|
||||
# 1. 杀掉占用端口的僵尸进程
|
||||
subprocess.run(
|
||||
f"lsof -ti tcp:{debug_port} | xargs -r kill -9",
|
||||
@@ -139,7 +170,7 @@ def cleanup_debug_port(address: str) -> None:
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
|
||||
# 2. 如果在 Linux 环境,彻底清理残留的 DrissionPage 锁文件,防止 "Failed to create SingletonLock"
|
||||
if is_linux_env():
|
||||
subprocess.run(
|
||||
@@ -161,6 +192,45 @@ def clear_drission_singletons() -> None:
|
||||
BrowserDriver.BROWSERS.clear()
|
||||
|
||||
|
||||
def probe_devtools_endpoints(address: str, log_output: bool = True) -> dict:
|
||||
"""探测 DevTools HTTP 端点,并返回解析后的版本信息与标签页信息。"""
|
||||
result = {"version": None, "list": None}
|
||||
if not address:
|
||||
return result
|
||||
|
||||
endpoint_map = {"version": "json/version", "list": "json/list"}
|
||||
for key, endpoint in endpoint_map.items():
|
||||
url = f"http://{address}/{endpoint}"
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=2) as resp:
|
||||
body = resp.read().decode("utf-8", errors="replace")
|
||||
if log_output:
|
||||
log("WARN", f"[DEBUG] DevTools 探测 {url} -> HTTP {resp.status}")
|
||||
log("WARN", f"[DEBUG] DevTools 响应体 {endpoint}: {body[:1000]}")
|
||||
result[key] = json.loads(body)
|
||||
except Exception as probe_err:
|
||||
if log_output:
|
||||
log("WARN", f"[DEBUG] DevTools 探测失败 {url}: {probe_err}")
|
||||
return result
|
||||
|
||||
|
||||
def get_first_page_ws_address(devtools_payload: dict) -> str:
|
||||
"""从 /json/list 响应中提取第一个可用 page/webview 的 ws 地址。"""
|
||||
tabs = devtools_payload.get("list") or []
|
||||
if not isinstance(tabs, list):
|
||||
return ""
|
||||
|
||||
for tab in tabs:
|
||||
if (
|
||||
isinstance(tab, dict)
|
||||
and tab.get("type") in ("page", "webview")
|
||||
and not str(tab.get("url", "")).startswith("devtools://")
|
||||
and tab.get("webSocketDebuggerUrl")
|
||||
):
|
||||
return tab["webSocketDebuggerUrl"]
|
||||
return ""
|
||||
|
||||
|
||||
# ── 日志 ──────────────────────────────────────────────────────────────────────
|
||||
def log(level: str, msg: str):
|
||||
icons = {"INFO": "ℹ️ ", "OK": "✅", "WARN": "⚠️ ", "ERR": "❌"}
|
||||
@@ -206,6 +276,7 @@ def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
|
||||
co.set_argument("--disable-software-rasterizer")
|
||||
co.set_argument("--remote-allow-origins=*")
|
||||
co.set_argument("--remote-debugging-address=127.0.0.1")
|
||||
co.set_argument("--disable-web-security")
|
||||
co.set_argument("--ignore-certificate-errors")
|
||||
co.set_argument("--proxy-server=direct://")
|
||||
co.set_argument("--proxy-bypass-list=*")
|
||||
@@ -250,6 +321,22 @@ def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
|
||||
actual_address = opt.address or f"127.0.0.1:{port}"
|
||||
log("WARN", f"[DEBUG] ChromiumPage 初始化失败: {e}")
|
||||
|
||||
devtools_payload = probe_devtools_endpoints(actual_address, log_output=True) if opt.address else {"version": None, "list": None}
|
||||
fallback_page_ws = get_first_page_ws_address(devtools_payload)
|
||||
|
||||
if is_linux_env() and fallback_page_ws and "Handshake status 404 Not Found" in str(e):
|
||||
log("WARN", f"[DEBUG] Browser WS 握手失败,尝试降级连接 Page WS: {fallback_page_ws}")
|
||||
try:
|
||||
clear_drission_singletons()
|
||||
fallback_co = ChromiumOptions()
|
||||
fallback_co.set_address(fallback_page_ws)
|
||||
page = ChromiumPage(fallback_co)
|
||||
log("OK", "[DEBUG] 已通过 Page WS 降级连接成功。")
|
||||
return page
|
||||
except Exception as ws_only_e:
|
||||
log("ERR", f"[DEBUG] Page WS 降级连接失败: {ws_only_e}")
|
||||
e = ws_only_e
|
||||
|
||||
log("WARN", f"[DEBUG] 尝试清理地址 {actual_address} 后重试...")
|
||||
try:
|
||||
clear_drission_singletons()
|
||||
@@ -261,7 +348,11 @@ def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
|
||||
except Exception as retry_e:
|
||||
log("ERR", f"[DEBUG] 清理后重试依然失败: {retry_e}")
|
||||
e = retry_e
|
||||
|
||||
|
||||
# #region debug-point B:devtools-http-probe
|
||||
if not opt.address:
|
||||
log("WARN", "[DEBUG] DevTools 探测跳过:address 为空")
|
||||
# #endregion
|
||||
log("ERR", f"浏览器初始化失败: {e}")
|
||||
raise
|
||||
|
||||
@@ -386,9 +477,5 @@ if __name__ == "__main__":
|
||||
except KeyboardInterrupt:
|
||||
log("INFO", "用户中断")
|
||||
finally:
|
||||
try:
|
||||
if page:
|
||||
page.quit()
|
||||
except Exception:
|
||||
pass
|
||||
page.quit()
|
||||
log("INFO", "浏览器已关闭")
|
||||
|
||||
Reference in New Issue
Block a user