BOM发料对比
This commit is contained in:
@@ -9,6 +9,7 @@ import shutil
|
|||||||
import json
|
import json
|
||||||
import datetime
|
import datetime
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import urllib.request
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from DrissionPage import ChromiumPage, ChromiumOptions
|
from DrissionPage import ChromiumPage, ChromiumOptions
|
||||||
@@ -49,26 +50,56 @@ def patch_drission_ws_handshake() -> None:
|
|||||||
return
|
return
|
||||||
|
|
||||||
def resilient_create_connection(address, **kwargs):
|
def resilient_create_connection(address, **kwargs):
|
||||||
kwargs = dict(kwargs)
|
# 终极暴力破解法:移除所有可能导致 Chrome 安全校验失败的头
|
||||||
|
base_kwargs = dict(kwargs)
|
||||||
|
|
||||||
# 禁止 websocket-client 使用代理
|
# 强制禁用代理,防止请求被容器内的网络规则重定向
|
||||||
kwargs["http_no_proxy"] = [
|
no_proxy_hosts = ["127.0.0.1", "localhost", "::1"]
|
||||||
"127.0.0.1",
|
base_kwargs["http_no_proxy"] = no_proxy_hosts
|
||||||
"localhost",
|
base_kwargs["http_proxy_host"] = None
|
||||||
"::1"
|
base_kwargs["http_proxy_port"] = None
|
||||||
|
|
||||||
|
# 提取目标端口,用于构造合法的 Host 头
|
||||||
|
try:
|
||||||
|
port = address.split(":")[2].split("/")[0]
|
||||||
|
except IndexError:
|
||||||
|
port = "9222"
|
||||||
|
|
||||||
|
# Chrome 149 增强了 DevTools 的安全校验。
|
||||||
|
# Host header 必须是 IP 地址或 localhost,且必须包含端口号!
|
||||||
|
candidate_kwargs = [
|
||||||
|
# 策略1:最标准的 localhost 组合,带端口
|
||||||
|
{
|
||||||
|
**base_kwargs,
|
||||||
|
"suppress_origin": False,
|
||||||
|
"header": [f"Host: 127.0.0.1:{port}", "Origin: http://127.0.0.1"]
|
||||||
|
},
|
||||||
|
# 策略2:最原始、最干净的连接方式(类似 curl)
|
||||||
|
{
|
||||||
|
**base_kwargs,
|
||||||
|
"suppress_origin": True,
|
||||||
|
"header": []
|
||||||
|
},
|
||||||
|
# 策略3:伪装成 localhost 带端口
|
||||||
|
{
|
||||||
|
**base_kwargs,
|
||||||
|
"suppress_origin": False,
|
||||||
|
"header": [f"Host: localhost:{port}", "Origin: http://localhost"]
|
||||||
|
}
|
||||||
]
|
]
|
||||||
kwargs["http_proxy_host"] = None
|
|
||||||
kwargs["http_proxy_port"] = None
|
|
||||||
|
|
||||||
# 删除 DrissionPage 可能传入的 header
|
last_err = None
|
||||||
kwargs.pop("header", None)
|
for candidate in candidate_kwargs:
|
||||||
kwargs.pop("host", None)
|
try:
|
||||||
kwargs.pop("origin", None)
|
# 强制使用 127.0.0.1,因为在 Docker 内 localhost 可能解析异常
|
||||||
|
target_url = address.replace("localhost", "127.0.0.1")
|
||||||
return raw_ws_create_connection(
|
return raw_ws_create_connection(target_url, **candidate)
|
||||||
address.replace("localhost", "127.0.0.1"),
|
except WebSocketBadStatusException as ws_err:
|
||||||
**kwargs
|
last_err = ws_err
|
||||||
)
|
except Exception as other_err:
|
||||||
|
last_err = other_err
|
||||||
|
break
|
||||||
|
raise last_err
|
||||||
|
|
||||||
if is_linux_env():
|
if is_linux_env():
|
||||||
dp_driver_module.create_connection = resilient_create_connection
|
dp_driver_module.create_connection = resilient_create_connection
|
||||||
@@ -161,6 +192,45 @@ def clear_drission_singletons() -> None:
|
|||||||
BrowserDriver.BROWSERS.clear()
|
BrowserDriver.BROWSERS.clear()
|
||||||
|
|
||||||
|
|
||||||
|
def probe_devtools_endpoints(address: str, log_output: bool = True) -> dict:
|
||||||
|
"""探测 DevTools HTTP 端点,并返回解析后的版本信息与标签页信息。"""
|
||||||
|
result = {"version": None, "list": None}
|
||||||
|
if not address:
|
||||||
|
return result
|
||||||
|
|
||||||
|
endpoint_map = {"version": "json/version", "list": "json/list"}
|
||||||
|
for key, endpoint in endpoint_map.items():
|
||||||
|
url = f"http://{address}/{endpoint}"
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(url, timeout=2) as resp:
|
||||||
|
body = resp.read().decode("utf-8", errors="replace")
|
||||||
|
if log_output:
|
||||||
|
log("WARN", f"[DEBUG] DevTools 探测 {url} -> HTTP {resp.status}")
|
||||||
|
log("WARN", f"[DEBUG] DevTools 响应体 {endpoint}: {body[:1000]}")
|
||||||
|
result[key] = json.loads(body)
|
||||||
|
except Exception as probe_err:
|
||||||
|
if log_output:
|
||||||
|
log("WARN", f"[DEBUG] DevTools 探测失败 {url}: {probe_err}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def get_first_page_ws_address(devtools_payload: dict) -> str:
|
||||||
|
"""从 /json/list 响应中提取第一个可用 page/webview 的 ws 地址。"""
|
||||||
|
tabs = devtools_payload.get("list") or []
|
||||||
|
if not isinstance(tabs, list):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
for tab in tabs:
|
||||||
|
if (
|
||||||
|
isinstance(tab, dict)
|
||||||
|
and tab.get("type") in ("page", "webview")
|
||||||
|
and not str(tab.get("url", "")).startswith("devtools://")
|
||||||
|
and tab.get("webSocketDebuggerUrl")
|
||||||
|
):
|
||||||
|
return tab["webSocketDebuggerUrl"]
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
# ── 日志 ──────────────────────────────────────────────────────────────────────
|
# ── 日志 ──────────────────────────────────────────────────────────────────────
|
||||||
def log(level: str, msg: str):
|
def log(level: str, msg: str):
|
||||||
icons = {"INFO": "ℹ️ ", "OK": "✅", "WARN": "⚠️ ", "ERR": "❌"}
|
icons = {"INFO": "ℹ️ ", "OK": "✅", "WARN": "⚠️ ", "ERR": "❌"}
|
||||||
@@ -206,6 +276,7 @@ def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
|
|||||||
co.set_argument("--disable-software-rasterizer")
|
co.set_argument("--disable-software-rasterizer")
|
||||||
co.set_argument("--remote-allow-origins=*")
|
co.set_argument("--remote-allow-origins=*")
|
||||||
co.set_argument("--remote-debugging-address=127.0.0.1")
|
co.set_argument("--remote-debugging-address=127.0.0.1")
|
||||||
|
co.set_argument("--disable-web-security")
|
||||||
co.set_argument("--ignore-certificate-errors")
|
co.set_argument("--ignore-certificate-errors")
|
||||||
co.set_argument("--proxy-server=direct://")
|
co.set_argument("--proxy-server=direct://")
|
||||||
co.set_argument("--proxy-bypass-list=*")
|
co.set_argument("--proxy-bypass-list=*")
|
||||||
@@ -250,6 +321,22 @@ def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
|
|||||||
actual_address = opt.address or f"127.0.0.1:{port}"
|
actual_address = opt.address or f"127.0.0.1:{port}"
|
||||||
log("WARN", f"[DEBUG] ChromiumPage 初始化失败: {e}")
|
log("WARN", f"[DEBUG] ChromiumPage 初始化失败: {e}")
|
||||||
|
|
||||||
|
devtools_payload = probe_devtools_endpoints(actual_address, log_output=True) if opt.address else {"version": None, "list": None}
|
||||||
|
fallback_page_ws = get_first_page_ws_address(devtools_payload)
|
||||||
|
|
||||||
|
if is_linux_env() and fallback_page_ws and "Handshake status 404 Not Found" in str(e):
|
||||||
|
log("WARN", f"[DEBUG] Browser WS 握手失败,尝试降级连接 Page WS: {fallback_page_ws}")
|
||||||
|
try:
|
||||||
|
clear_drission_singletons()
|
||||||
|
fallback_co = ChromiumOptions()
|
||||||
|
fallback_co.set_address(fallback_page_ws)
|
||||||
|
page = ChromiumPage(fallback_co)
|
||||||
|
log("OK", "[DEBUG] 已通过 Page WS 降级连接成功。")
|
||||||
|
return page
|
||||||
|
except Exception as ws_only_e:
|
||||||
|
log("ERR", f"[DEBUG] Page WS 降级连接失败: {ws_only_e}")
|
||||||
|
e = ws_only_e
|
||||||
|
|
||||||
log("WARN", f"[DEBUG] 尝试清理地址 {actual_address} 后重试...")
|
log("WARN", f"[DEBUG] 尝试清理地址 {actual_address} 后重试...")
|
||||||
try:
|
try:
|
||||||
clear_drission_singletons()
|
clear_drission_singletons()
|
||||||
@@ -262,6 +349,10 @@ def get_page(headless: bool = False, port: int = 9222) -> ChromiumPage:
|
|||||||
log("ERR", f"[DEBUG] 清理后重试依然失败: {retry_e}")
|
log("ERR", f"[DEBUG] 清理后重试依然失败: {retry_e}")
|
||||||
e = retry_e
|
e = retry_e
|
||||||
|
|
||||||
|
# #region debug-point B:devtools-http-probe
|
||||||
|
if not opt.address:
|
||||||
|
log("WARN", "[DEBUG] DevTools 探测跳过:address 为空")
|
||||||
|
# #endregion
|
||||||
log("ERR", f"浏览器初始化失败: {e}")
|
log("ERR", f"浏览器初始化失败: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@@ -386,9 +477,5 @@ if __name__ == "__main__":
|
|||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
log("INFO", "用户中断")
|
log("INFO", "用户中断")
|
||||||
finally:
|
finally:
|
||||||
try:
|
|
||||||
if page:
|
|
||||||
page.quit()
|
page.quit()
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
log("INFO", "浏览器已关闭")
|
log("INFO", "浏览器已关闭")
|
||||||
|
|||||||
Reference in New Issue
Block a user