Initial upload for secondary development
This commit is contained in:
190
chatlog_fastAPI/routers/files.py
Normal file
190
chatlog_fastAPI/routers/files.py
Normal file
@@ -0,0 +1,190 @@
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import sqlite3
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from urllib.parse import quote
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, HTTPException, Query
|
||||
from fastapi.responses import FileResponse, StreamingResponse
|
||||
|
||||
from config import settings
|
||||
from services.chatlog_client import chatlog_client
|
||||
|
||||
router = APIRouter(prefix="/api/files", tags=["files"])
|
||||
|
||||
|
||||
OFFICE_MEDIA_TYPES = {
|
||||
".xls": "application/vnd.ms-excel",
|
||||
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
".ppt": "application/vnd.ms-powerpoint",
|
||||
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
".doc": "application/msword",
|
||||
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
".pdf": "application/pdf",
|
||||
".dwg": "application/acad",
|
||||
}
|
||||
|
||||
|
||||
def _connect_hardlink_db(hardlink_db: Path) -> sqlite3.Connection:
|
||||
"""
|
||||
chatlog may keep hardlink.db open. Copying a tiny snapshot avoids transient
|
||||
"unable to open database file" errors on Windows while keeping reads safe.
|
||||
"""
|
||||
tmp = Path(tempfile.gettempdir()) / f"chatlab_hardlink_{os.getpid()}_{hardlink_db.stat().st_mtime_ns}.db"
|
||||
if not tmp.exists() or tmp.stat().st_size != hardlink_db.stat().st_size:
|
||||
shutil.copy2(hardlink_db, tmp)
|
||||
con = sqlite3.connect(tmp)
|
||||
con.row_factory = sqlite3.Row
|
||||
return con
|
||||
|
||||
|
||||
def _safe_download_name(name: str, fallback: str) -> str:
|
||||
name = (name or fallback).replace("\r", "").replace("\n", "").strip()
|
||||
return name or fallback
|
||||
|
||||
|
||||
def _content_disposition(filename: str) -> str:
|
||||
quoted = quote(filename)
|
||||
ascii_fallback = re.sub(r"[^A-Za-z0-9._-]+", "_", filename) or "download"
|
||||
return f"attachment; filename=\"{ascii_fallback}\"; filename*=UTF-8''{quoted}"
|
||||
|
||||
|
||||
def _guess_media_type(filename: str, fallback: str = "") -> str:
|
||||
ext = Path(filename or "").suffix.lower()
|
||||
return OFFICE_MEDIA_TYPES.get(ext) or mimetypes.guess_type(filename)[0] or fallback or "application/octet-stream"
|
||||
|
||||
|
||||
async def _proxy_chatlog_file(md5: str, filename: str = ""):
|
||||
url = f"{settings.chatlog_base_url}/file/{quote(md5, safe='')}"
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30, trust_env=False, follow_redirects=True) as client:
|
||||
resp = await client.get(url)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if resp.status_code != 200 or resp.content == b'"media not found"':
|
||||
return None
|
||||
|
||||
headers = {
|
||||
"Content-Length": str(len(resp.content)),
|
||||
"X-ChatLab-File-Source": "chatlog",
|
||||
}
|
||||
if filename:
|
||||
headers["Content-Disposition"] = _content_disposition(filename)
|
||||
media_type = _guess_media_type(filename, resp.headers.get("content-type") or "")
|
||||
return StreamingResponse(iter([resp.content]), media_type=media_type, headers=headers)
|
||||
|
||||
|
||||
def _xwechat_roots_from_hardlink_db(hardlink_db: Path) -> list[Path]:
|
||||
roots: list[Path] = []
|
||||
try:
|
||||
con = _connect_hardlink_db(hardlink_db)
|
||||
row = con.execute("SELECT ValueStdStr FROM db_info WHERE Key='uuid'").fetchone()
|
||||
raw = row["ValueStdStr"] if row else ""
|
||||
except Exception:
|
||||
raw = ""
|
||||
|
||||
if raw:
|
||||
m = re.search(r"([A-Za-z]:\\[^|]+?xwechat_files)", raw)
|
||||
if m:
|
||||
roots.append(Path(m.group(1)))
|
||||
|
||||
roots.extend([
|
||||
Path.home() / "xwechat_files",
|
||||
Path.home() / "Documents" / "WeChat Files",
|
||||
])
|
||||
uniq: list[Path] = []
|
||||
seen = set()
|
||||
for root in roots:
|
||||
s = str(root).lower()
|
||||
if s not in seen:
|
||||
uniq.append(root)
|
||||
seen.add(s)
|
||||
return uniq
|
||||
|
||||
|
||||
def _find_local_file(hardlink_db: Path, md5: str, requested_name: str = "") -> Path | None:
|
||||
try:
|
||||
con = _connect_hardlink_db(hardlink_db)
|
||||
row = con.execute(
|
||||
"""
|
||||
SELECT md5, file_name, file_size, dir1, dir2
|
||||
FROM file_hardlink_info_v4
|
||||
WHERE md5=?
|
||||
ORDER BY _rowid_ DESC
|
||||
LIMIT 1
|
||||
""",
|
||||
(md5,),
|
||||
).fetchone()
|
||||
except Exception:
|
||||
row = None
|
||||
if not row:
|
||||
return None
|
||||
|
||||
names = [requested_name, row["file_name"]]
|
||||
names = [n for n in names if n]
|
||||
size = int(row["file_size"] or 0)
|
||||
roots = _xwechat_roots_from_hardlink_db(hardlink_db)
|
||||
|
||||
for root in roots:
|
||||
if not root.exists():
|
||||
continue
|
||||
for name in names:
|
||||
for candidate in root.rglob(name):
|
||||
try:
|
||||
if candidate.is_file() and (not size or candidate.stat().st_size == size):
|
||||
return candidate
|
||||
except Exception:
|
||||
continue
|
||||
if size:
|
||||
# Fallback by size in the common file store. This is intentionally limited
|
||||
# to msg/file to avoid scanning unrelated huge trees for every request.
|
||||
for file_root in root.glob("*/msg/file"):
|
||||
if not file_root.exists():
|
||||
continue
|
||||
for candidate in file_root.rglob("*"):
|
||||
try:
|
||||
if candidate.is_file() and candidate.stat().st_size == size:
|
||||
if not names or candidate.name in names:
|
||||
return candidate
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
@router.get("/{md5}")
|
||||
async def get_file(md5: str, filename: str = Query("")):
|
||||
md5 = md5.strip()
|
||||
if not re.fullmatch(r"[0-9a-fA-F]{8,64}", md5):
|
||||
raise HTTPException(400, "文件 md5 不合法")
|
||||
|
||||
filename = _safe_download_name(filename, md5)
|
||||
proxied = await _proxy_chatlog_file(md5, filename)
|
||||
if proxied:
|
||||
return proxied
|
||||
|
||||
db_paths = await chatlog_client.get_db_paths()
|
||||
hardlink_paths = db_paths.get("media") or []
|
||||
for raw_path in hardlink_paths:
|
||||
hardlink_db = Path(raw_path)
|
||||
if not hardlink_db.exists():
|
||||
continue
|
||||
local_file = _find_local_file(hardlink_db, md5, filename)
|
||||
if local_file:
|
||||
media_type = _guess_media_type(filename or local_file.name)
|
||||
return FileResponse(
|
||||
path=str(local_file),
|
||||
filename=filename or local_file.name,
|
||||
media_type=media_type,
|
||||
headers={
|
||||
"Content-Disposition": _content_disposition(filename or local_file.name),
|
||||
"Content-Length": str(local_file.stat().st_size),
|
||||
"X-ChatLab-File-Source": "local-hardlink",
|
||||
},
|
||||
)
|
||||
|
||||
raise HTTPException(404, "原文件未找到,可能未解密或已清理")
|
||||
Reference in New Issue
Block a user