Initial upload for secondary development

2026-06-08 19:00:03 +08:00
commit b913b8c78c
81 changed files with 27139 additions and 0 deletions
--- a/chatlog_fastAPI/services/init.py
+++ b/chatlog_fastAPI/services/init.py
--- a/chatlog_fastAPI/services/ai_client.py
+++ b/chatlog_fastAPI/services/ai_client.py
@@ -0,0 +1,31 @@
+import httpx
+from openai import AsyncOpenAI
+
+from services.runtime_settings import get_ai_settings
+
+_client_cache: dict[tuple[str, str], AsyncOpenAI] = {}
+_http_client_cache: dict[tuple[str, str], httpx.AsyncClient] = {}
+
+
+async def get_openai_client() -> tuple[AsyncOpenAI, dict]:
+    settings = await get_ai_settings()
+    cache_key = (
+        settings.get("ai_base_url") or "",
+        settings.get("ai_api_key") or "",
+    )
+
+    if cache_key not in _client_cache:
+        for http_client in _http_client_cache.values():
+            await http_client.aclose()
+        _client_cache.clear()
+        _http_client_cache.clear()
+
+        http_client = httpx.AsyncClient(timeout=httpx.Timeout(600.0, connect=30.0))
+        _http_client_cache[cache_key] = http_client
+        _client_cache[cache_key] = AsyncOpenAI(
+            api_key=settings.get("ai_api_key") or "missing",
+            base_url=settings.get("ai_base_url"),
+            http_client=http_client,
+        )
+
+    return _client_cache[cache_key], settings
--- a/chatlog_fastAPI/services/chatlog_client.py
+++ b/chatlog_fastAPI/services/chatlog_client.py
@@ -0,0 +1,203 @@
+import httpx
+import asyncio
+from typing import List
+from config import settings
+
+
+class ChatlogHTTPError(RuntimeError):
+    def __init__(self, status_code: int, method: str, path: str, detail: str):
+        self.status_code = status_code
+        self.method = method
+        self.path = path
+        self.detail = detail
+        super().__init__(f"chatlog HTTP {status_code}: {method} {path} body={detail!r}")
+
+
+class MessageIndexNotReady(RuntimeError):
+    """Raised when chatlog has sessions but its message time index is not usable yet."""
+
+
+class ChatlogClient:
+    def __init__(self):
+        self.base = settings.chatlog_base_url
+        self._contact_db_file = None
+
+    async def _get(self, path: str, params: dict, timeout: float = 30.0) -> dict:
+        try:
+            async with httpx.AsyncClient(timeout=timeout, trust_env=False) as client:
+                r = await client.get(f"{self.base}{path}", params=params)
+                r.raise_for_status()
+                return r.json()
+        except httpx.TimeoutException:
+            raise RuntimeError(f"chatlog timeout: GET {path}")
+        except httpx.HTTPStatusError as e:
+            detail = self._response_detail(e.response)
+            raise ChatlogHTTPError(e.response.status_code, "GET", path, detail)
+        except Exception as e:
+            raise RuntimeError(f"chatlog request failed: {e}")
+
+    async def _post(self, path: str, body: dict, timeout: float = 30.0) -> dict:
+        try:
+            async with httpx.AsyncClient(timeout=timeout, trust_env=False) as client:
+                r = await client.post(f"{self.base}{path}", json=body)
+                r.raise_for_status()
+                return r.json()
+        except httpx.TimeoutException:
+            raise RuntimeError(f"chatlog timeout: POST {path}")
+        except httpx.HTTPStatusError as e:
+            detail = self._response_detail(e.response)
+            raise ChatlogHTTPError(e.response.status_code, "POST", path, detail)
+        except Exception as e:
+            raise RuntimeError(f"chatlog request failed: {e}")
+
+    def _response_detail(self, response: httpx.Response) -> str:
+        try:
+            body = response.json()
+            if isinstance(body, dict):
+                return str(body.get("error") or body.get("detail") or body)
+            return str(body)
+        except Exception:
+            return response.text
+
+    async def get_messages(
+        self,
+        talker: str,
+        time: str = "",
+        sender: str = "",
+        keyword: str = "",
+        min_seq: int = 0,
+        limit: int = 100,
+        offset: int = 0,
+    ) -> dict:
+        params: dict = {
+            "talker": talker,
+            "limit": limit,
+            "offset": offset,
+            "format": "json",
+        }
+        if time:
+            params["time"] = time
+        else:
+            params["time"] = "1970-01-01,2099-12-31"
+        if sender:
+            params["sender"] = sender
+        if keyword:
+            params["keyword"] = keyword
+        if min_seq > 0:
+            params["min_seq"] = min_seq
+
+        try:
+            data = await self._get("/api/v1/chatlog", params)
+        except ChatlogHTTPError as e:
+            detail = e.detail.lower()
+            if e.status_code == 404 and "time range not found" in detail:
+                await asyncio.sleep(0.2)
+                try:
+                    data = await self._get("/api/v1/chatlog", params)
+                except ChatlogHTTPError as retry_error:
+                    if (
+                        retry_error.status_code == 404
+                        and "time range not found" in retry_error.detail.lower()
+                    ):
+                        raise MessageIndexNotReady(
+                            "自动解密仍在处理消息库，请稍后刷新聊天记录；如果长时间为空，请在微信里打开该聊天并翻看历史消息。"
+                        ) from retry_error
+                    raise
+            elif e.status_code == 404 and "not found" in detail:
+                # chatlog sometimes reports a valid date window as missing while it is warming/querying.
+                await asyncio.sleep(0.2)
+                try:
+                    data = await self._get("/api/v1/chatlog", params)
+                except ChatlogHTTPError as retry_error:
+                    retry_detail = retry_error.detail.lower()
+                    if (
+                        retry_error.status_code == 404
+                        and "time range not found" in retry_detail
+                    ):
+                        raise MessageIndexNotReady(
+                            "自动解密仍在处理消息库，请稍后刷新聊天记录；如果长时间为空，请在微信里打开该聊天并翻看历史消息。"
+                        ) from retry_error
+                    if retry_error.status_code == 404 and "not found" in retry_detail:
+                        return {"total": 0, "items": []}
+                    raise
+            else:
+                raise
+        if isinstance(data, dict):
+            return data
+        return {"total": len(data), "items": data}
+
+    async def get_message(self, talker: str, seq: int) -> dict | None:
+        try:
+            async with httpx.AsyncClient(timeout=10.0, trust_env=False) as client:
+                r = await client.get(
+                    f"{self.base}/api/v1/chatlog/message",
+                    params={"talker": talker, "seq": seq},
+                )
+                if r.status_code == 404:
+                    return None
+                r.raise_for_status()
+                return r.json()
+        except httpx.TimeoutException:
+            raise RuntimeError("chatlog timeout: get_message")
+        except Exception as e:
+            raise RuntimeError(f"chatlog request failed: {e}")
+
+    async def get_messages_batch(self, talker: str, seqs: List[int]) -> dict:
+        return await self._post("/api/v1/chatlog/batch", {"talker": talker, "seqs": seqs})
+
+    async def get_chatrooms(self, keyword: str = "", limit: int = 100, offset: int = 0) -> dict:
+        params: dict = {"limit": limit, "offset": offset, "format": "json"}
+        if keyword:
+            params["keyword"] = keyword
+        return await self._get("/api/v1/chatroom", params, timeout=10.0)
+
+    async def get_contacts(self, keyword: str = "", limit: int = 100, offset: int = 0) -> dict:
+        params: dict = {"limit": limit, "offset": offset, "format": "json"}
+        if keyword:
+            params["keyword"] = keyword
+        return await self._get("/api/v1/contact", params, timeout=10.0)
+
+    async def get_chatroom_members(self, talker: str, time: str = "") -> dict:
+        params: dict = {"talker": talker}
+        if time:
+            params["time"] = time
+        return await self._get("/api/v1/chatroom/members", params)
+
+    async def get_sessions(self, keyword: str = "", limit: int = 500) -> list:
+        params: dict = {"limit": limit, "format": "json"}
+        if keyword:
+            params["keyword"] = keyword
+        data = await self._get("/api/v1/session", params, timeout=15.0)
+        if isinstance(data, list):
+            return data
+        return data.get("items", data.get("data", []))
+
+
+    async def get_avatar_url(self, wxid: str) -> str:
+        if self._contact_db_file is None:
+            try:
+                db_list = await self._get("/api/v1/db", {})
+                self._contact_db_file = (db_list.get("contact") or [""])[0]
+            except Exception:
+                self._contact_db_file = ""
+        if not self._contact_db_file:
+            return ""
+        safe_wxid = wxid.replace("'", "''")
+        sql = f"SELECT small_head_url, big_head_url FROM contact WHERE username='{safe_wxid}' LIMIT 1"
+        params = {"group": "contact", "file": self._contact_db_file, "sql": sql}
+        try:
+            rows = await self._get("/api/v1/db/query", params, timeout=5.0)
+            if rows:
+                url = rows[0].get("small_head_url") or rows[0].get("big_head_url") or ""
+                if url:
+                    return url
+        except Exception:
+            pass
+        return ""
+
+    async def get_db_paths(self) -> dict:
+        data = await self._get("/api/v1/db", {}, timeout=10.0)
+        return data if isinstance(data, dict) else {}
+
+
+chatlog_client = ChatlogClient()
--- a/chatlog_fastAPI/services/chatlog_context.py
+++ b/chatlog_fastAPI/services/chatlog_context.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+
+
+@dataclass
+class ChatlogContext:
+    account: str = ""
+    work_dir: str = ""
+    data_dir: str = ""
+    platform: str = "windows"
+    version: int = 4
+    chatlog_exe: str = ""
+    chatlog_version: str = ""
+
+
+_context = ChatlogContext()
+
+
+def update_chatlog_context(payload: dict) -> dict:
+    global _context
+    _context = ChatlogContext(
+        account=str(payload.get("account") or ""),
+        work_dir=str(payload.get("workDir") or payload.get("work_dir") or ""),
+        data_dir=str(payload.get("dataDir") or payload.get("data_dir") or ""),
+        platform=str(payload.get("platform") or "windows"),
+        version=int(payload.get("version") or 4),
+        chatlog_exe=str(payload.get("chatlogExe") or payload.get("chatlog_exe") or ""),
+        chatlog_version=str(payload.get("chatlogVersion") or payload.get("chatlog_version") or ""),
+    )
+    return get_chatlog_context()
+
+
+def get_chatlog_context() -> dict:
+    return asdict(_context)
--- a/chatlog_fastAPI/services/fts.py
+++ b/chatlog_fastAPI/services/fts.py
@@ -0,0 +1,25 @@
+import jieba
+import re
+
+def tokenize(text: str) -> str:
+    return " ".join(jieba.cut(text))
+
+
+def build_match_query(text: str, limit: int = 12) -> str:
+    """Build a safe FTS5 MATCH query from user/model text."""
+    terms: list[str] = []
+    seen: set[str] = set()
+    for token in tokenize(text or "").split():
+        token = token.strip()
+        if not token or not re.search(r"\w", token, flags=re.UNICODE):
+            continue
+        upper = token.upper()
+        if upper in {"AND", "OR", "NOT", "NEAR"}:
+            continue
+        if token in seen:
+            continue
+        seen.add(token)
+        terms.append('"' + token.replace('"', '""') + '"')
+        if len(terms) >= limit:
+            break
+    return " OR ".join(terms)
--- a/chatlog_fastAPI/services/media_parser.py
+++ b/chatlog_fastAPI/services/media_parser.py
@@ -0,0 +1,142 @@
+import base64
+import logging
+
+import httpx
+from fastapi import HTTPException
+
+from services.ai_client import get_openai_client
+from services.media_resolver import resolve_media
+from services.runtime_settings import get_ai_settings
+
+log = logging.getLogger(__name__)
+
+
+async def _get_ai_client():
+    return await get_openai_client()
+
+
+async def parse_media(kind: str, key: str) -> dict:
+    """
+    Parse one chatlog media object into text.
+
+    kind: voice, image, or video.
+    key: chatlog media key.
+    """
+    if kind not in {"voice", "image", "video"}:
+        raise HTTPException(400, "不支持的媒体类型")
+    if not key:
+        raise HTTPException(400, "媒体 key 不能为空")
+
+    ai = await get_ai_settings()
+    if not ai.get("ai_api_key"):
+        raise HTTPException(503, "AI 服务未配置，请在设置页填写 AI API Key")
+    if kind == "voice" and not ai.get("voice_model"):
+        raise HTTPException(503, "语音模型未配置，请在设置页填写语音模型名称，例如 paraformer-v2")
+    if kind in ("image", "video") and not ai.get("vision_model"):
+        raise HTTPException(503, "视觉模型未配置，请在设置页填写视觉模型名称，例如 qwen-vl-plus")
+
+    media = await resolve_media(kind, key)
+    if kind == "voice":
+        return {"text": await _parse_voice(media.bytes, media.content_type)}
+    return {"text": await _parse_visual(kind, media.bytes, media.content_type)}
+
+
+async def _parse_voice(media_bytes: bytes, content_type: str) -> str:
+    b64_audio = base64.b64encode(media_bytes).decode()
+    audio_ct = content_type.lower()
+    if "silk" in audio_ct or "x-silk" in audio_ct:
+        audio_mime = "audio/silk"
+    elif "amr" in audio_ct:
+        audio_mime = "audio/amr"
+    elif "ogg" in audio_ct or "opus" in audio_ct:
+        audio_mime = "audio/ogg"
+    elif "wav" in audio_ct:
+        audio_mime = "audio/wav"
+    else:
+        audio_mime = "audio/mpeg"
+
+    data_uri = f"data:{audio_mime};base64,{b64_audio}"
+    _, ai = await _get_ai_client()
+    asr_headers = {
+        "Authorization": f"Bearer {ai['ai_api_key']}",
+        "Content-Type": "application/json",
+    }
+
+    async with httpx.AsyncClient(timeout=60) as http:
+        submit = await http.post(
+            "https://dashscope.aliyuncs.com/api/v1/services/audio/asr/transcription",
+            headers={**asr_headers, "X-DashScope-Async": "enable"},
+            json={
+                "model": ai["voice_model"],
+                "input": {"file_urls": [data_uri]},
+                "parameters": {"language_hints": ["zh", "en"]},
+            },
+            timeout=30,
+        )
+        submit_data = submit.json()
+        if submit.status_code not in (200, 201):
+            raise HTTPException(500, f"提交识别任务失败: {submit_data.get('message', submit_data)}")
+
+        task_id = submit_data.get("output", {}).get("task_id")
+        if not task_id:
+            raise HTTPException(500, f"未获取到 task_id: {submit_data}")
+
+        for _ in range(30):
+            import asyncio
+
+            await asyncio.sleep(1)
+            poll = await http.get(
+                f"https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}",
+                headers=asr_headers,
+                timeout=10,
+            )
+            poll_data = poll.json()
+            status = poll_data.get("output", {}).get("task_status", "")
+            if status == "SUCCEEDED":
+                results = poll_data.get("output", {}).get("results", [])
+                log.info("[media_parser] ASR SUCCEEDED results: %s", results)
+                if not results:
+                    return "（识别结果为空）"
+                trans_url = results[0].get("transcription_url", "")
+                if trans_url:
+                    trans_resp = await http.get(trans_url, timeout=10)
+                    trans_data = trans_resp.json()
+                    log.info("[media_parser] transcription_url content: %s", str(trans_data)[:500])
+                    transcripts = trans_data.get("transcripts", [])
+                    text = transcripts[0].get("text", "") if transcripts else ""
+                else:
+                    text = results[0].get("transcription", "")
+                return text or "（识别结果为空）"
+            if status in ("FAILED", "CANCELLED"):
+                raise HTTPException(500, f"识别任务失败: {poll_data.get('output', {}).get('message', status)}")
+
+    raise HTTPException(500, "语音识别超时（30秒）")
+
+
+async def _parse_visual(kind: str, media_bytes: bytes, content_type: str) -> str:
+    b64 = base64.b64encode(media_bytes).decode()
+    ct = content_type.lower()
+    if "png" in ct:
+        mime = "image/png"
+    elif "webp" in ct:
+        mime = "image/webp"
+    else:
+        mime = "image/jpeg"
+    data_url = f"data:{mime};base64,{b64}"
+    prompt = "请用中文简洁描述这张图片的内容。" if kind == "image" else "请用中文简洁描述这个视频截图的内容。"
+
+    client, ai = await _get_ai_client()
+    resp_ai = await client.chat.completions.create(
+        model=ai["vision_model"],
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": data_url}},
+                    {"type": "text", "text": prompt},
+                ],
+            }
+        ],
+        max_tokens=300,
+    )
+    return resp_ai.choices[0].message.content or ""
--- a/chatlog_fastAPI/services/media_resolver.py
+++ b/chatlog_fastAPI/services/media_resolver.py
@@ -0,0 +1,174 @@
+from __future__ import annotations
+
+import logging
+import sqlite3
+from dataclasses import dataclass
+from pathlib import Path
+
+import httpx
+from fastapi import HTTPException
+
+from config import settings
+from services.chatlog_context import get_chatlog_context
+
+log = logging.getLogger(__name__)
+
+
+@dataclass
+class ResolvedMedia:
+    bytes: bytes
+    content_type: str
+    url: str
+
+
+def _media_url(kind: str, key: str, thumb: bool = False) -> str:
+    url = f"{settings.chatlog_base_url}/{kind}/{key}"
+    if thumb:
+        url += "?thumb=1"
+    return url
+
+
+def _read_voice_resource_status(key: str) -> dict:
+    ctx = get_chatlog_context()
+    work_dir = ctx.get("work_dir") or ""
+    if not work_dir:
+        return {"checked": False, "reason": "missing_work_dir"}
+
+    db_path = Path(work_dir) / "db_storage" / "message" / "message_resource.db"
+    if not db_path.exists():
+        return {"checked": False, "reason": "message_resource_db_missing", "path": str(db_path)}
+
+    try:
+        conn = sqlite3.connect(f"file:{db_path.as_posix()}?mode=ro", uri=True)
+        conn.row_factory = sqlite3.Row
+        try:
+            info = conn.execute(
+                "SELECT * FROM MessageResourceInfo WHERE message_svr_id=?",
+                (int(key),),
+            ).fetchone()
+            if not info:
+                return {
+                    "checked": True,
+                    "found": False,
+                    "path": str(db_path),
+                    "message": "当前已解密资源库里没有这条语音的媒体资源记录",
+                }
+            details = conn.execute(
+                "SELECT type,size,status,data_index FROM MessageResourceDetail WHERE message_id=?",
+                (info["message_id"],),
+            ).fetchall()
+            return {
+                "checked": True,
+                "found": True,
+                "path": str(db_path),
+                "message_id": info["message_id"],
+                "resources": [dict(row) for row in details],
+            }
+        finally:
+            conn.close()
+    except Exception as exc:
+        return {"checked": False, "reason": "resource_db_read_failed", "error": str(exc), "path": str(db_path)}
+
+
+def _download_failure_message(kind: str, key: str, status_code: int | None, body: str = "") -> str:
+    if kind == "voice":
+        base = "底层语音文件未读取成功"
+        if status_code:
+            base += f"（chatlog /voice 返回 HTTP {status_code}）"
+        return (
+            f"{base}。请先确认已安装新版程序并重新识别当前微信账号；"
+            "如果仍失败，说明当前 chatlog 版本还不能解析该 WeChat 4.x 语音资源。"
+        )
+    if status_code:
+        return f"从 chatlog 下载媒体失败: HTTP {status_code}"
+    return f"从 chatlog 下载媒体失败: {body or 'unknown error'}"
+
+
+async def diagnose_media(kind: str, key: str) -> dict:
+    if kind not in {"voice", "image", "video"}:
+        raise HTTPException(400, "不支持的媒体类型")
+    if not key:
+        raise HTTPException(400, "媒体 key 不能为空")
+
+    url = _media_url(kind, key, thumb=kind in {"image", "video"})
+    result = {
+        "ok": False,
+        "kind": kind,
+        "key": key,
+        "url": url,
+        "chatlog_base_url": settings.chatlog_base_url,
+        "chatlog_context": get_chatlog_context(),
+    }
+
+    async with httpx.AsyncClient(timeout=20, trust_env=False, follow_redirects=True) as client:
+        try:
+            resp = await client.get(url)
+            content_type = resp.headers.get("content-type", "")
+            result.update(
+                {
+                    "status_code": resp.status_code,
+                    "content_type": content_type,
+                    "content_length": len(resp.content or b""),
+                    "ok": resp.status_code < 400 and bool(resp.content),
+                }
+            )
+            if resp.status_code >= 400:
+                result["error"] = _download_failure_message(kind, key, resp.status_code, resp.text[:500])
+                result["response_preview"] = resp.text[:500]
+            elif not resp.content:
+                result["error"] = "chatlog 返回了空媒体文件"
+        except Exception as exc:
+            result.update({"error": f"无法连接 chatlog 媒体接口: {exc}", "exception": str(exc)})
+
+    if kind == "voice":
+        result["resource_db"] = _read_voice_resource_status(key)
+    return result
+
+
+async def resolve_media(kind: str, key: str) -> ResolvedMedia:
+    if kind not in {"voice", "image", "video"}:
+        raise HTTPException(400, "不支持的媒体类型")
+    if not key:
+        raise HTTPException(400, "媒体 key 不能为空")
+
+    url = _media_url(kind, key, thumb=kind in {"image", "video"})
+    async with httpx.AsyncClient(timeout=60, trust_env=False, follow_redirects=True) as client:
+        try:
+            resp = await client.get(url)
+            resp.raise_for_status()
+        except httpx.HTTPStatusError as exc:
+            diagnostics = await diagnose_media(kind, key)
+            log.warning("[media_resolver] media download failed: %s", diagnostics)
+            raise HTTPException(
+                502,
+                {
+                    "message": _download_failure_message(kind, key, exc.response.status_code, exc.response.text[:500]),
+                    "diagnostics": diagnostics,
+                },
+            )
+        except Exception as exc:
+            diagnostics = await diagnose_media(kind, key)
+            log.warning("[media_resolver] media download exception: %s", diagnostics)
+            raise HTTPException(
+                502,
+                {
+                    "message": _download_failure_message(kind, key, None, str(exc)),
+                    "diagnostics": diagnostics,
+                },
+            )
+
+    if not resp.content:
+        diagnostics = await diagnose_media(kind, key)
+        raise HTTPException(
+            502,
+            {
+                "message": "chatlog 返回了空媒体文件",
+                "diagnostics": diagnostics,
+            },
+        )
+
+    return ResolvedMedia(
+        bytes=resp.content,
+        content_type=resp.headers.get("content-type", "application/octet-stream"),
+        url=url,
+    )
--- a/chatlog_fastAPI/services/message_formatter.py
+++ b/chatlog_fastAPI/services/message_formatter.py
@@ -0,0 +1,253 @@
+import html
+import json
+import re
+import xml.etree.ElementTree as ET
+from typing import Any
+
+
+QUOTE_CONTENT_LIMIT = 600
+
+
+def extract_contents(item: dict) -> dict:
+    contents = item.get("contents") or item.get("Contents") or {}
+    return contents if isinstance(contents, dict) else {}
+
+
+def clean_message_text(value: Any) -> str:
+    text = html.unescape(str(value or "")).strip()
+    text = re.sub(r"\s+", " ", text)
+    if len(text) > QUOTE_CONTENT_LIMIT:
+        text = text[:QUOTE_CONTENT_LIMIT] + "..."
+    return text
+
+
+def _local_name(tag: str) -> str:
+    return tag.rsplit("}", 1)[-1]
+
+
+def _safe_int(value: Any) -> int | None:
+    if value in (None, ""):
+        return None
+    try:
+        return int(str(value).strip())
+    except Exception:
+        return None
+
+
+def _first(data: dict, *keys: str) -> Any:
+    for key in keys:
+        value = data.get(key)
+        if value not in (None, ""):
+            return value
+    return None
+
+
+def _has_quote_indicator(data: dict) -> bool:
+    keys = {str(key) for key in data.keys()}
+    indicators = {
+        "quote",
+        "refermsg",
+        "referMsg",
+        "refer",
+        "recordInfo",
+        "recordinfo",
+        "fromusr",
+        "fromUser",
+        "chatusr",
+        "chatUser",
+        "displayname",
+        "displayName",
+        "referContent",
+        "svrid",
+        "newmsgid",
+        "newMsgId",
+    }
+    return bool(keys & indicators)
+
+
+def _decode_json(value: str) -> Any:
+    try:
+        return json.loads(value)
+    except Exception:
+        return None
+
+
+def _xml_node_text(node: ET.Element, names: set[str]) -> str:
+    for child in node.iter():
+        if _local_name(child.tag) in names:
+            text = "".join(child.itertext()).strip()
+            if text:
+                return text
+    return ""
+
+
+def _quote_from_xml(value: str) -> dict | None:
+    text = html.unescape(value or "").strip()
+    if "<" not in text or ">" not in text:
+        return None
+    try:
+        root = ET.fromstring(text)
+    except Exception:
+        try:
+            root = ET.fromstring(f"<root>{text}</root>")
+        except Exception:
+            return None
+
+    refer_node = None
+    for node in root.iter():
+        if _local_name(node.tag).lower() == "refermsg":
+            refer_node = node
+            break
+    if refer_node is None:
+        return None
+
+    content = _xml_node_text(refer_node, {"content", "title", "desc"})
+    sender_name = _xml_node_text(refer_node, {"displayname", "nickname", "fromnickname"})
+    sender = _xml_node_text(refer_node, {"fromusr", "chatusr", "sender"})
+    msg_type = _safe_int(_xml_node_text(refer_node, {"type"}))
+    seq = _safe_int(_xml_node_text(refer_node, {"seq", "msgid", "newmsgid", "svrid"}))
+
+    return _normalize_quote(
+        {
+            "sender": sender,
+            "sender_name": sender_name,
+            "content": content,
+            "type": msg_type,
+            "seq": seq,
+        }
+    )
+
+
+def _find_quote_payload(value: Any, allow_plain_text: bool = False) -> dict | None:
+    if value in (None, ""):
+        return None
+
+    if isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return None
+        decoded = _decode_json(text) if text[:1] in ("{", "[") else None
+        if decoded is not None:
+            return _find_quote_payload(decoded, allow_plain_text=allow_plain_text)
+        xml_quote = _quote_from_xml(text)
+        if xml_quote:
+            return xml_quote
+        if allow_plain_text:
+            return _normalize_quote({"content": text})
+        return None
+
+    if isinstance(value, list):
+        for item in value:
+            quote = _find_quote_payload(item, allow_plain_text=allow_plain_text)
+            if quote:
+                return quote
+        return None
+
+    if not isinstance(value, dict):
+        return None
+
+    for key in ("quote", "refermsg", "referMsg", "refer", "recordInfo", "recordinfo"):
+        if key in value:
+            quote = _find_quote_payload(value.get(key), allow_plain_text=True)
+            if quote:
+                return quote
+
+    quote = _normalize_quote(value) if allow_plain_text or _has_quote_indicator(value) else None
+    if quote:
+        return quote
+
+    for nested in value.values():
+        quote = _find_quote_payload(nested, allow_plain_text=False)
+        if quote:
+            return quote
+    return None
+
+
+def _normalize_quote(data: dict) -> dict | None:
+    content = clean_message_text(
+        _first(
+            data,
+            "content",
+            "Content",
+            "text",
+            "title",
+            "desc",
+            "digest",
+            "displayContent",
+            "referContent",
+        )
+    )
+    if not content:
+        return None
+
+    sender = clean_message_text(
+        _first(data, "sender", "Sender", "fromusr", "fromUser", "chatusr", "chatUser", "from")
+    )
+    sender_name = clean_message_text(
+        _first(data, "sender_name", "senderName", "SenderName", "displayname", "displayName", "nickname", "nickName")
+    )
+    msg_type = _safe_int(_first(data, "type", "Type", "msgType", "subType"))
+    seq = _safe_int(_first(data, "seq", "Seq", "sort_seq", "msgid", "msgId", "newmsgid", "newMsgId", "svrid"))
+
+    return {
+        "sender": sender,
+        "sender_name": sender_name,
+        "content": content,
+        "type": msg_type,
+        "seq": seq,
+    }
+
+
+def extract_quote(item: dict | None) -> dict | None:
+    if not isinstance(item, dict):
+        return None
+
+    contents = extract_contents(item)
+    explicit_sources = (
+        item.get("quote"),
+        item.get("Quote"),
+        item.get("refer"),
+        item.get("recordInfo"),
+        contents.get("quote"),
+        contents.get("refer"),
+        contents.get("refermsg"),
+        contents.get("referMsg"),
+        contents.get("recordInfo"),
+        contents.get("recordinfo"),
+    )
+    for source in explicit_sources:
+        quote = _find_quote_payload(source, allow_plain_text=True)
+        if quote:
+            return quote
+
+    for source in (
+        contents.get("appmsg"),
+        item.get("content"),
+        item.get("Content"),
+    ):
+        quote = _find_quote_payload(source, allow_plain_text=False)
+        if quote:
+            return quote
+    return None
+
+
+def attach_quote(item: dict) -> dict:
+    item["quote"] = extract_quote(item)
+    return item
+
+
+def quote_to_text(quote: dict | None) -> str:
+    if not quote:
+        return ""
+    sender = quote.get("sender_name") or quote.get("sender") or "未知"
+    seq = quote.get("seq")
+    seq_text = f" seq={seq}" if seq else ""
+    return f"[引用消息{seq_text}] {sender}: {quote.get('content') or ''}".strip()
+
+
+def append_quote_text(base_text: str, item: dict) -> str:
+    parts = [base_text.strip()] if base_text and base_text.strip() else []
+    quote_text = quote_to_text(extract_quote(item))
+    if quote_text:
+        parts.append(quote_text)
+    return "；".join(parts)
--- a/chatlog_fastAPI/services/report_learning.py
+++ b/chatlog_fastAPI/services/report_learning.py
@@ -0,0 +1,139 @@
+import re
+import aiosqlite
+
+from services.fts import build_match_query
+
+MAX_EXAMPLES = 3
+MAX_EXAMPLE_CHARS = 1800
+MAX_CONTEXT_CHARS = 5200
+
+
+def _compact(text: str, limit: int = MAX_EXAMPLE_CHARS) -> str:
+    text = re.sub(r"\n{3,}", "\n\n", (text or "").strip())
+    if len(text) <= limit:
+        return text
+    return text[:limit].rstrip() + "\n..."
+
+
+def _format_examples(rows: list[aiosqlite.Row], purpose: str) -> str:
+    if not rows:
+        return ""
+    heading = {
+        "topic": "历史人工修订报告参考（用于学习话题命名和分类口径）",
+        "summary": "历史人工修订报告参考（只学习结构、措辞和关注点，不得照抄历史事实）",
+    }.get(purpose, "历史人工修订报告参考")
+    parts = [heading]
+    total = len(parts[0])
+    for idx, row in enumerate(rows, 1):
+        block = (
+            f"\n\n--- 示例 {idx} ---\n"
+            f"群聊：{row['group_name'] or row['talker'] or row['group_id']}\n"
+            f"话题标题：{row['title']}\n"
+            f"报告内容：\n{_compact(row['content'])}"
+        )
+        if total + len(block) > MAX_CONTEXT_CHARS:
+            break
+        parts.append(block)
+        total += len(block)
+    return "".join(parts).strip()
+
+
+async def build_report_learning_context(
+    db: aiosqlite.Connection,
+    *,
+    group_id: int | None,
+    query: str = "",
+    exclude_topic_id: int | None = None,
+    purpose: str = "summary",
+    limit: int = MAX_EXAMPLES,
+) -> str:
+    params: list[object] = []
+    exclude_sql = ""
+    if exclude_topic_id is not None:
+        exclude_sql = " AND t.id<>?"
+        params.append(exclude_topic_id)
+
+    selected: list[aiosqlite.Row] = []
+    seen_doc_ids: set[int] = set()
+
+    if group_id is not None:
+        async with db.execute(
+            f"""
+            SELECT k.id, k.content, k.updated_at, t.id AS topic_id, t.title, t.group_id,
+                   g.name AS group_name, g.talker
+            FROM knowledge_docs k
+            JOIN topics t ON t.id = k.topic_id
+            LEFT JOIN groups g ON g.id = t.group_id
+            WHERE k.curated_at IS NOT NULL
+              AND t.group_id=?
+              {exclude_sql}
+            ORDER BY k.curated_at DESC, k.updated_at DESC
+            LIMIT ?
+            """,
+            [group_id, *params, limit],
+        ) as cur:
+            rows = await cur.fetchall()
+        for row in rows:
+            selected.append(row)
+            seen_doc_ids.add(int(row["id"]))
+
+    if len(selected) < limit:
+        remaining = limit - len(selected)
+        fts_query = build_match_query(query or "")
+        if fts_query:
+            async with db.execute(
+                f"""
+                SELECT k.id, k.content, k.updated_at, t.id AS topic_id, t.title, t.group_id,
+                       g.name AS group_name, g.talker
+                FROM knowledge_docs k
+                JOIN topics t ON t.id = k.topic_id
+                LEFT JOIN groups g ON g.id = t.group_id
+                WHERE k.curated_at IS NOT NULL
+                  AND k.id IN (SELECT doc_id FROM knowledge_fts WHERE knowledge_fts MATCH ?)
+                  {exclude_sql}
+                ORDER BY CASE WHEN t.group_id=? THEN 0 ELSE 1 END,
+                         k.curated_at DESC,
+                         k.updated_at DESC
+                LIMIT ?
+                """,
+                [fts_query, *params, group_id or -1, remaining * 3],
+            ) as cur:
+                rows = await cur.fetchall()
+            for row in rows:
+                doc_id = int(row["id"])
+                if doc_id in seen_doc_ids:
+                    continue
+                selected.append(row)
+                seen_doc_ids.add(doc_id)
+                if len(selected) >= limit:
+                    break
+
+    if len(selected) < limit:
+        remaining = limit - len(selected)
+        async with db.execute(
+            f"""
+            SELECT k.id, k.content, k.updated_at, t.id AS topic_id, t.title, t.group_id,
+                   g.name AS group_name, g.talker
+            FROM knowledge_docs k
+            JOIN topics t ON t.id = k.topic_id
+            LEFT JOIN groups g ON g.id = t.group_id
+            WHERE k.curated_at IS NOT NULL
+              {exclude_sql}
+            ORDER BY CASE WHEN t.group_id=? THEN 0 ELSE 1 END,
+                     k.curated_at DESC,
+                     k.updated_at DESC
+            LIMIT ?
+            """,
+            [*params, group_id or -1, remaining * 3],
+        ) as cur:
+            rows = await cur.fetchall()
+        for row in rows:
+            doc_id = int(row["id"])
+            if doc_id in seen_doc_ids:
+                continue
+            selected.append(row)
+            seen_doc_ids.add(doc_id)
+            if len(selected) >= limit:
+                break
+
+    return _format_examples(selected[:limit], purpose)
--- a/chatlog_fastAPI/services/runtime_settings.py
+++ b/chatlog_fastAPI/services/runtime_settings.py
@@ -0,0 +1,45 @@
+import logging
+import aiosqlite
+from config import settings as default_settings
+from database import get_active_db_path
+
+log = logging.getLogger(__name__)
+
+_cache: dict | None = None
+
+
+def invalidate_cache():
+    global _cache
+    _cache = None
+
+
+async def get_ai_settings() -> dict:
+    global _cache
+    if _cache is not None:
+        return _cache
+
+    # ai_base_url 保留默认值（阿里云兼容 OpenAI 格式地址），其余字段必须由用户在设置页配置
+    result = {
+        "ai_base_url": default_settings.ai_base_url,
+        "ai_api_key": "",
+        "ai_model": "",
+        "summary_model": "",
+        "vision_model": "",
+        "voice_model": "",
+        "topic_analysis_prompt": "",
+    }
+
+    try:
+        path = get_active_db_path()
+        async with aiosqlite.connect(path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute("SELECT key, value FROM app_settings") as cur:
+                rows = await cur.fetchall()
+            for row in rows:
+                if row["key"] in result and row["value"]:
+                    result[row["key"]] = row["value"]
+    except Exception as e:
+        log.warning(f"Failed to read runtime settings: {e}")
+
+    _cache = result
+    return result
--- a/chatlog_fastAPI/services/summary_engine.py
+++ b/chatlog_fastAPI/services/summary_engine.py
@@ -0,0 +1,476 @@
+"""
+售后报告生成引擎
+- 从 topic_messages 拿到所有 msg_seq
+- 通过 chatlog batch 接口批量拉回消息原文
+- 用配置的总结模型生成 Markdown 售后事件报告
+- 写入 knowledge_docs + knowledge_fts（jieba 分词）
+"""
+
+import asyncio
+import logging
+import json
+import aiosqlite
+from urllib.parse import quote
+
+from database import get_active_db_path
+from services.ai_client import get_openai_client
+from services.fts import tokenize
+from services.message_formatter import append_quote_text, extract_contents, extract_quote
+from services.report_learning import build_report_learning_context
+
+log = logging.getLogger(__name__)
+
+CHATLOG_BATCH_SIZE = 80
+SUMMARY_LLM_TIMEOUT_SECONDS = 300
+
+
+async def _get_client():
+    return await get_openai_client()
+
+
+def _message_line(item: dict, fallback_seq: int = 0) -> tuple[int, str] | None:
+    if not item:
+        return None
+    seq = item.get("seq") or item.get("Seq") or item.get("sort_seq") or fallback_seq or 0
+    time_str = item.get("create_time") or item.get("time") or item.get("CreateTime") or ""
+    sender = (
+        item.get("sender_name")
+        or item.get("senderName")
+        or item.get("SenderName")
+        or item.get("sender")
+        or item.get("Sender")
+        or ""
+    )
+    content = _message_text(item)
+    if not content:
+        return None
+    return int(seq), f"[{time_str}] {sender}: {content}"
+
+
+def _message_meta(item: dict, fallback_seq: int = 0) -> dict:
+    return {
+        "seq": int(item.get("seq") or item.get("Seq") or item.get("sort_seq") or fallback_seq or 0),
+        "time": item.get("create_time") or item.get("time") or item.get("CreateTime") or "",
+        "sender": (
+            item.get("sender_name")
+            or item.get("senderName")
+            or item.get("SenderName")
+            or item.get("sender")
+            or item.get("Sender")
+            or ""
+        ),
+        "type": item.get("type") or item.get("Type") or 1,
+    }
+
+
+def _extract_contents(item: dict) -> dict:
+    return extract_contents(item)
+
+
+def _message_text(item: dict) -> str:
+    content = item.get("content") or item.get("Content") or ""
+    contents = _extract_contents(item)
+    if isinstance(content, str) and content.lstrip().startswith("<") and extract_quote(item):
+        content = ""
+
+    link_title = contents.get("title") or item.get("link_title") or ""
+    link_desc = contents.get("desc") or item.get("link_desc") or ""
+    link_source = contents.get("sourceName") or contents.get("source_name") or item.get("link_source") or ""
+    link_url = contents.get("url") or item.get("link_url") or ""
+
+    if link_title:
+        parts = [f"[链接卡片] {link_title}"]
+        if link_desc:
+            parts.append(link_desc)
+        if link_source:
+            parts.append(f"来源：{link_source}")
+        if link_url:
+            parts.append(f"URL：{link_url}")
+        if content and content not in parts:
+            parts.append(content)
+        return append_quote_text("；".join(parts), item)
+
+    return append_quote_text(content, item)
+
+
+def _extract_image_key(item: dict) -> str:
+    contents = _extract_contents(item)
+    key = (
+        contents.get("rawmd5")
+        or contents.get("md5")
+        or contents.get("path")
+        or item.get("media_key")
+        or item.get("mediaKey")
+        or item.get("image_path")
+        or ""
+    )
+    return str(key).replace("\\", "/")
+
+
+def _is_image_message(item: dict) -> bool:
+    try:
+        return int(item.get("type") or item.get("Type") or 0) == 3
+    except Exception:
+        return False
+
+
+def _media_path(kind: str, key: str) -> str:
+    return f"/{kind}/" + "/".join(quote(part) for part in key.split("/"))
+
+
+def _image_url(key: str) -> str:
+    return f"{_media_path('image', key)}?thumb=1"
+
+
+def _collect_image_evidence(messages: list[dict]) -> tuple[list[dict], list[dict]]:
+    images: list[dict] = []
+    failures: list[dict] = []
+
+    for item in messages:
+        if not _is_image_message(item):
+            continue
+        meta = _message_meta(item)
+        key = _extract_image_key(item)
+        if not key:
+            failures.append({**meta, "url": "", "reason": "图片无法展示，缺少图片文件标识"})
+            continue
+
+        url = _image_url(key)
+        images.append({**meta, "key": key, "url": url})
+
+    return images, failures
+
+
+def _image_evidence_context(images: list[dict], failures: list[dict]) -> str:
+    lines: list[str] = []
+    if images:
+        lines.append("系统将作为原始材料插入报告的现场图片：")
+        for img in images:
+            lines.append(f"- [{img['time']}] {img['sender']} seq={img['seq']} url={img['url']}")
+    if failures:
+        lines.append("无法展示的图片清单：")
+        for img in failures:
+            link = f"，查看图片：{img['url']}" if img.get("url") else ""
+            lines.append(f"- [{img['time']}] {img['sender']} seq={img['seq']}：{img['reason']}{link}")
+    return "\n".join(lines)
+
+
+def _image_success_markdown(images: list[dict]) -> str:
+    if not images:
+        return ""
+    blocks = ["### 现场图片"]
+    for img in images:
+        alt = f"现场图片 - {img['time']} {img['sender']}".strip()
+        blocks.extend(
+            [
+                f"![{alt}]({img['url']})",
+                f"来源：{img['time']} {img['sender']} seq={img['seq']}",
+                "",
+            ]
+        )
+    return "\n".join(blocks).strip()
+
+
+def _image_failure_markdown(failures: list[dict]) -> str:
+    if not failures:
+        return ""
+    lines = ["## 图片展示提示"]
+    for img in failures:
+        link = f"，查看图片：{img['url']}" if img.get("url") else ""
+        lines.append(f"- [{img['time']}] {img['sender']} seq={img['seq']}：{img['reason']}{link}")
+    return "\n".join(lines)
+
+
+def _insert_after_heading(content: str, heading: str, addition: str) -> str:
+    if not addition:
+        return content
+    lines = content.splitlines()
+    for i, line in enumerate(lines):
+        if line.strip() == heading:
+            return "\n".join(lines[: i + 1] + ["", addition, ""] + lines[i + 1 :]).strip()
+    for i, line in enumerate(lines):
+        if line.startswith("# "):
+            return "\n".join(lines[: i + 1] + ["", heading, "", addition, ""] + lines[i + 1 :]).strip()
+    return f"{heading}\n\n{addition}\n\n{content}".strip()
+
+
+def _merge_image_sections(content: str, successes: list[dict], failures: list[dict]) -> str:
+    result = _insert_after_heading(content, "## 关键聊天依据", _image_success_markdown(successes))
+    failure_md = _image_failure_markdown(failures)
+    if failure_md:
+        result = f"{result.rstrip()}\n\n{failure_md}"
+    return result.strip()
+
+
+def _line_from_snapshot(raw: str | None, fallback_seq: int) -> str | None:
+    if not raw:
+        return None
+    try:
+        item = json.loads(raw)
+    except Exception:
+        return None
+    line = _message_line(item, fallback_seq)
+    return line[1] if line else None
+
+MARKDOWN_TEMPLATE = """\
+# {title}
+
+请按聊天记录中的实际内容生成一份【具体售后问题点】报告，不要照抄固定字段，也不要输出占位文案。
+
+必须围绕以下结构组织，按内容决定是否保留章节，不要输出空章节：
+## 问题摘要
+## 关键聊天依据
+## 当前处理状态
+## 是否解决
+## AI 建议/解决方法
+
+输出规则：
+- 只写聊天记录中能直接识别或合理归纳的信息。
+- 没有识别到的客户、门店、联系人、合同、订单、物流、日期、价格、原因等信息直接省略。
+- 不要写“未从聊天记录中识别”“待补充”“未知”“无”等占位内容。
+- “是否解决”只能从聊天记录判断，取值限定为：已解决、未解决、处理中、待确认。
+- 如果聊天内容不足以形成明确售后问题点，仍然按当前话题内容整理，但用更保守的“待确认”结论。
+- “AI 建议/解决方法”必须放在文档下方，并附注：注：此方法由 AI 生成，仅供参考，请以人工复核和现场实际情况为准。
+- 只输出 Markdown 报告，不要输出这些规则本身。
+"""
+
+
+async def _mark_summarize_failed(topic_id: int, task_id: int | None, error: str):
+    path = get_active_db_path()
+    message = error or "AI 报告生成失败"
+    try:
+        async with aiosqlite.connect(path) as db:
+            await db.execute(
+                "UPDATE topics SET status = 'error', updated_at = CURRENT_TIMESTAMP WHERE id = ?",
+                (topic_id,),
+            )
+            if task_id is not None:
+                await db.execute(
+                    """
+                    UPDATE ai_tasks
+                    SET status='error', progress=?, error=?, updated_at=CURRENT_TIMESTAMP
+                    WHERE id=?
+                    """,
+                    (json.dumps({"processed": 0, "total": 1}), message, task_id),
+                )
+            await db.commit()
+    except Exception as exc:
+        log.warning(f"[summarize] 标记失败状态失败 topic={topic_id} task={task_id}: {exc}")
+
+
+async def _run_summarize_impl(topic_id: int, topic: dict, task_id: int | None = None):
+    """
+    为指定话题生成/更新 Markdown 售后事件报告。
+    由 POST /api/topics/{id}/summarize（手动触发）调用。
+    task_id: 若提供，则更新 ai_tasks 表的状态和进度。
+    """
+    path = get_active_db_path()
+
+    async def _update_task(status: str, processed: int = 0, total: int = 1, error: str = ""):
+        """辅助函数：更新 ai_tasks 状态和进度"""
+        if task_id is None:
+            return
+        try:
+            async with aiosqlite.connect(path) as _db:
+                _db.row_factory = aiosqlite.Row
+                await _db.execute(
+                    """
+                    UPDATE ai_tasks
+                    SET status=?, progress=?, error=?, updated_at=CURRENT_TIMESTAMP
+                    WHERE id=?
+                    """,
+                    (status, json.dumps({"processed": processed, "total": total}), error or None, task_id)
+                )
+                await _db.commit()
+        except Exception as e:
+            log.warning(f"[summarize] 更新 task {task_id} 失败: {e}")
+    path = get_active_db_path()
+    async with aiosqlite.connect(path) as db:
+        db.row_factory = aiosqlite.Row
+
+        # 将话题状态置为 processing
+        await db.execute("UPDATE topics SET status = 'processing', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
+        await db.commit()
+        await _update_task("running", 0, 1)
+
+        # 1. 拿到该话题的所有消息 seq 和群 talker
+        async with db.execute(
+            """
+            SELECT tm.msg_seq, tm.talker, tm.message_json
+            FROM topic_messages tm
+            WHERE tm.topic_id = ?
+            ORDER BY tm.msg_seq
+            """,
+            (topic_id,),
+        ) as cur:
+            msg_rows = await cur.fetchall()
+
+        if not msg_rows:
+            log.warning(f"[summarize] topic={topic_id} 没有消息，跳过")
+            error = "该话题没有关联消息，无法生成 AI 报告"
+            await db.execute("UPDATE topics SET status = 'error', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
+            await db.commit()
+            await _update_task("error", 0, 1, error)
+            return
+
+        seqs = [r["msg_seq"] for r in msg_rows]
+        # talker 在 topic_messages 里存的是群 ID（chatlog 叫 talker）
+        group_talker = msg_rows[0]["talker"]
+
+        # 2. 批量从 chatlog 拉取消息原文（最多 100 条/批）
+        from services.chatlog_client import chatlog_client
+        messages_text: list[str] = []
+        message_items: dict[int, dict] = {}
+
+        fetched_lines: dict[int, str] = {}
+        for i in range(0, len(seqs), CHATLOG_BATCH_SIZE):
+            chunk_seqs = seqs[i: i + CHATLOG_BATCH_SIZE]
+            try:
+                result = await chatlog_client.get_messages_batch(group_talker, chunk_seqs)
+                for m in result.get("items", []):
+                    meta = _message_meta(m)
+                    if meta["seq"]:
+                        message_items[meta["seq"]] = m
+                    line = _message_line(m)
+                    if line:
+                        fetched_lines[line[0]] = line[1]
+            except Exception as e:
+                log.error(f"[summarize] batch 拉取失败 topic={topic_id}: {e}")
+
+        for r in msg_rows:
+            seq = int(r["msg_seq"])
+            if seq in fetched_lines:
+                messages_text.append(fetched_lines[seq])
+                continue
+            snap_raw = r["message_json"] if "message_json" in r.keys() else None
+            if seq not in message_items and snap_raw:
+                try:
+                    snap_item = json.loads(snap_raw)
+                    if isinstance(snap_item, dict):
+                        message_items[seq] = snap_item
+                except Exception:
+                    pass
+            snap_line = _line_from_snapshot(snap_raw, seq)
+            if snap_line:
+                messages_text.append(snap_line)
+
+        image_successes, image_failures = _collect_image_evidence(
+            [message_items[seq] for seq in seqs if seq in message_items]
+        )
+
+        if not messages_text and not image_successes and not image_failures:
+            log.warning(f"[summarize] topic={topic_id} 从 chatlog 获取到 0 条有效消息")
+            error = "未能从 chatlog 获取到有效消息，无法生成 AI 报告"
+            await db.execute("UPDATE topics SET status = 'error', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
+            await db.commit()
+            await _update_task("error", 0, 1, error)
+            return
+
+        chat_text = "\n".join(messages_text) if messages_text else "无文字消息，仅有图片或媒体证据。"
+        image_context = _image_evidence_context(image_successes, image_failures)
+        learning_context = await build_report_learning_context(
+            db,
+            group_id=topic.get("group_id"),
+            query=f"{topic.get('title', '')}\n{chat_text[:2000]}",
+            exclude_topic_id=topic_id,
+            purpose="summary",
+        )
+
+        # 3. 构建 Prompt
+        template_filled = MARKDOWN_TEMPLATE.format(title=topic["title"])
+        prompt = (
+            f"售后问题点话题：{topic['title']}\n\n"
+            f"以下是该售后问题点关联的完整微信群聊天记录（按时间顺序）：\n\n"
+            f"{chat_text}\n\n"
+            f"以下是系统将插入报告的现场图片信息（如有）：\n\n{image_context or '无现场图片。'}\n\n"
+            "请根据上述聊天记录输出一份 Markdown 报告。\n"
+            "报告要求：\n"
+            "1. 保持售后问题点口径，优先提炼问题现象、涉及产品/部件、现场材料、处理过程和处理结果。\n"
+            "2. 只能使用聊天记录中能直接识别或合理归纳的信息，不要编造客户、合同、订单、物流、日期、价格、原因或处理结果。\n"
+            "3. 不要输出空字段、空项目、空章节、空表格；某个章节没有有效内容时整段省略。\n"
+            "4. 「是否解决」必须写在文档中，并使用：已解决 / 未解决 / 处理中 / 待确认。\n"
+            "5. 「AI 建议/解决方法」必须写在文档中，且在段末附上固定注释：注：此方法由 AI 生成，仅供参考，请以人工复核和现场实际情况为准。\n"
+            "6. 如果聊天内容不足以形成明确售后问题点，也不要编造结论；只按聊天中已有事实给出保守的待确认判断。\n"
+            "7. 图片会由系统作为「现场图片」原始材料插入「关键聊天依据」；你不要猜测图片内容，也不要自行输出图片 Markdown 或图片说明。\n"
+            "8. 如果聊天文字中有人描述图片内容，可以引用这些文字；但不要根据图片本身编造故障细节。\n"
+            "9. 聊天记录中的「[引用消息]」属于当前回复的上下文证据，可以用于理解被回复的问题和处理过程。\n"
+            "10. 只输出 Markdown 报告，不要输出模板说明或额外解释。\n\n"
+            f"以下是本企业报告库中人工修订过的历史报告示例（如有）。请只学习它们的栏目结构、措辞风格、问题关注点和结论表达方式；不得复制历史事实、客户名、设备状态或处理结果到当前报告：\n\n{learning_context or '暂无可学习的人工修订报告。'}\n\n"
+            f"{template_filled}"
+        )
+
+        # 4. 调用 LLM
+        try:
+            _client, _ai = await _get_client()
+            async with asyncio.timeout(SUMMARY_LLM_TIMEOUT_SECONDS):
+                resp = await _client.chat.completions.create(
+                    model=_ai["summary_model"],
+                    messages=[
+                        {
+                            "role": "system",
+                            "content": (
+                                "你是资深售后运营与设备服务工程师，负责根据微信群聊天记录整理具体售后问题点报告。"
+                                "你必须忠实依据聊天记录，只输出已识别到的有效信息，缺失信息直接省略，不得编造。"
+                                "你要在文档中明确给出是否解决结论，并给出 AI 建议/解决方法和免责声明。只输出 Markdown 报告，不要有任何额外说明。"
+                            ),
+                        },
+                        {"role": "user", "content": prompt},
+                    ],
+                    temperature=0.2,
+                )
+            content = resp.choices[0].message.content.strip()
+            content = _merge_image_sections(content, image_successes, image_failures)
+        except TimeoutError:
+            error = "AI 报告生成超时，请检查模型/API或稍后重试"
+            log.error(f"[summarize] LLM 调用超时 topic={topic_id}")
+            await db.execute("UPDATE topics SET status = 'error', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
+            await db.commit()
+            await _update_task("error", 0, 1, error)
+            return
+        except Exception as e:
+            log.error(f"[summarize] LLM 调用失败 topic={topic_id}: {e}", exc_info=True)
+            await db.execute("UPDATE topics SET status = 'error', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
+            await db.commit()
+            await _update_task("error", 0, 1, str(e) or "LLM 调用失败")
+            return
+
+        # 5. 写入 knowledge_docs
+        async with db.execute(
+            "SELECT id FROM knowledge_docs WHERE topic_id = ?", (topic_id,)
+        ) as cur:
+            existing = await cur.fetchone()
+
+        if existing:
+            doc_id = existing["id"]
+            await db.execute(
+                "UPDATE knowledge_docs SET content = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?",
+                (content, doc_id),
+            )
+        else:
+            await db.execute(
+                "INSERT INTO knowledge_docs (topic_id, content) VALUES (?, ?)",
+                (topic_id, content),
+            )
+            async with db.execute("SELECT last_insert_rowid() AS id") as cur:
+                doc_id = (await cur.fetchone())["id"]
+
+        # 6. 更新 FTS（先删后插）
+        await db.execute("DELETE FROM knowledge_fts WHERE doc_id = ?", (doc_id,))
+        await db.execute(
+            "INSERT INTO knowledge_fts (doc_id, title, content) VALUES (?, ?, ?)",
+            (doc_id, tokenize(topic["title"]), tokenize(content)),
+        )
+
+        await db.execute("UPDATE topics SET status = 'completed', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
+        await db.commit()
+        await _update_task("done", 1, 1)
+        log.info(f"[summarize] topic={topic_id} doc={doc_id} 生成完成（{len(content)} 字符）")
+
+
+async def run_summarize(topic_id: int, topic: dict, task_id: int | None = None):
+    try:
+        await _run_summarize_impl(topic_id, topic, task_id)
+    except Exception as e:
+        error = str(e) or e.__class__.__name__
+        log.error(f"[summarize] 未捕获异常 topic={topic_id}: {error}", exc_info=True)
+        await _mark_summarize_failed(topic_id, task_id, error)
--- a/chatlog_fastAPI/services/topic_engine.py
+++ b/chatlog_fastAPI/services/topic_engine.py