import re import aiosqlite from services.fts import build_match_query MAX_EXAMPLES = 3 MAX_EXAMPLE_CHARS = 1800 MAX_CONTEXT_CHARS = 5200 def _compact(text: str, limit: int = MAX_EXAMPLE_CHARS) -> str: text = re.sub(r"\n{3,}", "\n\n", (text or "").strip()) if len(text) <= limit: return text return text[:limit].rstrip() + "\n..." def _format_examples(rows: list[aiosqlite.Row], purpose: str) -> str: if not rows: return "" heading = { "topic": "历史人工修订报告参考(用于学习话题命名和分类口径)", "summary": "历史人工修订报告参考(只学习结构、措辞和关注点,不得照抄历史事实)", }.get(purpose, "历史人工修订报告参考") parts = [heading] total = len(parts[0]) for idx, row in enumerate(rows, 1): block = ( f"\n\n--- 示例 {idx} ---\n" f"群聊:{row['group_name'] or row['talker'] or row['group_id']}\n" f"话题标题:{row['title']}\n" f"报告内容:\n{_compact(row['content'])}" ) if total + len(block) > MAX_CONTEXT_CHARS: break parts.append(block) total += len(block) return "".join(parts).strip() async def build_report_learning_context( db: aiosqlite.Connection, *, group_id: int | None, query: str = "", exclude_topic_id: int | None = None, purpose: str = "summary", limit: int = MAX_EXAMPLES, ) -> str: params: list[object] = [] exclude_sql = "" if exclude_topic_id is not None: exclude_sql = " AND t.id<>?" params.append(exclude_topic_id) selected: list[aiosqlite.Row] = [] seen_doc_ids: set[int] = set() if group_id is not None: async with db.execute( f""" SELECT k.id, k.content, k.updated_at, t.id AS topic_id, t.title, t.group_id, g.name AS group_name, g.talker FROM knowledge_docs k JOIN topics t ON t.id = k.topic_id LEFT JOIN groups g ON g.id = t.group_id WHERE k.curated_at IS NOT NULL AND t.group_id=? {exclude_sql} ORDER BY k.curated_at DESC, k.updated_at DESC LIMIT ? """, [group_id, *params, limit], ) as cur: rows = await cur.fetchall() for row in rows: selected.append(row) seen_doc_ids.add(int(row["id"])) if len(selected) < limit: remaining = limit - len(selected) fts_query = build_match_query(query or "") if fts_query: async with db.execute( f""" SELECT k.id, k.content, k.updated_at, t.id AS topic_id, t.title, t.group_id, g.name AS group_name, g.talker FROM knowledge_docs k JOIN topics t ON t.id = k.topic_id LEFT JOIN groups g ON g.id = t.group_id WHERE k.curated_at IS NOT NULL AND k.id IN (SELECT doc_id FROM knowledge_fts WHERE knowledge_fts MATCH ?) {exclude_sql} ORDER BY CASE WHEN t.group_id=? THEN 0 ELSE 1 END, k.curated_at DESC, k.updated_at DESC LIMIT ? """, [fts_query, *params, group_id or -1, remaining * 3], ) as cur: rows = await cur.fetchall() for row in rows: doc_id = int(row["id"]) if doc_id in seen_doc_ids: continue selected.append(row) seen_doc_ids.add(doc_id) if len(selected) >= limit: break if len(selected) < limit: remaining = limit - len(selected) async with db.execute( f""" SELECT k.id, k.content, k.updated_at, t.id AS topic_id, t.title, t.group_id, g.name AS group_name, g.talker FROM knowledge_docs k JOIN topics t ON t.id = k.topic_id LEFT JOIN groups g ON g.id = t.group_id WHERE k.curated_at IS NOT NULL {exclude_sql} ORDER BY CASE WHEN t.group_id=? THEN 0 ELSE 1 END, k.curated_at DESC, k.updated_at DESC LIMIT ? """, [*params, group_id or -1, remaining * 3], ) as cur: rows = await cur.fetchall() for row in rows: doc_id = int(row["id"]) if doc_id in seen_doc_ids: continue selected.append(row) seen_doc_ids.add(doc_id) if len(selected) >= limit: break return _format_examples(selected[:limit], purpose)