Initial upload for secondary development
This commit is contained in:
139
chatlog_fastAPI/services/report_learning.py
Normal file
139
chatlog_fastAPI/services/report_learning.py
Normal file
@@ -0,0 +1,139 @@
|
||||
import re
|
||||
import aiosqlite
|
||||
|
||||
from services.fts import build_match_query
|
||||
|
||||
MAX_EXAMPLES = 3
|
||||
MAX_EXAMPLE_CHARS = 1800
|
||||
MAX_CONTEXT_CHARS = 5200
|
||||
|
||||
|
||||
def _compact(text: str, limit: int = MAX_EXAMPLE_CHARS) -> str:
|
||||
text = re.sub(r"\n{3,}", "\n\n", (text or "").strip())
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[:limit].rstrip() + "\n..."
|
||||
|
||||
|
||||
def _format_examples(rows: list[aiosqlite.Row], purpose: str) -> str:
|
||||
if not rows:
|
||||
return ""
|
||||
heading = {
|
||||
"topic": "历史人工修订报告参考(用于学习话题命名和分类口径)",
|
||||
"summary": "历史人工修订报告参考(只学习结构、措辞和关注点,不得照抄历史事实)",
|
||||
}.get(purpose, "历史人工修订报告参考")
|
||||
parts = [heading]
|
||||
total = len(parts[0])
|
||||
for idx, row in enumerate(rows, 1):
|
||||
block = (
|
||||
f"\n\n--- 示例 {idx} ---\n"
|
||||
f"群聊:{row['group_name'] or row['talker'] or row['group_id']}\n"
|
||||
f"话题标题:{row['title']}\n"
|
||||
f"报告内容:\n{_compact(row['content'])}"
|
||||
)
|
||||
if total + len(block) > MAX_CONTEXT_CHARS:
|
||||
break
|
||||
parts.append(block)
|
||||
total += len(block)
|
||||
return "".join(parts).strip()
|
||||
|
||||
|
||||
async def build_report_learning_context(
|
||||
db: aiosqlite.Connection,
|
||||
*,
|
||||
group_id: int | None,
|
||||
query: str = "",
|
||||
exclude_topic_id: int | None = None,
|
||||
purpose: str = "summary",
|
||||
limit: int = MAX_EXAMPLES,
|
||||
) -> str:
|
||||
params: list[object] = []
|
||||
exclude_sql = ""
|
||||
if exclude_topic_id is not None:
|
||||
exclude_sql = " AND t.id<>?"
|
||||
params.append(exclude_topic_id)
|
||||
|
||||
selected: list[aiosqlite.Row] = []
|
||||
seen_doc_ids: set[int] = set()
|
||||
|
||||
if group_id is not None:
|
||||
async with db.execute(
|
||||
f"""
|
||||
SELECT k.id, k.content, k.updated_at, t.id AS topic_id, t.title, t.group_id,
|
||||
g.name AS group_name, g.talker
|
||||
FROM knowledge_docs k
|
||||
JOIN topics t ON t.id = k.topic_id
|
||||
LEFT JOIN groups g ON g.id = t.group_id
|
||||
WHERE k.curated_at IS NOT NULL
|
||||
AND t.group_id=?
|
||||
{exclude_sql}
|
||||
ORDER BY k.curated_at DESC, k.updated_at DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
[group_id, *params, limit],
|
||||
) as cur:
|
||||
rows = await cur.fetchall()
|
||||
for row in rows:
|
||||
selected.append(row)
|
||||
seen_doc_ids.add(int(row["id"]))
|
||||
|
||||
if len(selected) < limit:
|
||||
remaining = limit - len(selected)
|
||||
fts_query = build_match_query(query or "")
|
||||
if fts_query:
|
||||
async with db.execute(
|
||||
f"""
|
||||
SELECT k.id, k.content, k.updated_at, t.id AS topic_id, t.title, t.group_id,
|
||||
g.name AS group_name, g.talker
|
||||
FROM knowledge_docs k
|
||||
JOIN topics t ON t.id = k.topic_id
|
||||
LEFT JOIN groups g ON g.id = t.group_id
|
||||
WHERE k.curated_at IS NOT NULL
|
||||
AND k.id IN (SELECT doc_id FROM knowledge_fts WHERE knowledge_fts MATCH ?)
|
||||
{exclude_sql}
|
||||
ORDER BY CASE WHEN t.group_id=? THEN 0 ELSE 1 END,
|
||||
k.curated_at DESC,
|
||||
k.updated_at DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
[fts_query, *params, group_id or -1, remaining * 3],
|
||||
) as cur:
|
||||
rows = await cur.fetchall()
|
||||
for row in rows:
|
||||
doc_id = int(row["id"])
|
||||
if doc_id in seen_doc_ids:
|
||||
continue
|
||||
selected.append(row)
|
||||
seen_doc_ids.add(doc_id)
|
||||
if len(selected) >= limit:
|
||||
break
|
||||
|
||||
if len(selected) < limit:
|
||||
remaining = limit - len(selected)
|
||||
async with db.execute(
|
||||
f"""
|
||||
SELECT k.id, k.content, k.updated_at, t.id AS topic_id, t.title, t.group_id,
|
||||
g.name AS group_name, g.talker
|
||||
FROM knowledge_docs k
|
||||
JOIN topics t ON t.id = k.topic_id
|
||||
LEFT JOIN groups g ON g.id = t.group_id
|
||||
WHERE k.curated_at IS NOT NULL
|
||||
{exclude_sql}
|
||||
ORDER BY CASE WHEN t.group_id=? THEN 0 ELSE 1 END,
|
||||
k.curated_at DESC,
|
||||
k.updated_at DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
[*params, group_id or -1, remaining * 3],
|
||||
) as cur:
|
||||
rows = await cur.fetchall()
|
||||
for row in rows:
|
||||
doc_id = int(row["id"])
|
||||
if doc_id in seen_doc_ids:
|
||||
continue
|
||||
selected.append(row)
|
||||
seen_doc_ids.add(doc_id)
|
||||
if len(selected) >= limit:
|
||||
break
|
||||
|
||||
return _format_examples(selected[:limit], purpose)
|
||||
Reference in New Issue
Block a user