Initial upload for secondary development

This commit is contained in:
2026-06-08 19:00:03 +08:00
commit b913b8c78c
81 changed files with 27139 additions and 0 deletions

301
chatlog_fastAPI/database.py Normal file
View File

@@ -0,0 +1,301 @@
import aiosqlite
import asyncio
import httpx
import logging
import time
from pathlib import Path
from config import settings
log = logging.getLogger(__name__)
_data_db_dir = Path(settings.db_path).resolve().parent
_data_db_dir.mkdir(parents=True, exist_ok=True)
_current_db_path = str(Path(settings.db_path).resolve())
_initialized_dbs = set()
_resolved_wxid: str | None = None
_wxid_last_resolved: float = 0.0
_WXID_TTL = 60.0 # 60 秒后强制重新检测,确保账号切换能被感知
STALE_SUMMARIZE_ERROR = "AI 报告生成任务超过 15 分钟未完成,已自动标记为失败,可重新生成"
def _db_path_for_wxid(wxid: str) -> str:
if wxid and wxid != "default":
safe = "".join(c for c in wxid if c.isalnum() or c in ("_", "-"))
return str((_data_db_dir / f"knowledge_{safe}.db").resolve())
return str(Path(settings.db_path).resolve())
def reset_wxid_cache():
global _resolved_wxid, _wxid_last_resolved
_resolved_wxid = None
_wxid_last_resolved = 0.0
async def get_current_wxid(force: bool = False):
global _resolved_wxid, _wxid_last_resolved
now = time.time()
# 已有有效缓存且未超时,直接返回
if (
not force
and _resolved_wxid
and _resolved_wxid != "default"
and (now - _wxid_last_resolved) < _WXID_TTL
):
return _resolved_wxid
# 重新解析当前 wxid
base = settings.chatlog_base_url
async with httpx.AsyncClient(trust_env=False, timeout=10.0) as client:
try:
r = await client.get(f"{base}/api/v1/chatlog", params={"talker": "filehelper", "limit": 100, "time": "1970-01-01,2099-12-31", "format": "json"})
if r.status_code == 200:
data = r.json()
for msg in data.get("items", []):
if msg.get("isSelf"):
_resolved_wxid = msg.get("sender")
_wxid_last_resolved = time.time()
return _resolved_wxid
except Exception:
pass
try:
r = await client.get(f"{base}/api/v1/chatroom", params={"limit": 10, "format": "json"})
if r.status_code == 200:
rooms = r.json().get("items", [])
for room in rooms:
room_id = room.get("name")
r2 = await client.get(f"{base}/api/v1/chatlog", params={"talker": room_id, "limit": 50, "time": "1970-01-01,2099-12-31", "format": "json"})
if r2.status_code == 200:
data2 = r2.json()
for msg in data2.get("items", []):
if msg.get("isSelf"):
_resolved_wxid = msg.get("sender")
_wxid_last_resolved = time.time()
return _resolved_wxid
except Exception:
pass
if force:
reset_wxid_cache()
return "default"
async def update_db_path(force: bool = False):
global _current_db_path
wxid = await get_current_wxid(force=force)
new_path = _db_path_for_wxid(wxid)
if new_path != _current_db_path:
log.info(f"Switching database to {new_path}")
_current_db_path = new_path
await init_db(new_path)
return _current_db_path
def get_active_db_path():
return _current_db_path
async def get_db():
path = get_active_db_path()
if path not in _initialized_dbs:
await init_db(path)
async with aiosqlite.connect(path) as db:
db.row_factory = aiosqlite.Row
yield db
async def init_db(path=None):
if path is None:
path = get_active_db_path()
async with aiosqlite.connect(path) as db:
await db.executescript("""
CREATE TABLE IF NOT EXISTS groups (
id INTEGER PRIMARY KEY,
talker TEXT UNIQUE NOT NULL,
name TEXT,
analysis_prompt TEXT DEFAULT '',
cursor_seq INTEGER DEFAULT 0,
initialized INTEGER DEFAULT 0,
poll_interval INTEGER DEFAULT 300,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS topics (
id INTEGER PRIMARY KEY,
group_id INTEGER REFERENCES groups(id),
title TEXT NOT NULL,
source TEXT DEFAULT 'manual',
status TEXT DEFAULT 'pending',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS topic_messages (
topic_id INTEGER REFERENCES topics(id),
msg_seq INTEGER,
talker TEXT,
added_by TEXT DEFAULT 'ai',
message_json TEXT,
PRIMARY KEY (topic_id, msg_seq)
);
CREATE TABLE IF NOT EXISTS knowledge_docs (
id INTEGER PRIMARY KEY,
topic_id INTEGER UNIQUE REFERENCES topics(id),
content TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
curated_at DATETIME
);
CREATE VIRTUAL TABLE IF NOT EXISTS knowledge_fts USING fts5(
doc_id UNINDEXED,
title,
content
);
CREATE TABLE IF NOT EXISTS ai_tasks (
id INTEGER PRIMARY KEY,
group_id INTEGER REFERENCES groups(id),
type TEXT,
status TEXT,
progress TEXT,
error TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS app_settings (
key TEXT PRIMARY KEY,
value TEXT NOT NULL DEFAULT ''
);
""")
await db.execute(
"""
UPDATE ai_tasks
SET status='error', error=?, updated_at=CURRENT_TIMESTAMP
WHERE type='summarize'
AND status='running'
AND datetime(updated_at) <= datetime('now', '-15 minutes')
""",
(STALE_SUMMARIZE_ERROR,),
)
await db.execute(
"""
UPDATE topics
SET status='error', updated_at=CURRENT_TIMESTAMP
WHERE status='processing'
AND datetime(updated_at) <= datetime('now', '-15 minutes')
"""
)
await db.commit()
async with db.execute("PRAGMA table_info(topic_messages)") as cur:
topic_message_cols = {row[1] for row in await cur.fetchall()}
if "message_json" not in topic_message_cols:
await db.execute("ALTER TABLE topic_messages ADD COLUMN message_json TEXT")
await db.commit()
log.info(f"[init_db] added topic_messages.message_json in {path}")
async with db.execute("PRAGMA table_info(groups)") as cur:
group_cols = {row[1] for row in await cur.fetchall()}
if "analysis_prompt" not in group_cols:
await db.execute("ALTER TABLE groups ADD COLUMN analysis_prompt TEXT DEFAULT ''")
await db.commit()
log.info(f"[init_db] added groups.analysis_prompt in {path}")
async with db.execute("PRAGMA table_info(topics)") as cur:
topic_cols = {row[1] for row in await cur.fetchall()}
if "source" not in topic_cols:
await db.execute("ALTER TABLE topics ADD COLUMN source TEXT DEFAULT 'manual'")
await db.execute(
"""
UPDATE topics
SET source = CASE
WHEN EXISTS (
SELECT 1 FROM topic_messages tm
WHERE tm.topic_id = topics.id AND tm.added_by = 'user'
) THEN 'manual'
WHEN EXISTS (
SELECT 1 FROM topic_messages tm
WHERE tm.topic_id = topics.id AND COALESCE(tm.added_by, 'ai') = 'ai'
) THEN 'ai'
ELSE 'manual'
END
"""
)
await db.commit()
log.info(f"[init_db] added topics.source in {path}")
async with db.execute("PRAGMA table_info(knowledge_docs)") as cur:
knowledge_cols = {row[1] for row in await cur.fetchall()}
if "curated_at" not in knowledge_cols:
await db.execute("ALTER TABLE knowledge_docs ADD COLUMN curated_at DATETIME")
await db.execute(
"""
UPDATE knowledge_docs
SET curated_at = updated_at
WHERE updated_at IS NOT NULL
AND created_at IS NOT NULL
AND updated_at > created_at
"""
)
await db.commit()
log.info(f"[init_db] added knowledge_docs.curated_at in {path}")
# 迁移 topics 表到 AUTOINCREMENT防止 SQLite rowid 复用导致旧 knowledge_docs
# 被新建话题"接"上(跨群串报告的根因)。每次启动检测一次,已迁移则跳过。
async with db.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='sqlite_sequence'"
) as cur:
has_seq_tbl = await cur.fetchone() is not None
needs_migrate = True
if has_seq_tbl:
async with db.execute(
"SELECT 1 FROM sqlite_sequence WHERE name='topics'"
) as cur:
if await cur.fetchone():
needs_migrate = False
if needs_migrate:
await db.executescript("""
BEGIN;
CREATE TABLE topics_new (
id INTEGER PRIMARY KEY AUTOINCREMENT,
group_id INTEGER REFERENCES groups(id),
title TEXT NOT NULL,
source TEXT DEFAULT 'manual',
status TEXT DEFAULT 'pending',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
INSERT INTO topics_new (id, group_id, title, source, status, created_at, updated_at)
SELECT id, group_id, title, COALESCE(source, 'manual'), status, created_at, updated_at FROM topics;
DROP TABLE topics;
ALTER TABLE topics_new RENAME TO topics;
COMMIT;
""")
await db.execute(
"INSERT OR REPLACE INTO sqlite_sequence(name, seq) "
"SELECT 'topics', COALESCE(MAX(id), 0) FROM topics"
)
await db.commit()
log.info(f"[init_db] migrated topics table to AUTOINCREMENT in {path}")
# 孤儿数据清理:删除 topic_id 不存在于 topics 的 knowledge_docs 及其 FTS。
# 历史上删群时遗漏过这两张表,需要每次启动幂等修复。
await db.execute("""
DELETE FROM knowledge_fts WHERE doc_id IN (
SELECT id FROM knowledge_docs
WHERE topic_id NOT IN (SELECT id FROM topics)
)
""")
await db.execute("""
DELETE FROM knowledge_docs
WHERE topic_id NOT IN (SELECT id FROM topics)
""")
# 错绑数据清理doc 创建时间早于其指向的 topic 创建时间,说明 doc 是历史残留、
# topic 是后建的rowid 复用doc 应清掉。合法 doc 必然在 topic 之后生成。
await db.execute("""
DELETE FROM knowledge_fts WHERE doc_id IN (
SELECT d.id FROM knowledge_docs d
JOIN topics t ON t.id = d.topic_id
WHERE d.created_at < t.created_at
)
""")
await db.execute("""
DELETE FROM knowledge_docs WHERE id IN (
SELECT d.id FROM knowledge_docs d
JOIN topics t ON t.id = d.topic_id
WHERE d.created_at < t.created_at
)
""")
await db.commit()
_initialized_dbs.add(path)