Files
get_wechat/chatlog_fastAPI/database.py

302 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import aiosqlite
import asyncio
import httpx
import logging
import time
from pathlib import Path
from config import settings
log = logging.getLogger(__name__)
_data_db_dir = Path(settings.db_path).resolve().parent
_data_db_dir.mkdir(parents=True, exist_ok=True)
_current_db_path = str(Path(settings.db_path).resolve())
_initialized_dbs = set()
_resolved_wxid: str | None = None
_wxid_last_resolved: float = 0.0
_WXID_TTL = 60.0 # 60 秒后强制重新检测,确保账号切换能被感知
STALE_SUMMARIZE_ERROR = "AI 报告生成任务超过 15 分钟未完成,已自动标记为失败,可重新生成"
def _db_path_for_wxid(wxid: str) -> str:
if wxid and wxid != "default":
safe = "".join(c for c in wxid if c.isalnum() or c in ("_", "-"))
return str((_data_db_dir / f"knowledge_{safe}.db").resolve())
return str(Path(settings.db_path).resolve())
def reset_wxid_cache():
global _resolved_wxid, _wxid_last_resolved
_resolved_wxid = None
_wxid_last_resolved = 0.0
async def get_current_wxid(force: bool = False):
global _resolved_wxid, _wxid_last_resolved
now = time.time()
# 已有有效缓存且未超时,直接返回
if (
not force
and _resolved_wxid
and _resolved_wxid != "default"
and (now - _wxid_last_resolved) < _WXID_TTL
):
return _resolved_wxid
# 重新解析当前 wxid
base = settings.chatlog_base_url
async with httpx.AsyncClient(trust_env=False, timeout=10.0) as client:
try:
r = await client.get(f"{base}/api/v1/chatlog", params={"talker": "filehelper", "limit": 100, "time": "1970-01-01,2099-12-31", "format": "json"})
if r.status_code == 200:
data = r.json()
for msg in data.get("items", []):
if msg.get("isSelf"):
_resolved_wxid = msg.get("sender")
_wxid_last_resolved = time.time()
return _resolved_wxid
except Exception:
pass
try:
r = await client.get(f"{base}/api/v1/chatroom", params={"limit": 10, "format": "json"})
if r.status_code == 200:
rooms = r.json().get("items", [])
for room in rooms:
room_id = room.get("name")
r2 = await client.get(f"{base}/api/v1/chatlog", params={"talker": room_id, "limit": 50, "time": "1970-01-01,2099-12-31", "format": "json"})
if r2.status_code == 200:
data2 = r2.json()
for msg in data2.get("items", []):
if msg.get("isSelf"):
_resolved_wxid = msg.get("sender")
_wxid_last_resolved = time.time()
return _resolved_wxid
except Exception:
pass
if force:
reset_wxid_cache()
return "default"
async def update_db_path(force: bool = False):
global _current_db_path
wxid = await get_current_wxid(force=force)
new_path = _db_path_for_wxid(wxid)
if new_path != _current_db_path:
log.info(f"Switching database to {new_path}")
_current_db_path = new_path
await init_db(new_path)
return _current_db_path
def get_active_db_path():
return _current_db_path
async def get_db():
path = get_active_db_path()
if path not in _initialized_dbs:
await init_db(path)
async with aiosqlite.connect(path) as db:
db.row_factory = aiosqlite.Row
yield db
async def init_db(path=None):
if path is None:
path = get_active_db_path()
async with aiosqlite.connect(path) as db:
await db.executescript("""
CREATE TABLE IF NOT EXISTS groups (
id INTEGER PRIMARY KEY,
talker TEXT UNIQUE NOT NULL,
name TEXT,
analysis_prompt TEXT DEFAULT '',
cursor_seq INTEGER DEFAULT 0,
initialized INTEGER DEFAULT 0,
poll_interval INTEGER DEFAULT 300,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS topics (
id INTEGER PRIMARY KEY,
group_id INTEGER REFERENCES groups(id),
title TEXT NOT NULL,
source TEXT DEFAULT 'manual',
status TEXT DEFAULT 'pending',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS topic_messages (
topic_id INTEGER REFERENCES topics(id),
msg_seq INTEGER,
talker TEXT,
added_by TEXT DEFAULT 'ai',
message_json TEXT,
PRIMARY KEY (topic_id, msg_seq)
);
CREATE TABLE IF NOT EXISTS knowledge_docs (
id INTEGER PRIMARY KEY,
topic_id INTEGER UNIQUE REFERENCES topics(id),
content TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
curated_at DATETIME
);
CREATE VIRTUAL TABLE IF NOT EXISTS knowledge_fts USING fts5(
doc_id UNINDEXED,
title,
content
);
CREATE TABLE IF NOT EXISTS ai_tasks (
id INTEGER PRIMARY KEY,
group_id INTEGER REFERENCES groups(id),
type TEXT,
status TEXT,
progress TEXT,
error TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS app_settings (
key TEXT PRIMARY KEY,
value TEXT NOT NULL DEFAULT ''
);
""")
await db.execute(
"""
UPDATE ai_tasks
SET status='error', error=?, updated_at=CURRENT_TIMESTAMP
WHERE type='summarize'
AND status='running'
AND datetime(updated_at) <= datetime('now', '-15 minutes')
""",
(STALE_SUMMARIZE_ERROR,),
)
await db.execute(
"""
UPDATE topics
SET status='error', updated_at=CURRENT_TIMESTAMP
WHERE status='processing'
AND datetime(updated_at) <= datetime('now', '-15 minutes')
"""
)
await db.commit()
async with db.execute("PRAGMA table_info(topic_messages)") as cur:
topic_message_cols = {row[1] for row in await cur.fetchall()}
if "message_json" not in topic_message_cols:
await db.execute("ALTER TABLE topic_messages ADD COLUMN message_json TEXT")
await db.commit()
log.info(f"[init_db] added topic_messages.message_json in {path}")
async with db.execute("PRAGMA table_info(groups)") as cur:
group_cols = {row[1] for row in await cur.fetchall()}
if "analysis_prompt" not in group_cols:
await db.execute("ALTER TABLE groups ADD COLUMN analysis_prompt TEXT DEFAULT ''")
await db.commit()
log.info(f"[init_db] added groups.analysis_prompt in {path}")
async with db.execute("PRAGMA table_info(topics)") as cur:
topic_cols = {row[1] for row in await cur.fetchall()}
if "source" not in topic_cols:
await db.execute("ALTER TABLE topics ADD COLUMN source TEXT DEFAULT 'manual'")
await db.execute(
"""
UPDATE topics
SET source = CASE
WHEN EXISTS (
SELECT 1 FROM topic_messages tm
WHERE tm.topic_id = topics.id AND tm.added_by = 'user'
) THEN 'manual'
WHEN EXISTS (
SELECT 1 FROM topic_messages tm
WHERE tm.topic_id = topics.id AND COALESCE(tm.added_by, 'ai') = 'ai'
) THEN 'ai'
ELSE 'manual'
END
"""
)
await db.commit()
log.info(f"[init_db] added topics.source in {path}")
async with db.execute("PRAGMA table_info(knowledge_docs)") as cur:
knowledge_cols = {row[1] for row in await cur.fetchall()}
if "curated_at" not in knowledge_cols:
await db.execute("ALTER TABLE knowledge_docs ADD COLUMN curated_at DATETIME")
await db.execute(
"""
UPDATE knowledge_docs
SET curated_at = updated_at
WHERE updated_at IS NOT NULL
AND created_at IS NOT NULL
AND updated_at > created_at
"""
)
await db.commit()
log.info(f"[init_db] added knowledge_docs.curated_at in {path}")
# 迁移 topics 表到 AUTOINCREMENT防止 SQLite rowid 复用导致旧 knowledge_docs
# 被新建话题"接"上(跨群串报告的根因)。每次启动检测一次,已迁移则跳过。
async with db.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name='sqlite_sequence'"
) as cur:
has_seq_tbl = await cur.fetchone() is not None
needs_migrate = True
if has_seq_tbl:
async with db.execute(
"SELECT 1 FROM sqlite_sequence WHERE name='topics'"
) as cur:
if await cur.fetchone():
needs_migrate = False
if needs_migrate:
await db.executescript("""
BEGIN;
CREATE TABLE topics_new (
id INTEGER PRIMARY KEY AUTOINCREMENT,
group_id INTEGER REFERENCES groups(id),
title TEXT NOT NULL,
source TEXT DEFAULT 'manual',
status TEXT DEFAULT 'pending',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
INSERT INTO topics_new (id, group_id, title, source, status, created_at, updated_at)
SELECT id, group_id, title, COALESCE(source, 'manual'), status, created_at, updated_at FROM topics;
DROP TABLE topics;
ALTER TABLE topics_new RENAME TO topics;
COMMIT;
""")
await db.execute(
"INSERT OR REPLACE INTO sqlite_sequence(name, seq) "
"SELECT 'topics', COALESCE(MAX(id), 0) FROM topics"
)
await db.commit()
log.info(f"[init_db] migrated topics table to AUTOINCREMENT in {path}")
# 孤儿数据清理:删除 topic_id 不存在于 topics 的 knowledge_docs 及其 FTS。
# 历史上删群时遗漏过这两张表,需要每次启动幂等修复。
await db.execute("""
DELETE FROM knowledge_fts WHERE doc_id IN (
SELECT id FROM knowledge_docs
WHERE topic_id NOT IN (SELECT id FROM topics)
)
""")
await db.execute("""
DELETE FROM knowledge_docs
WHERE topic_id NOT IN (SELECT id FROM topics)
""")
# 错绑数据清理doc 创建时间早于其指向的 topic 创建时间,说明 doc 是历史残留、
# topic 是后建的rowid 复用doc 应清掉。合法 doc 必然在 topic 之后生成。
await db.execute("""
DELETE FROM knowledge_fts WHERE doc_id IN (
SELECT d.id FROM knowledge_docs d
JOIN topics t ON t.id = d.topic_id
WHERE d.created_at < t.created_at
)
""")
await db.execute("""
DELETE FROM knowledge_docs WHERE id IN (
SELECT d.id FROM knowledge_docs d
JOIN topics t ON t.id = d.topic_id
WHERE d.created_at < t.created_at
)
""")
await db.commit()
_initialized_dbs.add(path)