feat(api): 添加万川平台模型配置获取和同步功能 - 新增 getWanchuanModelConfig 函数,按模型编码获取平台模型配置 - 新增 syncWanchuanModelToSettings 函数,从万川平台拉取模型配置并写入后端 AI 设置 - 支持按用途分多个模型编码(generic/vision/voice)分别同步配置 - 配置失败时跳过对应字段,不影响其他模型同步 feat(settings): 重构AI模型配置界面支持多模块分组 - 将AI配置按话题分析、报告生成、视觉、语音四个模块分组展示 - 每个模块独立配置接口地址、密钥和模型名称 - 添加从万川平台获取配置的按钮和同步功能 - 优化配置状态指示和错误提示信息 refactor(config): 扩展AI配置支持独立的语音视觉报告网关 - 新增 voice_base_url/voice_api_key 配置项 - 新增 vision_base_url/vision_api_key 配置项 - 新增 summary_base_url/summary_api_key 配置项 - 留空时回退到 ai_base_url/ai_api_key 兼容单网关场景 refactor(http): 统一使用共享HTTP客户端减少连接开销 - 替换各处 httpx.AsyncClient 为 shared_client - 在 lifespan 中正确关闭共享客户端资源 - 优化 get_current_wxid 和 health 检查中的HTTP请求 refactor(ai): 按用途缓存AI客户端支持不同网关配置 - 重构 get_openai_client 支持按(base_url, api_key)缓存 - 新增 get_client_for 函数按用途获取对应客户端 - 支持语音、视觉、报告等不同用途使用独立网关和密钥 ```
478 lines
20 KiB
Python
478 lines
20 KiB
Python
"""
|
||
售后报告生成引擎
|
||
- 从 topic_messages 拿到所有 msg_seq
|
||
- 通过 chatlog batch 接口批量拉回消息原文
|
||
- 用配置的总结模型生成 Markdown 售后事件报告
|
||
- 写入 knowledge_docs + knowledge_fts(jieba 分词)
|
||
"""
|
||
|
||
import asyncio
|
||
import logging
|
||
import json
|
||
import aiosqlite
|
||
from urllib.parse import quote
|
||
|
||
from database import get_active_db_path
|
||
from services.ai_client import get_client_for
|
||
from services.fts import tokenize
|
||
from services.message_formatter import append_quote_text, extract_contents, extract_quote
|
||
from services.report_learning import build_report_learning_context
|
||
|
||
log = logging.getLogger(__name__)
|
||
|
||
CHATLOG_BATCH_SIZE = 80
|
||
SUMMARY_LLM_TIMEOUT_SECONDS = 300
|
||
|
||
|
||
async def _get_client():
|
||
# 报告生成走独立网关 summary_base_url/summary_api_key(为空回退 ai_*)
|
||
return await get_client_for("summary")
|
||
|
||
|
||
def _message_line(item: dict, fallback_seq: int = 0) -> tuple[int, str] | None:
|
||
if not item:
|
||
return None
|
||
seq = item.get("seq") or item.get("Seq") or item.get("sort_seq") or fallback_seq or 0
|
||
time_str = item.get("create_time") or item.get("time") or item.get("CreateTime") or ""
|
||
sender = (
|
||
item.get("sender_name")
|
||
or item.get("senderName")
|
||
or item.get("SenderName")
|
||
or item.get("sender")
|
||
or item.get("Sender")
|
||
or ""
|
||
)
|
||
content = _message_text(item)
|
||
if not content:
|
||
return None
|
||
return int(seq), f"[{time_str}] {sender}: {content}"
|
||
|
||
|
||
def _message_meta(item: dict, fallback_seq: int = 0) -> dict:
|
||
return {
|
||
"seq": int(item.get("seq") or item.get("Seq") or item.get("sort_seq") or fallback_seq or 0),
|
||
"time": item.get("create_time") or item.get("time") or item.get("CreateTime") or "",
|
||
"sender": (
|
||
item.get("sender_name")
|
||
or item.get("senderName")
|
||
or item.get("SenderName")
|
||
or item.get("sender")
|
||
or item.get("Sender")
|
||
or ""
|
||
),
|
||
"type": item.get("type") or item.get("Type") or 1,
|
||
}
|
||
|
||
|
||
def _extract_contents(item: dict) -> dict:
|
||
return extract_contents(item)
|
||
|
||
|
||
def _message_text(item: dict) -> str:
|
||
content = item.get("content") or item.get("Content") or ""
|
||
contents = _extract_contents(item)
|
||
if isinstance(content, str) and content.lstrip().startswith("<") and extract_quote(item):
|
||
content = ""
|
||
|
||
link_title = contents.get("title") or item.get("link_title") or ""
|
||
link_desc = contents.get("desc") or item.get("link_desc") or ""
|
||
link_source = contents.get("sourceName") or contents.get("source_name") or item.get("link_source") or ""
|
||
link_url = contents.get("url") or item.get("link_url") or ""
|
||
|
||
if link_title:
|
||
parts = [f"[链接卡片] {link_title}"]
|
||
if link_desc:
|
||
parts.append(link_desc)
|
||
if link_source:
|
||
parts.append(f"来源:{link_source}")
|
||
if link_url:
|
||
parts.append(f"URL:{link_url}")
|
||
if content and content not in parts:
|
||
parts.append(content)
|
||
return append_quote_text(";".join(parts), item)
|
||
|
||
return append_quote_text(content, item)
|
||
|
||
|
||
def _extract_image_key(item: dict) -> str:
|
||
contents = _extract_contents(item)
|
||
key = (
|
||
contents.get("rawmd5")
|
||
or contents.get("md5")
|
||
or contents.get("path")
|
||
or item.get("media_key")
|
||
or item.get("mediaKey")
|
||
or item.get("image_path")
|
||
or ""
|
||
)
|
||
return str(key).replace("\\", "/")
|
||
|
||
|
||
def _is_image_message(item: dict) -> bool:
|
||
try:
|
||
return int(item.get("type") or item.get("Type") or 0) == 3
|
||
except Exception:
|
||
return False
|
||
|
||
|
||
def _media_path(kind: str, key: str) -> str:
|
||
return f"/{kind}/" + "/".join(quote(part) for part in key.split("/"))
|
||
|
||
|
||
def _image_url(key: str) -> str:
|
||
return f"{_media_path('image', key)}?thumb=1"
|
||
|
||
|
||
def _collect_image_evidence(messages: list[dict]) -> tuple[list[dict], list[dict]]:
|
||
images: list[dict] = []
|
||
failures: list[dict] = []
|
||
|
||
for item in messages:
|
||
if not _is_image_message(item):
|
||
continue
|
||
meta = _message_meta(item)
|
||
key = _extract_image_key(item)
|
||
if not key:
|
||
failures.append({**meta, "url": "", "reason": "图片无法展示,缺少图片文件标识"})
|
||
continue
|
||
|
||
url = _image_url(key)
|
||
images.append({**meta, "key": key, "url": url})
|
||
|
||
return images, failures
|
||
|
||
|
||
def _image_evidence_context(images: list[dict], failures: list[dict]) -> str:
|
||
lines: list[str] = []
|
||
if images:
|
||
lines.append("系统将作为原始材料插入报告的现场图片:")
|
||
for img in images:
|
||
lines.append(f"- [{img['time']}] {img['sender']} seq={img['seq']} url={img['url']}")
|
||
if failures:
|
||
lines.append("无法展示的图片清单:")
|
||
for img in failures:
|
||
link = f",查看图片:{img['url']}" if img.get("url") else ""
|
||
lines.append(f"- [{img['time']}] {img['sender']} seq={img['seq']}:{img['reason']}{link}")
|
||
return "\n".join(lines)
|
||
|
||
|
||
def _image_success_markdown(images: list[dict]) -> str:
|
||
if not images:
|
||
return ""
|
||
blocks = ["### 现场图片"]
|
||
for img in images:
|
||
alt = f"现场图片 - {img['time']} {img['sender']}".strip()
|
||
blocks.extend(
|
||
[
|
||
f"",
|
||
f"来源:{img['time']} {img['sender']} seq={img['seq']}",
|
||
"",
|
||
]
|
||
)
|
||
return "\n".join(blocks).strip()
|
||
|
||
|
||
def _image_failure_markdown(failures: list[dict]) -> str:
|
||
if not failures:
|
||
return ""
|
||
lines = ["## 图片展示提示"]
|
||
for img in failures:
|
||
link = f",查看图片:{img['url']}" if img.get("url") else ""
|
||
lines.append(f"- [{img['time']}] {img['sender']} seq={img['seq']}:{img['reason']}{link}")
|
||
return "\n".join(lines)
|
||
|
||
|
||
def _insert_after_heading(content: str, heading: str, addition: str) -> str:
|
||
if not addition:
|
||
return content
|
||
lines = content.splitlines()
|
||
for i, line in enumerate(lines):
|
||
if line.strip() == heading:
|
||
return "\n".join(lines[: i + 1] + ["", addition, ""] + lines[i + 1 :]).strip()
|
||
for i, line in enumerate(lines):
|
||
if line.startswith("# "):
|
||
return "\n".join(lines[: i + 1] + ["", heading, "", addition, ""] + lines[i + 1 :]).strip()
|
||
return f"{heading}\n\n{addition}\n\n{content}".strip()
|
||
|
||
|
||
def _merge_image_sections(content: str, successes: list[dict], failures: list[dict]) -> str:
|
||
result = _insert_after_heading(content, "## 关键聊天依据", _image_success_markdown(successes))
|
||
failure_md = _image_failure_markdown(failures)
|
||
if failure_md:
|
||
result = f"{result.rstrip()}\n\n{failure_md}"
|
||
return result.strip()
|
||
|
||
|
||
def _line_from_snapshot(raw: str | None, fallback_seq: int) -> str | None:
|
||
if not raw:
|
||
return None
|
||
try:
|
||
item = json.loads(raw)
|
||
except Exception:
|
||
return None
|
||
line = _message_line(item, fallback_seq)
|
||
return line[1] if line else None
|
||
|
||
MARKDOWN_TEMPLATE = """\
|
||
# {title}
|
||
|
||
请按聊天记录中的实际内容生成一份【具体售后问题点】报告,不要照抄固定字段,也不要输出占位文案。
|
||
|
||
必须围绕以下结构组织,按内容决定是否保留章节,不要输出空章节:
|
||
## 问题摘要
|
||
## 关键聊天依据
|
||
## 当前处理状态
|
||
## 是否解决
|
||
## AI 建议/解决方法
|
||
|
||
输出规则:
|
||
- 只写聊天记录中能直接识别或合理归纳的信息。
|
||
- 没有识别到的客户、门店、联系人、合同、订单、物流、日期、价格、原因等信息直接省略。
|
||
- 不要写“未从聊天记录中识别”“待补充”“未知”“无”等占位内容。
|
||
- “是否解决”只能从聊天记录判断,取值限定为:已解决、未解决、处理中、待确认。
|
||
- 如果聊天内容不足以形成明确售后问题点,仍然按当前话题内容整理,但用更保守的“待确认”结论。
|
||
- “AI 建议/解决方法”必须放在文档下方,并附注:注:此方法由 AI 生成,仅供参考,请以人工复核和现场实际情况为准。
|
||
- 只输出 Markdown 报告,不要输出这些规则本身。
|
||
"""
|
||
|
||
|
||
async def _mark_summarize_failed(topic_id: int, task_id: int | None, error: str):
|
||
path = get_active_db_path()
|
||
message = error or "AI 报告生成失败"
|
||
try:
|
||
async with aiosqlite.connect(path) as db:
|
||
await db.execute(
|
||
"UPDATE topics SET status = 'error', updated_at = CURRENT_TIMESTAMP WHERE id = ?",
|
||
(topic_id,),
|
||
)
|
||
if task_id is not None:
|
||
await db.execute(
|
||
"""
|
||
UPDATE ai_tasks
|
||
SET status='error', progress=?, error=?, updated_at=CURRENT_TIMESTAMP
|
||
WHERE id=?
|
||
""",
|
||
(json.dumps({"processed": 0, "total": 1}), message, task_id),
|
||
)
|
||
await db.commit()
|
||
except Exception as exc:
|
||
log.warning(f"[summarize] 标记失败状态失败 topic={topic_id} task={task_id}: {exc}")
|
||
|
||
|
||
async def _run_summarize_impl(topic_id: int, topic: dict, task_id: int | None = None):
|
||
"""
|
||
为指定话题生成/更新 Markdown 售后事件报告。
|
||
由 POST /api/topics/{id}/summarize(手动触发)调用。
|
||
task_id: 若提供,则更新 ai_tasks 表的状态和进度。
|
||
"""
|
||
path = get_active_db_path()
|
||
|
||
async def _update_task(status: str, processed: int = 0, total: int = 1, error: str = ""):
|
||
"""辅助函数:更新 ai_tasks 状态和进度"""
|
||
if task_id is None:
|
||
return
|
||
try:
|
||
async with aiosqlite.connect(path) as _db:
|
||
_db.row_factory = aiosqlite.Row
|
||
await _db.execute(
|
||
"""
|
||
UPDATE ai_tasks
|
||
SET status=?, progress=?, error=?, updated_at=CURRENT_TIMESTAMP
|
||
WHERE id=?
|
||
""",
|
||
(status, json.dumps({"processed": processed, "total": total}), error or None, task_id)
|
||
)
|
||
await _db.commit()
|
||
except Exception as e:
|
||
log.warning(f"[summarize] 更新 task {task_id} 失败: {e}")
|
||
path = get_active_db_path()
|
||
async with aiosqlite.connect(path) as db:
|
||
db.row_factory = aiosqlite.Row
|
||
|
||
# 将话题状态置为 processing
|
||
await db.execute("UPDATE topics SET status = 'processing', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
|
||
await db.commit()
|
||
await _update_task("running", 0, 1)
|
||
|
||
# 1. 拿到该话题的所有消息 seq 和群 talker
|
||
async with db.execute(
|
||
"""
|
||
SELECT tm.msg_seq, tm.talker, tm.message_json
|
||
FROM topic_messages tm
|
||
WHERE tm.topic_id = ?
|
||
ORDER BY tm.msg_seq
|
||
""",
|
||
(topic_id,),
|
||
) as cur:
|
||
msg_rows = await cur.fetchall()
|
||
|
||
if not msg_rows:
|
||
log.warning(f"[summarize] topic={topic_id} 没有消息,跳过")
|
||
error = "该话题没有关联消息,无法生成 AI 报告"
|
||
await db.execute("UPDATE topics SET status = 'error', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
|
||
await db.commit()
|
||
await _update_task("error", 0, 1, error)
|
||
return
|
||
|
||
seqs = [r["msg_seq"] for r in msg_rows]
|
||
# talker 在 topic_messages 里存的是群 ID(chatlog 叫 talker)
|
||
group_talker = msg_rows[0]["talker"]
|
||
|
||
# 2. 批量从 chatlog 拉取消息原文(最多 100 条/批)
|
||
from services.chatlog_client import chatlog_client
|
||
messages_text: list[str] = []
|
||
message_items: dict[int, dict] = {}
|
||
|
||
fetched_lines: dict[int, str] = {}
|
||
for i in range(0, len(seqs), CHATLOG_BATCH_SIZE):
|
||
chunk_seqs = seqs[i: i + CHATLOG_BATCH_SIZE]
|
||
try:
|
||
result = await chatlog_client.get_messages_batch(group_talker, chunk_seqs)
|
||
for m in result.get("items", []):
|
||
meta = _message_meta(m)
|
||
if meta["seq"]:
|
||
message_items[meta["seq"]] = m
|
||
line = _message_line(m)
|
||
if line:
|
||
fetched_lines[line[0]] = line[1]
|
||
except Exception as e:
|
||
log.error(f"[summarize] batch 拉取失败 topic={topic_id}: {e}")
|
||
|
||
for r in msg_rows:
|
||
seq = int(r["msg_seq"])
|
||
if seq in fetched_lines:
|
||
messages_text.append(fetched_lines[seq])
|
||
continue
|
||
snap_raw = r["message_json"] if "message_json" in r.keys() else None
|
||
if seq not in message_items and snap_raw:
|
||
try:
|
||
snap_item = json.loads(snap_raw)
|
||
if isinstance(snap_item, dict):
|
||
message_items[seq] = snap_item
|
||
except Exception:
|
||
pass
|
||
snap_line = _line_from_snapshot(snap_raw, seq)
|
||
if snap_line:
|
||
messages_text.append(snap_line)
|
||
|
||
image_successes, image_failures = _collect_image_evidence(
|
||
[message_items[seq] for seq in seqs if seq in message_items]
|
||
)
|
||
|
||
if not messages_text and not image_successes and not image_failures:
|
||
log.warning(f"[summarize] topic={topic_id} 从 chatlog 获取到 0 条有效消息")
|
||
error = "未能从 chatlog 获取到有效消息,无法生成 AI 报告"
|
||
await db.execute("UPDATE topics SET status = 'error', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
|
||
await db.commit()
|
||
await _update_task("error", 0, 1, error)
|
||
return
|
||
|
||
chat_text = "\n".join(messages_text) if messages_text else "无文字消息,仅有图片或媒体证据。"
|
||
image_context = _image_evidence_context(image_successes, image_failures)
|
||
learning_context = await build_report_learning_context(
|
||
db,
|
||
group_id=topic.get("group_id"),
|
||
query=f"{topic.get('title', '')}\n{chat_text[:2000]}",
|
||
exclude_topic_id=topic_id,
|
||
purpose="summary",
|
||
)
|
||
|
||
# 3. 构建 Prompt
|
||
template_filled = MARKDOWN_TEMPLATE.format(title=topic["title"])
|
||
prompt = (
|
||
f"售后问题点话题:{topic['title']}\n\n"
|
||
f"以下是该售后问题点关联的完整微信群聊天记录(按时间顺序):\n\n"
|
||
f"{chat_text}\n\n"
|
||
f"以下是系统将插入报告的现场图片信息(如有):\n\n{image_context or '无现场图片。'}\n\n"
|
||
"请根据上述聊天记录输出一份 Markdown 报告。\n"
|
||
"报告要求:\n"
|
||
"1. 保持售后问题点口径,优先提炼问题现象、涉及产品/部件、现场材料、处理过程和处理结果。\n"
|
||
"2. 只能使用聊天记录中能直接识别或合理归纳的信息,不要编造客户、合同、订单、物流、日期、价格、原因或处理结果。\n"
|
||
"3. 不要输出空字段、空项目、空章节、空表格;某个章节没有有效内容时整段省略。\n"
|
||
"4. 「是否解决」必须写在文档中,并使用:已解决 / 未解决 / 处理中 / 待确认。\n"
|
||
"5. 「AI 建议/解决方法」必须写在文档中,且在段末附上固定注释:注:此方法由 AI 生成,仅供参考,请以人工复核和现场实际情况为准。\n"
|
||
"6. 如果聊天内容不足以形成明确售后问题点,也不要编造结论;只按聊天中已有事实给出保守的待确认判断。\n"
|
||
"7. 图片会由系统作为「现场图片」原始材料插入「关键聊天依据」;你不要猜测图片内容,也不要自行输出图片 Markdown 或图片说明。\n"
|
||
"8. 如果聊天文字中有人描述图片内容,可以引用这些文字;但不要根据图片本身编造故障细节。\n"
|
||
"9. 聊天记录中的「[引用消息]」属于当前回复的上下文证据,可以用于理解被回复的问题和处理过程。\n"
|
||
"10. 只输出 Markdown 报告,不要输出模板说明或额外解释。\n\n"
|
||
f"以下是本企业报告库中人工修订过的历史报告示例(如有)。请只学习它们的栏目结构、措辞风格、问题关注点和结论表达方式;不得复制历史事实、客户名、设备状态或处理结果到当前报告:\n\n{learning_context or '暂无可学习的人工修订报告。'}\n\n"
|
||
f"{template_filled}"
|
||
)
|
||
|
||
# 4. 调用 LLM
|
||
try:
|
||
_client, _ai = await _get_client()
|
||
async with asyncio.timeout(SUMMARY_LLM_TIMEOUT_SECONDS):
|
||
resp = await _client.chat.completions.create(
|
||
model=_ai["summary_model"],
|
||
messages=[
|
||
{
|
||
"role": "system",
|
||
"content": (
|
||
"你是资深售后运营与设备服务工程师,负责根据微信群聊天记录整理具体售后问题点报告。"
|
||
"你必须忠实依据聊天记录,只输出已识别到的有效信息,缺失信息直接省略,不得编造。"
|
||
"你要在文档中明确给出是否解决结论,并给出 AI 建议/解决方法和免责声明。只输出 Markdown 报告,不要有任何额外说明。"
|
||
),
|
||
},
|
||
{"role": "user", "content": prompt},
|
||
],
|
||
temperature=0.2,
|
||
)
|
||
content = resp.choices[0].message.content.strip()
|
||
content = _merge_image_sections(content, image_successes, image_failures)
|
||
except TimeoutError:
|
||
error = "AI 报告生成超时,请检查模型/API或稍后重试"
|
||
log.error(f"[summarize] LLM 调用超时 topic={topic_id}")
|
||
await db.execute("UPDATE topics SET status = 'error', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
|
||
await db.commit()
|
||
await _update_task("error", 0, 1, error)
|
||
return
|
||
except Exception as e:
|
||
log.error(f"[summarize] LLM 调用失败 topic={topic_id}: {e}", exc_info=True)
|
||
await db.execute("UPDATE topics SET status = 'error', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
|
||
await db.commit()
|
||
await _update_task("error", 0, 1, str(e) or "LLM 调用失败")
|
||
return
|
||
|
||
# 5. 写入 knowledge_docs
|
||
async with db.execute(
|
||
"SELECT id FROM knowledge_docs WHERE topic_id = ?", (topic_id,)
|
||
) as cur:
|
||
existing = await cur.fetchone()
|
||
|
||
if existing:
|
||
doc_id = existing["id"]
|
||
await db.execute(
|
||
"UPDATE knowledge_docs SET content = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?",
|
||
(content, doc_id),
|
||
)
|
||
else:
|
||
await db.execute(
|
||
"INSERT INTO knowledge_docs (topic_id, content) VALUES (?, ?)",
|
||
(topic_id, content),
|
||
)
|
||
async with db.execute("SELECT last_insert_rowid() AS id") as cur:
|
||
doc_id = (await cur.fetchone())["id"]
|
||
|
||
# 6. 更新 FTS(先删后插)
|
||
await db.execute("DELETE FROM knowledge_fts WHERE doc_id = ?", (doc_id,))
|
||
await db.execute(
|
||
"INSERT INTO knowledge_fts (doc_id, title, content) VALUES (?, ?, ?)",
|
||
(doc_id, tokenize(topic["title"]), tokenize(content)),
|
||
)
|
||
|
||
await db.execute("UPDATE topics SET status = 'completed', updated_at = CURRENT_TIMESTAMP WHERE id = ?", (topic_id,))
|
||
await db.commit()
|
||
await _update_task("done", 1, 1)
|
||
log.info(f"[summarize] topic={topic_id} doc={doc_id} 生成完成({len(content)} 字符)")
|
||
|
||
|
||
async def run_summarize(topic_id: int, topic: dict, task_id: int | None = None):
|
||
try:
|
||
await _run_summarize_impl(topic_id, topic, task_id)
|
||
except Exception as e:
|
||
error = str(e) or e.__class__.__name__
|
||
log.error(f"[summarize] 未捕获异常 topic={topic_id}: {error}", exc_info=True)
|
||
await _mark_summarize_failed(topic_id, task_id, error)
|