feat(api): 添加万川平台模型配置获取和同步功能 - 新增 getWanchuanModelConfig 函数,按模型编码获取平台模型配置 - 新增 syncWanchuanModelToSettings 函数,从万川平台拉取模型配置并写入后端 AI 设置 - 支持按用途分多个模型编码(generic/vision/voice)分别同步配置 - 配置失败时跳过对应字段,不影响其他模型同步 feat(settings): 重构AI模型配置界面支持多模块分组 - 将AI配置按话题分析、报告生成、视觉、语音四个模块分组展示 - 每个模块独立配置接口地址、密钥和模型名称 - 添加从万川平台获取配置的按钮和同步功能 - 优化配置状态指示和错误提示信息 refactor(config): 扩展AI配置支持独立的语音视觉报告网关 - 新增 voice_base_url/voice_api_key 配置项 - 新增 vision_base_url/vision_api_key 配置项 - 新增 summary_base_url/summary_api_key 配置项 - 留空时回退到 ai_base_url/ai_api_key 兼容单网关场景 refactor(http): 统一使用共享HTTP客户端减少连接开销 - 替换各处 httpx.AsyncClient 为 shared_client - 在 lifespan 中正确关闭共享客户端资源 - 优化 get_current_wxid 和 health 检查中的HTTP请求 refactor(ai): 按用途缓存AI客户端支持不同网关配置 - 重构 get_openai_client 支持按(base_url, api_key)缓存 - 新增 get_client_for 函数按用途获取对应客户端 - 支持语音、视觉、报告等不同用途使用独立网关和密钥 ```
183 lines
7.8 KiB
Python
183 lines
7.8 KiB
Python
import asyncio
|
||
import base64
|
||
import logging
|
||
|
||
import httpx
|
||
from fastapi import HTTPException
|
||
|
||
from services.ai_client import get_client_for
|
||
from services.media_resolver import resolve_media
|
||
from services.runtime_settings import get_ai_settings
|
||
|
||
log = logging.getLogger(__name__)
|
||
|
||
# 语音异步 ASR 默认网关(阿里云)。voice_base_url 为空时回退到此;
|
||
# 提交任务/轮询的子路径由代码自动拼接,配置只需填到 .../api/v1 这一层。
|
||
DEFAULT_ASR_BASE_URL = "https://dashscope.aliyuncs.com/api/v1"
|
||
|
||
|
||
async def parse_media(kind: str, key: str) -> dict:
|
||
"""
|
||
Parse one chatlog media object into text.
|
||
|
||
kind: voice, image, or video.
|
||
key: chatlog media key.
|
||
"""
|
||
if kind not in {"voice", "image", "video"}:
|
||
raise HTTPException(400, "不支持的媒体类型")
|
||
if not key:
|
||
raise HTTPException(400, "媒体 key 不能为空")
|
||
|
||
ai = await get_ai_settings()
|
||
# voice/vision 各自有独立 url/key,为空则回退全局 ai_api_key
|
||
if kind == "voice":
|
||
if not (ai.get("voice_api_key") or ai.get("ai_api_key")):
|
||
raise HTTPException(503, "AI 服务未配置,请在设置页填写语音密钥或 AI API Key")
|
||
if not ai.get("voice_model"):
|
||
raise HTTPException(503, "语音模型未配置,请在设置页填写语音模型名称,例如 paraformer-v2")
|
||
if kind in ("image", "video"):
|
||
if not (ai.get("vision_api_key") or ai.get("ai_api_key")):
|
||
raise HTTPException(503, "AI 服务未配置,请在设置页填写视觉密钥或 AI API Key")
|
||
if not ai.get("vision_model"):
|
||
raise HTTPException(503, "视觉模型未配置,请在设置页填写视觉模型名称,例如 qwen-vl-plus")
|
||
|
||
media = await resolve_media(kind, key)
|
||
if kind == "voice":
|
||
return {"text": await _parse_voice(media.bytes, media.content_type)}
|
||
return {"text": await _parse_visual(kind, media.bytes, media.content_type)}
|
||
|
||
|
||
def _audio_mime(content_type: str) -> str:
|
||
"""由 chatlog 返回的 content_type 推断音频 MIME(用于 data URI)。"""
|
||
ct = content_type.lower()
|
||
if "silk" in ct or "x-silk" in ct:
|
||
return "audio/silk"
|
||
if "amr" in ct:
|
||
return "audio/amr"
|
||
if "ogg" in ct or "opus" in ct:
|
||
return "audio/ogg"
|
||
if "wav" in ct:
|
||
return "audio/wav"
|
||
return "audio/mpeg"
|
||
|
||
|
||
def _asr_json(resp: httpx.Response, url: str) -> dict:
|
||
"""安全解析 ASR 响应为 JSON。
|
||
|
||
响应非 JSON(空响应 / HTML 错误页 / 网关 404)时,原来直接 .json() 会抛
|
||
JSONDecodeError,把真实原因(HTTP 状态码 + 正文)掩盖掉。这里改成抛出
|
||
带状态码与正文片段的 HTTPException,便于排查(如地址填成 compatible-mode/v1)。
|
||
"""
|
||
try:
|
||
return resp.json()
|
||
except Exception:
|
||
body = (resp.text or "").strip()[:300]
|
||
raise HTTPException(
|
||
500,
|
||
f"ASR 接口返回非 JSON (HTTP {resp.status_code}) @ {url}:{body or '(空响应)'}。"
|
||
"请检查语音接口地址是否为异步 ASR 网关(如 .../api/v1)及密钥是否正确。",
|
||
)
|
||
|
||
|
||
async def _parse_voice(media_bytes: bytes, content_type: str) -> str:
|
||
"""语音转文字:阿里云异步 ASR 协议(提交任务 → 轮询 → 取结果)。
|
||
|
||
接口地址动态:base = voice_base_url(为空直接用默认阿里云原生网关,不回退 ai_base_url),
|
||
提交端点 = {base}/services/audio/asr/transcription,轮询 = {base}/tasks/{id},
|
||
子路径由代码自动拼接,配置只需填到 .../api/v1 这一层。
|
||
密钥 = voice_api_key(为空回退 ai_api_key)。
|
||
"""
|
||
ai = await get_ai_settings()
|
||
# strip 防止配置/同步带入首尾空格(实测出现过 api_key 前导空格导致鉴权失败)
|
||
# 注意:异步 ASR 走原生网关 /api/v1,与 ai_base_url(OpenAI 兼容的 chat 端点
|
||
# .../compatible-mode/...)是两套服务,不能混用。voice_base_url 为空时应回退到
|
||
# DEFAULT_ASR_BASE_URL,绝不能回退到 ai_base_url,否则会拼成 .../compatible-mode/.../asr 而 404。
|
||
base = (ai.get("voice_base_url") or DEFAULT_ASR_BASE_URL).strip().rstrip("/")
|
||
api_key = (ai.get("voice_api_key") or ai.get("ai_api_key") or "").strip()
|
||
voice_model = (ai.get("voice_model") or "").strip()
|
||
|
||
b64_audio = base64.b64encode(media_bytes).decode()
|
||
data_uri = f"data:{_audio_mime(content_type)};base64,{b64_audio}"
|
||
asr_headers = {
|
||
"Authorization": f"Bearer {api_key}",
|
||
"Content-Type": "application/json",
|
||
}
|
||
|
||
submit_url = f"{base}/services/audio/asr/transcription"
|
||
async with httpx.AsyncClient(timeout=60) as http:
|
||
submit = await http.post(
|
||
submit_url,
|
||
headers={**asr_headers, "X-DashScope-Async": "enable"},
|
||
json={
|
||
"model": voice_model,
|
||
"input": {"file_urls": [data_uri]},
|
||
"parameters": {"language_hints": ["zh", "en"]},
|
||
},
|
||
timeout=30,
|
||
)
|
||
submit_data = _asr_json(submit, submit_url)
|
||
if submit.status_code not in (200, 201):
|
||
raise HTTPException(500, f"提交识别任务失败 (HTTP {submit.status_code}): {submit_data.get('message', submit_data)}")
|
||
|
||
task_id = submit_data.get("output", {}).get("task_id")
|
||
if not task_id:
|
||
raise HTTPException(500, f"未获取到 task_id: {submit_data}")
|
||
|
||
for _ in range(30):
|
||
await asyncio.sleep(1)
|
||
poll = await http.get(
|
||
f"{base}/tasks/{task_id}",
|
||
headers=asr_headers,
|
||
timeout=10,
|
||
)
|
||
poll_data = _asr_json(poll, f"{base}/tasks/{task_id}")
|
||
status = poll_data.get("output", {}).get("task_status", "")
|
||
if status == "SUCCEEDED":
|
||
results = poll_data.get("output", {}).get("results", [])
|
||
log.info("[media_parser] ASR SUCCEEDED results: %s", results)
|
||
if not results:
|
||
return "(识别结果为空)"
|
||
trans_url = results[0].get("transcription_url", "")
|
||
if trans_url:
|
||
trans_resp = await http.get(trans_url, timeout=10)
|
||
trans_data = trans_resp.json()
|
||
log.info("[media_parser] transcription_url content: %s", str(trans_data)[:500])
|
||
transcripts = trans_data.get("transcripts", [])
|
||
text = transcripts[0].get("text", "") if transcripts else ""
|
||
else:
|
||
text = results[0].get("transcription", "")
|
||
return text or "(识别结果为空)"
|
||
if status in ("FAILED", "CANCELLED"):
|
||
raise HTTPException(500, f"识别任务失败: {poll_data.get('output', {}).get('message', status)}")
|
||
|
||
raise HTTPException(500, "语音识别超时(30秒)")
|
||
|
||
|
||
async def _parse_visual(kind: str, media_bytes: bytes, content_type: str) -> str:
|
||
b64 = base64.b64encode(media_bytes).decode()
|
||
ct = content_type.lower()
|
||
if "png" in ct:
|
||
mime = "image/png"
|
||
elif "webp" in ct:
|
||
mime = "image/webp"
|
||
else:
|
||
mime = "image/jpeg"
|
||
data_url = f"data:{mime};base64,{b64}"
|
||
prompt = "请用中文简洁描述这张图片的内容。" if kind == "image" else "请用中文简洁描述这个视频截图的内容。"
|
||
|
||
client, ai = await get_client_for("vision")
|
||
resp_ai = await client.chat.completions.create(
|
||
model=ai["vision_model"],
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "image_url", "image_url": {"url": data_url}},
|
||
{"type": "text", "text": prompt},
|
||
],
|
||
}
|
||
],
|
||
max_tokens=300,
|
||
)
|
||
return resp_ai.choices[0].message.content or ""
|