feat(auto-reply): 优化自动回复逻辑和知识库功能
- 将默认回复详细程度从"detailed"调整为"medium",前后端保持一致 - 新增话题切换检测逻辑,当用户主动要求换话题时提供引导回复 - 优化上下文处理机制,仅在指代型追问时注入历史对话,避免模型复读旧内容 - 改进知识库检索逻辑,区分自包含问题和指代型问题的上下文需求 - 完善知识库完整性指令,确保回复详细程度与知识展开程度一致 - 重构知识库重建逻辑,支持递归扫描子目录中的文件,修复索引为空的问题 - 增强素材匹配算法,引入强信号检测机制,避免仅凭模糊匹配误发素材 - 新增素材开场白AI生成功能,支持图片、视频、文档等类型智能描述 - 改进知识库重建通知,显示具体的文件数、分片数及失败统计信息
This commit is contained in:
@@ -191,6 +191,12 @@ func (e *AutoReplyEngine) processJob(job AutoReplyJob) {
|
||||
return
|
||||
}
|
||||
e.rememberUserMessage(msg)
|
||||
if isPureTopicSwitchMessage(msg.Content) {
|
||||
if err := e.replyTextWithTimings(msg, topicSwitchGuidanceAnswer(), "topic_switch_guidance", nil, currentTimings()); err != nil {
|
||||
e.setLastErrorWithScope(autoReplyErrorScopeRecords, "topic switch reply failed: "+err.Error())
|
||||
}
|
||||
return
|
||||
}
|
||||
if answer, ok := greetingAnswer(msg.Content); ok {
|
||||
if err := sendAutoReplyText(uint32(msg.ClientID), msg.ConversationID, answer); err != nil {
|
||||
e.handoffWithTimings(msg, "send_greeting_failed: "+err.Error(), nil, currentTimings())
|
||||
|
||||
@@ -46,7 +46,7 @@ func (e *AutoReplyEngine) getConfig() config.AutoReplyConfig {
|
||||
cfg.AI.MaxTokens = 700
|
||||
}
|
||||
if strings.TrimSpace(cfg.AI.ReplyDetail) == "" {
|
||||
cfg.AI.ReplyDetail = "detailed"
|
||||
cfg.AI.ReplyDetail = "medium"
|
||||
}
|
||||
if cfg.Knowledge.TopK <= 0 {
|
||||
cfg.Knowledge.TopK = 3
|
||||
@@ -69,8 +69,8 @@ func (e *AutoReplyEngine) askAI(question string, hits []KnowledgeChunk, msg auto
|
||||
return nil, fmt.Errorf("AI模型未配置")
|
||||
}
|
||||
systemPrompt := buildAutoReplySystemPrompt(cfg)
|
||||
msg.ContextText = e.recentContextPrompt(msg, 6)
|
||||
userPrompt := buildAutoReplyUserPrompt(question, hits, msg, cfg.ReplyPolicy.UnknownAnswerToken)
|
||||
msg.ContextText = e.contextPromptForQuestion(question, msg)
|
||||
userPrompt := buildAutoReplyUserPrompt(question, hits, msg, cfg.ReplyPolicy.UnknownAnswerToken, cfg)
|
||||
switch strings.ToLower(strings.TrimSpace(cfg.AI.Provider)) {
|
||||
case "local", "ollama":
|
||||
return callOllamaChat(cfg.AI, systemPrompt, userPrompt)
|
||||
@@ -88,7 +88,7 @@ func (e *AutoReplyEngine) askGeneralAI(question string, msg autoReplyMessage) (*
|
||||
return nil, fmt.Errorf("AI模型未配置")
|
||||
}
|
||||
systemPrompt := buildGeneralAutoReplySystemPrompt(cfg)
|
||||
msg.ContextText = e.recentContextPrompt(msg, 6)
|
||||
msg.ContextText = e.contextPromptForQuestion(question, msg)
|
||||
userPrompt := buildGeneralAutoReplyUserPrompt(question, msg)
|
||||
switch strings.ToLower(strings.TrimSpace(cfg.AI.Provider)) {
|
||||
case "local", "ollama":
|
||||
@@ -137,7 +137,7 @@ func buildAutoReplySystemPrompt(cfg config.AutoReplyConfig) string {
|
||||
if token == "" {
|
||||
token = "NO_ANSWER"
|
||||
}
|
||||
return prependAISystemPrompt(cfg, "你是企业微信客服。请基于提供的知识库片段,用自然亲切的语气回答客户问题。"+replyDetailInstruction(cfg)+"如果知识库里有详细内容,请完整展开说明,不要只列标题。知识库不足以确定答案时,只输出 "+token+"。不要编造政策、价格、承诺、库存或物流时效。客户要求人工、投诉、退款、合同、发票、赔偿或价格特殊审批时,也只输出 "+token+"。")
|
||||
return prependAISystemPrompt(cfg, "你是企业微信客服。请基于提供的知识库片段,用自然亲切的语气回答客户问题。"+replyDetailInstruction(cfg)+knowledgeCompletenessInstruction(cfg)+"知识库不足以确定答案时,只输出 "+token+"。不要编造政策、价格、承诺、库存或物流时效。客户要求人工、投诉、退款、合同、发票、赔偿或价格特殊审批时,也只输出 "+token+"。")
|
||||
}
|
||||
|
||||
func buildGeneralAutoReplySystemPrompt(cfg config.AutoReplyConfig) string {
|
||||
@@ -184,12 +184,26 @@ func replyDetailInstruction(cfg config.AutoReplyConfig) string {
|
||||
case "concise":
|
||||
return "回复简洁直接,1-2句话说清楚核心内容即可。"
|
||||
case "medium":
|
||||
return "回复适度详细,2-4句话,说明关键信息和注意事项。"
|
||||
return "回复适度详细,2-4句话,说明关键信息和注意事项,不要罗列大段条目。"
|
||||
default:
|
||||
return "回复详细充分,把知识库的相关内容完整说清楚,让客户能理解具体情况。语气要自然,像真人对话一样,不要用模板化的官方表达。"
|
||||
}
|
||||
}
|
||||
|
||||
// knowledgeCompletenessInstruction 控制"知识库片段要展开到多细"。
|
||||
// 这条指令必须与 replyDetailInstruction 一致,否则会出现"选了中等却仍写长文"的矛盾:
|
||||
// detailed 才要求完整展开;concise/medium 只挑与问题最相关的部分作答,避免又慢又被 max_tokens 截断。
|
||||
func knowledgeCompletenessInstruction(cfg config.AutoReplyConfig) string {
|
||||
switch strings.ToLower(strings.TrimSpace(cfg.AI.ReplyDetail)) {
|
||||
case "concise":
|
||||
return "只回答客户这一句问的内容,挑知识库里最相关的一点说清楚,不要把整段资料都搬出来。"
|
||||
case "medium":
|
||||
return "只针对客户当前的问题作答,从知识库里挑最相关的关键信息,不要把不相关的条目也一并列出。"
|
||||
default:
|
||||
return "如果知识库里有详细内容,请完整展开说明,不要只列标题。"
|
||||
}
|
||||
}
|
||||
|
||||
func effectiveReplyMaxTokens(cfg config.AIConfig) int {
|
||||
maxTokens := cfg.MaxTokens
|
||||
switch strings.ToLower(strings.TrimSpace(cfg.ReplyDetail)) {
|
||||
@@ -220,7 +234,7 @@ func buildGeneralAutoReplyUserPrompt(question string, msg autoReplyMessage) stri
|
||||
b.WriteString("\n客户问题:")
|
||||
b.WriteString(question)
|
||||
if contextText := strings.TrimSpace(msg.ContextText); contextText != "" {
|
||||
b.WriteString("\n\n最近对话上下文:\n")
|
||||
b.WriteString("\n\n最近对话上下文(仅供理解称呼和承接,请只回答“客户问题”那一句,不要主动延续之前的话题):\n")
|
||||
b.WriteString(contextText)
|
||||
}
|
||||
b.WriteString("\n请直接给客户一条友好、可发送的回复。")
|
||||
@@ -253,7 +267,7 @@ func buildNonTextAutoReplyUserPrompt(msg autoReplyMessage) string {
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func buildAutoReplyUserPrompt(question string, hits []KnowledgeChunk, msg autoReplyMessage, noAnswerToken string) string {
|
||||
func buildAutoReplyUserPrompt(question string, hits []KnowledgeChunk, msg autoReplyMessage, noAnswerToken string, cfg config.AutoReplyConfig) string {
|
||||
noAnswerToken = strings.TrimSpace(noAnswerToken)
|
||||
if noAnswerToken == "" {
|
||||
noAnswerToken = "NO_ANSWER"
|
||||
@@ -268,14 +282,16 @@ func buildAutoReplyUserPrompt(question string, hits []KnowledgeChunk, msg autoRe
|
||||
b.WriteString("\n客户问题:")
|
||||
b.WriteString(question)
|
||||
if contextText := strings.TrimSpace(msg.ContextText); contextText != "" {
|
||||
b.WriteString("\n\n最近对话上下文:\n")
|
||||
b.WriteString("\n\n最近对话上下文(仅供理解称呼和承接,请只回答“客户问题”那一句,不要主动延续之前的话题):\n")
|
||||
b.WriteString(contextText)
|
||||
}
|
||||
b.WriteString("\n\n知识库片段:\n")
|
||||
for i, hit := range compactKnowledgeHitsForAI(hits) {
|
||||
b.WriteString(fmt.Sprintf("[%d] 来源:%s 分数:%.3f\n%s\n\n", i+1, hit.Source, hit.Score, hit.Content))
|
||||
}
|
||||
b.WriteString("请基于上面的知识库片段回答客户问题。如果片段中有详细说明(比如具体步骤、标准、要求等),请完整地告诉客户,不要只列出标题。用自然的口语化表达,避免生硬的书面语。")
|
||||
b.WriteString("请基于上面的知识库片段回答客户问题。")
|
||||
b.WriteString(knowledgeCompletenessInstruction(cfg))
|
||||
b.WriteString("用自然的口语化表达,避免生硬的书面语。")
|
||||
if isGenericProductQuery(question) {
|
||||
b.WriteString("如果客户询问全部产品、产品线或产品总览,请根据片段中能确定的内容整理产品/产品线清单;只列能确定的产品,不要说“knowledge库”“根据知识库”“知识库内容无法确定具体产品”,不要输出空的 Markdown 列表或连续星号。")
|
||||
}
|
||||
|
||||
@@ -149,6 +149,15 @@ func (e *AutoReplyEngine) previousUserQuestion(msg autoReplyMessage) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// contextPromptForQuestion 仅在当前问题是指代型追问时,才把历史对话注入 AI prompt。
|
||||
// 自包含问题(含“换话题”)不带历史,避免模型顺着旧话题继续答、甚至逐字复读上一条。
|
||||
func (e *AutoReplyEngine) contextPromptForQuestion(question string, msg autoReplyMessage) string {
|
||||
if !questionReferencesContext(question) {
|
||||
return ""
|
||||
}
|
||||
return e.recentContextPrompt(msg, 6)
|
||||
}
|
||||
|
||||
func (e *AutoReplyEngine) recentContextPrompt(msg autoReplyMessage, maxEntries int) string {
|
||||
entries := e.contextEntriesForMessage(msg)
|
||||
if len(entries) == 0 {
|
||||
@@ -184,14 +193,85 @@ func (e *AutoReplyEngine) recentContextPrompt(msg autoReplyMessage, maxEntries i
|
||||
}
|
||||
|
||||
func (e *AutoReplyEngine) contextualSearchText(question string, msg autoReplyMessage) string {
|
||||
contextText := e.recentContextPrompt(msg, 6)
|
||||
question = strings.TrimSpace(question)
|
||||
// 只有“指代型追问”(如“它多少钱”“刚才那个再说说”)才需要把历史对话拼进检索 query。
|
||||
// 自包含问题(如“今天星期几”“换个话题”)一旦带上历史,会把上一个话题的知识高分召回,
|
||||
// 导致顺着旧话题继续答,甚至在 temperature=0 时逐字复读上一条回复。
|
||||
if !questionReferencesContext(question) {
|
||||
return question
|
||||
}
|
||||
contextText := e.recentContextPrompt(msg, 6)
|
||||
if contextText == "" {
|
||||
return question
|
||||
}
|
||||
return contextText + "\n当前问题:" + question
|
||||
}
|
||||
|
||||
// topicSwitchPhrases 是“客户主动要求换话题”的常见说法(短语本身不含新话题)。
|
||||
var topicSwitchPhrases = []string{
|
||||
"换个话题", "换一个话题", "换个问题", "换一个问题", "换话题", "换个方向",
|
||||
"聊点别的", "说点别的", "聊别的", "说别的", "不聊这个", "不说这个", "别聊这个", "别说这个",
|
||||
}
|
||||
|
||||
// strongAnaphoraTokens 基本只在“追问上文”时出现,命中即视为指代型问题。
|
||||
var strongAnaphoraTokens = []string{
|
||||
"它", "它们", "这个", "那个", "这款", "那款", "这种", "那种", "这台", "那台",
|
||||
"上面", "刚才", "刚刚", "接着", "继续", "还有没有", "还有别的", "展开说",
|
||||
"详细说", "多说点", "再说说", "具体点", "上一个", "上一条", "前面说", "之前说", "你说的",
|
||||
}
|
||||
|
||||
// weakAnaphoraTokens 是较弱的指代词,仅在很短的问句里才视为追问,
|
||||
// 避免“今天他要不要来”这类自带主语的完整问题被误判为指代。
|
||||
var weakAnaphoraTokens = []string{"这", "那", "它", "这是", "那是", "这些", "那些"}
|
||||
|
||||
// questionReferencesContext 判断当前问题是否依赖上文(指代型追问)。
|
||||
func questionReferencesContext(question string) bool {
|
||||
text := normalizeGreetingText(question)
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
for _, token := range strongAnaphoraTokens {
|
||||
if strings.Contains(text, normalizeGreetingText(token)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if len([]rune(text)) <= 8 {
|
||||
for _, token := range weakAnaphoraTokens {
|
||||
if strings.Contains(text, normalizeGreetingText(token)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isPureTopicSwitchMessage 判断消息是否“只是要求换话题”而没有带上新的问题。
|
||||
// 这类消息应给一句干净的引导,不能把上一个话题的知识或回复拖过来。
|
||||
func isPureTopicSwitchMessage(content string) bool {
|
||||
text := normalizeGreetingText(content)
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
matched := false
|
||||
for _, phrase := range topicSwitchPhrases {
|
||||
normalized := normalizeGreetingText(phrase)
|
||||
if strings.Contains(text, normalized) {
|
||||
text = strings.ReplaceAll(text, normalized, "")
|
||||
matched = true
|
||||
}
|
||||
}
|
||||
if !matched {
|
||||
return false
|
||||
}
|
||||
// 去掉切换短语和常见语气词后,几乎没有剩余内容,才算“纯换话题”。
|
||||
text = strings.Trim(text, "吧把了啊呀呢嘛吗好的我们咱们来")
|
||||
return len([]rune(text)) <= 2
|
||||
}
|
||||
|
||||
func topicSwitchGuidanceAnswer() string {
|
||||
return "好的,那咱们聊点别的。您还想了解些什么,直接发我就行。"
|
||||
}
|
||||
|
||||
func (e *AutoReplyEngine) contextEntriesForMessage(msg autoReplyMessage) []autoReplyContextEntry {
|
||||
key := e.contextKeyForMessage(msg)
|
||||
e.mu.Lock()
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"fmt"
|
||||
"html"
|
||||
"io"
|
||||
"io/fs"
|
||||
"math"
|
||||
"os"
|
||||
"os/exec"
|
||||
@@ -111,19 +112,20 @@ func (e *AutoReplyEngine) rebuildKnowledgeIndex() (*KnowledgeIndex, error) {
|
||||
if err := os.MkdirAll(root, 0755); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
entries, err := os.ReadDir(root)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
// 递归遍历子目录(filepath.WalkDir):知识库常按分类分文件夹组织
|
||||
// (如 01_产品与设备/、03_售后支持/01_故障排查/),与素材扫描保持一致。
|
||||
// 仅扫根目录会漏掉所有子目录文件,导致索引为空、向量召回失败。
|
||||
walkErr := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return nil // 单个条目出错跳过,不中断整体重建
|
||||
}
|
||||
ext := strings.ToLower(filepath.Ext(entry.Name()))
|
||||
if !isRootKnowledgeFile(entry.Name(), ext, allowed, cfg.Knowledge.IndexPath, cfg.Retrieval.EmbeddingIndexPath) {
|
||||
continue
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
ext := strings.ToLower(filepath.Ext(d.Name()))
|
||||
if !isRootKnowledgeFile(d.Name(), ext, allowed, cfg.Knowledge.IndexPath, cfg.Retrieval.EmbeddingIndexPath) {
|
||||
return nil
|
||||
}
|
||||
path := filepath.Join(root, entry.Name())
|
||||
chunks, err := parseKnowledgeFile(path, root)
|
||||
if err != nil {
|
||||
var warning knowledgeParseWarning
|
||||
@@ -133,15 +135,19 @@ func (e *AutoReplyEngine) rebuildKnowledgeIndex() (*KnowledgeIndex, error) {
|
||||
}
|
||||
} else {
|
||||
idx.FailedFiles = append(idx.FailedFiles, fmt.Sprintf("%s: %v", path, err))
|
||||
continue
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if len(chunks) == 0 {
|
||||
idx.FailedFiles = append(idx.FailedFiles, fmt.Sprintf("%s: 未读取到可索引内容", path))
|
||||
continue
|
||||
return nil
|
||||
}
|
||||
idx.FileCount++
|
||||
idx.Chunks = append(idx.Chunks, chunks...)
|
||||
return nil
|
||||
})
|
||||
if walkErr != nil {
|
||||
return nil, walkErr
|
||||
}
|
||||
idx.LastIndexedAt = time.Now().Unix()
|
||||
indexPath := resolveAutoReplyPath(cfg.Knowledge.IndexPath)
|
||||
|
||||
@@ -71,6 +71,53 @@ func TestRebuildKnowledgeIndexCountsOnlyRootKnowledgeFiles(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestRebuildKnowledgeIndexScansSubdirectories 锁住递归扫描行为:
|
||||
// 知识库按分类分文件夹组织时(文件在子目录里),重建必须把子目录里的文件
|
||||
// 一并索引。这是“重置索引后向量仍为空”那个问题的根因回归测试。
|
||||
func TestRebuildKnowledgeIndexScansSubdirectories(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// 根目录故意不放任何知识文件,全部放进多层子目录。
|
||||
files := map[string]string{
|
||||
filepath.Join("01_产品", "数控机床", "VMC850规格.md"): "VMC850 立式加工中心,主轴转速 8000rpm。",
|
||||
filepath.Join("03_售后", "故障排查", "常见故障.md"): "报警 E01 表示伺服过载,请检查负载。",
|
||||
filepath.Join("readme.txt"): "", // 空文件,应进 FailedFiles 不计入 FileCount
|
||||
}
|
||||
for rel, content := range files {
|
||||
full := filepath.Join(dir, rel)
|
||||
if err := os.MkdirAll(filepath.Dir(full), 0755); err != nil {
|
||||
t.Fatalf("mkdir for %s: %v", rel, err)
|
||||
}
|
||||
if err := os.WriteFile(full, []byte(content), 0644); err != nil {
|
||||
t.Fatalf("write %s: %v", rel, err)
|
||||
}
|
||||
}
|
||||
|
||||
cfg := config.NewDefaultAutoReplyConfig()
|
||||
cfg.Knowledge.Directory = dir
|
||||
cfg.Knowledge.IndexPath = filepath.Join(dir, "index.json")
|
||||
cfg.Retrieval.EmbeddingIndexPath = filepath.Join(dir, "embedding_index.json")
|
||||
engine := testAutoReplyEngine(cfg)
|
||||
|
||||
idx, err := engine.rebuildKnowledgeIndex()
|
||||
if err != nil {
|
||||
t.Fatalf("rebuildKnowledgeIndex failed: %v", err)
|
||||
}
|
||||
if idx.FileCount != 2 {
|
||||
t.Fatalf("expected 2 indexed files from subdirectories, got %d (chunks=%d failed=%v)", idx.FileCount, len(idx.Chunks), idx.FailedFiles)
|
||||
}
|
||||
if len(idx.Chunks) == 0 {
|
||||
t.Fatal("expected chunks from subdirectory files, got none")
|
||||
}
|
||||
// 确认子目录文件的相对路径作为 Source 被正确记录(用 / 分隔)。
|
||||
sources := make(map[string]bool)
|
||||
for _, chunk := range idx.Chunks {
|
||||
sources[chunk.Source] = true
|
||||
}
|
||||
if !sources["01_产品/数控机床/VMC850规格.md"] {
|
||||
t.Fatalf("expected nested source path recorded, got sources=%v", sources)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePDFKnowledgeFileExtractsTextLayer(t *testing.T) {
|
||||
path := filepath.Join(t.TempDir(), "text.pdf")
|
||||
writeMinimalTextPDF(t, path, "AgentBox PDF content 123")
|
||||
|
||||
221
helper/auto_reply_material_caption.go
Normal file
221
helper/auto_reply_material_caption.go
Normal file
@@ -0,0 +1,221 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"qiweimanager/config"
|
||||
)
|
||||
|
||||
// materialCaptionGenerator 返回引擎当前可用的描述生成器。
|
||||
// 未配置 AI(BaseURL/Model 为空)时返回 nil,同步时整体跳过生成,
|
||||
// 素材沿用按类型的默认话术,不影响原有行为。
|
||||
func (e *AutoReplyEngine) materialCaptionGenerator() materialCaptionGenerator {
|
||||
cfg := e.getConfig()
|
||||
if strings.TrimSpace(cfg.AI.BaseURL) == "" || strings.TrimSpace(cfg.AI.Model) == "" {
|
||||
return nil
|
||||
}
|
||||
aiCfg := cfg.AI
|
||||
provider := strings.ToLower(strings.TrimSpace(cfg.AI.Provider))
|
||||
return func(material AutoReplyMaterial, absPath string) (string, bool) {
|
||||
switch strings.ToLower(strings.TrimSpace(material.MaterialType)) {
|
||||
case "image", "gif":
|
||||
// 本地图片喂 vision 模型“看图说话”;ollama 的多模态格式不同,退回按标题生成。
|
||||
if provider == "local" || provider == "ollama" {
|
||||
return generateMaterialCaptionByChat(aiCfg, provider, materialCaptionTitleUserPrompt(material))
|
||||
}
|
||||
return generateMaterialCaptionFromImage(aiCfg, material, absPath)
|
||||
case "video":
|
||||
// 视频不便直接喂模型,用标题让 chat 模型生成。
|
||||
return generateMaterialCaptionByChat(aiCfg, provider, materialCaptionTitleUserPrompt(material))
|
||||
default:
|
||||
// 文档/表格:抽取开头文字喂 chat 模型概括;抽不出就退回按标题生成。
|
||||
excerpt := materialDocumentExcerpt(absPath)
|
||||
return generateMaterialCaptionByChat(aiCfg, provider, materialCaptionDocumentUserPrompt(material, excerpt))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// applyMaterialCaptions 为需要的素材并发生成开场白,原地写回 synced。
|
||||
// 每个 goroutine 只写各自下标,互不重叠,无需加锁。
|
||||
func applyMaterialCaptions(materials []AutoReplyMaterial, root string, generate materialCaptionGenerator) {
|
||||
targets := make([]int, 0, len(materials))
|
||||
for i := range materials {
|
||||
if materialNeedsCaptionGeneration(materials[i]) {
|
||||
targets = append(targets, i)
|
||||
}
|
||||
}
|
||||
if len(targets) == 0 {
|
||||
return
|
||||
}
|
||||
const maxConcurrent = 3
|
||||
sem := make(chan struct{}, maxConcurrent)
|
||||
var wg sync.WaitGroup
|
||||
for _, idx := range targets {
|
||||
idx := idx
|
||||
wg.Add(1)
|
||||
sem <- struct{}{}
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }()
|
||||
absPath := resolveAutoReplyMaterialPath(root, materials[idx].Path)
|
||||
if caption, ok := generate(materials[idx], absPath); ok {
|
||||
materials[idx].Caption = caption
|
||||
materials[idx].CaptionSource = "ai"
|
||||
}
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// materialNeedsCaptionGeneration 判断某条素材是否需要(重新)生成开场白。
|
||||
// - manual:运营手写,绝不覆盖。
|
||||
// - ai:已生成过,避免每次同步重复花费 token;需要刷新走专门入口。
|
||||
// - 其它:caption 为空或仍是按类型的通用默认话术时才生成;
|
||||
// 运营在 JSON 里手填的非通用 caption(source 为空)视为人工,保留不动。
|
||||
func materialNeedsCaptionGeneration(material AutoReplyMaterial) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(material.CaptionSource)) {
|
||||
case "manual", "ai":
|
||||
return false
|
||||
}
|
||||
caption := strings.TrimSpace(material.Caption)
|
||||
return caption == "" || isGenericMaterialCaption(caption)
|
||||
}
|
||||
|
||||
// isGenericMaterialCaption 判断 caption 是否为系统内置的通用默认话术(含历史版本)。
|
||||
func isGenericMaterialCaption(caption string) bool {
|
||||
if isLegacyGenericMaterialCaption(caption) {
|
||||
return true
|
||||
}
|
||||
norm := normalizeGreetingText(caption)
|
||||
if norm == "" {
|
||||
return true
|
||||
}
|
||||
for _, materialType := range []string{"image", "video", "gif", "file"} {
|
||||
if normalizeGreetingText(defaultMaterialCaption(materialType)) == norm {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func generateMaterialCaptionFromImage(aiCfg config.AIConfig, material AutoReplyMaterial, absPath string) (string, bool) {
|
||||
dataURL, err := imageDataURLFromFile(absPath)
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
result, err := callOpenAICompatibleVisionChat(aiCfg, materialCaptionSystemPrompt(), materialCaptionImageUserPrompt(material), dataURL)
|
||||
if err != nil || result == nil {
|
||||
return "", false
|
||||
}
|
||||
return sanitizeMaterialCaption(result.Answer)
|
||||
}
|
||||
|
||||
func generateMaterialCaptionByChat(aiCfg config.AIConfig, provider string, userPrompt string) (string, bool) {
|
||||
var (
|
||||
result *AIResult
|
||||
err error
|
||||
)
|
||||
if provider == "local" || provider == "ollama" {
|
||||
result, err = callOllamaChat(aiCfg, materialCaptionSystemPrompt(), userPrompt)
|
||||
} else {
|
||||
result, err = callOpenAICompatibleChat(aiCfg, materialCaptionSystemPrompt(), userPrompt)
|
||||
}
|
||||
if err != nil || result == nil {
|
||||
return "", false
|
||||
}
|
||||
return sanitizeMaterialCaption(result.Answer)
|
||||
}
|
||||
|
||||
func materialCaptionSystemPrompt() string {
|
||||
return "你是企业微信里的真人客服,现在要把一份资料顺手发给客户。请写一句自然口语的开场白," +
|
||||
"要求:①只有一句话,不超过40字;②像微信里随手发东西时说的话,亲切自然,不要书面腔和客服模板腔" +
|
||||
"(不要用“您好”“为您提供”“请查收”这类);③结合资料内容点出这是什么、对客户有什么用;" +
|
||||
"④不要编造资料里没有的信息;⑤只输出这句话本身,不要加引号、解释或多余标点。"
|
||||
}
|
||||
|
||||
func materialCaptionImageUserPrompt(material AutoReplyMaterial) string {
|
||||
return fmt.Sprintf("这是一张要发给客户的图片,标题是「%s」。请先看图片实际内容,再写一句发图时的自然开场白。", strings.TrimSpace(material.Title))
|
||||
}
|
||||
|
||||
func materialCaptionDocumentUserPrompt(material AutoReplyMaterial, excerpt string) string {
|
||||
title := strings.TrimSpace(material.Title)
|
||||
label := materialTypeLabel(material.MaterialType)
|
||||
excerpt = strings.TrimSpace(excerpt)
|
||||
if excerpt == "" {
|
||||
return fmt.Sprintf("这是一份要发给客户的%s,标题是「%s」。请根据标题写一句发送时的自然开场白。", label, title)
|
||||
}
|
||||
return fmt.Sprintf("这是一份要发给客户的%s,标题是「%s」。以下是它开头部分的内容节选:\n%s\n请结合内容写一句发送时的自然开场白。", label, title, excerpt)
|
||||
}
|
||||
|
||||
func materialCaptionTitleUserPrompt(material AutoReplyMaterial) string {
|
||||
return fmt.Sprintf("这是一个要发给客户的%s,标题是「%s」。请根据标题写一句发送时的自然开场白。", materialTypeLabel(material.MaterialType), strings.TrimSpace(material.Title))
|
||||
}
|
||||
|
||||
func materialTypeLabel(materialType string) string {
|
||||
switch strings.ToLower(strings.TrimSpace(materialType)) {
|
||||
case "image":
|
||||
return "图片"
|
||||
case "video":
|
||||
return "视频"
|
||||
case "gif":
|
||||
return "动图"
|
||||
default:
|
||||
return "文件"
|
||||
}
|
||||
}
|
||||
|
||||
// materialDocumentExcerpt 复用知识库解析器抽取文档开头文字,供模型概括。
|
||||
// 不支持的格式(如 .pptx)会解析失败,返回空串,调用方退回按标题生成。
|
||||
func materialDocumentExcerpt(absPath string) string {
|
||||
chunks, err := parseKnowledgeFile(absPath, filepath.Dir(absPath))
|
||||
if err != nil || len(chunks) == 0 {
|
||||
return ""
|
||||
}
|
||||
var builder strings.Builder
|
||||
for _, chunk := range chunks {
|
||||
title := strings.TrimSpace(chunk.Title)
|
||||
content := strings.TrimSpace(chunk.Content)
|
||||
if title != "" {
|
||||
if builder.Len() > 0 {
|
||||
builder.WriteString("\n")
|
||||
}
|
||||
builder.WriteString(title)
|
||||
}
|
||||
if content != "" {
|
||||
if builder.Len() > 0 {
|
||||
builder.WriteString(" ")
|
||||
}
|
||||
builder.WriteString(content)
|
||||
}
|
||||
if len([]rune(builder.String())) >= 600 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return truncateText(strings.TrimSpace(builder.String()), 800)
|
||||
}
|
||||
|
||||
// sanitizeMaterialCaption 清洗模型输出:去包裹引号、压成单行、挡掉异常输出,限制长度。
|
||||
func sanitizeMaterialCaption(raw string) (string, bool) {
|
||||
text := strings.TrimSpace(raw)
|
||||
if text == "" {
|
||||
return "", false
|
||||
}
|
||||
text = strings.Trim(text, "\"'“”‘’ \t\r\n")
|
||||
text = strings.Join(strings.Fields(text), " ")
|
||||
if text == "" {
|
||||
return "", false
|
||||
}
|
||||
if strings.Contains(strings.ToUpper(text), "NO_ANSWER") {
|
||||
return "", false
|
||||
}
|
||||
if runes := []rune(text); len(runes) > 60 {
|
||||
text = strings.TrimSpace(string(runes[:60]))
|
||||
}
|
||||
if text == "" {
|
||||
return "", false
|
||||
}
|
||||
return text, true
|
||||
}
|
||||
@@ -21,8 +21,13 @@ type AutoReplyMaterial struct {
|
||||
MaterialType string `json:"materialType"`
|
||||
Path string `json:"path"`
|
||||
Caption string `json:"caption"`
|
||||
Priority int `json:"priority"`
|
||||
Enabled bool `json:"enabled"`
|
||||
// CaptionSource 标记 caption 的来源:
|
||||
// "ai" —— 同步时由模型自动生成;重新同步可被再次刷新。
|
||||
// "manual" —— 运营手工编写;重新同步绝不覆盖。
|
||||
// "" —— 未知/历史数据;按需生成。
|
||||
CaptionSource string `json:"captionSource,omitempty"`
|
||||
Priority int `json:"priority"`
|
||||
Enabled bool `json:"enabled"`
|
||||
}
|
||||
|
||||
type autoReplyMaterialsFile struct {
|
||||
@@ -93,8 +98,10 @@ func (e *AutoReplyEngine) collectMaterialMatches(materials []AutoReplyMaterial,
|
||||
continue
|
||||
}
|
||||
path := resolveAutoReplyMaterialPath(root, material.Path)
|
||||
score := materialMatchScore(searchText, material, hasSendIntent)
|
||||
if score <= 0 {
|
||||
score, strong := materialMatchScoreDetailed(searchText, material, hasSendIntent)
|
||||
// 必须命中过强信号(整词关键词/问句模板,或整串标题/文件名)才算候选;
|
||||
// 仅靠 2-gram 模糊片段凑分的弱命中直接丢弃,避免误发。
|
||||
if score <= 0 || !strong {
|
||||
continue
|
||||
}
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
@@ -211,12 +218,16 @@ func loadAutoReplyMaterials(indexPath string) ([]AutoReplyMaterial, error) {
|
||||
return normalizeAutoReplyMaterials(list), nil
|
||||
}
|
||||
|
||||
// materialCaptionGenerator 根据素材本身(含已解析的绝对路径)生成一句开场白。
|
||||
// 返回 ok=false 表示本条生成失败/跳过,调用方应保留原 caption 不动。
|
||||
type materialCaptionGenerator func(material AutoReplyMaterial, absPath string) (caption string, ok bool)
|
||||
|
||||
func (e *AutoReplyEngine) syncAutoReplyMaterials() (autoReplyMaterialSyncResult, error) {
|
||||
cfg := e.getConfig()
|
||||
return syncAutoReplyMaterials(cfg.Materials.Directory, cfg.Materials.IndexPath)
|
||||
return syncAutoReplyMaterials(cfg.Materials.Directory, cfg.Materials.IndexPath, e.materialCaptionGenerator())
|
||||
}
|
||||
|
||||
func syncAutoReplyMaterials(root string, indexPath string) (autoReplyMaterialSyncResult, error) {
|
||||
func syncAutoReplyMaterials(root string, indexPath string, generateCaption materialCaptionGenerator) (autoReplyMaterialSyncResult, error) {
|
||||
result := autoReplyMaterialSyncResult{
|
||||
Directory: resolveAutoReplyPath(root),
|
||||
IndexPath: resolveAutoReplyPath(indexPath),
|
||||
@@ -270,6 +281,11 @@ func syncAutoReplyMaterials(root string, indexPath string) (autoReplyMaterialSyn
|
||||
return strings.ToLower(synced[i].Title) < strings.ToLower(synced[j].Title)
|
||||
})
|
||||
|
||||
// 在写盘前为需要的素材生成开场白;generateCaption 为 nil(如未配置 AI 或单测)时整体跳过。
|
||||
if generateCaption != nil {
|
||||
applyMaterialCaptions(synced, result.Directory, generateCaption)
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(filepath.Dir(result.IndexPath), 0755); err != nil {
|
||||
return result, err
|
||||
}
|
||||
@@ -435,7 +451,17 @@ func normalizeAutoReplyMaterials(items []AutoReplyMaterial) []AutoReplyMaterial
|
||||
}
|
||||
|
||||
func materialMatchScore(searchText string, material AutoReplyMaterial, hasSendIntent bool) int {
|
||||
score, _ := materialMatchScoreDetailed(searchText, material, hasSendIntent)
|
||||
return score
|
||||
}
|
||||
|
||||
// materialMatchScoreDetailed 在打分之外额外返回 strong:是否命中过“强信号”。
|
||||
// 强信号 = 整词关键词/问句模板命中,或整串标题/文件名命中。
|
||||
// 仅靠 2-gram 模糊片段(fuzzyMaterialTokenScore)凑出的分数不算强信号——
|
||||
// 这类弱命中只用于在多个强匹配之间排序,不能单独触发发送,避免误发。
|
||||
func materialMatchScoreDetailed(searchText string, material AutoReplyMaterial, hasSendIntent bool) (int, bool) {
|
||||
score := 0
|
||||
strong := false
|
||||
for _, keyword := range append(material.Keywords, material.QuestionPatterns...) {
|
||||
keyword = strings.ToLower(strings.TrimSpace(keyword))
|
||||
if keyword == "" || isGenericMaterialIntentToken(keyword) {
|
||||
@@ -443,19 +469,21 @@ func materialMatchScore(searchText string, material AutoReplyMaterial, hasSendIn
|
||||
}
|
||||
if strings.Contains(searchText, keyword) {
|
||||
score += 10
|
||||
strong = true
|
||||
}
|
||||
}
|
||||
for _, field := range []string{material.Title, filepath.Base(material.Path), strings.TrimSuffix(filepath.Base(material.Path), filepath.Ext(material.Path))} {
|
||||
field = strings.ToLower(strings.TrimSpace(field))
|
||||
if field != "" && strings.Contains(searchText, field) {
|
||||
if field != "" && len([]rune(field)) >= 2 && strings.Contains(searchText, field) {
|
||||
score += 4
|
||||
strong = true
|
||||
}
|
||||
score += fuzzyMaterialTokenScore(searchText, field)
|
||||
}
|
||||
if hasSendIntent && score > 0 {
|
||||
score += 3
|
||||
}
|
||||
return score
|
||||
return score, strong
|
||||
}
|
||||
|
||||
func isBroadAllMaterialRequest(query string) bool {
|
||||
|
||||
@@ -473,7 +473,7 @@ func TestSyncAutoReplyMaterialsAddsRemovesAndKeepsExistingConfig(t *testing.T) {
|
||||
t.Fatalf("write existing index: %v", err)
|
||||
}
|
||||
|
||||
result, err := syncAutoReplyMaterials(dir, indexPath)
|
||||
result, err := syncAutoReplyMaterials(dir, indexPath, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("syncAutoReplyMaterials failed: %v", err)
|
||||
}
|
||||
@@ -847,6 +847,214 @@ func TestSpecificMaterialRequestSendsOnlyBestMatch(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestFuzzyOnlyMatchDoesNotSendMaterial 锁住“强信号门槛”:
|
||||
// 客户问句只与素材长标题切出的 2-gram 片段(如“数字”)模糊相交,
|
||||
// 没有整词关键词/整串标题命中时,不应误发素材。
|
||||
func TestFuzzyOnlyMatchDoesNotSendMaterial(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "企业级AI数字员工宣传手册.pptx"), []byte("file"), 0644); err != nil {
|
||||
t.Fatalf("write material: %v", err)
|
||||
}
|
||||
indexPath := filepath.Join(dir, "materials.json")
|
||||
materials := autoReplyMaterialsFile{Materials: []AutoReplyMaterial{{
|
||||
ID: "ai-worker-brochure",
|
||||
Title: "企业级AI数字员工宣传手册",
|
||||
Keywords: []string{"企业级AI数字员工", "AI数字员工", "宣传手册"},
|
||||
MaterialType: "file",
|
||||
Path: "企业级AI数字员工宣传手册.pptx",
|
||||
Enabled: true,
|
||||
}}}
|
||||
data, err := json.Marshal(materials)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal materials: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(indexPath, data, 0644); err != nil {
|
||||
t.Fatalf("write materials index: %v", err)
|
||||
}
|
||||
|
||||
cfg := config.NewDefaultAutoReplyConfig()
|
||||
cfg.Materials.Directory = dir
|
||||
cfg.Materials.IndexPath = indexPath
|
||||
cfg.Materials.MaxPerReply = 2
|
||||
engine := testAutoReplyEngine(cfg)
|
||||
|
||||
// “数字证书”与标题只在“数字”这个 2-gram 上相交,属于弱命中,应被门槛挡掉。
|
||||
if matches := engine.matchMaterials("发我数字证书的资料", "发我数字证书的资料", nil); len(matches) != 0 {
|
||||
t.Fatalf("expected fuzzy-only match to be rejected, got %#v", matches)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMaterialNeedsCaptionGeneration(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
material AutoReplyMaterial
|
||||
want bool
|
||||
}{
|
||||
{"manual never regenerated", AutoReplyMaterial{Caption: "随手写的", CaptionSource: "manual"}, false},
|
||||
{"ai not regenerated", AutoReplyMaterial{Caption: "已生成", CaptionSource: "ai"}, false},
|
||||
{"empty caption needs", AutoReplyMaterial{MaterialType: "file"}, true},
|
||||
{"typed default needs", AutoReplyMaterial{Caption: defaultMaterialCaption("image"), MaterialType: "image"}, true},
|
||||
{"legacy generic needs", AutoReplyMaterial{Caption: "我把相关资料直接发你。"}, true},
|
||||
{"hand-written kept", AutoReplyMaterial{Caption: "这是AgentBox产线实拍图,您看下整体布局~"}, false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if got := materialNeedsCaptionGeneration(tc.material); got != tc.want {
|
||||
t.Fatalf("%s: materialNeedsCaptionGeneration = %v, want %v", tc.name, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeMaterialCaption(t *testing.T) {
|
||||
if got, ok := sanitizeMaterialCaption(" “这是产品图,您看下~” "); !ok || got != "这是产品图,您看下~" {
|
||||
t.Fatalf("expected quotes/space stripped, got %q ok=%v", got, ok)
|
||||
}
|
||||
if got, ok := sanitizeMaterialCaption("第一行\n第二行"); !ok || got != "第一行 第二行" {
|
||||
t.Fatalf("expected newline collapsed to single line, got %q ok=%v", got, ok)
|
||||
}
|
||||
if _, ok := sanitizeMaterialCaption(" "); ok {
|
||||
t.Fatal("expected blank input to be rejected")
|
||||
}
|
||||
if _, ok := sanitizeMaterialCaption("NO_ANSWER"); ok {
|
||||
t.Fatal("expected NO_ANSWER token to be rejected")
|
||||
}
|
||||
long := strings.Repeat("描述", 50)
|
||||
got, ok := sanitizeMaterialCaption(long)
|
||||
if !ok || len([]rune(got)) > 60 {
|
||||
t.Fatalf("expected long caption truncated to <=60 runes, got %d runes ok=%v", len([]rune(got)), ok)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyMaterialCaptionsOnlyFillsTargets(t *testing.T) {
|
||||
materials := []AutoReplyMaterial{
|
||||
{Path: "a.jpg", MaterialType: "image", Caption: defaultMaterialCaption("image")},
|
||||
{Path: "b.jpg", MaterialType: "image", Caption: "运营手写不能动", CaptionSource: "manual"},
|
||||
{Path: "c.jpg", MaterialType: "image", Caption: "上次生成的", CaptionSource: "ai"},
|
||||
}
|
||||
calls := 0
|
||||
generate := func(material AutoReplyMaterial, absPath string) (string, bool) {
|
||||
calls++
|
||||
return "生成给-" + material.Path, true
|
||||
}
|
||||
applyMaterialCaptions(materials, t.TempDir(), generate)
|
||||
|
||||
if calls != 1 {
|
||||
t.Fatalf("expected generator called once (only the default-caption item), got %d", calls)
|
||||
}
|
||||
if materials[0].Caption != "生成给-a.jpg" || materials[0].CaptionSource != "ai" {
|
||||
t.Fatalf("expected a.jpg regenerated and marked ai, got %#v", materials[0])
|
||||
}
|
||||
if materials[1].Caption != "运营手写不能动" || materials[1].CaptionSource != "manual" {
|
||||
t.Fatalf("manual caption must be preserved, got %#v", materials[1])
|
||||
}
|
||||
if materials[2].Caption != "上次生成的" {
|
||||
t.Fatalf("existing ai caption must not be regenerated, got %#v", materials[2])
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyMaterialCaptionsKeepsOriginalOnFailure(t *testing.T) {
|
||||
materials := []AutoReplyMaterial{
|
||||
{Path: "a.jpg", MaterialType: "image", Caption: defaultMaterialCaption("image")},
|
||||
}
|
||||
generate := func(material AutoReplyMaterial, absPath string) (string, bool) {
|
||||
return "", false // 模拟生成失败
|
||||
}
|
||||
applyMaterialCaptions(materials, t.TempDir(), generate)
|
||||
|
||||
if materials[0].Caption != defaultMaterialCaption("image") || materials[0].CaptionSource != "" {
|
||||
t.Fatalf("expected failed generation to leave caption untouched, got %#v", materials[0])
|
||||
}
|
||||
}
|
||||
|
||||
// TestSyncAutoReplyMaterialsGeneratesCaptionsEndToEnd 串起整条同步链路(mock 生成器,不调真实 AI):
|
||||
// 真实落盘文件 → 扫描发现 → 经过生成器 → 写回 materials.json → 重新加载校验。
|
||||
func TestSyncAutoReplyMaterialsGeneratesCaptionsEndToEnd(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// 新图片素材:无既有索引,应被生成器赋予描述。
|
||||
if err := os.WriteFile(filepath.Join(dir, "产线实拍.jpg"), []byte("jpg"), 0644); err != nil {
|
||||
t.Fatalf("write image material: %v", err)
|
||||
}
|
||||
// 运营手写 caption 的素材:必须原样保留,不被生成覆盖。
|
||||
if err := os.WriteFile(filepath.Join(dir, "报价单.pdf"), []byte("pdf"), 0644); err != nil {
|
||||
t.Fatalf("write manual material: %v", err)
|
||||
}
|
||||
indexPath := filepath.Join(dir, "materials.json")
|
||||
existing := autoReplyMaterialsFile{Materials: []AutoReplyMaterial{{
|
||||
ID: "manual-quote",
|
||||
Title: "报价单",
|
||||
Keywords: []string{"报价单", "报价"},
|
||||
MaterialType: "file",
|
||||
Path: "报价单.pdf",
|
||||
Caption: "这是最新报价,您过下目~",
|
||||
CaptionSource: "manual",
|
||||
Priority: 5,
|
||||
Enabled: true,
|
||||
}}}
|
||||
data, err := json.Marshal(existing)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal existing: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(indexPath, data, 0644); err != nil {
|
||||
t.Fatalf("write existing index: %v", err)
|
||||
}
|
||||
|
||||
// mock 生成器:记录被生成的素材路径,返回可识别的描述。
|
||||
var generated []string
|
||||
generate := func(material AutoReplyMaterial, absPath string) (string, bool) {
|
||||
generated = append(generated, material.Path)
|
||||
if _, statErr := os.Stat(absPath); statErr != nil {
|
||||
t.Errorf("generator got unreadable absPath %q: %v", absPath, statErr)
|
||||
}
|
||||
return "看下这张「" + material.Title + "」~", true
|
||||
}
|
||||
|
||||
result, err := syncAutoReplyMaterials(dir, indexPath, generate)
|
||||
if err != nil {
|
||||
t.Fatalf("sync failed: %v", err)
|
||||
}
|
||||
if result.Total != 2 {
|
||||
t.Fatalf("expected 2 materials total, got %#v", result)
|
||||
}
|
||||
|
||||
// 只有新图片应触发生成,手写素材不触发。
|
||||
if len(generated) != 1 || generated[0] != "产线实拍.jpg" {
|
||||
t.Fatalf("expected only the new image to be generated, got %#v", generated)
|
||||
}
|
||||
|
||||
got, err := loadAutoReplyMaterials(indexPath)
|
||||
if err != nil {
|
||||
t.Fatalf("reload synced materials: %v", err)
|
||||
}
|
||||
byPath := make(map[string]AutoReplyMaterial, len(got))
|
||||
for _, item := range got {
|
||||
byPath[item.Path] = item
|
||||
}
|
||||
|
||||
image, ok := byPath["产线实拍.jpg"]
|
||||
if !ok {
|
||||
t.Fatalf("image material missing after sync: %#v", got)
|
||||
}
|
||||
if image.Caption != "看下这张「产线实拍」~" || image.CaptionSource != "ai" {
|
||||
t.Fatalf("expected generated caption marked ai, got %#v", image)
|
||||
}
|
||||
|
||||
manual, ok := byPath["报价单.pdf"]
|
||||
if !ok {
|
||||
t.Fatalf("manual material missing after sync: %#v", got)
|
||||
}
|
||||
if manual.Caption != "这是最新报价,您过下目~" || manual.CaptionSource != "manual" {
|
||||
t.Fatalf("manual caption must survive sync untouched, got %#v", manual)
|
||||
}
|
||||
|
||||
// 再同步一次:ai 描述已存在,不应重复调用生成器。
|
||||
generated = nil
|
||||
if _, err := syncAutoReplyMaterials(dir, indexPath, generate); err != nil {
|
||||
t.Fatalf("second sync failed: %v", err)
|
||||
}
|
||||
if len(generated) != 0 {
|
||||
t.Fatalf("expected no regeneration on second sync, got %#v", generated)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromptLeakageAnswerIsSanitized(t *testing.T) {
|
||||
answer := "您好,我是企业微信智能客服。\n话语规则:只用第一人称,不要说本系统、本AI。你的目标是让客户感觉自己在和这家公司的人对话。根据知识库回答。"
|
||||
cfg := config.NewDefaultAutoReplyConfig()
|
||||
@@ -1585,6 +1793,64 @@ func TestContextualSearchTextIncludesRecentQuestion(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// 自包含问题(如“今天星期几”)不应把上一个话题的对话拼进检索 query,
|
||||
// 否则会把旧话题的知识高分召回,导致顺着旧话题继续答。
|
||||
func TestContextualSearchTextSkipsContextForSelfContainedQuestion(t *testing.T) {
|
||||
withTestContextCachePath(t)
|
||||
cfg := config.NewDefaultAutoReplyConfig()
|
||||
engine := testAutoReplyEngine(cfg)
|
||||
prev := autoReplyMessage{ClientID: 7, RobotID: "robot-user", ConversationID: "S:robot-user_customer-user", FromWxID: "customer-user", Content: "IRB 1200是什么"}
|
||||
engine.rememberUserMessage(prev)
|
||||
engine.rememberAssistantMessage(prev, "IRB 1200是一款紧凑型6轴工业机器人,重复定位精度±0.02mm。")
|
||||
|
||||
question := "今天星期几"
|
||||
searchText := engine.contextualSearchText(question, autoReplyMessage{ClientID: 7, RobotID: "robot-user", ConversationID: "S:robot-user_customer-user", FromWxID: "customer-user", Content: question})
|
||||
if searchText != question {
|
||||
t.Fatalf("self-contained question should not carry previous topic into search, got %q", searchText)
|
||||
}
|
||||
if strings.Contains(searchText, "IRB") {
|
||||
t.Fatalf("search text leaked previous topic: %q", searchText)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQuestionReferencesContext(t *testing.T) {
|
||||
cases := []struct {
|
||||
question string
|
||||
want bool
|
||||
}{
|
||||
{"它多少钱", true}, // 它多少钱
|
||||
{"这个怎么用", true}, // 这个怎么用
|
||||
{"刚才那个再说说", true}, // 刚才那个再说说
|
||||
{"继续", true}, // 继续
|
||||
{"今天星期几", false}, // 今天星期几
|
||||
{"你们有什么产品", false}, // 你们有什么产品
|
||||
{"换个话题吧", false}, // 换个话题吧
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := questionReferencesContext(c.question); got != c.want {
|
||||
t.Errorf("questionReferencesContext(%q)=%v, want %v", c.question, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPureTopicSwitchMessage(t *testing.T) {
|
||||
cases := []struct {
|
||||
content string
|
||||
want bool
|
||||
}{
|
||||
{"换个话题吧", true}, // 换个话题吧
|
||||
{"我们聊点别的", true}, // 我们聊点别的
|
||||
{"不聊这个了", true}, // 不聊这个了
|
||||
{"换个话题,你们产品多少钱", false}, // 换个话题,你们产品多少钱(带了新问题)
|
||||
{"今天星期几", false}, // 今天星期几
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := isPureTopicSwitchMessage(c.content); got != c.want {
|
||||
t.Errorf("isPureTopicSwitchMessage(%q)=%v, want %v", c.content, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestImageRecognitionContentEntersNormalReplyFlow(t *testing.T) {
|
||||
withTestContextCachePath(t)
|
||||
restoreClients := setTestIdentifiedClients(t, map[uint32]string{7: "robot-user"})
|
||||
@@ -2363,8 +2629,8 @@ func TestFastAutoReplyDefaults(t *testing.T) {
|
||||
if cfg.AI.MaxTokens != 700 {
|
||||
t.Fatalf("expected 700 max tokens, got %d", cfg.AI.MaxTokens)
|
||||
}
|
||||
if cfg.AI.ReplyDetail != "detailed" {
|
||||
t.Fatalf("expected detailed reply detail, got %s", cfg.AI.ReplyDetail)
|
||||
if cfg.AI.ReplyDetail != "medium" {
|
||||
t.Fatalf("expected medium reply detail, got %s", cfg.AI.ReplyDetail)
|
||||
}
|
||||
if cfg.AI.EnableThinking {
|
||||
t.Fatal("expected thinking to be disabled by default")
|
||||
|
||||
Reference in New Issue
Block a user