feat(auto-reply): 优化自动回复逻辑和知识库功能
- 将默认回复详细程度从"detailed"调整为"medium",前后端保持一致 - 新增话题切换检测逻辑,当用户主动要求换话题时提供引导回复 - 优化上下文处理机制,仅在指代型追问时注入历史对话,避免模型复读旧内容 - 改进知识库检索逻辑,区分自包含问题和指代型问题的上下文需求 - 完善知识库完整性指令,确保回复详细程度与知识展开程度一致 - 重构知识库重建逻辑,支持递归扫描子目录中的文件,修复索引为空的问题 - 增强素材匹配算法,引入强信号检测机制,避免仅凭模糊匹配误发素材 - 新增素材开场白AI生成功能,支持图片、视频、文档等类型智能描述 - 改进知识库重建通知,显示具体的文件数、分片数及失败统计信息
This commit is contained in:
@@ -473,7 +473,7 @@ func TestSyncAutoReplyMaterialsAddsRemovesAndKeepsExistingConfig(t *testing.T) {
|
||||
t.Fatalf("write existing index: %v", err)
|
||||
}
|
||||
|
||||
result, err := syncAutoReplyMaterials(dir, indexPath)
|
||||
result, err := syncAutoReplyMaterials(dir, indexPath, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("syncAutoReplyMaterials failed: %v", err)
|
||||
}
|
||||
@@ -847,6 +847,214 @@ func TestSpecificMaterialRequestSendsOnlyBestMatch(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestFuzzyOnlyMatchDoesNotSendMaterial 锁住“强信号门槛”:
|
||||
// 客户问句只与素材长标题切出的 2-gram 片段(如“数字”)模糊相交,
|
||||
// 没有整词关键词/整串标题命中时,不应误发素材。
|
||||
func TestFuzzyOnlyMatchDoesNotSendMaterial(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "企业级AI数字员工宣传手册.pptx"), []byte("file"), 0644); err != nil {
|
||||
t.Fatalf("write material: %v", err)
|
||||
}
|
||||
indexPath := filepath.Join(dir, "materials.json")
|
||||
materials := autoReplyMaterialsFile{Materials: []AutoReplyMaterial{{
|
||||
ID: "ai-worker-brochure",
|
||||
Title: "企业级AI数字员工宣传手册",
|
||||
Keywords: []string{"企业级AI数字员工", "AI数字员工", "宣传手册"},
|
||||
MaterialType: "file",
|
||||
Path: "企业级AI数字员工宣传手册.pptx",
|
||||
Enabled: true,
|
||||
}}}
|
||||
data, err := json.Marshal(materials)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal materials: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(indexPath, data, 0644); err != nil {
|
||||
t.Fatalf("write materials index: %v", err)
|
||||
}
|
||||
|
||||
cfg := config.NewDefaultAutoReplyConfig()
|
||||
cfg.Materials.Directory = dir
|
||||
cfg.Materials.IndexPath = indexPath
|
||||
cfg.Materials.MaxPerReply = 2
|
||||
engine := testAutoReplyEngine(cfg)
|
||||
|
||||
// “数字证书”与标题只在“数字”这个 2-gram 上相交,属于弱命中,应被门槛挡掉。
|
||||
if matches := engine.matchMaterials("发我数字证书的资料", "发我数字证书的资料", nil); len(matches) != 0 {
|
||||
t.Fatalf("expected fuzzy-only match to be rejected, got %#v", matches)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMaterialNeedsCaptionGeneration(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
material AutoReplyMaterial
|
||||
want bool
|
||||
}{
|
||||
{"manual never regenerated", AutoReplyMaterial{Caption: "随手写的", CaptionSource: "manual"}, false},
|
||||
{"ai not regenerated", AutoReplyMaterial{Caption: "已生成", CaptionSource: "ai"}, false},
|
||||
{"empty caption needs", AutoReplyMaterial{MaterialType: "file"}, true},
|
||||
{"typed default needs", AutoReplyMaterial{Caption: defaultMaterialCaption("image"), MaterialType: "image"}, true},
|
||||
{"legacy generic needs", AutoReplyMaterial{Caption: "我把相关资料直接发你。"}, true},
|
||||
{"hand-written kept", AutoReplyMaterial{Caption: "这是AgentBox产线实拍图,您看下整体布局~"}, false},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
if got := materialNeedsCaptionGeneration(tc.material); got != tc.want {
|
||||
t.Fatalf("%s: materialNeedsCaptionGeneration = %v, want %v", tc.name, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeMaterialCaption(t *testing.T) {
|
||||
if got, ok := sanitizeMaterialCaption(" “这是产品图,您看下~” "); !ok || got != "这是产品图,您看下~" {
|
||||
t.Fatalf("expected quotes/space stripped, got %q ok=%v", got, ok)
|
||||
}
|
||||
if got, ok := sanitizeMaterialCaption("第一行\n第二行"); !ok || got != "第一行 第二行" {
|
||||
t.Fatalf("expected newline collapsed to single line, got %q ok=%v", got, ok)
|
||||
}
|
||||
if _, ok := sanitizeMaterialCaption(" "); ok {
|
||||
t.Fatal("expected blank input to be rejected")
|
||||
}
|
||||
if _, ok := sanitizeMaterialCaption("NO_ANSWER"); ok {
|
||||
t.Fatal("expected NO_ANSWER token to be rejected")
|
||||
}
|
||||
long := strings.Repeat("描述", 50)
|
||||
got, ok := sanitizeMaterialCaption(long)
|
||||
if !ok || len([]rune(got)) > 60 {
|
||||
t.Fatalf("expected long caption truncated to <=60 runes, got %d runes ok=%v", len([]rune(got)), ok)
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyMaterialCaptionsOnlyFillsTargets(t *testing.T) {
|
||||
materials := []AutoReplyMaterial{
|
||||
{Path: "a.jpg", MaterialType: "image", Caption: defaultMaterialCaption("image")},
|
||||
{Path: "b.jpg", MaterialType: "image", Caption: "运营手写不能动", CaptionSource: "manual"},
|
||||
{Path: "c.jpg", MaterialType: "image", Caption: "上次生成的", CaptionSource: "ai"},
|
||||
}
|
||||
calls := 0
|
||||
generate := func(material AutoReplyMaterial, absPath string) (string, bool) {
|
||||
calls++
|
||||
return "生成给-" + material.Path, true
|
||||
}
|
||||
applyMaterialCaptions(materials, t.TempDir(), generate)
|
||||
|
||||
if calls != 1 {
|
||||
t.Fatalf("expected generator called once (only the default-caption item), got %d", calls)
|
||||
}
|
||||
if materials[0].Caption != "生成给-a.jpg" || materials[0].CaptionSource != "ai" {
|
||||
t.Fatalf("expected a.jpg regenerated and marked ai, got %#v", materials[0])
|
||||
}
|
||||
if materials[1].Caption != "运营手写不能动" || materials[1].CaptionSource != "manual" {
|
||||
t.Fatalf("manual caption must be preserved, got %#v", materials[1])
|
||||
}
|
||||
if materials[2].Caption != "上次生成的" {
|
||||
t.Fatalf("existing ai caption must not be regenerated, got %#v", materials[2])
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyMaterialCaptionsKeepsOriginalOnFailure(t *testing.T) {
|
||||
materials := []AutoReplyMaterial{
|
||||
{Path: "a.jpg", MaterialType: "image", Caption: defaultMaterialCaption("image")},
|
||||
}
|
||||
generate := func(material AutoReplyMaterial, absPath string) (string, bool) {
|
||||
return "", false // 模拟生成失败
|
||||
}
|
||||
applyMaterialCaptions(materials, t.TempDir(), generate)
|
||||
|
||||
if materials[0].Caption != defaultMaterialCaption("image") || materials[0].CaptionSource != "" {
|
||||
t.Fatalf("expected failed generation to leave caption untouched, got %#v", materials[0])
|
||||
}
|
||||
}
|
||||
|
||||
// TestSyncAutoReplyMaterialsGeneratesCaptionsEndToEnd 串起整条同步链路(mock 生成器,不调真实 AI):
|
||||
// 真实落盘文件 → 扫描发现 → 经过生成器 → 写回 materials.json → 重新加载校验。
|
||||
func TestSyncAutoReplyMaterialsGeneratesCaptionsEndToEnd(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// 新图片素材:无既有索引,应被生成器赋予描述。
|
||||
if err := os.WriteFile(filepath.Join(dir, "产线实拍.jpg"), []byte("jpg"), 0644); err != nil {
|
||||
t.Fatalf("write image material: %v", err)
|
||||
}
|
||||
// 运营手写 caption 的素材:必须原样保留,不被生成覆盖。
|
||||
if err := os.WriteFile(filepath.Join(dir, "报价单.pdf"), []byte("pdf"), 0644); err != nil {
|
||||
t.Fatalf("write manual material: %v", err)
|
||||
}
|
||||
indexPath := filepath.Join(dir, "materials.json")
|
||||
existing := autoReplyMaterialsFile{Materials: []AutoReplyMaterial{{
|
||||
ID: "manual-quote",
|
||||
Title: "报价单",
|
||||
Keywords: []string{"报价单", "报价"},
|
||||
MaterialType: "file",
|
||||
Path: "报价单.pdf",
|
||||
Caption: "这是最新报价,您过下目~",
|
||||
CaptionSource: "manual",
|
||||
Priority: 5,
|
||||
Enabled: true,
|
||||
}}}
|
||||
data, err := json.Marshal(existing)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal existing: %v", err)
|
||||
}
|
||||
if err := os.WriteFile(indexPath, data, 0644); err != nil {
|
||||
t.Fatalf("write existing index: %v", err)
|
||||
}
|
||||
|
||||
// mock 生成器:记录被生成的素材路径,返回可识别的描述。
|
||||
var generated []string
|
||||
generate := func(material AutoReplyMaterial, absPath string) (string, bool) {
|
||||
generated = append(generated, material.Path)
|
||||
if _, statErr := os.Stat(absPath); statErr != nil {
|
||||
t.Errorf("generator got unreadable absPath %q: %v", absPath, statErr)
|
||||
}
|
||||
return "看下这张「" + material.Title + "」~", true
|
||||
}
|
||||
|
||||
result, err := syncAutoReplyMaterials(dir, indexPath, generate)
|
||||
if err != nil {
|
||||
t.Fatalf("sync failed: %v", err)
|
||||
}
|
||||
if result.Total != 2 {
|
||||
t.Fatalf("expected 2 materials total, got %#v", result)
|
||||
}
|
||||
|
||||
// 只有新图片应触发生成,手写素材不触发。
|
||||
if len(generated) != 1 || generated[0] != "产线实拍.jpg" {
|
||||
t.Fatalf("expected only the new image to be generated, got %#v", generated)
|
||||
}
|
||||
|
||||
got, err := loadAutoReplyMaterials(indexPath)
|
||||
if err != nil {
|
||||
t.Fatalf("reload synced materials: %v", err)
|
||||
}
|
||||
byPath := make(map[string]AutoReplyMaterial, len(got))
|
||||
for _, item := range got {
|
||||
byPath[item.Path] = item
|
||||
}
|
||||
|
||||
image, ok := byPath["产线实拍.jpg"]
|
||||
if !ok {
|
||||
t.Fatalf("image material missing after sync: %#v", got)
|
||||
}
|
||||
if image.Caption != "看下这张「产线实拍」~" || image.CaptionSource != "ai" {
|
||||
t.Fatalf("expected generated caption marked ai, got %#v", image)
|
||||
}
|
||||
|
||||
manual, ok := byPath["报价单.pdf"]
|
||||
if !ok {
|
||||
t.Fatalf("manual material missing after sync: %#v", got)
|
||||
}
|
||||
if manual.Caption != "这是最新报价,您过下目~" || manual.CaptionSource != "manual" {
|
||||
t.Fatalf("manual caption must survive sync untouched, got %#v", manual)
|
||||
}
|
||||
|
||||
// 再同步一次:ai 描述已存在,不应重复调用生成器。
|
||||
generated = nil
|
||||
if _, err := syncAutoReplyMaterials(dir, indexPath, generate); err != nil {
|
||||
t.Fatalf("second sync failed: %v", err)
|
||||
}
|
||||
if len(generated) != 0 {
|
||||
t.Fatalf("expected no regeneration on second sync, got %#v", generated)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromptLeakageAnswerIsSanitized(t *testing.T) {
|
||||
answer := "您好,我是企业微信智能客服。\n话语规则:只用第一人称,不要说本系统、本AI。你的目标是让客户感觉自己在和这家公司的人对话。根据知识库回答。"
|
||||
cfg := config.NewDefaultAutoReplyConfig()
|
||||
@@ -1585,6 +1793,64 @@ func TestContextualSearchTextIncludesRecentQuestion(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// 自包含问题(如“今天星期几”)不应把上一个话题的对话拼进检索 query,
|
||||
// 否则会把旧话题的知识高分召回,导致顺着旧话题继续答。
|
||||
func TestContextualSearchTextSkipsContextForSelfContainedQuestion(t *testing.T) {
|
||||
withTestContextCachePath(t)
|
||||
cfg := config.NewDefaultAutoReplyConfig()
|
||||
engine := testAutoReplyEngine(cfg)
|
||||
prev := autoReplyMessage{ClientID: 7, RobotID: "robot-user", ConversationID: "S:robot-user_customer-user", FromWxID: "customer-user", Content: "IRB 1200是什么"}
|
||||
engine.rememberUserMessage(prev)
|
||||
engine.rememberAssistantMessage(prev, "IRB 1200是一款紧凑型6轴工业机器人,重复定位精度±0.02mm。")
|
||||
|
||||
question := "今天星期几"
|
||||
searchText := engine.contextualSearchText(question, autoReplyMessage{ClientID: 7, RobotID: "robot-user", ConversationID: "S:robot-user_customer-user", FromWxID: "customer-user", Content: question})
|
||||
if searchText != question {
|
||||
t.Fatalf("self-contained question should not carry previous topic into search, got %q", searchText)
|
||||
}
|
||||
if strings.Contains(searchText, "IRB") {
|
||||
t.Fatalf("search text leaked previous topic: %q", searchText)
|
||||
}
|
||||
}
|
||||
|
||||
func TestQuestionReferencesContext(t *testing.T) {
|
||||
cases := []struct {
|
||||
question string
|
||||
want bool
|
||||
}{
|
||||
{"它多少钱", true}, // 它多少钱
|
||||
{"这个怎么用", true}, // 这个怎么用
|
||||
{"刚才那个再说说", true}, // 刚才那个再说说
|
||||
{"继续", true}, // 继续
|
||||
{"今天星期几", false}, // 今天星期几
|
||||
{"你们有什么产品", false}, // 你们有什么产品
|
||||
{"换个话题吧", false}, // 换个话题吧
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := questionReferencesContext(c.question); got != c.want {
|
||||
t.Errorf("questionReferencesContext(%q)=%v, want %v", c.question, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPureTopicSwitchMessage(t *testing.T) {
|
||||
cases := []struct {
|
||||
content string
|
||||
want bool
|
||||
}{
|
||||
{"换个话题吧", true}, // 换个话题吧
|
||||
{"我们聊点别的", true}, // 我们聊点别的
|
||||
{"不聊这个了", true}, // 不聊这个了
|
||||
{"换个话题,你们产品多少钱", false}, // 换个话题,你们产品多少钱(带了新问题)
|
||||
{"今天星期几", false}, // 今天星期几
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := isPureTopicSwitchMessage(c.content); got != c.want {
|
||||
t.Errorf("isPureTopicSwitchMessage(%q)=%v, want %v", c.content, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestImageRecognitionContentEntersNormalReplyFlow(t *testing.T) {
|
||||
withTestContextCachePath(t)
|
||||
restoreClients := setTestIdentifiedClients(t, map[uint32]string{7: "robot-user"})
|
||||
@@ -2363,8 +2629,8 @@ func TestFastAutoReplyDefaults(t *testing.T) {
|
||||
if cfg.AI.MaxTokens != 700 {
|
||||
t.Fatalf("expected 700 max tokens, got %d", cfg.AI.MaxTokens)
|
||||
}
|
||||
if cfg.AI.ReplyDetail != "detailed" {
|
||||
t.Fatalf("expected detailed reply detail, got %s", cfg.AI.ReplyDetail)
|
||||
if cfg.AI.ReplyDetail != "medium" {
|
||||
t.Fatalf("expected medium reply detail, got %s", cfg.AI.ReplyDetail)
|
||||
}
|
||||
if cfg.AI.EnableThinking {
|
||||
t.Fatal("expected thinking to be disabled by default")
|
||||
|
||||
Reference in New Issue
Block a user