feat(auto-reply): 优化自动回复逻辑和知识库功能

- 将默认回复详细程度从"detailed"调整为"medium",前后端保持一致
- 新增话题切换检测逻辑,当用户主动要求换话题时提供引导回复
- 优化上下文处理机制,仅在指代型追问时注入历史对话,避免模型复读旧内容
- 改进知识库检索逻辑,区分自包含问题和指代型问题的上下文需求
- 完善知识库完整性指令,确保回复详细程度与知识展开程度一致
- 重构知识库重建逻辑,支持递归扫描子目录中的文件,修复索引为空的问题
- 增强素材匹配算法,引入强信号检测机制,避免仅凭模糊匹配误发素材
- 新增素材开场白AI生成功能,支持图片、视频、文档等类型智能描述
- 改进知识库重建通知,显示具体的文件数、分片数及失败统计信息
This commit is contained in:
2026-06-26 14:25:35 +08:00
parent 1517be2a25
commit 849090a627
12 changed files with 809 additions and 40 deletions

View File

@@ -21,8 +21,13 @@ type AutoReplyMaterial struct {
MaterialType string `json:"materialType"`
Path string `json:"path"`
Caption string `json:"caption"`
Priority int `json:"priority"`
Enabled bool `json:"enabled"`
// CaptionSource 标记 caption 的来源:
// "ai" —— 同步时由模型自动生成;重新同步可被再次刷新。
// "manual" —— 运营手工编写;重新同步绝不覆盖。
// "" —— 未知/历史数据;按需生成。
CaptionSource string `json:"captionSource,omitempty"`
Priority int `json:"priority"`
Enabled bool `json:"enabled"`
}
type autoReplyMaterialsFile struct {
@@ -93,8 +98,10 @@ func (e *AutoReplyEngine) collectMaterialMatches(materials []AutoReplyMaterial,
continue
}
path := resolveAutoReplyMaterialPath(root, material.Path)
score := materialMatchScore(searchText, material, hasSendIntent)
if score <= 0 {
score, strong := materialMatchScoreDetailed(searchText, material, hasSendIntent)
// 必须命中过强信号(整词关键词/问句模板,或整串标题/文件名)才算候选;
// 仅靠 2-gram 模糊片段凑分的弱命中直接丢弃,避免误发。
if score <= 0 || !strong {
continue
}
if _, err := os.Stat(path); err != nil {
@@ -211,12 +218,16 @@ func loadAutoReplyMaterials(indexPath string) ([]AutoReplyMaterial, error) {
return normalizeAutoReplyMaterials(list), nil
}
// materialCaptionGenerator 根据素材本身(含已解析的绝对路径)生成一句开场白。
// 返回 ok=false 表示本条生成失败/跳过,调用方应保留原 caption 不动。
type materialCaptionGenerator func(material AutoReplyMaterial, absPath string) (caption string, ok bool)
func (e *AutoReplyEngine) syncAutoReplyMaterials() (autoReplyMaterialSyncResult, error) {
cfg := e.getConfig()
return syncAutoReplyMaterials(cfg.Materials.Directory, cfg.Materials.IndexPath)
return syncAutoReplyMaterials(cfg.Materials.Directory, cfg.Materials.IndexPath, e.materialCaptionGenerator())
}
func syncAutoReplyMaterials(root string, indexPath string) (autoReplyMaterialSyncResult, error) {
func syncAutoReplyMaterials(root string, indexPath string, generateCaption materialCaptionGenerator) (autoReplyMaterialSyncResult, error) {
result := autoReplyMaterialSyncResult{
Directory: resolveAutoReplyPath(root),
IndexPath: resolveAutoReplyPath(indexPath),
@@ -270,6 +281,11 @@ func syncAutoReplyMaterials(root string, indexPath string) (autoReplyMaterialSyn
return strings.ToLower(synced[i].Title) < strings.ToLower(synced[j].Title)
})
// 在写盘前为需要的素材生成开场白generateCaption 为 nil如未配置 AI 或单测)时整体跳过。
if generateCaption != nil {
applyMaterialCaptions(synced, result.Directory, generateCaption)
}
if err := os.MkdirAll(filepath.Dir(result.IndexPath), 0755); err != nil {
return result, err
}
@@ -435,7 +451,17 @@ func normalizeAutoReplyMaterials(items []AutoReplyMaterial) []AutoReplyMaterial
}
func materialMatchScore(searchText string, material AutoReplyMaterial, hasSendIntent bool) int {
score, _ := materialMatchScoreDetailed(searchText, material, hasSendIntent)
return score
}
// materialMatchScoreDetailed 在打分之外额外返回 strong是否命中过“强信号”。
// 强信号 = 整词关键词/问句模板命中,或整串标题/文件名命中。
// 仅靠 2-gram 模糊片段fuzzyMaterialTokenScore凑出的分数不算强信号——
// 这类弱命中只用于在多个强匹配之间排序,不能单独触发发送,避免误发。
func materialMatchScoreDetailed(searchText string, material AutoReplyMaterial, hasSendIntent bool) (int, bool) {
score := 0
strong := false
for _, keyword := range append(material.Keywords, material.QuestionPatterns...) {
keyword = strings.ToLower(strings.TrimSpace(keyword))
if keyword == "" || isGenericMaterialIntentToken(keyword) {
@@ -443,19 +469,21 @@ func materialMatchScore(searchText string, material AutoReplyMaterial, hasSendIn
}
if strings.Contains(searchText, keyword) {
score += 10
strong = true
}
}
for _, field := range []string{material.Title, filepath.Base(material.Path), strings.TrimSuffix(filepath.Base(material.Path), filepath.Ext(material.Path))} {
field = strings.ToLower(strings.TrimSpace(field))
if field != "" && strings.Contains(searchText, field) {
if field != "" && len([]rune(field)) >= 2 && strings.Contains(searchText, field) {
score += 4
strong = true
}
score += fuzzyMaterialTokenScore(searchText, field)
}
if hasSendIntent && score > 0 {
score += 3
}
return score
return score, strong
}
func isBroadAllMaterialRequest(query string) bool {