- 将默认回复详细程度从"detailed"调整为"medium",前后端保持一致 - 新增话题切换检测逻辑,当用户主动要求换话题时提供引导回复 - 优化上下文处理机制,仅在指代型追问时注入历史对话,避免模型复读旧内容 - 改进知识库检索逻辑,区分自包含问题和指代型问题的上下文需求 - 完善知识库完整性指令,确保回复详细程度与知识展开程度一致 - 重构知识库重建逻辑,支持递归扫描子目录中的文件,修复索引为空的问题 - 增强素材匹配算法,引入强信号检测机制,避免仅凭模糊匹配误发素材 - 新增素材开场白AI生成功能,支持图片、视频、文档等类型智能描述 - 改进知识库重建通知,显示具体的文件数、分片数及失败统计信息
338 lines
10 KiB
Go
338 lines
10 KiB
Go
package main
|
||
|
||
import (
|
||
"encoding/json"
|
||
"os"
|
||
"path/filepath"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
const (
|
||
autoReplyContextLimit = 20
|
||
autoReplyContextPromptLimit = 4000
|
||
)
|
||
|
||
type autoReplyContextEntry struct {
|
||
Role string `json:"role"`
|
||
Content string `json:"content"`
|
||
NormalizedContent string `json:"normalizedContent"`
|
||
MessageType string `json:"messageType"`
|
||
ServerID string `json:"serverId"`
|
||
LocalID string `json:"localId"`
|
||
CreatedAt int64 `json:"createdAt"`
|
||
SenderName string `json:"senderName"`
|
||
}
|
||
|
||
type autoReplyContextStore struct {
|
||
Conversations map[string][]autoReplyContextEntry `json:"conversations"`
|
||
LastSavedAt int64 `json:"lastSavedAt"`
|
||
}
|
||
|
||
var contextCachePathOverride string
|
||
|
||
func autoReplyContextCachePath() string {
|
||
if strings.TrimSpace(contextCachePathOverride) != "" {
|
||
return contextCachePathOverride
|
||
}
|
||
return resolveAutoReplyPath("config/auto_reply_context_cache.json")
|
||
}
|
||
|
||
func (e *AutoReplyEngine) loadContextCache() error {
|
||
path := autoReplyContextCachePath()
|
||
data, err := os.ReadFile(path)
|
||
if err != nil {
|
||
if os.IsNotExist(err) {
|
||
e.mu.Lock()
|
||
if e.contextEntries == nil {
|
||
e.contextEntries = make(map[string][]autoReplyContextEntry)
|
||
}
|
||
e.mu.Unlock()
|
||
return nil
|
||
}
|
||
return err
|
||
}
|
||
var store autoReplyContextStore
|
||
if err := json.Unmarshal(data, &store); err != nil {
|
||
return err
|
||
}
|
||
e.mu.Lock()
|
||
e.contextEntries = make(map[string][]autoReplyContextEntry, len(store.Conversations))
|
||
for key, entries := range store.Conversations {
|
||
key = strings.TrimSpace(key)
|
||
if key == "" {
|
||
continue
|
||
}
|
||
e.contextEntries[key] = trimAutoReplyContextEntries(entries)
|
||
}
|
||
e.mu.Unlock()
|
||
return nil
|
||
}
|
||
|
||
func (e *AutoReplyEngine) saveContextCache() {
|
||
if err := e.saveContextCacheToDisk(); err != nil {
|
||
e.setLastErrorWithScope(autoReplyErrorScopeRecords, "conversation context save failed: "+err.Error())
|
||
}
|
||
}
|
||
|
||
func (e *AutoReplyEngine) saveContextCacheToDisk() error {
|
||
e.mu.Lock()
|
||
store := autoReplyContextStore{
|
||
Conversations: make(map[string][]autoReplyContextEntry, len(e.contextEntries)),
|
||
LastSavedAt: time.Now().Unix(),
|
||
}
|
||
for key, entries := range e.contextEntries {
|
||
store.Conversations[key] = append([]autoReplyContextEntry(nil), trimAutoReplyContextEntries(entries)...)
|
||
}
|
||
e.mu.Unlock()
|
||
path := autoReplyContextCachePath()
|
||
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
||
return err
|
||
}
|
||
return atomicWriteJSON(path, store)
|
||
}
|
||
|
||
func (e *AutoReplyEngine) rememberUserMessage(msg autoReplyMessage) {
|
||
e.rememberContextEntry(msg, autoReplyContextEntry{
|
||
Role: "user",
|
||
Content: strings.TrimSpace(msg.Content),
|
||
MessageType: msg.MessageType,
|
||
ServerID: msg.ServerID,
|
||
LocalID: msg.LocalID,
|
||
CreatedAt: time.Now().Unix(),
|
||
SenderName: msg.FromNickName,
|
||
})
|
||
}
|
||
|
||
func (e *AutoReplyEngine) rememberAssistantMessage(msg autoReplyMessage, answer string) {
|
||
e.rememberContextEntry(msg, autoReplyContextEntry{
|
||
Role: "assistant",
|
||
Content: strings.TrimSpace(answer),
|
||
MessageType: "text",
|
||
CreatedAt: time.Now().Unix(),
|
||
SenderName: "assistant",
|
||
})
|
||
}
|
||
|
||
func (e *AutoReplyEngine) rememberContextEntry(msg autoReplyMessage, entry autoReplyContextEntry) {
|
||
entry.Content = strings.TrimSpace(entry.Content)
|
||
if entry.Content == "" || strings.TrimSpace(msg.ConversationID) == "" {
|
||
return
|
||
}
|
||
entry.Role = strings.TrimSpace(entry.Role)
|
||
if entry.Role == "" {
|
||
entry.Role = "user"
|
||
}
|
||
if entry.CreatedAt <= 0 {
|
||
entry.CreatedAt = time.Now().Unix()
|
||
}
|
||
entry.NormalizedContent = normalizeContextContent(entry.Content)
|
||
key := e.contextKeyForMessage(msg)
|
||
e.mu.Lock()
|
||
if e.contextEntries == nil {
|
||
e.contextEntries = make(map[string][]autoReplyContextEntry)
|
||
}
|
||
entries := append(e.contextEntries[key], entry)
|
||
e.contextEntries[key] = trimAutoReplyContextEntries(entries)
|
||
e.mu.Unlock()
|
||
e.saveContextCache()
|
||
}
|
||
|
||
func (e *AutoReplyEngine) previousUserQuestion(msg autoReplyMessage) string {
|
||
entries := e.contextEntriesForMessage(msg)
|
||
for i := len(entries) - 1; i >= 0; i-- {
|
||
entry := entries[i]
|
||
if entry.Role == "user" && strings.TrimSpace(entry.Content) != "" {
|
||
return strings.TrimSpace(entry.Content)
|
||
}
|
||
}
|
||
return ""
|
||
}
|
||
|
||
// contextPromptForQuestion 仅在当前问题是指代型追问时,才把历史对话注入 AI prompt。
|
||
// 自包含问题(含“换话题”)不带历史,避免模型顺着旧话题继续答、甚至逐字复读上一条。
|
||
func (e *AutoReplyEngine) contextPromptForQuestion(question string, msg autoReplyMessage) string {
|
||
if !questionReferencesContext(question) {
|
||
return ""
|
||
}
|
||
return e.recentContextPrompt(msg, 6)
|
||
}
|
||
|
||
func (e *AutoReplyEngine) recentContextPrompt(msg autoReplyMessage, maxEntries int) string {
|
||
entries := e.contextEntriesForMessage(msg)
|
||
if len(entries) == 0 {
|
||
return ""
|
||
}
|
||
if maxEntries <= 0 {
|
||
maxEntries = 6
|
||
}
|
||
start := len(entries) - maxEntries
|
||
if start < 0 {
|
||
start = 0
|
||
}
|
||
var b strings.Builder
|
||
for _, entry := range entries[start:] {
|
||
content := strings.TrimSpace(entry.Content)
|
||
if content == "" {
|
||
continue
|
||
}
|
||
role := "客户"
|
||
if entry.Role == "assistant" {
|
||
role = "客服"
|
||
}
|
||
line := role + ":" + content
|
||
if b.Len()+len([]rune(line))+1 > autoReplyContextPromptLimit {
|
||
break
|
||
}
|
||
if b.Len() > 0 {
|
||
b.WriteString("\n")
|
||
}
|
||
b.WriteString(line)
|
||
}
|
||
return b.String()
|
||
}
|
||
|
||
func (e *AutoReplyEngine) contextualSearchText(question string, msg autoReplyMessage) string {
|
||
question = strings.TrimSpace(question)
|
||
// 只有“指代型追问”(如“它多少钱”“刚才那个再说说”)才需要把历史对话拼进检索 query。
|
||
// 自包含问题(如“今天星期几”“换个话题”)一旦带上历史,会把上一个话题的知识高分召回,
|
||
// 导致顺着旧话题继续答,甚至在 temperature=0 时逐字复读上一条回复。
|
||
if !questionReferencesContext(question) {
|
||
return question
|
||
}
|
||
contextText := e.recentContextPrompt(msg, 6)
|
||
if contextText == "" {
|
||
return question
|
||
}
|
||
return contextText + "\n当前问题:" + question
|
||
}
|
||
|
||
// topicSwitchPhrases 是“客户主动要求换话题”的常见说法(短语本身不含新话题)。
|
||
var topicSwitchPhrases = []string{
|
||
"换个话题", "换一个话题", "换个问题", "换一个问题", "换话题", "换个方向",
|
||
"聊点别的", "说点别的", "聊别的", "说别的", "不聊这个", "不说这个", "别聊这个", "别说这个",
|
||
}
|
||
|
||
// strongAnaphoraTokens 基本只在“追问上文”时出现,命中即视为指代型问题。
|
||
var strongAnaphoraTokens = []string{
|
||
"它", "它们", "这个", "那个", "这款", "那款", "这种", "那种", "这台", "那台",
|
||
"上面", "刚才", "刚刚", "接着", "继续", "还有没有", "还有别的", "展开说",
|
||
"详细说", "多说点", "再说说", "具体点", "上一个", "上一条", "前面说", "之前说", "你说的",
|
||
}
|
||
|
||
// weakAnaphoraTokens 是较弱的指代词,仅在很短的问句里才视为追问,
|
||
// 避免“今天他要不要来”这类自带主语的完整问题被误判为指代。
|
||
var weakAnaphoraTokens = []string{"这", "那", "它", "这是", "那是", "这些", "那些"}
|
||
|
||
// questionReferencesContext 判断当前问题是否依赖上文(指代型追问)。
|
||
func questionReferencesContext(question string) bool {
|
||
text := normalizeGreetingText(question)
|
||
if text == "" {
|
||
return false
|
||
}
|
||
for _, token := range strongAnaphoraTokens {
|
||
if strings.Contains(text, normalizeGreetingText(token)) {
|
||
return true
|
||
}
|
||
}
|
||
if len([]rune(text)) <= 8 {
|
||
for _, token := range weakAnaphoraTokens {
|
||
if strings.Contains(text, normalizeGreetingText(token)) {
|
||
return true
|
||
}
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
// isPureTopicSwitchMessage 判断消息是否“只是要求换话题”而没有带上新的问题。
|
||
// 这类消息应给一句干净的引导,不能把上一个话题的知识或回复拖过来。
|
||
func isPureTopicSwitchMessage(content string) bool {
|
||
text := normalizeGreetingText(content)
|
||
if text == "" {
|
||
return false
|
||
}
|
||
matched := false
|
||
for _, phrase := range topicSwitchPhrases {
|
||
normalized := normalizeGreetingText(phrase)
|
||
if strings.Contains(text, normalized) {
|
||
text = strings.ReplaceAll(text, normalized, "")
|
||
matched = true
|
||
}
|
||
}
|
||
if !matched {
|
||
return false
|
||
}
|
||
// 去掉切换短语和常见语气词后,几乎没有剩余内容,才算“纯换话题”。
|
||
text = strings.Trim(text, "吧把了啊呀呢嘛吗好的我们咱们来")
|
||
return len([]rune(text)) <= 2
|
||
}
|
||
|
||
func topicSwitchGuidanceAnswer() string {
|
||
return "好的,那咱们聊点别的。您还想了解些什么,直接发我就行。"
|
||
}
|
||
|
||
func (e *AutoReplyEngine) contextEntriesForMessage(msg autoReplyMessage) []autoReplyContextEntry {
|
||
key := e.contextKeyForMessage(msg)
|
||
e.mu.Lock()
|
||
defer e.mu.Unlock()
|
||
return append([]autoReplyContextEntry(nil), e.contextEntries[key]...)
|
||
}
|
||
|
||
func (e *AutoReplyEngine) contextKeyForMessage(msg autoReplyMessage) string {
|
||
scope := strings.TrimSpace(e.identityScopeForClient(msg.ClientID))
|
||
if scope == "" {
|
||
scope = "client:" + stringFromAny(msg.ClientID)
|
||
}
|
||
robotID := strings.TrimSpace(msg.stableRobotID())
|
||
conversationID := strings.TrimSpace(msg.ConversationID)
|
||
return scope + "|" + robotID + "|" + conversationID
|
||
}
|
||
|
||
func trimAutoReplyContextEntries(entries []autoReplyContextEntry) []autoReplyContextEntry {
|
||
if len(entries) > autoReplyContextLimit {
|
||
entries = entries[len(entries)-autoReplyContextLimit:]
|
||
}
|
||
total := 0
|
||
for i := len(entries) - 1; i >= 0; i-- {
|
||
total += len([]rune(entries[i].Content))
|
||
if total > autoReplyContextPromptLimit {
|
||
return append([]autoReplyContextEntry(nil), entries[i+1:]...)
|
||
}
|
||
}
|
||
return append([]autoReplyContextEntry(nil), entries...)
|
||
}
|
||
|
||
func normalizeContextContent(content string) string {
|
||
return normalizeGreetingText(strings.TrimSpace(content))
|
||
}
|
||
|
||
func isPreviousQuestionQuery(content string) bool {
|
||
normalized := normalizeGreetingText(content)
|
||
if normalized == "" {
|
||
return false
|
||
}
|
||
for _, token := range []string{
|
||
"我上一个问题问了什么",
|
||
"我上个问题问了什么",
|
||
"我刚才问了什么",
|
||
"刚才我问了什么",
|
||
"上一句是什么",
|
||
"上一个问题是什么",
|
||
"上个问题是什么",
|
||
} {
|
||
if strings.Contains(normalized, normalizeGreetingText(token)) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func previousQuestionAnswer(previous string) string {
|
||
previous = strings.TrimSpace(previous)
|
||
if previous == "" {
|
||
return "我这边暂时没有查到您上一条具体问题,您可以再发一遍,我继续帮您处理。"
|
||
}
|
||
return "您上一个问题是:“" + previous + "”。"
|
||
}
|