1007 lines
31 KiB
Go
1007 lines
31 KiB
Go
package main
|
||
|
||
import (
|
||
"crypto/sha1"
|
||
"encoding/hex"
|
||
"encoding/json"
|
||
"fmt"
|
||
"io/fs"
|
||
"os"
|
||
"path/filepath"
|
||
"sort"
|
||
"strings"
|
||
"time"
|
||
"unicode"
|
||
)
|
||
|
||
const autoReplyMaterialRepeatWindow = 30 * time.Minute
|
||
|
||
type AutoReplyMaterial struct {
|
||
ID string `json:"id"`
|
||
Title string `json:"title"`
|
||
Keywords []string `json:"keywords"`
|
||
QuestionPatterns []string `json:"questionPatterns"`
|
||
MaterialType string `json:"materialType"`
|
||
Path string `json:"path"`
|
||
Caption string `json:"caption"`
|
||
// CaptionSource 标记 caption 的来源:
|
||
// "ai" —— 同步时由模型自动生成;重新同步可被再次刷新。
|
||
// "manual" —— 运营手工编写;重新同步绝不覆盖。
|
||
// "" —— 未知/历史数据;按需生成。
|
||
CaptionSource string `json:"captionSource,omitempty"`
|
||
Priority int `json:"priority"`
|
||
Enabled bool `json:"enabled"`
|
||
}
|
||
|
||
type autoReplyMaterialsFile struct {
|
||
Materials []AutoReplyMaterial `json:"materials"`
|
||
}
|
||
|
||
type autoReplyMaterialMatch struct {
|
||
Material AutoReplyMaterial
|
||
Path string
|
||
Score int
|
||
}
|
||
|
||
type autoReplyMaterialSyncResult struct {
|
||
Added int `json:"added"`
|
||
Removed int `json:"removed"`
|
||
Total int `json:"total"`
|
||
Materials []AutoReplyMaterial `json:"materials"`
|
||
IndexPath string `json:"indexPath"`
|
||
Directory string `json:"directory"`
|
||
AddedPaths []string `json:"addedPaths"`
|
||
RemovedPaths []string `json:"removedPaths"`
|
||
}
|
||
|
||
func (e *AutoReplyEngine) matchMaterials(userQuery string, searchContext string, hits []KnowledgeChunk) []autoReplyMaterialMatch {
|
||
cfg := e.getConfig()
|
||
if !cfg.Materials.AutoSendEnabled {
|
||
return nil
|
||
}
|
||
materials, err := loadAutoReplyMaterials(cfg.Materials.IndexPath)
|
||
if err != nil {
|
||
if !os.IsNotExist(err) {
|
||
e.setLastErrorWithScope(autoReplyErrorScopeKnowledge, "load materials failed: "+err.Error())
|
||
}
|
||
}
|
||
if len(materials) == 0 {
|
||
materials = discoverAutoReplyMaterials(cfg.Materials.Directory)
|
||
}
|
||
if len(materials) == 0 {
|
||
return nil
|
||
}
|
||
|
||
requestedTypes := requestedMaterialTypes(userQuery)
|
||
hasSendIntent := hasMaterialSendIntent(userQuery)
|
||
explicitAll := isBroadAllMaterialRequest(userQuery)
|
||
hasSpecificSignal := materialQueryHasSpecificSignal(userQuery, materials)
|
||
if !hasSendIntent && !explicitAll {
|
||
return nil
|
||
}
|
||
if explicitAll {
|
||
if hasSpecificSignal {
|
||
queryText := buildMaterialSearchText(userQuery, "", nil, false)
|
||
return e.collectMaterialMatches(materials, cfg.Materials.Directory, requestedTypesForExplicitAll(userQuery, requestedTypes), queryText, true)
|
||
}
|
||
filteredTypes := requestedTypesForExplicitAll(userQuery, requestedTypes)
|
||
if len(filteredTypes) > 0 {
|
||
return e.collectMaterialMatchesByType(materials, cfg.Materials.Directory, filteredTypes)
|
||
}
|
||
return nil
|
||
}
|
||
if hasSendIntent && isGenericMaterialRequest(userQuery) && !hasSpecificSignal && strings.TrimSpace(searchContext) == strings.TrimSpace(userQuery) {
|
||
return nil
|
||
}
|
||
queryText := buildMaterialSearchText(userQuery, "", nil, false)
|
||
matches := e.collectMaterialMatches(materials, cfg.Materials.Directory, requestedTypes, queryText, hasSendIntent)
|
||
if len(matches) > 0 {
|
||
return limitMaterialMatches(matches, cfg.Materials.MaxPerReply)
|
||
}
|
||
if !hasSendIntent {
|
||
return nil
|
||
}
|
||
|
||
searchText := buildMaterialSearchText(userQuery, "", hits, true)
|
||
matches = e.collectMaterialMatches(materials, cfg.Materials.Directory, requestedTypes, searchText, hasSendIntent)
|
||
return limitMaterialMatches(matches, cfg.Materials.MaxPerReply)
|
||
}
|
||
|
||
func (e *AutoReplyEngine) collectMaterialMatches(materials []AutoReplyMaterial, root string, requestedTypes map[string]bool, searchText string, hasSendIntent bool) []autoReplyMaterialMatch {
|
||
matches := make([]autoReplyMaterialMatch, 0, 4)
|
||
for _, material := range materials {
|
||
if len(requestedTypes) > 0 && !requestedTypes[material.MaterialType] {
|
||
continue
|
||
}
|
||
path := resolveAutoReplyMaterialPath(root, material.Path)
|
||
score, strong := materialMatchScoreDetailed(searchText, material, hasSendIntent)
|
||
// 必须命中过强信号(整词关键词/问句模板,或整串标题/文件名)才算候选;
|
||
// 仅靠 2-gram 模糊片段凑分的弱命中直接丢弃,避免误发。
|
||
if score <= 0 || !strong {
|
||
continue
|
||
}
|
||
if _, err := os.Stat(path); err != nil {
|
||
e.setLastErrorWithScope(autoReplyErrorScopeKnowledge, fmt.Sprintf("material file missing: %s", path))
|
||
continue
|
||
}
|
||
matches = append(matches, autoReplyMaterialMatch{Material: material, Path: path, Score: score})
|
||
}
|
||
sort.Slice(matches, func(i, j int) bool {
|
||
if matches[i].Score != matches[j].Score {
|
||
return matches[i].Score > matches[j].Score
|
||
}
|
||
if matches[i].Material.Priority != matches[j].Material.Priority {
|
||
return matches[i].Material.Priority > matches[j].Material.Priority
|
||
}
|
||
return matches[i].Material.Title < matches[j].Material.Title
|
||
})
|
||
return matches
|
||
}
|
||
|
||
func (e *AutoReplyEngine) collectMaterialMatchesByType(materials []AutoReplyMaterial, root string, requestedTypes map[string]bool) []autoReplyMaterialMatch {
|
||
if len(requestedTypes) == 0 {
|
||
return nil
|
||
}
|
||
matches := make([]autoReplyMaterialMatch, 0, len(materials))
|
||
for _, material := range materials {
|
||
if !requestedTypes[material.MaterialType] {
|
||
continue
|
||
}
|
||
path := resolveAutoReplyMaterialPath(root, material.Path)
|
||
if _, err := os.Stat(path); err != nil {
|
||
e.setLastErrorWithScope(autoReplyErrorScopeKnowledge, fmt.Sprintf("material file missing: %s", path))
|
||
continue
|
||
}
|
||
matches = append(matches, autoReplyMaterialMatch{Material: material, Path: path, Score: 1})
|
||
}
|
||
sort.SliceStable(matches, func(i, j int) bool {
|
||
if matches[i].Material.Priority != matches[j].Material.Priority {
|
||
return matches[i].Material.Priority > matches[j].Material.Priority
|
||
}
|
||
return strings.ToLower(matches[i].Material.Path) < strings.ToLower(matches[j].Material.Path)
|
||
})
|
||
return matches
|
||
}
|
||
|
||
func (e *AutoReplyEngine) filterRecentlySentMaterials(msg autoReplyMessage, matches []autoReplyMaterialMatch) []autoReplyMaterialMatch {
|
||
if len(matches) == 0 {
|
||
return nil
|
||
}
|
||
now := time.Now()
|
||
e.mu.Lock()
|
||
if e.materialSent == nil {
|
||
e.materialSent = make(map[string]time.Time)
|
||
}
|
||
for key, ts := range e.materialSent {
|
||
if now.Sub(ts) > autoReplyMaterialRepeatWindow {
|
||
delete(e.materialSent, key)
|
||
}
|
||
}
|
||
filtered := make([]autoReplyMaterialMatch, 0, len(matches))
|
||
for _, match := range matches {
|
||
key := materialSentFingerprint(msg, match)
|
||
if key == "" {
|
||
filtered = append(filtered, match)
|
||
continue
|
||
}
|
||
if ts, ok := e.materialSent[key]; ok && now.Sub(ts) <= autoReplyMaterialRepeatWindow {
|
||
continue
|
||
}
|
||
filtered = append(filtered, match)
|
||
}
|
||
e.mu.Unlock()
|
||
return filtered
|
||
}
|
||
|
||
func (e *AutoReplyEngine) rememberSentMaterial(msg autoReplyMessage, match autoReplyMaterialMatch) {
|
||
key := materialSentFingerprint(msg, match)
|
||
if key == "" {
|
||
return
|
||
}
|
||
e.mu.Lock()
|
||
if e.materialSent == nil {
|
||
e.materialSent = make(map[string]time.Time)
|
||
}
|
||
e.materialSent[key] = time.Now()
|
||
e.mu.Unlock()
|
||
}
|
||
|
||
func materialSentFingerprint(msg autoReplyMessage, match autoReplyMaterialMatch) string {
|
||
conversationID := strings.TrimSpace(msg.ConversationID)
|
||
path := strings.ToLower(filepath.Clean(strings.TrimSpace(match.Path)))
|
||
if path == "." || path == "" {
|
||
path = strings.ToLower(filepath.Clean(strings.TrimSpace(match.Material.Path)))
|
||
}
|
||
if conversationID == "" || path == "" || path == "." {
|
||
return ""
|
||
}
|
||
return strings.Join([]string{msg.stableRobotID(), conversationID, path}, "|")
|
||
}
|
||
|
||
func limitMaterialMatches(matches []autoReplyMaterialMatch, maxPerReply int) []autoReplyMaterialMatch {
|
||
limit := maxPerReply
|
||
if limit <= 0 {
|
||
limit = 2
|
||
}
|
||
if len(matches) > limit {
|
||
matches = matches[:limit]
|
||
}
|
||
return matches
|
||
}
|
||
|
||
func buildMaterialSearchText(userQuery string, searchContext string, hits []KnowledgeChunk, includeContext bool) string {
|
||
parts := []string{userQuery}
|
||
if includeContext {
|
||
parts = append(parts, searchContext)
|
||
for _, hit := range hits {
|
||
parts = append(parts, hit.Source, hit.Title, hit.Content)
|
||
}
|
||
}
|
||
return strings.ToLower(strings.Join(parts, "\n"))
|
||
}
|
||
|
||
func hasMaterialSendIntent(query string) bool {
|
||
text := normalizeGreetingText(query)
|
||
if text == "" {
|
||
return false
|
||
}
|
||
return containsAnyMaterialIntent(text, []string{
|
||
"发我", "发给我", "发一下", "发下", "发来", "发送", "传给我", "给我发",
|
||
"给我", "我要", "我想要", "需要", "有吗", "有没有", "资料", "素材",
|
||
"手册", "文档", "文件", "附件", "说明书", "宣传册", "ppt", "pdf",
|
||
"视频", "图片", "表格", "清单", "案例", "模板",
|
||
})
|
||
}
|
||
|
||
func requestedMaterialTypes(query string) map[string]bool {
|
||
text := strings.ToLower(strings.TrimSpace(query))
|
||
if text == "" {
|
||
return nil
|
||
}
|
||
result := map[string]bool{}
|
||
if containsAnyMaterialIntent(text, []string{
|
||
"\u56fe\u7247", "\u7167\u7247", "\u76f8\u7247", "\u56fe\u50cf", "\u622a\u56fe", "\u914d\u56fe",
|
||
"image", "photo", "jpg", "jpeg", "png", "webp",
|
||
}) {
|
||
result["image"] = true
|
||
}
|
||
if containsAnyMaterialIntent(text, []string{
|
||
"\u89c6\u9891", "\u5f55\u50cf", "\u5f71\u7247", "\u77ed\u89c6\u9891", "video", "movie", "mp4", "mov",
|
||
}) {
|
||
result["video"] = true
|
||
}
|
||
if containsAnyMaterialIntent(text, []string{
|
||
"\u52a8\u56fe", "\u8868\u60c5\u5305", "gif",
|
||
}) {
|
||
result["gif"] = true
|
||
}
|
||
if containsAnyMaterialIntent(text, []string{
|
||
"\u6587\u4ef6", "\u6587\u6863", "\u6587\u7a3f", "\u9644\u4ef6", "\u8868\u683c",
|
||
"\u624b\u518c", "\u8d44\u6599", "\u65b9\u6848", "\u8bf4\u660e\u4e66",
|
||
"file", "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx",
|
||
}) {
|
||
result["file"] = true
|
||
}
|
||
if len(result) == 0 {
|
||
return nil
|
||
}
|
||
return result
|
||
}
|
||
|
||
func requestedTypesForExplicitAll(query string, requestedTypes map[string]bool) map[string]bool {
|
||
if len(requestedTypes) == 0 {
|
||
return nil
|
||
}
|
||
if len(requestedTypes) == 1 && requestedTypes["file"] && !explicitlyRequestsOnlyFiles(query) {
|
||
return nil
|
||
}
|
||
return requestedTypes
|
||
}
|
||
|
||
func explicitlyRequestsOnlyFiles(query string) bool {
|
||
text := normalizeGreetingText(query)
|
||
if text == "" {
|
||
return false
|
||
}
|
||
for _, token := range []string{"文档", "表格", "手册", "说明书", "ppt", "pdf", "doc", "docx", "xls", "xlsx"} {
|
||
if strings.Contains(text, normalizeGreetingText(token)) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func containsAnyMaterialIntent(text string, keywords []string) bool {
|
||
for _, keyword := range keywords {
|
||
if strings.Contains(text, keyword) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func loadAutoReplyMaterials(indexPath string) ([]AutoReplyMaterial, error) {
|
||
path := resolveAutoReplyPath(indexPath)
|
||
data, err := os.ReadFile(path)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
var wrapped autoReplyMaterialsFile
|
||
if err := json.Unmarshal(data, &wrapped); err == nil {
|
||
return normalizeAutoReplyMaterials(wrapped.Materials), nil
|
||
}
|
||
var list []AutoReplyMaterial
|
||
if err := json.Unmarshal(data, &list); err != nil {
|
||
return nil, err
|
||
}
|
||
return normalizeAutoReplyMaterials(list), nil
|
||
}
|
||
|
||
// materialCaptionGenerator 根据素材本身(含已解析的绝对路径)生成一句开场白。
|
||
// 返回 ok=false 表示本条生成失败/跳过,调用方应保留原 caption 不动。
|
||
type materialCaptionGenerator func(material AutoReplyMaterial, absPath string) (caption string, ok bool)
|
||
|
||
func (e *AutoReplyEngine) syncAutoReplyMaterials() (autoReplyMaterialSyncResult, error) {
|
||
cfg := e.getConfig()
|
||
return syncAutoReplyMaterials(cfg.Materials.Directory, cfg.Materials.IndexPath, e.materialCaptionGenerator())
|
||
}
|
||
|
||
func syncAutoReplyMaterials(root string, indexPath string, generateCaption materialCaptionGenerator) (autoReplyMaterialSyncResult, error) {
|
||
result := autoReplyMaterialSyncResult{
|
||
Directory: resolveAutoReplyPath(root),
|
||
IndexPath: resolveAutoReplyPath(indexPath),
|
||
}
|
||
if err := os.MkdirAll(result.Directory, 0755); err != nil {
|
||
return result, err
|
||
}
|
||
|
||
existing, err := loadAutoReplyMaterials(indexPath)
|
||
if err != nil && !os.IsNotExist(err) {
|
||
return result, err
|
||
}
|
||
discovered := discoverAutoReplyMaterials(root)
|
||
discoveredByPath := make(map[string]AutoReplyMaterial, len(discovered))
|
||
for _, item := range discovered {
|
||
discoveredByPath[materialPathKey(item.Path)] = item
|
||
}
|
||
|
||
synced := make([]AutoReplyMaterial, 0, len(discovered))
|
||
seen := make(map[string]bool, len(discovered))
|
||
for _, item := range existing {
|
||
key := materialPathKey(item.Path)
|
||
if key == "" || seen[key] {
|
||
continue
|
||
}
|
||
if _, ok := discoveredByPath[key]; !ok {
|
||
result.Removed++
|
||
result.RemovedPaths = append(result.RemovedPaths, item.Path)
|
||
continue
|
||
}
|
||
synced = append(synced, item)
|
||
seen[key] = true
|
||
}
|
||
for _, item := range discovered {
|
||
key := materialPathKey(item.Path)
|
||
if key == "" || seen[key] {
|
||
continue
|
||
}
|
||
synced = append(synced, item)
|
||
seen[key] = true
|
||
result.Added++
|
||
result.AddedPaths = append(result.AddedPaths, item.Path)
|
||
}
|
||
|
||
sort.SliceStable(synced, func(i, j int) bool {
|
||
li := strings.ToLower(synced[i].Path)
|
||
lj := strings.ToLower(synced[j].Path)
|
||
if li != lj {
|
||
return li < lj
|
||
}
|
||
return strings.ToLower(synced[i].Title) < strings.ToLower(synced[j].Title)
|
||
})
|
||
|
||
// 在写盘前为需要的素材生成开场白;generateCaption 为 nil(如未配置 AI 或单测)时整体跳过。
|
||
if generateCaption != nil {
|
||
applyMaterialCaptions(synced, result.Directory, generateCaption)
|
||
}
|
||
|
||
if err := os.MkdirAll(filepath.Dir(result.IndexPath), 0755); err != nil {
|
||
return result, err
|
||
}
|
||
data, err := json.MarshalIndent(autoReplyMaterialsFile{Materials: synced}, "", " ")
|
||
if err != nil {
|
||
return result, err
|
||
}
|
||
if err := os.WriteFile(result.IndexPath, data, 0644); err != nil {
|
||
return result, err
|
||
}
|
||
result.Materials = synced
|
||
result.Total = len(synced)
|
||
return result, nil
|
||
}
|
||
|
||
func discoverAutoReplyMaterials(root string) []AutoReplyMaterial {
|
||
dir := resolveAutoReplyPath(root)
|
||
items := make([]AutoReplyMaterial, 0, 8)
|
||
// 递归遍历子目录(filepath.WalkDir):支持 config/materials 下任意层级嵌套。
|
||
// Path 存相对 root 的子路径并统一为 / 分隔;顶层文件相对路径即文件名,向后兼容旧索引。
|
||
_ = filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
|
||
if err != nil {
|
||
return nil // 单个条目出错跳过,不中断整体扫描
|
||
}
|
||
if d.IsDir() {
|
||
return nil
|
||
}
|
||
name := d.Name()
|
||
if strings.EqualFold(name, "materials.json") {
|
||
return nil
|
||
}
|
||
materialType := inferMaterialType(name)
|
||
if materialType == "" {
|
||
return nil
|
||
}
|
||
rel, relErr := filepath.Rel(dir, path)
|
||
if relErr != nil {
|
||
rel = name
|
||
}
|
||
rel = filepath.ToSlash(rel)
|
||
title := strings.TrimSuffix(name, filepath.Ext(name))
|
||
keywords := defaultMaterialKeywords(title, materialType)
|
||
// 把子目录名也并入关键词,便于"发我<目录名>的图/文件"命中。
|
||
// 只添加目录名本身,不再分词,避免关键词过度膨胀。
|
||
if dirPart := filepath.ToSlash(filepath.Dir(rel)); dirPart != "." && dirPart != "" {
|
||
for _, seg := range strings.Split(dirPart, "/") {
|
||
if seg = strings.TrimSpace(seg); seg != "" {
|
||
keywords = append(keywords, seg)
|
||
}
|
||
}
|
||
keywords = dedupeNonEmptyStrings(keywords)
|
||
}
|
||
items = append(items, AutoReplyMaterial{
|
||
ID: materialIDFromTitle(strings.TrimSuffix(rel, filepath.Ext(rel))),
|
||
Title: title,
|
||
Keywords: keywords,
|
||
QuestionPatterns: defaultMaterialQuestionPatterns(title),
|
||
MaterialType: materialType,
|
||
Path: rel,
|
||
Caption: defaultMaterialCaption(materialType),
|
||
Priority: 1,
|
||
Enabled: true,
|
||
})
|
||
return nil
|
||
})
|
||
return normalizeAutoReplyMaterials(items)
|
||
}
|
||
|
||
func materialIDFromTitle(title string) string {
|
||
base := strings.TrimSpace(strings.ToLower(title))
|
||
var builder strings.Builder
|
||
lastDash := false
|
||
for _, r := range base {
|
||
switch {
|
||
case unicode.IsLetter(r), unicode.IsDigit(r):
|
||
builder.WriteRune(r)
|
||
lastDash = false
|
||
case r == '-' || r == '_':
|
||
if builder.Len() > 0 {
|
||
builder.WriteRune(r)
|
||
lastDash = false
|
||
}
|
||
default:
|
||
if builder.Len() > 0 && !lastDash {
|
||
builder.WriteByte('-')
|
||
lastDash = true
|
||
}
|
||
}
|
||
}
|
||
id := strings.Trim(builder.String(), "-_")
|
||
if id == "" {
|
||
sum := sha1.Sum([]byte(base))
|
||
id = "material-" + hex.EncodeToString(sum[:])[:12]
|
||
}
|
||
return id
|
||
}
|
||
|
||
func defaultMaterialQuestionPatterns(title string) []string {
|
||
title = strings.TrimSpace(title)
|
||
if title == "" {
|
||
return nil
|
||
}
|
||
return []string{"我要" + title, "发我" + title, "看" + title, "有没有" + title, "把" + title + "发我", "需要" + title}
|
||
}
|
||
|
||
func defaultMaterialKeywords(title string, materialType string) []string {
|
||
keywords := []string{strings.TrimSpace(title)}
|
||
keywords = append(keywords, materialSearchTokens(title)...)
|
||
switch materialType {
|
||
case "image":
|
||
keywords = append(keywords, specificMaterialTokensForType(materialType)...)
|
||
case "video":
|
||
keywords = append(keywords, specificMaterialTokensForType(materialType)...)
|
||
case "gif":
|
||
keywords = append(keywords, specificMaterialTokensForType(materialType)...)
|
||
default:
|
||
keywords = append(keywords, specificMaterialTokensForType(materialType)...)
|
||
}
|
||
return dedupeNonEmptyStrings(keywords)
|
||
}
|
||
|
||
func specificMaterialTokensForType(materialType string) []string {
|
||
switch materialType {
|
||
case "video":
|
||
return []string{"安装视频", "演示视频", "教程视频"}
|
||
case "image":
|
||
return []string{"示意图", "效果图", "截图"}
|
||
case "gif":
|
||
return []string{"动图"}
|
||
default:
|
||
return nil
|
||
}
|
||
}
|
||
|
||
func materialPathKey(path string) string {
|
||
path = strings.TrimSpace(path)
|
||
if path == "" {
|
||
return ""
|
||
}
|
||
return strings.ToLower(filepath.ToSlash(filepath.Clean(path)))
|
||
}
|
||
|
||
func normalizeAutoReplyMaterials(items []AutoReplyMaterial) []AutoReplyMaterial {
|
||
result := make([]AutoReplyMaterial, 0, len(items))
|
||
for _, item := range items {
|
||
item.ID = strings.TrimSpace(item.ID)
|
||
item.Title = strings.TrimSpace(item.Title)
|
||
item.MaterialType = strings.ToLower(strings.TrimSpace(item.MaterialType))
|
||
item.Path = strings.TrimSpace(item.Path)
|
||
item.Caption = strings.TrimSpace(item.Caption)
|
||
if item.MaterialType == "" {
|
||
item.MaterialType = inferMaterialType(item.Path)
|
||
}
|
||
if item.Path == "" || item.MaterialType == "" {
|
||
continue
|
||
}
|
||
if !item.Enabled && strings.TrimSpace(item.ID+item.Title) == "" {
|
||
continue
|
||
}
|
||
result = append(result, item)
|
||
}
|
||
return result
|
||
}
|
||
|
||
func materialMatchScore(searchText string, material AutoReplyMaterial, hasSendIntent bool) int {
|
||
score, _ := materialMatchScoreDetailed(searchText, material, hasSendIntent)
|
||
return score
|
||
}
|
||
|
||
// materialMatchScoreDetailed 在打分之外额外返回 strong:是否命中过“强信号”。
|
||
// 强信号 = 整词关键词/问句模板命中,或整串标题/文件名命中。
|
||
// 仅靠 2-gram 模糊片段(fuzzyMaterialTokenScore)凑出的分数不算强信号——
|
||
// 这类弱命中只用于在多个强匹配之间排序,不能单独触发发送,避免误发。
|
||
func materialMatchScoreDetailed(searchText string, material AutoReplyMaterial, hasSendIntent bool) (int, bool) {
|
||
score := 0
|
||
strong := false
|
||
for _, keyword := range append(material.Keywords, material.QuestionPatterns...) {
|
||
keyword = strings.ToLower(strings.TrimSpace(keyword))
|
||
if keyword == "" || isGenericMaterialIntentToken(keyword) {
|
||
continue
|
||
}
|
||
if strings.Contains(searchText, keyword) {
|
||
score += 10
|
||
strong = true
|
||
}
|
||
}
|
||
for _, field := range []string{material.Title, filepath.Base(material.Path), strings.TrimSuffix(filepath.Base(material.Path), filepath.Ext(material.Path))} {
|
||
field = strings.ToLower(strings.TrimSpace(field))
|
||
if field != "" && len([]rune(field)) >= 2 && strings.Contains(searchText, field) {
|
||
score += 4
|
||
strong = true
|
||
}
|
||
score += fuzzyMaterialTokenScore(searchText, field)
|
||
}
|
||
if hasSendIntent && score > 0 {
|
||
score += 3
|
||
}
|
||
return score, strong
|
||
}
|
||
|
||
func isBroadAllMaterialRequest(query string) bool {
|
||
text := normalizeGreetingText(query)
|
||
if text == "" {
|
||
return false
|
||
}
|
||
phrases := []string{
|
||
"全部资料", "所有资料", "全部文件", "所有文件", "全部素材", "所有素材", "全部发", "全都发",
|
||
"都发我", "都发给我", "资料都发", "文件都发", "全套资料", "所有手册", "全部手册",
|
||
}
|
||
for _, phrase := range phrases {
|
||
if strings.Contains(text, normalizeGreetingText(phrase)) {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func isGenericMaterialRequest(query string) bool {
|
||
text := normalizeGreetingText(query)
|
||
if text == "" || !hasMaterialSendIntent(query) {
|
||
return false
|
||
}
|
||
generic := []string{
|
||
"资料", "文件", "文档", "附件", "素材", "手册", "说明书", "宣传册", "方案",
|
||
"模板", "案例", "清单", "表格", "图片", "照片", "截图", "视频", "ppt", "pdf", "doc", "docx", "xls", "xlsx",
|
||
"发我", "发给我", "发一个", "发下", "发来", "发送", "传给我", "给我发", "给我", "我要", "我想要", "需要", "有吗", "有没有",
|
||
}
|
||
remaining := text
|
||
for _, token := range generic {
|
||
remaining = strings.ReplaceAll(remaining, normalizeGreetingText(token), "")
|
||
}
|
||
remaining = strings.Trim(remaining, " \t\r\n,,。.!!??;;::、()()[]【】")
|
||
return len([]rune(remaining)) == 0
|
||
}
|
||
|
||
func materialQueryHasSpecificSignal(query string, materials []AutoReplyMaterial) bool {
|
||
text := strings.ToLower(normalizeGreetingText(query))
|
||
if text == "" {
|
||
return false
|
||
}
|
||
for _, material := range materials {
|
||
fields := []string{
|
||
material.Title,
|
||
filepath.Base(material.Path),
|
||
strings.TrimSuffix(filepath.Base(material.Path), filepath.Ext(material.Path)),
|
||
}
|
||
for _, keyword := range append(material.Keywords, material.QuestionPatterns...) {
|
||
if !isGenericMaterialIntentToken(keyword) {
|
||
fields = append(fields, keyword)
|
||
}
|
||
}
|
||
for _, field := range fields {
|
||
field = strings.ToLower(normalizeGreetingText(field))
|
||
if len([]rune(field)) >= 3 && strings.Contains(text, field) {
|
||
return true
|
||
}
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func isGenericMaterialIntentToken(token string) bool {
|
||
token = normalizeGreetingText(token)
|
||
if token == "" {
|
||
return true
|
||
}
|
||
switch token {
|
||
case "资料", "文件", "文档", "附件", "素材", "手册", "说明书", "宣传册",
|
||
"方案", "模板", "案例", "清单", "表格", "图片", "照片", "截图",
|
||
"视频", "录像", "ppt", "pptx", "pdf", "doc", "docx", "xls", "xlsx",
|
||
"发我", "给我", "需要", "有没有", "我要", "发一下":
|
||
return true
|
||
default:
|
||
return false
|
||
}
|
||
}
|
||
|
||
func fuzzyMaterialTokenScore(searchText string, field string) int {
|
||
tokens := materialSearchTokens(field)
|
||
if len(tokens) == 0 {
|
||
return 0
|
||
}
|
||
score := 0
|
||
for _, token := range tokens {
|
||
if len([]rune(token)) < 2 {
|
||
continue
|
||
}
|
||
if isGenericMaterialIntentToken(token) {
|
||
continue
|
||
}
|
||
if strings.Contains(searchText, token) {
|
||
score += 2
|
||
}
|
||
}
|
||
return score
|
||
}
|
||
|
||
func materialSearchTokens(text string) []string {
|
||
text = strings.ToLower(strings.TrimSpace(text))
|
||
if text == "" {
|
||
return nil
|
||
}
|
||
separators := func(r rune) bool {
|
||
return !(unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.Is(unicode.Han, r))
|
||
}
|
||
parts := strings.FieldsFunc(text, separators)
|
||
result := make([]string, 0, len(parts)*2)
|
||
for _, part := range parts {
|
||
part = strings.TrimSpace(part)
|
||
if len([]rune(part)) < 2 {
|
||
continue
|
||
}
|
||
result = append(result, part)
|
||
runes := []rune(part)
|
||
if len(runes) > 4 {
|
||
for i := 0; i+2 <= len(runes); i++ {
|
||
result = append(result, string(runes[i:i+2]))
|
||
}
|
||
}
|
||
}
|
||
return dedupeNonEmptyStrings(result)
|
||
}
|
||
|
||
func resolveAutoReplyMaterialPath(root string, materialPath string) string {
|
||
materialPath = strings.TrimSpace(materialPath)
|
||
if filepath.IsAbs(materialPath) {
|
||
return filepath.Clean(materialPath)
|
||
}
|
||
return filepath.Join(resolveAutoReplyPath(root), filepath.Clean(materialPath))
|
||
}
|
||
|
||
func inferMaterialType(path string) string {
|
||
switch strings.ToLower(filepath.Ext(path)) {
|
||
case ".jpg", ".jpeg", ".png", ".bmp", ".webp":
|
||
return "image"
|
||
case ".gif":
|
||
return "gif"
|
||
case ".mp4", ".mov", ".avi", ".mkv", ".wmv":
|
||
return "video"
|
||
case ".json":
|
||
return ""
|
||
default:
|
||
return "file"
|
||
}
|
||
}
|
||
|
||
func (e *AutoReplyEngine) sendMaterials(msg autoReplyMessage, matches []autoReplyMaterialMatch, reason string, timings autoReplyTimings, tutorialText string) error {
|
||
if len(matches) == 0 {
|
||
return nil
|
||
}
|
||
sent := make([]string, 0, len(matches))
|
||
if text := strings.TrimSpace(tutorialText); text != "" {
|
||
if err := sendAutoReplyText(uint32(msg.ClientID), msg.ConversationID, text); err != nil {
|
||
return err
|
||
}
|
||
e.rememberAutoSentMessage(uint32(msg.ClientID), msg.ConversationID, text)
|
||
}
|
||
for _, match := range matches {
|
||
caption := materialCaptionForSend(match.Material)
|
||
captionAfter := materialCaptionShouldFollowMaterial(match.Material)
|
||
if !captionAfter {
|
||
if err := sendAutoReplyText(uint32(msg.ClientID), msg.ConversationID, caption); err != nil {
|
||
return err
|
||
}
|
||
e.rememberAutoSentMessage(uint32(msg.ClientID), msg.ConversationID, caption)
|
||
}
|
||
if err := sendAutoReplyMaterial(uint32(msg.ClientID), msg.ConversationID, match.Material.MaterialType, match.Path); err != nil {
|
||
return fmt.Errorf("send material %s failed: %w", match.Path, err)
|
||
}
|
||
e.rememberSentMaterial(msg, match)
|
||
sent = append(sent, fmt.Sprintf("%s:%s", match.Material.MaterialType, match.Path))
|
||
if captionAfter {
|
||
if err := sendAutoReplyText(uint32(msg.ClientID), msg.ConversationID, caption); err != nil {
|
||
return err
|
||
}
|
||
e.rememberAutoSentMessage(uint32(msg.ClientID), msg.ConversationID, caption)
|
||
}
|
||
}
|
||
e.markCooldown(msg)
|
||
e.incStatus("replied")
|
||
e.noteReason(reason)
|
||
e.addRecord(AutoReplyRecord{
|
||
RobotID: msg.RobotID,
|
||
ClientID: msg.ClientID,
|
||
UserID: msg.RobotID,
|
||
ConversationID: msg.ConversationID,
|
||
Source: msg.sourceLabel(),
|
||
FromWxID: msg.FromWxID,
|
||
FromNickName: msg.FromNickName,
|
||
Question: msg.Content,
|
||
Action: "replied",
|
||
Reason: reason,
|
||
Answer: strings.Join(sent, "\n"),
|
||
SenderIdentity: msg.SenderIdentity,
|
||
IdentitySource: msg.IdentitySource,
|
||
KeywordScore: timings.KeywordScore,
|
||
VectorScore: timings.VectorScore,
|
||
RerankScore: timings.RerankScore,
|
||
RetrievalMode: timings.RetrievalMode,
|
||
UsedKnowledgeSources: strings.Join(timings.UsedKnowledgeSources, ", "),
|
||
KnowledgeDurationMS: timings.KnowledgeDurationMS,
|
||
KeywordDurationMS: timings.KeywordDurationMS,
|
||
VectorDurationMS: timings.VectorDurationMS,
|
||
RerankDurationMS: timings.RerankDurationMS,
|
||
AIDurationMS: timings.AIDurationMS,
|
||
TotalDurationMS: timings.TotalDurationMS,
|
||
})
|
||
return nil
|
||
}
|
||
|
||
func materialCaptionShouldFollowMaterial(material AutoReplyMaterial) bool {
|
||
switch strings.ToLower(strings.TrimSpace(material.MaterialType)) {
|
||
case "image", "video", "gif":
|
||
return true
|
||
default:
|
||
return false
|
||
}
|
||
}
|
||
|
||
func materialCaptionForSend(material AutoReplyMaterial) string {
|
||
if caption := customMaterialCaptionForSend(material); caption != "" {
|
||
return caption
|
||
}
|
||
return defaultMaterialCaption(material.MaterialType)
|
||
}
|
||
|
||
func materialTutorialTextFromHits(hits []KnowledgeChunk) string {
|
||
items := make([]string, 0, 3)
|
||
for _, hit := range hits {
|
||
content := strings.TrimSpace(hit.Content)
|
||
if content == "" {
|
||
continue
|
||
}
|
||
content = compactMaterialTutorialContent(content)
|
||
if content == "" {
|
||
continue
|
||
}
|
||
items = append(items, content)
|
||
if len(items) >= 3 {
|
||
break
|
||
}
|
||
}
|
||
if len(items) == 0 {
|
||
return ""
|
||
}
|
||
return "我也把相关排查说明整理给您:\n" + strings.Join(items, "\n")
|
||
}
|
||
|
||
func compactMaterialTutorialContent(content string) string {
|
||
lines := strings.Split(strings.ReplaceAll(content, "\r\n", "\n"), "\n")
|
||
result := make([]string, 0, 4)
|
||
for _, line := range lines {
|
||
line = strings.TrimSpace(strings.Trim(line, "#>*- \t"))
|
||
if line == "" {
|
||
continue
|
||
}
|
||
result = append(result, line)
|
||
if len(result) >= 4 {
|
||
break
|
||
}
|
||
}
|
||
text := strings.Join(result, "\n")
|
||
return truncateText(text, 280)
|
||
}
|
||
|
||
func customMaterialCaptionForSend(material AutoReplyMaterial) string {
|
||
caption := strings.TrimSpace(material.Caption)
|
||
if caption != "" && !isLegacyGenericMaterialCaption(caption) {
|
||
return caption
|
||
}
|
||
return ""
|
||
}
|
||
|
||
func isLegacyGenericMaterialCaption(caption string) bool {
|
||
text := normalizeGreetingText(caption)
|
||
switch text {
|
||
case normalizeGreetingText("我把相关资料直接发你。"),
|
||
normalizeGreetingText("我把相关资料发你。"):
|
||
return true
|
||
default:
|
||
return false
|
||
}
|
||
}
|
||
|
||
func defaultMaterialCaption(materialType string) string {
|
||
switch strings.ToLower(strings.TrimSpace(materialType)) {
|
||
case "image":
|
||
return "我把图片发给您。"
|
||
case "video":
|
||
return "我把视频发给您。"
|
||
case "gif":
|
||
return "我把动图发给您。"
|
||
default:
|
||
return "我把文件发给您。"
|
||
}
|
||
}
|
||
|
||
func combinedMaterialCaption(matches []autoReplyMaterialMatch) string {
|
||
if len(matches) == 0 {
|
||
return "我把文件发给您。"
|
||
}
|
||
seen := map[string]bool{}
|
||
labels := make([]string, 0, 4)
|
||
add := func(materialType string, label string) {
|
||
if !seen[materialType] {
|
||
seen[materialType] = true
|
||
labels = append(labels, label)
|
||
}
|
||
}
|
||
for _, match := range matches {
|
||
switch strings.ToLower(strings.TrimSpace(match.Material.MaterialType)) {
|
||
case "image":
|
||
add("image", "图片")
|
||
case "video":
|
||
add("video", "视频")
|
||
case "gif":
|
||
add("gif", "动图")
|
||
default:
|
||
add("file", "文件")
|
||
}
|
||
}
|
||
if len(labels) == 1 {
|
||
return defaultMaterialCaption(matches[0].Material.MaterialType)
|
||
}
|
||
return "我把" + strings.Join(labels, "和") + "发给您。"
|
||
}
|
||
|
||
func uniqueMaterialStrings(items []string) []string {
|
||
seen := make(map[string]bool, len(items))
|
||
result := make([]string, 0, len(items))
|
||
for _, item := range items {
|
||
item = strings.TrimSpace(item)
|
||
if item == "" || seen[item] {
|
||
continue
|
||
}
|
||
seen[item] = true
|
||
result = append(result, item)
|
||
}
|
||
return result
|
||
}
|
||
|
||
var sendAutoReplyMaterialSender = sendAutoReplyMaterialRequest
|
||
|
||
func sendAutoReplyMaterial(clientID uint32, conversationID string, materialType string, path string) error {
|
||
return sendAutoReplyMaterialSender(clientID, conversationID, materialType, path)
|
||
}
|
||
|
||
func sendAutoReplyMaterialRequest(clientID uint32, conversationID string, materialType string, path string) error {
|
||
if strings.TrimSpace(conversationID) == "" {
|
||
return fmt.Errorf("conversationId is empty")
|
||
}
|
||
if strings.TrimSpace(path) == "" {
|
||
return fmt.Errorf("material path is empty")
|
||
}
|
||
messageType := 11031
|
||
switch strings.ToLower(strings.TrimSpace(materialType)) {
|
||
case "image":
|
||
messageType = 11030
|
||
case "video":
|
||
messageType = 11067
|
||
case "gif":
|
||
messageType = 11070
|
||
case "file":
|
||
messageType = 11031
|
||
default:
|
||
messageType = 11031
|
||
}
|
||
request := map[string]interface{}{
|
||
"type": messageType,
|
||
"data": map[string]interface{}{
|
||
"conversation_id": conversationID,
|
||
"file": path,
|
||
},
|
||
}
|
||
if messageType == 11031 {
|
||
if fileName := strings.TrimSpace(filepath.Base(path)); fileName != "" && fileName != "." {
|
||
data := request["data"].(map[string]interface{})
|
||
data["fileName"] = fileName
|
||
data["file_name"] = fileName
|
||
}
|
||
}
|
||
data, err := json.Marshal(request)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
result, err := handleSendWxWorkData(map[string]interface{}{
|
||
"data": string(data),
|
||
"clientId": clientID,
|
||
})
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if resultMap, ok := result.(map[string]interface{}); ok {
|
||
if success, ok := resultMap["success"].(bool); ok && !success {
|
||
return fmt.Errorf("%v", resultMap["error"])
|
||
}
|
||
}
|
||
return nil
|
||
}
|