Files
2026-06-11 16:28:00 +08:00

52 lines
1.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import re
from dataclasses import dataclass, field
@dataclass
class DialogActEngine:
"""
基于配置词表的对话行为检测器。
- patterns : act_id → 触发词组 tuple逐词包含匹配
- numeric_patterns : act_id → 正则 tuple全文正则匹配用于数字类 inform
词表和正则均从 config/dialog_acts.yml 加载,不同设备部署时修改配置文件即可,无需改代码。
"""
patterns: dict[str, tuple[str, ...]] = field(
default_factory=lambda: {
"affirm": ("确认", "好的", "继续", "可以", "确定"),
"deny": ("不要", "不行", "", ""),
"cancel": ("取消", "算了", "不用了", "停止"),
"modify": ("改成", "换成", "再低一点", "再高一点", "调大", "调小"),
"chitchat": ("你好", "谢谢", "再见", "天气", "真不错"),
"request": ("帮我", "打开", "关闭", "导航", "播放", "查询", ""),
"inform": (),
}
)
# act_id → 正则表达式 tuple全文 search任意命中即触发
numeric_patterns: dict[str, tuple[str, ...]] = field(
default_factory=lambda: {
"inform": (r"\d+",),
}
)
def detect(self, text: str) -> str:
normalized = re.sub(r"\s+", "", text.strip().lower())
if not normalized:
return "unknown"
# 1. 词表包含匹配(保持原有优先级顺序)
for act_id, phrases in self.patterns.items():
if any(phrase and phrase in normalized for phrase in phrases):
return act_id
# 2. 正则匹配(主要用于 inform 的数字检测)
for act_id, regexes in self.numeric_patterns.items():
for pattern in regexes:
if re.search(pattern, normalized):
return act_id
return "unknown"