from __future__ import annotations import re from dataclasses import dataclass, field @dataclass class DialogActEngine: """ 基于配置词表的对话行为检测器。 - patterns : act_id → 触发词组 tuple,逐词包含匹配 - numeric_patterns : act_id → 正则 tuple,全文正则匹配(用于数字类 inform) 词表和正则均从 config/dialog_acts.yml 加载,不同设备部署时修改配置文件即可,无需改代码。 """ patterns: dict[str, tuple[str, ...]] = field( default_factory=lambda: { "affirm": ("确认", "好的", "继续", "可以", "确定"), "deny": ("不要", "不行", "否", "不"), "cancel": ("取消", "算了", "不用了", "停止"), "modify": ("改成", "换成", "再低一点", "再高一点", "调大", "调小"), "chitchat": ("你好", "谢谢", "再见", "天气", "真不错"), "request": ("帮我", "打开", "关闭", "导航", "播放", "查询", "查"), "inform": (), } ) # act_id → 正则表达式 tuple(全文 search,任意命中即触发) numeric_patterns: dict[str, tuple[str, ...]] = field( default_factory=lambda: { "inform": (r"\d+",), } ) def detect(self, text: str) -> str: normalized = re.sub(r"\s+", "", text.strip().lower()) if not normalized: return "unknown" # 1. 词表包含匹配(保持原有优先级顺序) for act_id, phrases in self.patterns.items(): if any(phrase and phrase in normalized for phrase in phrases): return act_id # 2. 正则匹配(主要用于 inform 的数字检测) for act_id, regexes in self.numeric_patterns.items(): for pattern in regexes: if re.search(pattern, normalized): return act_id return "unknown"