52 lines
1.9 KiB
Python
52 lines
1.9 KiB
Python
from __future__ import annotations
|
||
|
||
import re
|
||
from dataclasses import dataclass, field
|
||
|
||
|
||
@dataclass
|
||
class DialogActEngine:
|
||
"""
|
||
基于配置词表的对话行为检测器。
|
||
- patterns : act_id → 触发词组 tuple,逐词包含匹配
|
||
- numeric_patterns : act_id → 正则 tuple,全文正则匹配(用于数字类 inform)
|
||
|
||
词表和正则均从 config/dialog_acts.yml 加载,不同设备部署时修改配置文件即可,无需改代码。
|
||
"""
|
||
|
||
patterns: dict[str, tuple[str, ...]] = field(
|
||
default_factory=lambda: {
|
||
"affirm": ("确认", "好的", "继续", "可以", "确定"),
|
||
"deny": ("不要", "不行", "否", "不"),
|
||
"cancel": ("取消", "算了", "不用了", "停止"),
|
||
"modify": ("改成", "换成", "再低一点", "再高一点", "调大", "调小"),
|
||
"chitchat": ("你好", "谢谢", "再见", "天气", "真不错"),
|
||
"request": ("帮我", "打开", "关闭", "导航", "播放", "查询", "查"),
|
||
"inform": (),
|
||
}
|
||
)
|
||
# act_id → 正则表达式 tuple(全文 search,任意命中即触发)
|
||
numeric_patterns: dict[str, tuple[str, ...]] = field(
|
||
default_factory=lambda: {
|
||
"inform": (r"\d+",),
|
||
}
|
||
)
|
||
|
||
def detect(self, text: str) -> str:
|
||
normalized = re.sub(r"\s+", "", text.strip().lower())
|
||
if not normalized:
|
||
return "unknown"
|
||
|
||
# 1. 词表包含匹配(保持原有优先级顺序)
|
||
for act_id, phrases in self.patterns.items():
|
||
if any(phrase and phrase in normalized for phrase in phrases):
|
||
return act_id
|
||
|
||
# 2. 正则匹配(主要用于 inform 的数字检测)
|
||
for act_id, regexes in self.numeric_patterns.items():
|
||
for pattern in regexes:
|
||
if re.search(pattern, normalized):
|
||
return act_id
|
||
|
||
return "unknown"
|