ai-device/intelligent_cabin/app/services/dialog_act.py

from __future__ import annotations

import re
from dataclasses import dataclass, field


@dataclass
class DialogActEngine:
    """
    基于配置词表的对话行为检测器。
    - patterns          : act_id → 触发词组 tuple，逐词包含匹配
    - numeric_patterns  : act_id → 正则 tuple，全文正则匹配（用于数字类 inform）

    词表和正则均从 config/dialog_acts.yml 加载，不同设备部署时修改配置文件即可，无需改代码。
    """

    patterns: dict[str, tuple[str, ...]] = field(
        default_factory=lambda: {
            "affirm": ("确认", "好的", "继续", "可以", "确定"),
            "deny": ("不要", "不行", "否", "不"),
            "cancel": ("取消", "算了", "不用了", "停止"),
            "modify": ("改成", "换成", "再低一点", "再高一点", "调大", "调小"),
            "chitchat": ("你好", "谢谢", "再见", "天气", "真不错"),
            "request": ("帮我", "打开", "关闭", "导航", "播放", "查询", "查"),
            "inform": (),
        }
    )
    # act_id → 正则表达式 tuple（全文 search，任意命中即触发）
    numeric_patterns: dict[str, tuple[str, ...]] = field(
        default_factory=lambda: {
            "inform": (r"\d+",),
        }
    )

    def detect(self, text: str) -> str:
        normalized = re.sub(r"\s+", "", text.strip().lower())
        if not normalized:
            return "unknown"

        # 1. 词表包含匹配（保持原有优先级顺序）
        for act_id, phrases in self.patterns.items():
            if any(phrase and phrase in normalized for phrase in phrases):
                return act_id

        # 2. 正则匹配（主要用于 inform 的数字检测）
        for act_id, regexes in self.numeric_patterns.items():
            for pattern in regexes:
                if re.search(pattern, normalized):
                    return act_id

        return "unknown"