Files
get_wechat/chatlog_fastAPI/services/fts.py

26 lines
748 B
Python

import jieba
import re
def tokenize(text: str) -> str:
return " ".join(jieba.cut(text))
def build_match_query(text: str, limit: int = 12) -> str:
"""Build a safe FTS5 MATCH query from user/model text."""
terms: list[str] = []
seen: set[str] = set()
for token in tokenize(text or "").split():
token = token.strip()
if not token or not re.search(r"\w", token, flags=re.UNICODE):
continue
upper = token.upper()
if upper in {"AND", "OR", "NOT", "NEAR"}:
continue
if token in seen:
continue
seen.add(token)
terms.append('"' + token.replace('"', '""') + '"')
if len(terms) >= limit:
break
return " OR ".join(terms)