Initial upload for secondary development
This commit is contained in:
25
chatlog_fastAPI/services/fts.py
Normal file
25
chatlog_fastAPI/services/fts.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import jieba
|
||||
import re
|
||||
|
||||
def tokenize(text: str) -> str:
|
||||
return " ".join(jieba.cut(text))
|
||||
|
||||
|
||||
def build_match_query(text: str, limit: int = 12) -> str:
|
||||
"""Build a safe FTS5 MATCH query from user/model text."""
|
||||
terms: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for token in tokenize(text or "").split():
|
||||
token = token.strip()
|
||||
if not token or not re.search(r"\w", token, flags=re.UNICODE):
|
||||
continue
|
||||
upper = token.upper()
|
||||
if upper in {"AND", "OR", "NOT", "NEAR"}:
|
||||
continue
|
||||
if token in seen:
|
||||
continue
|
||||
seen.add(token)
|
||||
terms.append('"' + token.replace('"', '""') + '"')
|
||||
if len(terms) >= limit:
|
||||
break
|
||||
return " OR ".join(terms)
|
||||
Reference in New Issue
Block a user