Merge pull request #55 from manmohan659/fix/classifier-identity-veto

fix(classifier): veto identity/creator/meta queries from web_search
This commit is contained in:
Manmohan 2026-04-22 18:24:15 -04:00 committed by GitHub
commit 9ed58c4813
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -86,6 +86,66 @@ _KEYWORD_GATE = re.compile(
_CATEGORY_REGEXES = [re.compile(p, re.IGNORECASE) for p in _CATEGORY_PATTERNS]
# ---------------------------------------------------------------------------
# Negative veto — queries about the model itself or its creator should NOT
# trigger a web search. The model's SFT training has the correct grounded
# identity answers (samosaChaat, Manmohan Sharma, socials, etc.). Sending
# these to Tavily returns irrelevant results (Tyler the Creator, Waaree CFO, etc.)
# ---------------------------------------------------------------------------
_IDENTITY_VETO_PATTERNS = [
# self-referential questions directed at the model
r"\bwho\s+are\s+you\b",
r"\bwhat\s+are\s+you\b",
r"\bwhat(?:'| i)s\s+your\s+name\b",
r"\bintroduce\s+yourself\b",
r"\btell\s+me\s+about\s+yourself\b",
r"\bdescribe\s+yourself\b",
# creator / maker / trainer questions
r"\bwho\s+(?:is\s+)?(?:made|created|built|trained|developed|designed|coded|programmed|engineered|authored|invented|produced|fine[-\s]?tuned)\s+you\b",
r"\bwho(?:'|\s+i)s\s+your\s+(?:creator|creater|maker|author|developer|designer|engineer|architect|founder|builder|programmer|trainer|daddy|mom|parent|boss|owner)\b",
r"\bwho\s+(?:brought|gave)\s+you\s+(?:to\s+life|into\s+being|into\s+existence)\b",
# competitor/provenance questions (identity confusion attacks)
r"\bare\s+you\s+(?:chatgpt|gpt[-\s]?\d|claude|gemini|bard|llama|mistral|perplexity|copilot|sonnet|opus|haiku)\b",
r"\bare\s+you\s+(?:made|created|built|owned)\s+by\s+(?:openai|anthropic|google|meta|microsoft|deepmind|x\.ai)\b",
r"\bwhich\s+(?:company|organization|team)\s+(?:made|created|built|trained|owns)\s+you\b",
# samosaChaat / creator name references
r"\bsamosachaat\b",
r"\bwho(?:'|\s+i)s\s+manmohan(?:\s+sharma)?\b",
r"\bwho\s+is\s+manmohan\b",
r"\bmanmohan\s+sharma\b",
r"\btell\s+me\s+about\s+manmohan\b",
# model meta-questions
r"\bhow\s+(?:many|much)\s+parameters?\b",
r"\bwhat\s+(?:model|version|size|architecture)\s+are\s+you\b",
r"\bare\s+you\s+(?:open[-\s]?source|open\s+weight)\b",
r"\bwhere\s+(?:can\s+i\s+)?(?:find|download|get)\s+your\s+(?:weights|code|source)\b",
r"\bwhat\s+hardware\s+(?:were|are)\s+you\s+(?:trained|running)\b",
r"\bhow\s+(?:were|are)\s+you\s+trained\b",
r"\bwhen\s+were\s+you\s+(?:trained|released|built)\b",
# capability / tooling questions (not factual queries)
r"\bwhat\s+(?:tools|abilities|capabilities|languages)\s+(?:do\s+)?you\s+(?:have|support|speak)\b",
r"\bcan\s+you\s+(?:search|do|use|access)\b",
# greetings & social
r"^(?:hi|hello|hey|yo|sup|greetings|namaste|good\s+(?:morning|afternoon|evening|night))\b",
r"\bhow\s+are\s+you\b",
r"\bwhat(?:'|\s+i)s\s+up\b",
r"\bnice\s+to\s+meet\s+you\b",
# general small talk / thanks
r"^\s*(?:thanks?|thank\s+you|thx|ty|ok|okay|cool|nice|great|awesome|bye|goodbye)\s*[!.?]*\s*$",
# writing / reasoning / coding tasks (answered by the model, not the web)
r"\bwrite\s+(?:a|an|me)\s+(?:poem|haiku|limerick|story|essay|letter|email|code|function|script|query|sql)\b",
r"\bexplain\s+(?:what|how|why)\s+(?:is\s+)?(?:recursion|gradient\s+descent|backprop|attention|a\s+transformer|machine\s+learning|neural\s+network)\b",
r"\bsolve\b.*=", # math equations
]
_IDENTITY_VETO_REGEXES = [re.compile(p, re.IGNORECASE) for p in _IDENTITY_VETO_PATTERNS]
def _is_identity_or_meta(text: str) -> bool:
for rx in _IDENTITY_VETO_REGEXES:
if rx.search(text):
return True
return False
def needs_web_search(text: str) -> Tuple[bool, str]:
"""Classify whether a user query likely needs a live web search.
@ -93,6 +153,9 @@ def needs_web_search(text: str) -> Tuple[bool, str]:
Returns (needs, rewritten_query). The rewritten_query strips filler and
reformulates for better Tavily results (e.g. "whos the present president" ->
"who is the current president of the United States 2026").
Identity / meta / greeting / writing-task queries are vetoed the model's
SFT training has the correct grounded answer.
"""
if not text or not isinstance(text, str):
return False, ""
@ -101,6 +164,10 @@ def needs_web_search(text: str) -> Tuple[bool, str]:
if len(stripped) < 3:
return False, ""
# Veto: identity / self-referential / meta / greeting / writing tasks
if _is_identity_or_meta(stripped):
return False, ""
# Any category pattern hit
for rx in _CATEGORY_REGEXES:
if rx.search(stripped):