From 5e3b17e9908c454642e5805258b537d7be999665 Mon Sep 17 00:00:00 2001
From: Manmohan Sharma <manmohan659@gmail.com>
Date: Wed, 22 Apr 2026 15:24:08 -0700
Subject: [PATCH] fix(classifier): veto identity/meta/greeting/writing queries
 from web_search
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The heuristic classifier was triggering web_search on 'who is your creator', 'who is manmohan sharma', 'who created you' etc — which returned irrelevant Tavily results (Tyler the Creator, Waaree CFO) when the model's SFT training already has the correct grounded identity answer. Added _IDENTITY_VETO_PATTERNS covering: self-referential questions, creator/maker/developer queries, competitor/provenance attacks (are you chatgpt/made by openai), samosaChaat/Manmohan name references, meta-questions (parameters/architecture/training/open source), greetings (hi/hello/hey), small talk, and writing/reasoning tasks that the model answers from memory. Veto runs before all positive classification.
---
 modal/_query_classifier.py | 67 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/modal/_query_classifier.py b/modal/_query_classifier.py
index 16caa193..bbedf0f6 100644
--- a/modal/_query_classifier.py
+++ b/modal/_query_classifier.py
@@ -86,6 +86,66 @@ _KEYWORD_GATE = re.compile(
 
 _CATEGORY_REGEXES = [re.compile(p, re.IGNORECASE) for p in _CATEGORY_PATTERNS]
 
+# ---------------------------------------------------------------------------
+# Negative veto — queries about the model itself or its creator should NOT
+# trigger a web search. The model's SFT training has the correct grounded
+# identity answers (samosaChaat, Manmohan Sharma, socials, etc.). Sending
+# these to Tavily returns irrelevant results (Tyler the Creator, Waaree CFO, etc.)
+# ---------------------------------------------------------------------------
+_IDENTITY_VETO_PATTERNS = [
+    # self-referential questions directed at the model
+    r"\bwho\s+are\s+you\b",
+    r"\bwhat\s+are\s+you\b",
+    r"\bwhat(?:'| i)s\s+your\s+name\b",
+    r"\bintroduce\s+yourself\b",
+    r"\btell\s+me\s+about\s+yourself\b",
+    r"\bdescribe\s+yourself\b",
+    # creator / maker / trainer questions
+    r"\bwho\s+(?:is\s+)?(?:made|created|built|trained|developed|designed|coded|programmed|engineered|authored|invented|produced|fine[-\s]?tuned)\s+you\b",
+    r"\bwho(?:'|\s+i)s\s+your\s+(?:creator|creater|maker|author|developer|designer|engineer|architect|founder|builder|programmer|trainer|daddy|mom|parent|boss|owner)\b",
+    r"\bwho\s+(?:brought|gave)\s+you\s+(?:to\s+life|into\s+being|into\s+existence)\b",
+    # competitor/provenance questions (identity confusion attacks)
+    r"\bare\s+you\s+(?:chatgpt|gpt[-\s]?\d|claude|gemini|bard|llama|mistral|perplexity|copilot|sonnet|opus|haiku)\b",
+    r"\bare\s+you\s+(?:made|created|built|owned)\s+by\s+(?:openai|anthropic|google|meta|microsoft|deepmind|x\.ai)\b",
+    r"\bwhich\s+(?:company|organization|team)\s+(?:made|created|built|trained|owns)\s+you\b",
+    # samosaChaat / creator name references
+    r"\bsamosachaat\b",
+    r"\bwho(?:'|\s+i)s\s+manmohan(?:\s+sharma)?\b",
+    r"\bwho\s+is\s+manmohan\b",
+    r"\bmanmohan\s+sharma\b",
+    r"\btell\s+me\s+about\s+manmohan\b",
+    # model meta-questions
+    r"\bhow\s+(?:many|much)\s+parameters?\b",
+    r"\bwhat\s+(?:model|version|size|architecture)\s+are\s+you\b",
+    r"\bare\s+you\s+(?:open[-\s]?source|open\s+weight)\b",
+    r"\bwhere\s+(?:can\s+i\s+)?(?:find|download|get)\s+your\s+(?:weights|code|source)\b",
+    r"\bwhat\s+hardware\s+(?:were|are)\s+you\s+(?:trained|running)\b",
+    r"\bhow\s+(?:were|are)\s+you\s+trained\b",
+    r"\bwhen\s+were\s+you\s+(?:trained|released|built)\b",
+    # capability / tooling questions (not factual queries)
+    r"\bwhat\s+(?:tools|abilities|capabilities|languages)\s+(?:do\s+)?you\s+(?:have|support|speak)\b",
+    r"\bcan\s+you\s+(?:search|do|use|access)\b",
+    # greetings & social
+    r"^(?:hi|hello|hey|yo|sup|greetings|namaste|good\s+(?:morning|afternoon|evening|night))\b",
+    r"\bhow\s+are\s+you\b",
+    r"\bwhat(?:'|\s+i)s\s+up\b",
+    r"\bnice\s+to\s+meet\s+you\b",
+    # general small talk / thanks
+    r"^\s*(?:thanks?|thank\s+you|thx|ty|ok|okay|cool|nice|great|awesome|bye|goodbye)\s*[!.?]*\s*$",
+    # writing / reasoning / coding tasks (answered by the model, not the web)
+    r"\bwrite\s+(?:a|an|me)\s+(?:poem|haiku|limerick|story|essay|letter|email|code|function|script|query|sql)\b",
+    r"\bexplain\s+(?:what|how|why)\s+(?:is\s+)?(?:recursion|gradient\s+descent|backprop|attention|a\s+transformer|machine\s+learning|neural\s+network)\b",
+    r"\bsolve\b.*=",  # math equations
+]
+_IDENTITY_VETO_REGEXES = [re.compile(p, re.IGNORECASE) for p in _IDENTITY_VETO_PATTERNS]
+
+
+def _is_identity_or_meta(text: str) -> bool:
+    for rx in _IDENTITY_VETO_REGEXES:
+        if rx.search(text):
+            return True
+    return False
+
 
 def needs_web_search(text: str) -> Tuple[bool, str]:
     """Classify whether a user query likely needs a live web search.
@@ -93,6 +153,9 @@ def needs_web_search(text: str) -> Tuple[bool, str]:
     Returns (needs, rewritten_query). The rewritten_query strips filler and
     reformulates for better Tavily results (e.g. "whos the present president" ->
     "who is the current president of the United States 2026").
+
+    Identity / meta / greeting / writing-task queries are vetoed — the model's
+    SFT training has the correct grounded answer.
     """
     if not text or not isinstance(text, str):
         return False, ""
@@ -101,6 +164,10 @@ def needs_web_search(text: str) -> Tuple[bool, str]:
     if len(stripped) < 3:
         return False, ""
 
+    # Veto: identity / self-referential / meta / greeting / writing tasks
+    if _is_identity_or_meta(stripped):
+        return False, ""
+
     # Any category pattern hit
     for rx in _CATEGORY_REGEXES:
         if rx.search(stripped):