From fd8e10a820e8e7e49b53809014f9ffe389a0eb96 Mon Sep 17 00:00:00 2001 From: Manmohan Sharma Date: Wed, 22 Apr 2026 15:25:33 -0700 Subject: [PATCH] fix(classifier): expand identity veto to cover all self-introspection queries Added patterns for: tell me about yourself / you / about you, what do/can you do, what are your capabilities / skills, how do you work, what are you good at, what's your purpose / story / mission, where did you come from, how were you built, are you an AI / chatbot / language model, model meta (model/version/context/training cutoff), creator socials (github/linkedin/twitter), and more writing tasks (song, joke). All 27 identity cases now short-circuit without hitting Tavily. --- modal/_query_classifier.py | 45 +++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/modal/_query_classifier.py b/modal/_query_classifier.py index bbedf0f6..37aff4c2 100644 --- a/modal/_query_classifier.py +++ b/modal/_query_classifier.py @@ -96,35 +96,50 @@ _IDENTITY_VETO_PATTERNS = [ # self-referential questions directed at the model r"\bwho\s+are\s+you\b", r"\bwhat\s+are\s+you\b", - r"\bwhat(?:'| i)s\s+your\s+name\b", + r"\bwho\s+are\s+you\s+really\b", + r"\bwhat(?:'|\s+i)s\s+your\s+name\b", r"\bintroduce\s+yourself\b", - r"\btell\s+me\s+about\s+yourself\b", + r"\btell\s+me\s+about\s+(?:yourself|you|you\s+first)\b", + r"\btell\s+me\s+(?:more\s+)?about\s+yourself\b", + r"\babout\s+you(?:rself)?\s*[?!.]*\s*$", r"\bdescribe\s+yourself\b", + r"\bwhat(?:'|\s+i)s\s+your\s+(?:story|backstory|purpose|origin|deal|role|job|gig|mission|goal|objective)\b", + r"\bwhat\s+do\s+you\s+(?:do|know\s+about\s+yourself|stand\s+for)\b", # creator / maker / trainer questions - r"\bwho\s+(?:is\s+)?(?:made|created|built|trained|developed|designed|coded|programmed|engineered|authored|invented|produced|fine[-\s]?tuned)\s+you\b", - r"\bwho(?:'|\s+i)s\s+your\s+(?:creator|creater|maker|author|developer|designer|engineer|architect|founder|builder|programmer|trainer|daddy|mom|parent|boss|owner)\b", + r"\bwho\s+(?:is\s+)?(?:made|created|built|trained|developed|designed|coded|programmed|engineered|authored|invented|produced|fine[-\s]?tuned|wrote)\s+you\b", + r"\bwho(?:'|\s+i)s\s+(?:your|ur)\s+(?:creator|creater|maker|author|developer|designer|engineer|architect|founder|builder|programmer|trainer|daddy|mom|parent|boss|owner|father|mother|papa|mama)\b", r"\bwho\s+(?:brought|gave)\s+you\s+(?:to\s+life|into\s+being|into\s+existence)\b", + r"\bwhere\s+(?:did\s+)?you\s+come\s+from\b", + r"\bhow\s+(?:did|were)\s+you\s+(?:come|born|made|created|built)\b", # competitor/provenance questions (identity confusion attacks) - r"\bare\s+you\s+(?:chatgpt|gpt[-\s]?\d|claude|gemini|bard|llama|mistral|perplexity|copilot|sonnet|opus|haiku)\b", - r"\bare\s+you\s+(?:made|created|built|owned)\s+by\s+(?:openai|anthropic|google|meta|microsoft|deepmind|x\.ai)\b", - r"\bwhich\s+(?:company|organization|team)\s+(?:made|created|built|trained|owns)\s+you\b", + r"\bare\s+you\s+(?:chatgpt|gpt[-\s]?\d|claude|gemini|bard|llama|mistral|perplexity|copilot|sonnet|opus|haiku|grok)\b", + r"\bare\s+you\s+(?:made|created|built|owned|developed|trained)\s+by\s+(?:openai|anthropic|google|meta|microsoft|deepmind|x\.ai|xai)\b", + r"\bwhich\s+(?:company|organization|team|ai|model)\s+(?:made|created|built|trained|owns|are\s+you)\s*(?:you)?\b", + r"\bare\s+you\s+(?:an?\s+)?(?:ai|chat\s*bot|assistant|language\s+model|llm|robot)\b", # samosaChaat / creator name references r"\bsamosachaat\b", r"\bwho(?:'|\s+i)s\s+manmohan(?:\s+sharma)?\b", r"\bwho\s+is\s+manmohan\b", r"\bmanmohan\s+sharma\b", r"\btell\s+me\s+about\s+manmohan\b", + r"\bwhat(?:'|\s+i)s\s+(?:manmohan|your\s+creator)(?:'s)?\s+(?:github|linkedin|twitter|x|website|email|socials?)\b", # model meta-questions r"\bhow\s+(?:many|much)\s+parameters?\b", - r"\bwhat\s+(?:model|version|size|architecture)\s+are\s+you\b", - r"\bare\s+you\s+(?:open[-\s]?source|open\s+weight)\b", - r"\bwhere\s+(?:can\s+i\s+)?(?:find|download|get)\s+your\s+(?:weights|code|source)\b", + r"\bwhat\s+(?:model|version|size|architecture|type|kind)\s+(?:are\s+you|of\s+(?:ai|model)\s+are\s+you)\b", + r"\bwhat(?:'|\s+i)s\s+your\s+(?:model|version|size|architecture|context\s+(?:size|length|window))\b", + r"\bare\s+you\s+(?:open[-\s]?source|open\s+weight|closed\s+source|proprietary)\b", + r"\bwhere\s+(?:can\s+i\s+)?(?:find|download|get)\s+your\s+(?:weights|code|source|github|repo)\b", r"\bwhat\s+hardware\s+(?:were|are)\s+you\s+(?:trained|running)\b", r"\bhow\s+(?:were|are)\s+you\s+trained\b", - r"\bwhen\s+were\s+you\s+(?:trained|released|built)\b", + r"\bwhen\s+were\s+you\s+(?:trained|released|built|made|created)\b", + r"\bwhat(?:'|\s+i)s\s+your\s+(?:training|knowledge)\s+(?:data|cut[-\s]?off|cutoff)\b", + r"\bwhat\s+data\s+(?:were|are)\s+you\s+trained\s+on\b", # capability / tooling questions (not factual queries) - r"\bwhat\s+(?:tools|abilities|capabilities|languages)\s+(?:do\s+)?you\s+(?:have|support|speak)\b", - r"\bcan\s+you\s+(?:search|do|use|access)\b", + r"\bwhat\s+(?:tools|abilities|capabilities|languages|skills|features)\s+(?:do\s+)?you\s+(?:have|support|speak|offer|provide)\b", + r"\bwhat\s+(?:can|could)\s+you\s+(?:do|help\s+(?:me\s+)?with)\b", + r"\bcan\s+you\s+(?:search|do|use|access|help)\b", + r"\bwhat\s+are\s+you\s+(?:good\s+at|capable\s+of|able\s+to\s+do)\b", + r"\bhow\s+do\s+you\s+work\b", # greetings & social r"^(?:hi|hello|hey|yo|sup|greetings|namaste|good\s+(?:morning|afternoon|evening|night))\b", r"\bhow\s+are\s+you\b", @@ -133,8 +148,8 @@ _IDENTITY_VETO_PATTERNS = [ # general small talk / thanks r"^\s*(?:thanks?|thank\s+you|thx|ty|ok|okay|cool|nice|great|awesome|bye|goodbye)\s*[!.?]*\s*$", # writing / reasoning / coding tasks (answered by the model, not the web) - r"\bwrite\s+(?:a|an|me)\s+(?:poem|haiku|limerick|story|essay|letter|email|code|function|script|query|sql)\b", - r"\bexplain\s+(?:what|how|why)\s+(?:is\s+)?(?:recursion|gradient\s+descent|backprop|attention|a\s+transformer|machine\s+learning|neural\s+network)\b", + r"\bwrite\s+(?:a|an|me)\s+(?:poem|haiku|limerick|story|essay|letter|email|code|function|script|query|sql|song|joke)\b", + r"\bexplain\s+(?:what|how|why)\s+(?:is\s+)?(?:recursion|gradient\s+descent|backprop|attention|a\s+transformer|machine\s+learning|neural\s+network|rope|softmax)\b", r"\bsolve\b.*=", # math equations ] _IDENTITY_VETO_REGEXES = [re.compile(p, re.IGNORECASE) for p in _IDENTITY_VETO_PATTERNS]