mirror of
https://github.com/karpathy/nanochat.git
synced 2026-05-08 00:39:50 +00:00
Merge pull request #56 from manmohan659/fix/classifier-identity-veto-expanded
fix(classifier): expand identity veto (tell me about yourself etc.)
This commit is contained in:
commit
93b530c028
|
|
@ -96,35 +96,50 @@ _IDENTITY_VETO_PATTERNS = [
|
|||
# self-referential questions directed at the model
|
||||
r"\bwho\s+are\s+you\b",
|
||||
r"\bwhat\s+are\s+you\b",
|
||||
r"\bwhat(?:'| i)s\s+your\s+name\b",
|
||||
r"\bwho\s+are\s+you\s+really\b",
|
||||
r"\bwhat(?:'|\s+i)s\s+your\s+name\b",
|
||||
r"\bintroduce\s+yourself\b",
|
||||
r"\btell\s+me\s+about\s+yourself\b",
|
||||
r"\btell\s+me\s+about\s+(?:yourself|you|you\s+first)\b",
|
||||
r"\btell\s+me\s+(?:more\s+)?about\s+yourself\b",
|
||||
r"\babout\s+you(?:rself)?\s*[?!.]*\s*$",
|
||||
r"\bdescribe\s+yourself\b",
|
||||
r"\bwhat(?:'|\s+i)s\s+your\s+(?:story|backstory|purpose|origin|deal|role|job|gig|mission|goal|objective)\b",
|
||||
r"\bwhat\s+do\s+you\s+(?:do|know\s+about\s+yourself|stand\s+for)\b",
|
||||
# creator / maker / trainer questions
|
||||
r"\bwho\s+(?:is\s+)?(?:made|created|built|trained|developed|designed|coded|programmed|engineered|authored|invented|produced|fine[-\s]?tuned)\s+you\b",
|
||||
r"\bwho(?:'|\s+i)s\s+your\s+(?:creator|creater|maker|author|developer|designer|engineer|architect|founder|builder|programmer|trainer|daddy|mom|parent|boss|owner)\b",
|
||||
r"\bwho\s+(?:is\s+)?(?:made|created|built|trained|developed|designed|coded|programmed|engineered|authored|invented|produced|fine[-\s]?tuned|wrote)\s+you\b",
|
||||
r"\bwho(?:'|\s+i)s\s+(?:your|ur)\s+(?:creator|creater|maker|author|developer|designer|engineer|architect|founder|builder|programmer|trainer|daddy|mom|parent|boss|owner|father|mother|papa|mama)\b",
|
||||
r"\bwho\s+(?:brought|gave)\s+you\s+(?:to\s+life|into\s+being|into\s+existence)\b",
|
||||
r"\bwhere\s+(?:did\s+)?you\s+come\s+from\b",
|
||||
r"\bhow\s+(?:did|were)\s+you\s+(?:come|born|made|created|built)\b",
|
||||
# competitor/provenance questions (identity confusion attacks)
|
||||
r"\bare\s+you\s+(?:chatgpt|gpt[-\s]?\d|claude|gemini|bard|llama|mistral|perplexity|copilot|sonnet|opus|haiku)\b",
|
||||
r"\bare\s+you\s+(?:made|created|built|owned)\s+by\s+(?:openai|anthropic|google|meta|microsoft|deepmind|x\.ai)\b",
|
||||
r"\bwhich\s+(?:company|organization|team)\s+(?:made|created|built|trained|owns)\s+you\b",
|
||||
r"\bare\s+you\s+(?:chatgpt|gpt[-\s]?\d|claude|gemini|bard|llama|mistral|perplexity|copilot|sonnet|opus|haiku|grok)\b",
|
||||
r"\bare\s+you\s+(?:made|created|built|owned|developed|trained)\s+by\s+(?:openai|anthropic|google|meta|microsoft|deepmind|x\.ai|xai)\b",
|
||||
r"\bwhich\s+(?:company|organization|team|ai|model)\s+(?:made|created|built|trained|owns|are\s+you)\s*(?:you)?\b",
|
||||
r"\bare\s+you\s+(?:an?\s+)?(?:ai|chat\s*bot|assistant|language\s+model|llm|robot)\b",
|
||||
# samosaChaat / creator name references
|
||||
r"\bsamosachaat\b",
|
||||
r"\bwho(?:'|\s+i)s\s+manmohan(?:\s+sharma)?\b",
|
||||
r"\bwho\s+is\s+manmohan\b",
|
||||
r"\bmanmohan\s+sharma\b",
|
||||
r"\btell\s+me\s+about\s+manmohan\b",
|
||||
r"\bwhat(?:'|\s+i)s\s+(?:manmohan|your\s+creator)(?:'s)?\s+(?:github|linkedin|twitter|x|website|email|socials?)\b",
|
||||
# model meta-questions
|
||||
r"\bhow\s+(?:many|much)\s+parameters?\b",
|
||||
r"\bwhat\s+(?:model|version|size|architecture)\s+are\s+you\b",
|
||||
r"\bare\s+you\s+(?:open[-\s]?source|open\s+weight)\b",
|
||||
r"\bwhere\s+(?:can\s+i\s+)?(?:find|download|get)\s+your\s+(?:weights|code|source)\b",
|
||||
r"\bwhat\s+(?:model|version|size|architecture|type|kind)\s+(?:are\s+you|of\s+(?:ai|model)\s+are\s+you)\b",
|
||||
r"\bwhat(?:'|\s+i)s\s+your\s+(?:model|version|size|architecture|context\s+(?:size|length|window))\b",
|
||||
r"\bare\s+you\s+(?:open[-\s]?source|open\s+weight|closed\s+source|proprietary)\b",
|
||||
r"\bwhere\s+(?:can\s+i\s+)?(?:find|download|get)\s+your\s+(?:weights|code|source|github|repo)\b",
|
||||
r"\bwhat\s+hardware\s+(?:were|are)\s+you\s+(?:trained|running)\b",
|
||||
r"\bhow\s+(?:were|are)\s+you\s+trained\b",
|
||||
r"\bwhen\s+were\s+you\s+(?:trained|released|built)\b",
|
||||
r"\bwhen\s+were\s+you\s+(?:trained|released|built|made|created)\b",
|
||||
r"\bwhat(?:'|\s+i)s\s+your\s+(?:training|knowledge)\s+(?:data|cut[-\s]?off|cutoff)\b",
|
||||
r"\bwhat\s+data\s+(?:were|are)\s+you\s+trained\s+on\b",
|
||||
# capability / tooling questions (not factual queries)
|
||||
r"\bwhat\s+(?:tools|abilities|capabilities|languages)\s+(?:do\s+)?you\s+(?:have|support|speak)\b",
|
||||
r"\bcan\s+you\s+(?:search|do|use|access)\b",
|
||||
r"\bwhat\s+(?:tools|abilities|capabilities|languages|skills|features)\s+(?:do\s+)?you\s+(?:have|support|speak|offer|provide)\b",
|
||||
r"\bwhat\s+(?:can|could)\s+you\s+(?:do|help\s+(?:me\s+)?with)\b",
|
||||
r"\bcan\s+you\s+(?:search|do|use|access|help)\b",
|
||||
r"\bwhat\s+are\s+you\s+(?:good\s+at|capable\s+of|able\s+to\s+do)\b",
|
||||
r"\bhow\s+do\s+you\s+work\b",
|
||||
# greetings & social
|
||||
r"^(?:hi|hello|hey|yo|sup|greetings|namaste|good\s+(?:morning|afternoon|evening|night))\b",
|
||||
r"\bhow\s+are\s+you\b",
|
||||
|
|
@ -133,8 +148,8 @@ _IDENTITY_VETO_PATTERNS = [
|
|||
# general small talk / thanks
|
||||
r"^\s*(?:thanks?|thank\s+you|thx|ty|ok|okay|cool|nice|great|awesome|bye|goodbye)\s*[!.?]*\s*$",
|
||||
# writing / reasoning / coding tasks (answered by the model, not the web)
|
||||
r"\bwrite\s+(?:a|an|me)\s+(?:poem|haiku|limerick|story|essay|letter|email|code|function|script|query|sql)\b",
|
||||
r"\bexplain\s+(?:what|how|why)\s+(?:is\s+)?(?:recursion|gradient\s+descent|backprop|attention|a\s+transformer|machine\s+learning|neural\s+network)\b",
|
||||
r"\bwrite\s+(?:a|an|me)\s+(?:poem|haiku|limerick|story|essay|letter|email|code|function|script|query|sql|song|joke)\b",
|
||||
r"\bexplain\s+(?:what|how|why)\s+(?:is\s+)?(?:recursion|gradient\s+descent|backprop|attention|a\s+transformer|machine\s+learning|neural\s+network|rope|softmax)\b",
|
||||
r"\bsolve\b.*=", # math equations
|
||||
]
|
||||
_IDENTITY_VETO_REGEXES = [re.compile(p, re.IGNORECASE) for p in _IDENTITY_VETO_PATTERNS]
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user