From 6069a7329b46b200a84dea92cbad66ebde844fec Mon Sep 17 00:00:00 2001 From: Manmohan Sharma Date: Wed, 22 Apr 2026 16:08:59 -0700 Subject: [PATCH] fix: search veto for identity+greetings, grounding suffix for tool results MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs: (1) force-web-search toggle bypassed identity veto — 'who are u' with Search on hit Tavily and got personality-quiz garbage. Now we always check _is_identity_or_meta() which covers identity, creator, samosaChaat references AND greetings (hi/hello/hey/what's up) before honoring the force toggle. (2) Model ignored injected Tavily result and answered from training priors (e.g. generic VP bio instead of specific Armenia/Iran facts). Added a grounding suffix after <|output_end|> ('Based on the search results above, ' for web_search, 'The result is ' for calculator) so the model's next tokens condition on the fresh tool output instead of spinning up memory. --- modal/serve.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/modal/serve.py b/modal/serve.py index e90cfd5f..3bff6f75 100644 --- a/modal/serve.py +++ b/modal/serve.py @@ -293,12 +293,20 @@ class Inference: ) except Exception: needs_search, rewritten = False, "" - # Explicit user toggle wins — always force when force_web_search is True + # Explicit user toggle forces — BUT never for identity/meta queries. + # Model's SFT training has the correct identity answer; Tavily returns + # irrelevant junk (Tyler the Creator, personality quiz results, etc). if force_web_search and query_for_classify: - needs_search = True - if not rewritten: - # if classifier didn't rewrite, do a minimal cleanup - rewritten = query_for_classify.strip().rstrip("?.!") + " 2026" + try: + from _query_classifier import _is_identity_or_meta + _identity_q = _is_identity_or_meta(query_for_classify) + except Exception: + _identity_q = False + if not _identity_q: + needs_search = True + if not rewritten: + rewritten = query_for_classify.strip().rstrip("?.!") + " 2026" + # if identity, leave needs_search as whatever contextual returned (False) if needs_search and rewritten: preface = "I'll look that up for you. " tool_call_json = json.dumps( @@ -311,10 +319,14 @@ class Inference: result_text = tool_result.to_payload()[:4096] except Exception as exc: result_text = json.dumps({"error": str(exc)[:500]}) + # Grounding suffix: anchors the model to the fresh tool output + # instead of spinning up training-data priors. The model continues + # from this phrase and therefore bases its answer on the result. forced_prefix_text = ( preface + "<|python_start|>" + tool_call_json + "<|python_end|>" + "<|output_start|>" + result_text + "<|output_end|>\n" + + "Based on the search results above, " ) tokens.extend(self.tokenizer.encode(forced_prefix_text)) else: @@ -339,6 +351,7 @@ class Inference: preface + "<|python_start|>" + calc_call_json + "<|python_end|>" + "<|output_start|>" + calc_result_text + "<|output_end|>\n" + + "The result is " ) tokens.extend(self.tokenizer.encode(forced_prefix_text))