From 6d3e1f0afdd7dfcbf736b22e4099a1596c22d603 Mon Sep 17 00:00:00 2001 From: Manmohan Sharma Date: Thu, 16 Apr 2026 14:36:36 -0700 Subject: [PATCH] fix(chat-api): support Modal inference URL in inference client The inference client now auto-detects if the URL already ends with /generate (Modal's endpoint URL pattern) and skips appending the path. Co-Authored-By: Claude Opus 4.6 (1M context) --- services/chat-api/src/services/inference_client.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/services/chat-api/src/services/inference_client.py b/services/chat-api/src/services/inference_client.py index 235c41c1..cb612ecf 100644 --- a/services/chat-api/src/services/inference_client.py +++ b/services/chat-api/src/services/inference_client.py @@ -84,9 +84,15 @@ class InferenceClient: } client = self._get_client() + # If the base_url already ends with a path (e.g. Modal endpoint URL), + # use it directly. Otherwise append /generate for the local service. + url = self.base_url + if not url.endswith("/generate"): + url = f"{url}/generate" + async with client.stream( "POST", - f"{self.base_url}/generate", + url, headers=self.headers, json=payload, ) as response: