fix(chat-api): support Modal inference URL in inference client

The inference client now auto-detects if the URL already ends with /generate (Modal's endpoint URL pattern) and skips appending the path. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-07-06 04:49:14 +00:00 · 2026-04-16 14:36:36 -07:00 · 2026-04-16 14:36:36 -07:00 · 6d3e1f0afd
commit 6d3e1f0afd
parent e5b4db1eee
1 changed files with 7 additions and 1 deletions
--- a/services/chat-api/src/services/inference_client.py
+++ b/services/chat-api/src/services/inference_client.py
@ -84,9 +84,15 @@ class InferenceClient:
        }

        client = self._get_client()
+        # If the base_url already ends with a path (e.g. Modal endpoint URL),
+        # use it directly. Otherwise append /generate for the local service.
+        url = self.base_url
+        if not url.endswith("/generate"):
+            url = f"{url}/generate"
+
        async with client.stream(
            "POST",
-            f"{self.base_url}/generate",
+            url,
            headers=self.headers,
            json=payload,
        ) as response: