fix(chat-api): support Modal inference URL in inference client

The inference client now auto-detects if the URL already ends with
/generate (Modal's endpoint URL pattern) and skips appending the path.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Manmohan Sharma 2026-04-16 14:36:36 -07:00
parent e5b4db1eee
commit 6d3e1f0afd
No known key found for this signature in database

View File

@ -84,9 +84,15 @@ class InferenceClient:
}
client = self._get_client()
# If the base_url already ends with a path (e.g. Modal endpoint URL),
# use it directly. Otherwise append /generate for the local service.
url = self.base_url
if not url.endswith("/generate"):
url = f"{url}/generate"
async with client.stream(
"POST",
f"{self.base_url}/generate",
url,
headers=self.headers,
json=payload,
) as response: