diff --git a/modal/serve.py b/modal/serve.py index fc0df394..7589798a 100644 --- a/modal/serve.py +++ b/modal/serve.py @@ -229,7 +229,7 @@ class Inference: messages = request.get("messages", []) temperature = min(max(request.get("temperature", 0.8), 0.0), 2.0) - max_tokens = min(max(request.get("max_tokens", 512), 1), 2048) + max_tokens = min(max(request.get("max_tokens", 1024), 1), 4096) top_k = min(max(request.get("top_k", 50), 0), 200) force_web_search = bool(request.get("force_web_search", False)) diff --git a/services/chat-api/src/config.py b/services/chat-api/src/config.py index bc3ae51a..36b05e98 100644 --- a/services/chat-api/src/config.py +++ b/services/chat-api/src/config.py @@ -23,7 +23,7 @@ class Settings(BaseSettings): auth_cache_max_size: int = Field(default=1024) inference_default_temperature: float = Field(default=0.8) - inference_default_max_tokens: int = Field(default=512) + inference_default_max_tokens: int = Field(default=1024) inference_default_top_k: int = Field(default=50) frontend_url: str = Field(default="http://localhost:3000")