mirror of
https://github.com/karpathy/nanochat.git
synced 2026-05-08 16:59:59 +00:00
Merge pull request #64 from manmohan659/feat/double-max-tokens
feat: double default and max generation budget
This commit is contained in:
commit
4776430423
|
|
@ -229,7 +229,7 @@ class Inference:
|
|||
|
||||
messages = request.get("messages", [])
|
||||
temperature = min(max(request.get("temperature", 0.8), 0.0), 2.0)
|
||||
max_tokens = min(max(request.get("max_tokens", 512), 1), 2048)
|
||||
max_tokens = min(max(request.get("max_tokens", 1024), 1), 4096)
|
||||
top_k = min(max(request.get("top_k", 50), 0), 200)
|
||||
force_web_search = bool(request.get("force_web_search", False))
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ class Settings(BaseSettings):
|
|||
auth_cache_max_size: int = Field(default=1024)
|
||||
|
||||
inference_default_temperature: float = Field(default=0.8)
|
||||
inference_default_max_tokens: int = Field(default=512)
|
||||
inference_default_max_tokens: int = Field(default=1024)
|
||||
inference_default_top_k: int = Field(default=50)
|
||||
|
||||
frontend_url: str = Field(default="http://localhost:3000")
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user