mirror of
https://github.com/karpathy/nanochat.git
synced 2026-05-07 16:30:11 +00:00
- modal/serve.py: FastAPI endpoint on Modal T4 GPU, streams SSE tokens - modal/_model.py: Standalone GPT model (auto-detects architecture from checkpoint) - modal/_tokenizer.py: Standalone BPE tokenizer (tiktoken-based) - Downloads nanochat-students/base-d20 weights from HuggingFace - Deployed at: https://manmohan659--samosachaat-inference-inference-generate.modal.run Deploy: modal deploy modal/serve.py Dev: modal serve modal/serve.py Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
80 lines
2.6 KiB
Python
80 lines
2.6 KiB
Python
"""
|
|
Minimal standalone tokenizer for Modal inference.
|
|
Uses tiktoken for fast encoding/decoding with nanochat's special tokens.
|
|
"""
|
|
|
|
import os
|
|
import pickle
|
|
import tiktoken
|
|
|
|
|
|
# nanochat special tokens
|
|
SPECIAL_TOKENS = {
|
|
"<|bos|>": 0,
|
|
"<|user_start|>": 1,
|
|
"<|user_end|>": 2,
|
|
"<|assistant_start|>": 3,
|
|
"<|assistant_end|>": 4,
|
|
"<|python_start|>": 5,
|
|
"<|python_end|>": 6,
|
|
"<|output_start|>": 7,
|
|
"<|output_end|>": 8,
|
|
}
|
|
|
|
# GPT-4 split pattern
|
|
SPLIT_PATTERN = r"(?i:[sdmt]|ll|ve|re)|[^\r\n\p{L}\p{N}]?+\p{L}+|\p{N}{1,2}| ?[^\s\p{L}\p{N}]++[\r\n]*|\s*[\r\n]|\s+(?!\S)|\s+"
|
|
|
|
|
|
class NanochatTokenizer:
|
|
def __init__(self, model_dir: str):
|
|
token_bytes_path = os.path.join(model_dir, "token_bytes.pt")
|
|
tokenizer_pkl_path = os.path.join(model_dir, "tokenizer.pkl")
|
|
|
|
if os.path.exists(tokenizer_pkl_path):
|
|
with open(tokenizer_pkl_path, "rb") as f:
|
|
loaded = pickle.load(f)
|
|
# Handle different pickle formats
|
|
if isinstance(loaded, dict):
|
|
mergeable_ranks = loaded
|
|
elif hasattr(loaded, '_mergeable_ranks'):
|
|
# It's a tiktoken Encoding object
|
|
mergeable_ranks = loaded._mergeable_ranks
|
|
else:
|
|
# Try to use it as a pre-built encoder
|
|
self._enc = loaded
|
|
return
|
|
elif os.path.exists(token_bytes_path):
|
|
import torch
|
|
token_bytes = torch.load(token_bytes_path, weights_only=True)
|
|
mergeable_ranks = {bytes(token_bytes[i].tolist()): i for i in range(len(token_bytes))}
|
|
else:
|
|
from huggingface_hub import hf_hub_download
|
|
path = hf_hub_download("karpathy/nanochat-d32", "tokenizer.pkl")
|
|
with open(path, "rb") as f:
|
|
mergeable_ranks = pickle.load(f)
|
|
|
|
self._enc = tiktoken.Encoding(
|
|
name="nanochat",
|
|
pat_str=SPLIT_PATTERN,
|
|
mergeable_ranks=mergeable_ranks,
|
|
special_tokens=SPECIAL_TOKENS,
|
|
)
|
|
|
|
def encode(self, text: str) -> list[int]:
|
|
return self._enc.encode(text, allowed_special=set())
|
|
|
|
def decode(self, tokens: list[int]) -> str:
|
|
return self._enc.decode(tokens)
|
|
|
|
def encode_special(self, token_name: str) -> list[int]:
|
|
return self._enc.encode(token_name, allowed_special="all")
|
|
|
|
def get_vocab_size(self) -> int:
|
|
return self._enc.n_vocab
|
|
|
|
|
|
def get_tokenizer(model_dir: str | None = None) -> NanochatTokenizer:
|
|
if model_dir is None:
|
|
model_dir = "/weights/d20"
|
|
return NanochatTokenizer(model_dir)
|