mirror of
https://github.com/karpathy/nanochat.git
synced 2026-01-22 19:34:17 +00:00
eval module needs to test
This commit is contained in:
parent
77da258ee1
commit
bc11cd9e5b
46
lm_eval.md
Normal file
46
lm_eval.md
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
# Running lm-eval with nanochat checkpoints
|
||||
|
||||
This repo ships its own evals (CORE, ARC/GSM8K/MMLU/HumanEval/SpellingBee), but you can also run the HuggingFace-compatible [lm-evaluation-harness](tools/lm-eval). Steps below assume you've already run `bash setup.sh` (installs uv, submodules, deps, Rust tokenizer).
|
||||
|
||||
## 1) Activate env
|
||||
```bash
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
## 2) Export a trained checkpoint to HF format
|
||||
- `nanochat/to_hf.py` loads the latest checkpoint from `~/.cache/nanochat/<source>_checkpoints` and writes an HF folder.
|
||||
- Choose source: `base` | `mid` | `sft` | `rl`.
|
||||
```bash
|
||||
# export latest base checkpoint to hf-export/base
|
||||
uv run python -m nanochat.to_hf --source base --output hf-export/base
|
||||
|
||||
# export latest SFT checkpoint (chat model)
|
||||
uv run python -m nanochat.to_hf --source sft --output hf-export/sft
|
||||
```
|
||||
|
||||
## 3) Run lm-eval benchmarks on the exported model
|
||||
Use the HF backend (`--model hf`). Pick tasks; nanochat's built-in evals cover these, so they're good starters in lm-eval too:
|
||||
- `arc_easy`, `arc_challenge`
|
||||
- `mmlu`
|
||||
- `gsm8k`
|
||||
- `humaneval`
|
||||
|
||||
Example runs:
|
||||
```bash
|
||||
# Single task (MMLU)
|
||||
uv run lm-eval run --model hf \
|
||||
--model_args pretrained=hf-export/sft \
|
||||
--tasks mmlu \
|
||||
--batch_size 1
|
||||
|
||||
# A small suite similar to nanochat chat_eval coverage
|
||||
uv run lm-eval run --model hf \
|
||||
--model_args pretrained=hf-export/sft \
|
||||
--tasks arc_easy,arc_challenge,gsm8k,mmlu,humaneval \
|
||||
--batch_size 1
|
||||
```
|
||||
|
||||
Notes:
|
||||
- If you exported to a different folder, change `pretrained=...` accordingly. You can also point to a remote HF repo name.
|
||||
- `--batch_size auto` can help find the largest batch that fits GPU RAM. On CPU, keep it small.
|
||||
- No KV cache is implemented in the HF wrapper; generation is standard `AutoModelForCausalLM` style.
|
||||
159
nanochat/to_hf.py
Normal file
159
nanochat/to_hf.py
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
"""
|
||||
Convert a nanochat checkpoint into a HuggingFace-style folder.
|
||||
|
||||
Usage (example):
|
||||
python -m nanochat.to_hf --source base --output hf-export/base
|
||||
|
||||
Notes
|
||||
- Assumes checkpoints live under ~/.cache/nanochat/<source>_checkpoints/ (same as training scripts).
|
||||
- The exported model can be loaded with transformers via:
|
||||
AutoModelForCausalLM.from_pretrained(<export_dir>, trust_remote_code=True)
|
||||
- KV cache is not implemented in the HF wrapper; generation works but is not incremental.
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
try:
|
||||
from transformers import PreTrainedModel, PretrainedConfig
|
||||
from transformers.modeling_outputs import CausalLMOutputWithPast
|
||||
except ImportError as exc:
|
||||
raise SystemExit(
|
||||
"transformers is required for HF export. Run `uv sync` (with the hf extra) first."
|
||||
) from exc
|
||||
|
||||
from nanochat.checkpoint_manager import load_model
|
||||
from nanochat.gpt import GPT, GPTConfig
|
||||
from nanochat.common import get_base_dir
|
||||
from nanochat.tokenizer import get_tokenizer
|
||||
|
||||
|
||||
class NanoChatHFConfig(PretrainedConfig):
|
||||
model_type = "nanochat"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
sequence_len: int = 1024,
|
||||
vocab_size: int = 50304,
|
||||
n_layer: int = 12,
|
||||
n_head: int = 6,
|
||||
n_kv_head: int = 6,
|
||||
n_embd: int = 768,
|
||||
**kwargs,
|
||||
):
|
||||
# Don't tie embeddings; nanochat uses untied wte/lm_head
|
||||
kwargs.setdefault("tie_word_embeddings", False)
|
||||
super().__init__(**kwargs)
|
||||
self.sequence_len = sequence_len
|
||||
self.vocab_size = vocab_size
|
||||
self.n_layer = n_layer
|
||||
self.n_head = n_head
|
||||
self.n_kv_head = n_kv_head
|
||||
self.n_embd = n_embd
|
||||
|
||||
|
||||
class NanoChatHFForCausalLM(PreTrainedModel):
|
||||
config_class = NanoChatHFConfig
|
||||
|
||||
def __init__(self, config: NanoChatHFConfig):
|
||||
super().__init__(config)
|
||||
gpt_cfg = GPTConfig(
|
||||
sequence_len=config.sequence_len,
|
||||
vocab_size=config.vocab_size,
|
||||
n_layer=config.n_layer,
|
||||
n_head=config.n_head,
|
||||
n_kv_head=config.n_kv_head,
|
||||
n_embd=config.n_embd,
|
||||
)
|
||||
self.model = GPT(gpt_cfg)
|
||||
|
||||
def get_input_embeddings(self):
|
||||
return self.model.transformer.wte
|
||||
|
||||
def set_input_embeddings(self, value):
|
||||
self.model.transformer.wte = value
|
||||
|
||||
def get_output_embeddings(self):
|
||||
return self.model.lm_head
|
||||
|
||||
def tie_weights(self):
|
||||
# nanochat uses untied embeddings; override to no-op
|
||||
return
|
||||
|
||||
def forward(
|
||||
self,
|
||||
input_ids: Optional[torch.LongTensor] = None,
|
||||
attention_mask: Optional[torch.Tensor] = None, # unused
|
||||
labels: Optional[torch.LongTensor] = None,
|
||||
past_key_values=None, # not implemented
|
||||
**_: dict,
|
||||
) -> CausalLMOutputWithPast:
|
||||
if input_ids is None:
|
||||
raise ValueError("input_ids must be provided")
|
||||
logits = self.model(input_ids)
|
||||
loss = None
|
||||
if labels is not None:
|
||||
# Align shapes for CE: shift labels to match logits
|
||||
loss = F.cross_entropy(
|
||||
logits.view(-1, logits.size(-1)), labels.view(-1), ignore_index=-1
|
||||
)
|
||||
return CausalLMOutputWithPast(
|
||||
loss=loss,
|
||||
logits=logits,
|
||||
past_key_values=None,
|
||||
hidden_states=None,
|
||||
attentions=None,
|
||||
)
|
||||
|
||||
def prepare_inputs_for_generation(self, input_ids, **kwargs):
|
||||
return {"input_ids": input_ids, "attention_mask": kwargs.get("attention_mask", None)}
|
||||
|
||||
|
||||
def copy_tokenizer_files(output_dir: str):
|
||||
base_dir = get_base_dir()
|
||||
tokenizer_dir = os.path.join(base_dir, "tokenizer")
|
||||
if not os.path.isdir(tokenizer_dir):
|
||||
print(f"[to_hf] tokenizer directory not found at {tokenizer_dir}, skipping tokenizer export")
|
||||
return
|
||||
for name in os.listdir(tokenizer_dir):
|
||||
src = os.path.join(tokenizer_dir, name)
|
||||
dst = os.path.join(output_dir, name)
|
||||
if os.path.isdir(src):
|
||||
shutil.copytree(src, dst, dirs_exist_ok=True)
|
||||
else:
|
||||
os.makedirs(os.path.dirname(dst), exist_ok=True)
|
||||
shutil.copy2(src, dst)
|
||||
print(f"[to_hf] Copied tokenizer files from {tokenizer_dir} to {output_dir}")
|
||||
|
||||
|
||||
def export_to_hf(source: str, output_dir: str, model_tag: Optional[str], step: Optional[int]):
|
||||
device = torch.device("cpu")
|
||||
model, tokenizer, meta = load_model(source, device=device, phase="eval", model_tag=model_tag, step=step)
|
||||
cfg_kwargs = meta["model_config"]
|
||||
hf_config = NanoChatHFConfig(**cfg_kwargs)
|
||||
hf_model = NanoChatHFForCausalLM(hf_config)
|
||||
hf_model.model.load_state_dict(model.state_dict(), strict=True)
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
hf_model.save_pretrained(output_dir, safe_serialization=False)
|
||||
# Best effort: drop tokenizer files alongside weights
|
||||
copy_tokenizer_files(output_dir)
|
||||
print(f"[to_hf] Exported {source} checkpoint to {output_dir}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Export nanochat checkpoint to HuggingFace format")
|
||||
parser.add_argument("--source", choices=["base", "mid", "sft", "rl"], default="base", help="Which checkpoint family to export")
|
||||
parser.add_argument("--model-tag", type=str, default=None, help="Model tag (e.g., d20). Defaults to largest available.")
|
||||
parser.add_argument("--step", type=int, default=None, help="Checkpoint step. Defaults to latest.")
|
||||
parser.add_argument("--output", type=str, default="hf-export", help="Output directory for HF files")
|
||||
args = parser.parse_args()
|
||||
|
||||
export_to_hf(args.source, args.output, args.model_tag, args.step)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -16,6 +16,7 @@ dependencies = [
|
|||
"torch>=2.8.0",
|
||||
"uvicorn>=0.36.0",
|
||||
"wandb>=0.21.3",
|
||||
"lm_eval[hf]",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
|
|
@ -49,6 +50,7 @@ torch = [
|
|||
{ index = "pytorch-cpu", extra = "cpu" },
|
||||
{ index = "pytorch-cu128", extra = "gpu" },
|
||||
]
|
||||
lm_eval = { path = "tools/lm-eval" }
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "pytorch-cpu"
|
||||
|
|
@ -74,4 +76,4 @@ conflicts = [
|
|||
{ extra = "cpu" },
|
||||
{ extra = "gpu" },
|
||||
],
|
||||
]
|
||||
]
|
||||
|
|
|
|||
51
setup.sh
Normal file
51
setup.sh
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#!/usr/bin/env bash
|
||||
# Setup nanochat after cloning the repo.
|
||||
# - initializes the tools submodule (lm-evaluation-harness)
|
||||
# - creates a uv virtualenv
|
||||
# - installs deps (choose gpu|cpu extra)
|
||||
# - builds the Rust tokenizer extension
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# -----------------------------
|
||||
# Helpers
|
||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$repo_root"
|
||||
|
||||
extra="${1:-gpu}"
|
||||
if [[ "$extra" != "gpu" && "$extra" != "cpu" ]]; then
|
||||
echo "Usage: bash setup.sh [gpu|cpu]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[setup] Initializing submodules (tools/lm-eval)..."
|
||||
git submodule update --init --recursive
|
||||
|
||||
echo "[setup] Ensuring uv is installed..."
|
||||
if ! command -v uv >/dev/null 2>&1; then
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# shellcheck source=/dev/null
|
||||
command -v uv >/dev/null 2>&1 || export PATH="$HOME/.local/bin:$PATH"
|
||||
fi
|
||||
|
||||
echo "[setup] Ensuring Rust toolchain..."
|
||||
if ! command -v cargo >/dev/null 2>&1; then
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
fi
|
||||
# shellcheck source=/dev/null
|
||||
command -v cargo >/dev/null 2>&1 || source "$HOME/.cargo/env"
|
||||
|
||||
echo "[setup] Creating virtual environment (.venv)..."
|
||||
[ -d ".venv" ] || uv venv
|
||||
|
||||
echo "[setup] Installing Python deps (extra=$extra)..."
|
||||
uv sync --extra "$extra"
|
||||
|
||||
echo "[setup] Building Rust tokenizer (rustbpe)..."
|
||||
if [ -n "${CONDA_PREFIX:-}" ]; then
|
||||
echo "[setup] CONDA_PREFIX detected; unsetting to avoid conflicts with VIRTUAL_ENV during build..."
|
||||
unset CONDA_PREFIX
|
||||
fi
|
||||
uv run maturin develop --release --manifest-path rustbpe/Cargo.toml
|
||||
|
||||
echo "[setup] Done. Activate with: source .venv/bin/activate"
|
||||
Loading…
Reference in New Issue
Block a user