mirror of
https://github.com/karpathy/nanochat.git
synced 2026-05-01 13:30:23 +00:00
now ready for install
This commit is contained in:
parent
bc11cd9e5b
commit
6095f82fdd
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -4,4 +4,5 @@ __pycache__/
|
|||
rustbpe/target/
|
||||
dev-ignore/
|
||||
report.md
|
||||
eval_bundle/
|
||||
eval_bundle/
|
||||
hf-export/*
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
# Running lm-eval with nanochat checkpoints
|
||||
|
||||
This repo ships its own evals (CORE, ARC/GSM8K/MMLU/HumanEval/SpellingBee), but you can also run the HuggingFace-compatible [lm-evaluation-harness](tools/lm-eval). Steps below assume you've already run `bash setup.sh` (installs uv, submodules, deps, Rust tokenizer).
|
||||
This repo ships its own evals (CORE, ARC/GSM8K/MMLU/HumanEval/SpellingBee), but you can also run the HuggingFace-compatible [lm-evaluation-harness](tools/lm-eval). Steps below assume you've already run `bash setup.sh` (installs uv, submodules, deps, Rust tokenizer). `Please clone and run this repo in the local disk!`
|
||||
|
||||
## 1) Activate env
|
||||
```bash
|
||||
|
|
@ -9,7 +9,7 @@ source .venv/bin/activate
|
|||
|
||||
## 2) Export a trained checkpoint to HF format
|
||||
- `nanochat/to_hf.py` loads the latest checkpoint from `~/.cache/nanochat/<source>_checkpoints` and writes an HF folder.
|
||||
- Choose source: `base` | `mid` | `sft` | `rl`.
|
||||
- Choose source: `base` | `mid` | `chatsft` | `chatrl`.
|
||||
```bash
|
||||
# export latest base checkpoint to hf-export/base
|
||||
uv run python -m nanochat.to_hf --source base --output hf-export/base
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@ def find_largest_model(checkpoint_dir):
|
|||
# 1) normally all model tags are of the form d<number>, try that first:
|
||||
candidates = []
|
||||
for model_tag in model_tags:
|
||||
print(model_tag)
|
||||
match = re.match(r"d(\d+)", model_tag)
|
||||
if match:
|
||||
model_depth = int(match.group(1))
|
||||
|
|
|
|||
57
setup.sh
57
setup.sh
|
|
@ -11,6 +11,21 @@ set -euo pipefail
|
|||
# Helpers
|
||||
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$repo_root"
|
||||
# -----------------------------
|
||||
# Rust build output: avoid building into repo (may be on NFS/overlay and cause mmap 0-len errors)
|
||||
export PATH="$HOME/.cargo/bin:$PATH"
|
||||
# shellcheck source=/dev/null
|
||||
[ -f "$HOME/.cargo/env" ] && source "$HOME/.cargo/env"
|
||||
|
||||
# Prefer node-local scratch for Cargo artifacts
|
||||
if [ -d /tmp ] && [ -w /tmp ]; then
|
||||
export CARGO_TARGET_DIR="/tmp/cargo-target-${USER:-root}/nanochat"
|
||||
else
|
||||
export CARGO_TARGET_DIR="$repo_root/.cargo-target"
|
||||
fi
|
||||
mkdir -p "$CARGO_TARGET_DIR"
|
||||
|
||||
echo "[setup] Using CARGO_TARGET_DIR=$CARGO_TARGET_DIR"
|
||||
|
||||
extra="${1:-gpu}"
|
||||
if [[ "$extra" != "gpu" && "$extra" != "cpu" ]]; then
|
||||
|
|
@ -19,6 +34,7 @@ if [[ "$extra" != "gpu" && "$extra" != "cpu" ]]; then
|
|||
fi
|
||||
|
||||
echo "[setup] Initializing submodules (tools/lm-eval)..."
|
||||
echo "[setup] This would take some time to download all the benchmarks in lm-eval"
|
||||
git submodule update --init --recursive
|
||||
|
||||
echo "[setup] Ensuring uv is installed..."
|
||||
|
|
@ -29,17 +45,52 @@ if ! command -v uv >/dev/null 2>&1; then
|
|||
fi
|
||||
|
||||
echo "[setup] Ensuring Rust toolchain..."
|
||||
if ! command -v cargo >/dev/null 2>&1; then
|
||||
|
||||
# Always ensure cargo bin is on PATH (important for non-interactive shells / root)
|
||||
export PATH="$HOME/.cargo/bin:$PATH"
|
||||
|
||||
if ! command -v rustup >/dev/null 2>&1; then
|
||||
echo "[setup] rustup not found; installing..."
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
fi
|
||||
|
||||
# Load cargo env if present (ignore if missing)
|
||||
# shellcheck source=/dev/null
|
||||
command -v cargo >/dev/null 2>&1 || source "$HOME/.cargo/env"
|
||||
if [ -f "$HOME/.cargo/env" ]; then
|
||||
source "$HOME/.cargo/env"
|
||||
fi
|
||||
|
||||
# After installation/sourcing, ensure cargo exists
|
||||
if ! command -v cargo >/dev/null 2>&1; then
|
||||
echo "[setup] ERROR: cargo not found even after rustup install. PATH=$PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure a default toolchain is configured, otherwise maturin/cargo metadata will fail.
|
||||
# This fixes: "rustup could not choose a version of cargo to run ... no default is configured."
|
||||
if rustup show active-toolchain >/dev/null 2>&1; then
|
||||
echo "[setup] Active Rust toolchain: $(rustup show active-toolchain | head -n 1)"
|
||||
else
|
||||
echo "[setup] No active Rust toolchain; setting default to stable..."
|
||||
rustup default stable
|
||||
fi
|
||||
|
||||
# (Optional but nice) make sure stable is installed even if default points elsewhere
|
||||
rustup toolchain install stable >/dev/null 2>&1 || true
|
||||
|
||||
echo "[setup] Rust: $(rustc --version 2>/dev/null || echo 'rustc not found')"
|
||||
echo "[setup] Cargo: $(cargo --version 2>/dev/null || echo 'cargo not found')"
|
||||
|
||||
|
||||
echo "[setup] Creating virtual environment (.venv)..."
|
||||
[ -d ".venv" ] || uv venv
|
||||
|
||||
echo "[setup] Cleaning rustbpe build artifacts (safe)..."
|
||||
rm -rf rustbpe/target || true
|
||||
|
||||
echo "[setup] Installing Python deps (extra=$extra)..."
|
||||
uv sync --extra "$extra"
|
||||
echo "[setup] This will iterate all benchmarks, may take a long time"
|
||||
UV_LOG_LEVEL=debug uv sync --extra "$extra" -v
|
||||
|
||||
echo "[setup] Building Rust tokenizer (rustbpe)..."
|
||||
if [ -n "${CONDA_PREFIX:-}" ]; then
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user