#!/bin/bash # Showing an example run for exercising some of the code paths on the CPU (or MPS on Macbooks) # This script was last updated/tuned on Jan 17, 2026. # Run as: # bash dev/cpu_demo_run.sh # NOTE: Training LLMs requires GPU compute and $$$. You will not get far on your Macbook. # Think of this run as educational/fun demo, not something you should expect to work well. # (This is why I hide this script away in dev/) # You may also want to run this script manually and one by one, copy pasting commands into your terminal. # all the setup stuff export OMP_NUM_THREADS=1 export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" mkdir -p $NANOCHAT_BASE_DIR command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh [ -d ".venv" ] || uv venv uv sync --extra cpu source .venv/bin/activate if [ -z "$WANDB_RUN" ]; then WANDB_RUN=dummy fi # train tokenizer on ~2B characters (~34 seconds on my MacBook Pro M3 Max) python -m nanochat.dataset -n 8 python -m scripts.tok_train --max-chars=2000000000 python -m scripts.tok_eval # train a small 4 layer model # I tuned this run to complete in about 30 minutes on my MacBook Pro M3 Max. # To get better results, try increasing num_iterations, or get other ideas from your favorite LLM. python -m scripts.base_train \ --depth=6 \ --head-dim=64 \ --window-pattern=L \ --max-seq-len=512 \ --device-batch-size=32 \ --total-batch-size=16384 \ --eval-every=100 \ --eval-tokens=524288 \ --core-metric-every=-1 \ --sample-every=100 \ --num-iterations=5000 \ --run=$WANDB_RUN python -m scripts.base_loss --device-batch-size=1 --split-tokens=16384 python -m scripts.base_eval --max-per-task=16 # midtraining (~10 minutes on my MacBook Pro M3 Max) curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl python -m scripts.mid_train \ --max-seq-len=512 \ --device-batch-size=32 \ --total-batch-size=16384 \ --eval-every=200 \ --eval-tokens=524288 \ --num-iterations=1500 \ --run=$WANDB_RUN # (it's ~ok to skip SFT) # Chat with the model over CLI # The model should be able to say that it is Paris. # It might even know that the color of the sky is blue. # Sometimes the model likes it if you first say Hi before you ask it questions. # python -m scripts.chat_cli -i mid -p "What is the capital of France?" # Chat with the model over a pretty WebUI ChatGPT style # python -m scripts.chat_web -i mid