#!/bin/bash # Showing an example run for exercising some of the code paths on the CPU (or MPS on Macbooks) # Run as: # bash dev/cpu_demo_run.sh # NOTE: Training LLMs requires GPU compute and $$$. You will not get far on your Macbook. # Think of this run as educational/fun demo, not something you should expect to work well. # This is also why I hide this script away in dev/ # Stage selection (allow running only a subset, e.g. --stage=sft or --from=mid) RUN_BASE=1 RUN_MID=1 RUN_SFT=1 RUN_REPORT=1 STAGE_ONLY="" FROM_STAGE="" while [[ $# -gt 0 ]]; do case "$1" in --stage=*) STAGE_ONLY="${1#*=}" ;; --base|--mid|--sft|--report) STAGE_ONLY="${1#--}" ;; --from=*) FROM_STAGE="${1#*=}" ;; --from-base|--from-mid|--from-sft) FROM_STAGE="${1#--from-}" ;; --help|-h) cat <<'EOF' Usage: bash dev/runmps.sh [options] Options: --stage= Run only the specified stage. --from= Run from the specified stage through the end. --help Show this help message. Environment variables (same as before) control batch sizes, WANDB run names, etc. EOF exit 0 ;; *) echo "Unknown argument: $1" >&2 exit 1 ;; esac shift done if [[ -n "$FROM_STAGE" ]]; then RUN_BASE=0 RUN_MID=0 RUN_SFT=0 RUN_REPORT=0 case "$FROM_STAGE" in base) RUN_BASE=1 RUN_MID=1 RUN_SFT=1 RUN_REPORT=1 ;; mid) RUN_MID=1 RUN_SFT=1 ;; sft) RUN_SFT=1 ;; *) echo "Unknown --from stage: $FROM_STAGE" >&2 exit 1 ;; esac fi if [[ -n "$STAGE_ONLY" ]]; then RUN_BASE=0 RUN_MID=0 RUN_SFT=0 RUN_REPORT=0 case "$STAGE_ONLY" in base) RUN_BASE=1 ;; mid) RUN_MID=1 ;; sft) RUN_SFT=1 ;; report) RUN_REPORT=1 ;; *) echo "Unknown --stage value: $STAGE_ONLY" >&2 exit 1 ;; esac fi if [[ -n "$STAGE_ONLY" || -n "$FROM_STAGE" ]]; then # avoid regenerating reports when running a subset unless specifically requested if [[ "$STAGE_ONLY" != "report" && "$FROM_STAGE" != "base" ]]; then RUN_REPORT=0 fi fi # all the setup stuff export OMP_NUM_THREADS=1 NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" mkdir -p $NANOCHAT_BASE_DIR command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh [ -d ".venv" ] || uv venv uv sync --extra cpu source .venv/bin/activate if [ -z "$WANDB_RUN" ]; then WANDB_RUN=dummy fi if [ "$WANDB_RUN" != "dummy" ] && [ -z "$WANDB_MODE" ]; then export WANDB_MODE=online fi # Batch/sequence configuration BASE_DEPTH=${BASE_DEPTH:-4} SEQ_LEN=${SEQ_LEN:-1024} DEVICE_BATCH=${DEVICE_BATCH:-16} TOTAL_BATCH=${TOTAL_BATCH:-$((DEVICE_BATCH * SEQ_LEN))} # tokens per optimizer step EVAL_SEQUENCES=10000 EVAL_STEPS=$(((EVAL_SEQUENCES + DEVICE_BATCH - 1) / DEVICE_BATCH)) EVAL_BATCH_MULT=4 # evaluate on 4 full batches EVAL_TOKENS=$((TOTAL_BATCH * EVAL_BATCH_MULT)) MID_NUM_STEPS=6144 SFT_NUM_STEPS=${SFT_NUM_STEPS:-3072} CHECKPOINT_EVERY_SEQ=${CHECKPOINT_EVERY_SEQ:-10000} RUN_STAGE_EVALS=${RUN_STAGE_EVALS:-0} WANDB_PROJECT=${WANDB_PROJECT:-nanochat} TARGET_PARAM_DATA_RATIO=${TARGET_PARAM_DATA_RATIO:-20} SFT_DEVICE_BATCH=${SFT_DEVICE_BATCH:-$DEVICE_BATCH} SFT_TARGET_EXAMPLES=${SFT_TARGET_EXAMPLES:-$DEVICE_BATCH} SFT_EVAL_EVERY=${SFT_EVAL_EVERY:-0} SFT_EVAL_STEPS=${SFT_EVAL_STEPS:-0} SFT_EVAL_METRICS_EVERY=${SFT_EVAL_METRICS_EVERY:-0} SFT_EVAL_METRICS_MAX=${SFT_EVAL_METRICS_MAX:-0} STATE_FILE=".runmps_wandb_ids" printf '' > "$STATE_FILE" echo "WANDB_PROJECT=$WANDB_PROJECT" >> "$STATE_FILE" generate_wandb_id() { python - <<'PY' import uuid print(uuid.uuid4().hex[:8]) PY } BASE_SEQS_PER_STEP=$((TOTAL_BATCH / SEQ_LEN)) if [ $BASE_SEQS_PER_STEP -le 0 ]; then BASE_SEQS_PER_STEP=1; fi if [ "$CHECKPOINT_EVERY_SEQ" -le 0 ]; then BASE_CHECKPOINT_STEPS=0 else BASE_CHECKPOINT_STEPS=$(((CHECKPOINT_EVERY_SEQ + BASE_SEQS_PER_STEP - 1) / BASE_SEQS_PER_STEP)) fi MID_SEQS_PER_STEP=$((TOTAL_BATCH / SEQ_LEN)) if [ $MID_SEQS_PER_STEP -le 0 ]; then MID_SEQS_PER_STEP=1; fi if [ "$CHECKPOINT_EVERY_SEQ" -le 0 ]; then MID_CHECKPOINT_STEPS=0 else MID_CHECKPOINT_STEPS=$(((CHECKPOINT_EVERY_SEQ + MID_SEQS_PER_STEP - 1) / MID_SEQS_PER_STEP)) fi SFT_SEQS_PER_STEP=$SFT_DEVICE_BATCH if [ $SFT_SEQS_PER_STEP -le 0 ]; then SFT_SEQS_PER_STEP=1; fi if [ "$CHECKPOINT_EVERY_SEQ" -le 0 ]; then SFT_CHECKPOINT_STEPS=0 else SFT_CHECKPOINT_STEPS=$(((CHECKPOINT_EVERY_SEQ + SFT_SEQS_PER_STEP - 1) / SFT_SEQS_PER_STEP)) fi # Auto-populate WANDB_API_KEY from ~/.netrc when talking to a local W&B server. # Mirrors the helper used in TinyRecursiveModels/pretrain_text.py so we can log # to a self-hosted instance without manual export each time. if [ -z "$WANDB_API_KEY" ] && [ -f "$HOME/.netrc" ]; then # Allow custom WANDB_BASE_URL; default to localhost if user sets WANDB WANDB_BASE_URL_DEFAULT=${WANDB_BASE_URL:-http://localhost:8080} if printf '%s' "$WANDB_BASE_URL_DEFAULT" | grep -q "localhost"; then HOST=$(printf '%s\n' "$WANDB_BASE_URL_DEFAULT" | sed -E 's#https?://##' | cut -d/ -f1) API_KEY=$(python - "$HOST" <<'PY' import sys from netrc import netrc host = sys.argv[1] auth = netrc().authenticators(host) if auth and auth[2]: print(auth[2], end="") PY ) if [ -n "$API_KEY" ]; then export WANDB_BASE_URL="$WANDB_BASE_URL_DEFAULT" export WANDB_API_KEY="$API_KEY" echo "[runmps] Loaded WANDB_API_KEY for $WANDB_BASE_URL from ~/.netrc" fi fi fi curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y source "$HOME/.cargo/env" uv run maturin develop --release --manifest-path rustbpe/Cargo.toml EVAL_BUNDLE_URL=https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip if [ ! -d "$NANOCHAT_BASE_DIR/eval_bundle" ]; then curl -L -o eval_bundle.zip $EVAL_BUNDLE_URL unzip -q eval_bundle.zip rm eval_bundle.zip mv eval_bundle $NANOCHAT_BASE_DIR fi if (( RUN_BASE )); then # wipe the report python -m nanochat.report reset # train tokenizer on ~2B characters (download full shard set for extended training) python -m nanochat.dataset -n 240 python -m scripts.tok_train --max_chars=2000000000 python -m scripts.tok_eval # train a very small 4 layer model on the CPU # each optimization step processes a single sequence of 1024 tokens # we only run 50 steps of optimization (bump this to get better results) if [ "$WANDB_RUN" != "dummy" ]; then BASE_WANDB_ID=${BASE_WANDB_ID:-$(generate_wandb_id)} echo "BASE_WANDB_ID=$BASE_WANDB_ID" >> "$STATE_FILE" echo "BASE_WANDB_NAME=$WANDB_RUN" >> "$STATE_FILE" export WANDB_RUN_ID=$BASE_WANDB_ID export WANDB_EVAL_RUN=$WANDB_RUN export WANDB_PROJECT fi python -m scripts.base_train \ --depth=$BASE_DEPTH \ --max_seq_len=$SEQ_LEN \ --device_batch_size=$DEVICE_BATCH \ --total_batch_size=$TOTAL_BATCH \ --target_param_data_ratio=$TARGET_PARAM_DATA_RATIO \ --run="$WANDB_RUN" \ --eval_every=$EVAL_STEPS \ --eval_tokens=$EVAL_TOKENS \ --core_metric_every=-1 \ --sample_every=-1 \ --checkpoint_every_steps=$BASE_CHECKPOINT_STEPS if [ "$WANDB_RUN" != "dummy" ]; then unset WANDB_RUN_ID unset WANDB_EVAL_RUN fi if [ "$RUN_STAGE_EVALS" = "1" ]; then python -m scripts.base_loss --device_batch_size=$DEVICE_BATCH --split_tokens=$EVAL_TOKENS python -m scripts.base_eval --max-per-task=16 fi fi if (( RUN_MID )); then # midtraining if [ "$WANDB_RUN" != "dummy" ]; then MID_WANDB_ID=${MID_WANDB_ID:-$(generate_wandb_id)} echo "MID_WANDB_ID=$MID_WANDB_ID" >> "$STATE_FILE" echo "MID_WANDB_NAME=${WANDB_RUN}-mid" >> "$STATE_FILE" export WANDB_RUN_ID=$MID_WANDB_ID export WANDB_EVAL_RUN="${WANDB_RUN}-mid" export WANDB_PROJECT fi python -m scripts.mid_train \ --max_seq_len=$SEQ_LEN \ --device_batch_size=$DEVICE_BATCH \ --total_batch_size=$TOTAL_BATCH \ --run="${WANDB_RUN}-mid" \ --eval_every=$EVAL_STEPS \ --eval_tokens=$EVAL_TOKENS \ --checkpoint_every_steps=$MID_CHECKPOINT_STEPS \ --num_iterations=$MID_NUM_STEPS if [ "$WANDB_RUN" != "dummy" ]; then unset WANDB_RUN_ID unset WANDB_EVAL_RUN fi if [ "$RUN_STAGE_EVALS" = "1" ]; then # eval results will be terrible, this is just to execute the code paths. # note that we lower the execution memory limit to 1MB to avoid warnings on smaller systems python -m scripts.chat_eval --source=mid --max-new-tokens=128 --max-problems=20 fi fi if (( RUN_SFT )); then # SFT if [ "$WANDB_RUN" != "dummy" ]; then SFT_WANDB_ID=${SFT_WANDB_ID:-$(generate_wandb_id)} echo "SFT_WANDB_ID=$SFT_WANDB_ID" >> "$STATE_FILE" echo "SFT_WANDB_NAME=${WANDB_RUN}-sft" >> "$STATE_FILE" export WANDB_RUN_ID=$SFT_WANDB_ID export WANDB_EVAL_RUN="${WANDB_RUN}-sft" export WANDB_PROJECT fi python -m scripts.chat_sft \ --device_batch_size=$SFT_DEVICE_BATCH \ --target_examples_per_step=$SFT_TARGET_EXAMPLES \ --run="${WANDB_RUN}-sft" \ --num_iterations=$SFT_NUM_STEPS \ --eval_every=$SFT_EVAL_EVERY \ --eval_steps=$SFT_EVAL_STEPS \ --eval_metrics_every=$SFT_EVAL_METRICS_EVERY \ --eval_metrics_max_problems=$SFT_EVAL_METRICS_MAX \ --checkpoint_every_steps=$SFT_CHECKPOINT_STEPS if [ "$WANDB_RUN" != "dummy" ]; then unset WANDB_RUN_ID unset WANDB_EVAL_RUN fi fi # Chat CLI # python -m scripts.chat_cli -p "Why is the sky blue?" # Chat Web # python -m scripts.chat_web if (( RUN_REPORT )); then python -m nanochat.report generate fi if [ "$RUN_STAGE_EVALS" != "1" ] && (( RUN_BASE || RUN_MID || RUN_SFT )); then echo "[runmps] Inline evals disabled. Run 'bash dev/runmps_evals.sh' to compute metrics from saved checkpoints." fi