From 861414d500cfa74ff81d7610aa814fb511124894 Mon Sep 17 00:00:00 2001
From: Matt Van Horn <mvanhorn@gmail.com>
Date: Mon, 9 Mar 2026 07:52:48 -0700
Subject: [PATCH] add explicit --model-tag to run scripts

Without --model-tag, chat_sft/chat_cli/chat_web/base_eval can pick the
wrong model when multiple models exist in the cache. Add explicit
--model-tag=d6 (runcpu) and --model-tag=d24 (speedrun) matching the
depth used in each script's base_train call.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 runs/runcpu.sh   |  7 ++++---
 runs/speedrun.sh | 10 +++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/runs/runcpu.sh b/runs/runcpu.sh
index 853fa1f..eb236cd 100755
--- a/runs/runcpu.sh
+++ b/runs/runcpu.sh
@@ -42,11 +42,12 @@ python -m scripts.base_train \
     --sample-every=100 \
     --num-iterations=5000 \
     --run=$WANDB_RUN
-python -m scripts.base_eval --device-batch-size=1 --split-tokens=16384 --max-per-task=16
+python -m scripts.base_eval --model-tag=d6 --device-batch-size=1 --split-tokens=16384 --max-per-task=16
 
 # SFT (~10 minutes on my MacBook Pro M3 Max)
 curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
 python -m scripts.chat_sft \
+    --model-tag=d6 \
     --max-seq-len=512 \
     --device-batch-size=32 \
     --total-batch-size=16384 \
@@ -59,7 +60,7 @@ python -m scripts.chat_sft \
 # The model should be able to say that it is Paris.
 # It might even know that the color of the sky is blue.
 # Sometimes the model likes it if you first say Hi before you ask it questions.
-# python -m scripts.chat_cli -p "What is the capital of France?"
+# python -m scripts.chat_cli --model-tag=d6 -p "What is the capital of France?"
 
 # Chat with the model over a pretty WebUI ChatGPT style
-# python -m scripts.chat_web
+# python -m scripts.chat_web --model-tag=d6
diff --git a/runs/speedrun.sh b/runs/speedrun.sh
index fa50694..36ee142 100644
--- a/runs/speedrun.sh
+++ b/runs/speedrun.sh
@@ -72,7 +72,7 @@ wait $DATASET_DOWNLOAD_PID
 # d24 model (slightly undertrained to beat GPT-2 => decrease data:params ratio from compute optimal 10.5 (default) to 9.5)
 torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=24 --target-param-data-ratio=9.5 --device-batch-size=16 --fp8 --run=$WANDB_RUN
 # evaluate the model: CORE metric, BPB on train/val, and draw samples
-torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch-size=16
+torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --model-tag=d24 --device-batch-size=16
 
 # -----------------------------------------------------------------------------
 # SFT (teach the model conversation special tokens, tool use, multiple choice)
@@ -82,14 +82,14 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch-
 curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
 
 # run SFT and eval the model
-torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --device-batch-size=16 --run=$WANDB_RUN
-torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft
+torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --model-tag=d24 --device-batch-size=16 --run=$WANDB_RUN
+torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- --model-tag=d24 -i sft
 
 # chat with the model over CLI! Leave out the -p to chat interactively
-# python -m scripts.chat_cli -p "Why is the sky blue?"
+# python -m scripts.chat_cli --model-tag=d24 -p "Why is the sky blue?"
 
 # even better, chat with your model over a pretty WebUI ChatGPT style
-# python -m scripts.chat_web
+# python -m scripts.chat_web --model-tag=d24
 
 # -----------------------------------------------------------------------------
 # Generate the full report by putting together all the sections