From bb5137860e24efa995b60468e7b867206ae9dd5c Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Wed, 18 Feb 2026 23:26:22 +0000 Subject: [PATCH] fix comment --- runs/speedrun.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runs/speedrun.sh b/runs/speedrun.sh index 62466c7..c757253 100644 --- a/runs/speedrun.sh +++ b/runs/speedrun.sh @@ -69,7 +69,7 @@ python -m scripts.tok_eval echo "Waiting for dataset download to complete..." wait $DATASET_DOWNLOAD_PID -# d24 model (slightly overtrained is enough to beat GPT-2 => increase data:params ratio from compute optimal 10.5 (default) to 12) +# d26 model (slightly undertrained to beat GPT-2 => decrease data:params ratio from compute optimal 10.5 (default) to 8.25) torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=26 --target-param-data-ratio=8.25 --device-batch-size=16 --fp8 --run=$WANDB_RUN # evaluate the model: CORE metric, BPB on train/val, and draw samples torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch-size=16