From 542beb0c8c175af2d52ec7065345dcd8f0162368 Mon Sep 17 00:00:00 2001
From: Andrej Karpathy <andrej.karpathy@gmail.com>
Date: Wed, 4 Feb 2026 02:12:04 +0000
Subject: [PATCH] bump speedrun to be the up to date leaderboard run

---
 runs/speedrun.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runs/speedrun.sh b/runs/speedrun.sh
index d390c6d..c423ba6 100644
--- a/runs/speedrun.sh
+++ b/runs/speedrun.sh
@@ -70,7 +70,7 @@ echo "Waiting for dataset download to complete..."
 wait $DATASET_DOWNLOAD_PID
 
 # d24 model (slightly overtrained is enough to beat GPT-2 => increase data:params ratio from compute optimal 10.5 (default) to 12)
-torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=24 --target-param-data-ratio=12 --device-batch-size=16 --run=$WANDB_RUN
+torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=26 --target-param-data-ratio=8.5 --device-batch-size=16 --fp8 --run=$WANDB_RUN
 # evaluate the model: CORE metric, BPB on train/val, and draw samples
 torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch-size=16