From 16755495bce2e1b3da7ee27c201cba104481b9fd Mon Sep 17 00:00:00 2001
From: geopti <gpaptsis@gmail.com>
Date: Sat, 28 Feb 2026 20:43:34 +0000
Subject: [PATCH] fix(miniseries): extract tokens_trained from log instead of
 hardcoding batch size

Same bug as scaling_laws.sh: TOKENS_TRAINED was computed as NUM_ITERS * 524288,
hardcoding the default total batch size. When base_train auto-computes a different
batch size, the value is wrong. Fix by reading "Total number of training tokens:"
directly from the training log.
---
 runs/miniseries.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runs/miniseries.sh b/runs/miniseries.sh
index 01c4459..726bee0 100644
--- a/runs/miniseries.sh
+++ b/runs/miniseries.sh
@@ -85,7 +85,7 @@ for d in "${DEPTHS[@]}"; do
     NUM_PARAMS=$(grep "Number of parameters:" "$LOG_FILE" | tail -1 | grep -oP '[\d,]+' | head -1 | tr -d ',')
     NUM_SCALING_PARAMS=$(grep "Number of parameters:" "$LOG_FILE" | tail -1 | grep -oP 'scaling: [\d,]+' | grep -oP '[\d,]+' | tr -d ',')
     NUM_ITERS=$(grep "Calculated number of iterations" "$LOG_FILE" | tail -1 | sed 's/.*: //' | tr -d ',')
-    TOKENS_TRAINED=$((NUM_ITERS * 524288))
+    TOKENS_TRAINED=$(grep "Total number of training tokens:" "$LOG_FILE" | tail -1 | grep -oP '[\d,]+' | tr -d ',')
     PARAM_DATA_RATIO=$(python -c "print(f'{$TOKENS_TRAINED / $NUM_SCALING_PARAMS:.2f}')")
     MODEL_DIM=$((d * 64))
     VAL_BPB=$(grep "Validation bpb:" "$LOG_FILE" | tail -1 | grep -oP '[\d.]+$')