From ba4f40bf588a83ed3ee4d3c02cb7581edfb105ba Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Sat, 1 Nov 2025 21:27:00 -0700 Subject: [PATCH] Update run1000.sh to add missing --run=$WANDB_RUN --- run1000.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run1000.sh b/run1000.sh index e0bc4c4..46325d9 100644 --- a/run1000.sh +++ b/run1000.sh @@ -70,7 +70,7 @@ python -m scripts.tok_eval # which would decrease model performance. Possibly 2, 3 or so epochs is ~ok, but certainly not ideal and at 10+ epochs we'd # start to overfit hard. # 5) That's it, everything else (e.g. the learning rates) is adjusted automatically by the training script. -torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=32 --device_batch_size=8 +torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=32 --device_batch_size=8 --run=$WANDB_RUN torchrun --standalone --nproc_per_node=8 -m scripts.base_loss torchrun --standalone --nproc_per_node=8 -m scripts.base_eval