Make NPROC_PER_NODE customizable in run1000.sh and speedrun.sh

This commit is contained in:
vinjn 2025-11-04 22:16:08 -08:00
parent 885a4f25e7
commit a2d61393ee
2 changed files with 2 additions and 2 deletions

View File

@ -72,7 +72,7 @@ python -m scripts.tok_eval
# 5) That's it, everything else (e.g. the learning rates) is adjusted automatically by the training script.
# Number of processes/GPUs to use
NPROC_PER_NODE=8
NPROC_PER_NODE=${NPROC_PER_NODE:-8}
torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- --depth=32 --device_batch_size=8 --run=$WANDB_RUN
torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_loss

View File

@ -83,7 +83,7 @@ echo "Waiting for dataset download to complete..."
wait $DATASET_DOWNLOAD_PID
# Number of processes/GPUs to use
NPROC_PER_NODE=8
NPROC_PER_NODE=${NPROC_PER_NODE:-8}
# pretrain the d20 model
torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- --depth=20 --run=$WANDB_RUN