mirror of
https://github.com/karpathy/nanochat.git
synced 2026-04-21 08:18:40 +00:00
revert miniseries.sh to original, this should run fine with G4 instance in colab
This commit is contained in:
parent
c48aa05531
commit
676ddfdb46
|
|
@ -28,7 +28,7 @@ fi
|
|||
# Series name: from arg, env var, or default to today's date (e.g., jan11)
|
||||
SERIES_NAME="${1:-${SERIES_NAME:-$(date +%b%d | tr '[:upper:]' '[:lower:]')}}"
|
||||
# Depths to train (the "miniseries")
|
||||
DEPTHS=(12 14)
|
||||
DEPTHS=(12 14 16 18 20 22 24 26)
|
||||
# Hardware
|
||||
NPROC_PER_NODE="${NPROC_PER_NODE:-8}"
|
||||
# Logging
|
||||
|
|
@ -63,7 +63,7 @@ for d in "${DEPTHS[@]}"; do
|
|||
elif [ $d -ge 20 ]; then
|
||||
DEVICE_BATCH_SIZE_ARG="--device-batch-size=16"
|
||||
else
|
||||
DEVICE_BATCH_SIZE_ARG="--device-batch-size=16"
|
||||
DEVICE_BATCH_SIZE_ARG="--device-batch-size=32"
|
||||
fi
|
||||
|
||||
torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- \
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user