From 35174d1725ec79b13bbbbe888e28e18559c57264 Mon Sep 17 00:00:00 2001 From: Chris McCormick Date: Fri, 30 Jan 2026 21:01:12 -0800 Subject: [PATCH] Ease of use Edits to get the script running out-of-the-box on a fresh instance. --- runs/speedrun.sh | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/runs/speedrun.sh b/runs/speedrun.sh index ef4fa00..3661df6 100644 --- a/runs/speedrun.sh +++ b/runs/speedrun.sh @@ -4,22 +4,32 @@ # It is designed to run in ~4 hours on 8XH100 node at $3/GPU/hour. # 1) Example launch (simplest): -# bash speedrun.sh +# bash runs/speedrun.sh # 2) Example launch in a screen session (because the run takes ~4 hours): -# screen -L -Logfile speedrun.log -S speedrun bash speedrun.sh +# screen -L -Logfile speedrun.log -S speedrun bash runs/speedrun.sh # 3) Example launch with wandb logging, but see below for setting up wandb first: -# WANDB_RUN=speedrun screen -L -Logfile speedrun.log -S speedrun bash speedrun.sh +# WANDB_RUN=speedrun screen -L -Logfile speedrun.log -S speedrun bash runs/speedrun.sh # Default intermediate artifacts directory is in ~/.cache/nanochat export OMP_NUM_THREADS=1 export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" mkdir -p $NANOCHAT_BASE_DIR +# ----------------------------------------------------------------------------- +# System dependencies (Python dev headers needed for Triton/torch compilation) + +if ! dpkg -s python3-dev &> /dev/null; then + echo "Installing python3-dev (required for Python.h)..." + sudo apt-get update && sudo apt-get install -y python3-dev +fi + # ----------------------------------------------------------------------------- # Python venv setup with uv # install uv (if not already installed) command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh +# add uv to PATH (the installer puts it in ~/.local/bin) +export PATH="$HOME/.local/bin:$PATH" # create a .venv local virtual environment (if it doesn't exist) [ -d ".venv" ] || uv venv # install the repo dependencies @@ -81,7 +91,7 @@ wait $DATASET_DOWNLOAD_PID NPROC_PER_NODE=8 # pretrain the d20 model -torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- --depth=20 --target-param-data-ratio=20 --run=$WANDB_RUN +torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- --depth=24 --target-param-data-ratio=12 --device-batch-size=16 --run=$WANDB_RUN # evaluate the model on a larger chunk of train/val data and draw some samples torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_loss # evaluate the model on CORE tasks