From 78611b998335736ae9a89fdec83e1d88e80a0c4c Mon Sep 17 00:00:00 2001
From: Shizhe Diao <sdiao@nvidia.com>
Date: Mon, 20 Oct 2025 11:51:34 -0700
Subject: [PATCH] upload midtrain_sft_submit.sh

---
 midtrain_sft_submit.sh | 84 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 midtrain_sft_submit.sh

diff --git a/midtrain_sft_submit.sh b/midtrain_sft_submit.sh
new file mode 100644
index 0000000..2ac21d5
--- /dev/null
+++ b/midtrain_sft_submit.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+#SBATCH --account nvr_lpr_llm
+#SBATCH --partition interactive,batch_short,batch_block1,backfill
+#SBATCH --job-name=nanochat_multinode_d20    
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=1
+#SBATCH --gpus-per-node=8
+#SBATCH --time=04:00:00
+#SBATCH --output=logs/nanochat_1node_d20-%j.out
+#SBATCH --mem=0
+#SBATCH --exclusive
+
+
+set -x  # Enable debug output
+
+export DATA_NAME=nemotron # nemotron # smoltalk
+export BASE_NAME=smollm_d20_1node_matrixlr0.02_2298373 # fineweb_d20_1node # climbmix_d20_1node_matrixlr0.02_2298334 # nemotron-cc-hq_d20_1node_matrixlr0.02_2298371 # smollm_d20_1node_matrixlr0.02_2298373
+
+# Default intermediate artifacts directory is in ~/.cache/nanochat
+export OMP_NUM_THREADS=1
+export NANOCHAT_BASE_DIR="$HOME/nanochat_cache"
+mkdir -p $NANOCHAT_BASE_DIR
+
+# -----------------------------------------------------------------------------
+# Multi-node defaults from Slurm environment
+
+export GPUS_PER_NODE=${GPUS_PER_NODE:-${SLURM_GPUS_ON_NODE:-8}}
+export NNODES=${NNODES:-${SLURM_NNODES:-2}}
+export MASTER_ADDR=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
+export MASTER_PORT=${MASTER_PORT:-29500}
+export RDZV_ENDPOINT=$MASTER_ADDR:$MASTER_PORT
+export NCCL_ASYNC_ERROR_HANDLING=1
+
+
+# 1️⃣ 创建或重建 venv（--clear 会先清空旧内容）
+[ -d ".venv" ] || uv venv "$HOME/nanochat_cache/.venv" # --clear
+
+# 2️⃣ 激活虚拟环境
+source "$HOME/nanochat_cache/.venv/bin/activate"
+
+# 3️⃣ 安装依赖（uv 会自动识别项目 pyproject.toml）
+cd /lustre/fs1/portfolios/nvr/projects/nvr_lpr_llm/users/sdiao/nanochat
+uv sync --active
+
+export WANDB_API_KEY="ec7a9c0701d404122e4fc5c7c7518ed17f5b03ca"
+export WANDB_RUN=data_${DATA_NAME}_base_${BASE_NAME}_${SLURM_JOB_ID}
+
+# python -m nanochat.report reset --exp_name=$WANDB_RUN
+
+# -----------------------------------------------------------------------------
+# Midtraining (teach the model conversation special tokens, tool use, multiple choice)
+
+# run midtraining and eval the model (multi-node)
+# mid_train loads from base_checkpoints/$WANDB_RUN and saves to mid_checkpoints/$WANDB_RUN
+srun --ntasks=$NNODES --ntasks-per-node=1 bash --noprofile --norc -lc 'source $HOME/nanochat_cache/.venv/bin/activate; torchrun --nnodes=$NNODES --nproc_per_node=$GPUS_PER_NODE --rdzv_endpoint=$RDZV_ENDPOINT --rdzv_id=$SLURM_JOB_ID --node_rank=$SLURM_NODEID -m scripts.mid_train -- --run=$WANDB_RUN --model_tag=$BASE_NAME --dataset_choice=$DATA_NAME'
+srun --ntasks=$NNODES --ntasks-per-node=1 bash --noprofile --norc -lc 'source $HOME/nanochat_cache/.venv/bin/activate; torchrun --nnodes=$NNODES --nproc_per_node=$GPUS_PER_NODE --rdzv_endpoint=$RDZV_ENDPOINT --rdzv_id=$SLURM_JOB_ID --node_rank=$SLURM_NODEID -m scripts.chat_eval -- -i mid --model-tag=$WANDB_RUN'
+
+# -----------------------------------------------------------------------------
+# Supervised Finetuning (domain adaptation to each sequence all by itself per row)
+
+# train sft and re-eval right away (should see a small bump) (multi-node)
+# chat_sft loads from mid_checkpoints/$WANDB_RUN and saves to chatsft_checkpoints/$WANDB_RUN
+srun --ntasks=$NNODES --ntasks-per-node=1 bash --noprofile --norc -lc 'source $HOME/nanochat_cache/.venv/bin/activate; torchrun --nnodes=$NNODES --nproc_per_node=$GPUS_PER_NODE --rdzv_endpoint=$RDZV_ENDPOINT --rdzv_id=$SLURM_JOB_ID --node_rank=$SLURM_NODEID -m scripts.chat_sft -- --run=$WANDB_RUN --model_tag=$WANDB_RUN --dataset_choice=$DATA_NAME'
+srun --ntasks=$NNODES --ntasks-per-node=1 bash --noprofile --norc -lc 'source $HOME/nanochat_cache/.venv/bin/activate; torchrun --nnodes=$NNODES --nproc_per_node=$GPUS_PER_NODE --rdzv_endpoint=$RDZV_ENDPOINT --rdzv_id=$SLURM_JOB_ID --node_rank=$SLURM_NODEID -m scripts.chat_eval -- -i sft --model-tag=$WANDB_RUN'
+
+# chat with the model over CLI! Leave out the -p to chat interactively
+# python -m scripts.chat_cli -p "Why is the sky blue?"
+
+# even better, chat with your model over a pretty WebUI ChatGPT style
+# python -m scripts.chat_web
+
+# -----------------------------------------------------------------------------
+# Reinforcement Learning. Optional, and currently only on GSM8K
+# (optional)
+
+# run reinforcement learning
+# torchrun --standalone --nproc_per_node=8 -m scripts.chat_rl -- --run=$WANDB_RUN
+# eval the RL model only on GSM8K
+# torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i rl -a GSM8K
+
+# -----------------------------------------------------------------------------
+# Generate the full report by putting together all the sections
+# report.md is the output and will be copied to current directory for convenience
+python -m nanochat.report generate --exp_name=$WANDB_RUN
\ No newline at end of file