diff --git a/miniseries.sh b/miniseries.sh index 077418a..0a6947e 100644 --- a/miniseries.sh +++ b/miniseries.sh @@ -1,29 +1,39 @@ #!/bin/bash # See speedrun.sh for more comments +# Usage: ./miniseries.sh [series_name] +# Example: ./miniseries.sh jan11 +# Default series name is today's date (e.g., jan11) export OMP_NUM_THREADS=1 export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" mkdir -p $NANOCHAT_BASE_DIR -# uv -command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh -[ -d ".venv" ] || uv venv -uv sync --extra gpu -source .venv/bin/activate +# Setup (skip with SKIP_SETUP=1) +if [ -z "$SKIP_SETUP" ]; then + # uv + command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh + [ -d ".venv" ] || uv venv + uv sync --extra gpu + source .venv/bin/activate -# Tokenizer -python -m nanochat.dataset -n 240 -python -m scripts.tok_train --max_chars=2000000000 --vocab_size=32768 + # Tokenizer + python -m nanochat.dataset -n 240 + python -m scripts.tok_train --max_chars=2000000000 --vocab_size=32768 +else + source .venv/bin/activate +fi +# Series name: from arg, env var, or default to today's date (e.g., jan11) +SERIES_NAME="${1:-${SERIES_NAME:-$(date +%b%d | tr '[:upper:]' '[:lower:]')}}" # Depths to train (the "miniseries") DEPTHS=(10 11 12 13 14 15 16 17 18 19 20) # Hardware NPROC_PER_NODE="${NPROC_PER_NODE:-8}" # Logging -WANDB_RUN="${WANDB_RUN:-jan7_miniseries}" +WANDB_RUN="${WANDB_RUN:-${SERIES_NAME}_miniseries}" -RESULTS_DIR="$NANOCHAT_BASE_DIR/jan7_miniseries_results" +RESULTS_DIR="$NANOCHAT_BASE_DIR/${SERIES_NAME}_miniseries_results" mkdir -p "$RESULTS_DIR" RESULTS_FILE="$RESULTS_DIR/results.csv" @@ -37,13 +47,13 @@ log() { } log "==============================================" -log "Jan 7 Miniseries Training" +log "${SERIES_NAME} Miniseries Training" log "==============================================" for d in "${DEPTHS[@]}"; do log "Training d=$d..." - TAG="jan7_miniseries_d${d}" + TAG="${SERIES_NAME}_miniseries_d${d}" START_TIME=$(date +%s) # Train the model with natural horizon (target_param_data_ratio default) @@ -84,7 +94,7 @@ for d in "${DEPTHS[@]}"; do done log "==============================================" -log "Jan 7 Miniseries Complete!" +log "${SERIES_NAME} Miniseries Complete!" log "==============================================" log "Results saved to: $RESULTS_FILE" echo ""