From 00a562cfe9ce9b4066dd89d1037ef747247485da Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 26 Feb 2026 22:44:50 +0100 Subject: [PATCH] remove inherited parameters from commandline and replace by model-tag --- runs/runcpu.sh | 4 +--- runs/speedrun.sh | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/runs/runcpu.sh b/runs/runcpu.sh index 853fa1f..4c8db86 100755 --- a/runs/runcpu.sh +++ b/runs/runcpu.sh @@ -47,9 +47,7 @@ python -m scripts.base_eval --device-batch-size=1 --split-tokens=16384 --max-per # SFT (~10 minutes on my MacBook Pro M3 Max) curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl python -m scripts.chat_sft \ - --max-seq-len=512 \ - --device-batch-size=32 \ - --total-batch-size=16384 \ + --model-tag=6 \ --eval-every=200 \ --eval-tokens=524288 \ --num-iterations=1500 \ diff --git a/runs/speedrun.sh b/runs/speedrun.sh index c757253..98aaa8c 100644 --- a/runs/speedrun.sh +++ b/runs/speedrun.sh @@ -82,7 +82,7 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch- curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl # run SFT and eval the model -torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --device-batch-size=16 --run=$WANDB_RUN +torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --model-tag=26 --run=$WANDB_RUN torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft # chat with the model over CLI! Leave out the -p to chat interactively