From 00a562cfe9ce9b4066dd89d1037ef747247485da Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 26 Feb 2026 22:44:50 +0100 Subject: [PATCH 1/3] remove inherited parameters from commandline and replace by model-tag --- runs/runcpu.sh | 4 +--- runs/speedrun.sh | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/runs/runcpu.sh b/runs/runcpu.sh index 853fa1f..4c8db86 100755 --- a/runs/runcpu.sh +++ b/runs/runcpu.sh @@ -47,9 +47,7 @@ python -m scripts.base_eval --device-batch-size=1 --split-tokens=16384 --max-per # SFT (~10 minutes on my MacBook Pro M3 Max) curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl python -m scripts.chat_sft \ - --max-seq-len=512 \ - --device-batch-size=32 \ - --total-batch-size=16384 \ + --model-tag=6 \ --eval-every=200 \ --eval-tokens=524288 \ --num-iterations=1500 \ diff --git a/runs/speedrun.sh b/runs/speedrun.sh index c757253..98aaa8c 100644 --- a/runs/speedrun.sh +++ b/runs/speedrun.sh @@ -82,7 +82,7 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch- curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl # run SFT and eval the model -torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --device-batch-size=16 --run=$WANDB_RUN +torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --model-tag=26 --run=$WANDB_RUN torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft # chat with the model over CLI! Leave out the -p to chat interactively From 0fe9acb8c0f76175a9ae13e7719a0c2695af3914 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 27 Feb 2026 00:17:09 +0100 Subject: [PATCH 2/3] use default tag --- runs/runcpu.sh | 2 +- runs/speedrun.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/runs/runcpu.sh b/runs/runcpu.sh index 4c8db86..443e68e 100755 --- a/runs/runcpu.sh +++ b/runs/runcpu.sh @@ -47,7 +47,7 @@ python -m scripts.base_eval --device-batch-size=1 --split-tokens=16384 --max-per # SFT (~10 minutes on my MacBook Pro M3 Max) curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl python -m scripts.chat_sft \ - --model-tag=6 \ + --model-tag=d6 \ --eval-every=200 \ --eval-tokens=524288 \ --num-iterations=1500 \ diff --git a/runs/speedrun.sh b/runs/speedrun.sh index 98aaa8c..9122319 100644 --- a/runs/speedrun.sh +++ b/runs/speedrun.sh @@ -82,7 +82,7 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch- curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl # run SFT and eval the model -torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --model-tag=26 --run=$WANDB_RUN +torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --model-tag=d26 --run=$WANDB_RUN torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft # chat with the model over CLI! Leave out the -p to chat interactively From 212bdae12036918e6961aa3c3afe819602f277f2 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 27 Feb 2026 00:20:19 +0100 Subject: [PATCH 3/3] remove model tag --- runs/runcpu.sh | 1 - runs/speedrun.sh | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/runs/runcpu.sh b/runs/runcpu.sh index 443e68e..de3d3f2 100755 --- a/runs/runcpu.sh +++ b/runs/runcpu.sh @@ -47,7 +47,6 @@ python -m scripts.base_eval --device-batch-size=1 --split-tokens=16384 --max-per # SFT (~10 minutes on my MacBook Pro M3 Max) curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl python -m scripts.chat_sft \ - --model-tag=d6 \ --eval-every=200 \ --eval-tokens=524288 \ --num-iterations=1500 \ diff --git a/runs/speedrun.sh b/runs/speedrun.sh index 9122319..6e6d800 100644 --- a/runs/speedrun.sh +++ b/runs/speedrun.sh @@ -82,7 +82,7 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch- curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl # run SFT and eval the model -torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --model-tag=d26 --run=$WANDB_RUN +torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --run=$WANDB_RUN torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft # chat with the model over CLI! Leave out the -p to chat interactively