From 212bdae12036918e6961aa3c3afe819602f277f2 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 27 Feb 2026 00:20:19 +0100 Subject: [PATCH] remove model tag --- runs/runcpu.sh | 1 - runs/speedrun.sh | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/runs/runcpu.sh b/runs/runcpu.sh index 443e68e..de3d3f2 100755 --- a/runs/runcpu.sh +++ b/runs/runcpu.sh @@ -47,7 +47,6 @@ python -m scripts.base_eval --device-batch-size=1 --split-tokens=16384 --max-per # SFT (~10 minutes on my MacBook Pro M3 Max) curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl python -m scripts.chat_sft \ - --model-tag=d6 \ --eval-every=200 \ --eval-tokens=524288 \ --num-iterations=1500 \ diff --git a/runs/speedrun.sh b/runs/speedrun.sh index 9122319..6e6d800 100644 --- a/runs/speedrun.sh +++ b/runs/speedrun.sh @@ -82,7 +82,7 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch- curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl # run SFT and eval the model -torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --model-tag=d26 --run=$WANDB_RUN +torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --run=$WANDB_RUN torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft # chat with the model over CLI! Leave out the -p to chat interactively