diff --git a/runs/runcpu.sh b/runs/runcpu.sh index 443e68e..de3d3f2 100755 --- a/runs/runcpu.sh +++ b/runs/runcpu.sh @@ -47,7 +47,6 @@ python -m scripts.base_eval --device-batch-size=1 --split-tokens=16384 --max-per # SFT (~10 minutes on my MacBook Pro M3 Max) curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl python -m scripts.chat_sft \ - --model-tag=d6 \ --eval-every=200 \ --eval-tokens=524288 \ --num-iterations=1500 \ diff --git a/runs/speedrun.sh b/runs/speedrun.sh index 9122319..6e6d800 100644 --- a/runs/speedrun.sh +++ b/runs/speedrun.sh @@ -82,7 +82,7 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch- curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl # run SFT and eval the model -torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --model-tag=d26 --run=$WANDB_RUN +torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --run=$WANDB_RUN torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft # chat with the model over CLI! Leave out the -p to chat interactively