From cc40ccc51502a24531e7dac1c8b37e5e45a04d29 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Wed, 14 Jan 2026 15:08:50 +0100 Subject: [PATCH 1/3] fix commands in readme, using new arg format --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index acb9111..9de2884 100644 --- a/README.md +++ b/README.md @@ -82,10 +82,10 @@ That said, to give a sense, the example changes needed for the [speedrun.sh](spe python -m nanochat.dataset -n 450 & ... # use --depth to increase model size. to not oom, halve device batch size 32 -> 16: -torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=26 --device_batch_size=16 +torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=26 --device-batch-size=16 ... # make sure to use the same later during midtraining: -torchrun --standalone --nproc_per_node=8 -m scripts.mid_train -- --device_batch_size=16 +torchrun --standalone --nproc_per_node=8 -m scripts.mid_train -- --device-batch-size=16 ``` That's it! The biggest thing to pay attention to is making sure you have enough data shards to train on (the code will loop and do more epochs over the same training set otherwise, decreasing learning speed a bit), and managing your memory/VRAM, primarily by decreasing the `device_batch_size` until things fit (the scripts automatically compensate by increasing the number of gradient accumulation loops, simply turning parallel compute to sequential compute). From b88cef60534e0619b7972b6f11fb2bc483767331 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Wed, 14 Jan 2026 15:11:55 +0100 Subject: [PATCH 2/3] fix typo --- tasks/customjson.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/customjson.py b/tasks/customjson.py index e1b5f0b..aeb1a3f 100644 --- a/tasks/customjson.py +++ b/tasks/customjson.py @@ -25,7 +25,7 @@ class CustomJSON(Task): print("-" * 80) print(f"Warning: File {filepath} does not exist") print("HINT (Oct 21 2025)") - print("If you recently did a git pull and suddely see this, it might be due to the new addition of identity conversations") + print("If you recently did a git pull and suddenly see this, it might be due to the new addition of identity conversations") print("See this discussion for more details: https://github.com/karpathy/nanochat/discussions/139") print("Quick fix: simply run the following command to download the file and you're done:") print(f"curl -L -o {filepath} https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl") From 785b214b841593a5e84e8edd83f1b31c13c8d8cb Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 15 Jan 2026 21:35:05 +0100 Subject: [PATCH 3/3] add required -i flag to chat_eval example runs --- scripts/chat_eval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/chat_eval.py b/scripts/chat_eval.py index cae2f0f..a558303 100644 --- a/scripts/chat_eval.py +++ b/scripts/chat_eval.py @@ -4,8 +4,8 @@ All the generic code lives here, and all the evaluation-specific code lives in nanochat directory and is imported from here. Example runs: -python -m scripts.chat_eval -a ARC-Easy -torchrun --nproc_per_node=8 -m scripts.chat_eval -- -a ARC-Easy +python -m scripts.chat_eval -i mid -a ARC-Easy +torchrun --nproc_per_node=8 -m scripts.chat_eval -- -i mid -a ARC-Easy """ import argparse