From 70319851fc960bc472ac7cfe9518c9478ada402e Mon Sep 17 00:00:00 2001 From: svlandeg Date: Wed, 29 Oct 2025 19:48:34 +0100 Subject: [PATCH 1/3] fix typo --- scripts/base_eval.py | 2 +- scripts/chat_sft.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/base_eval.py b/scripts/base_eval.py index 8efde4f..3d403cc 100644 --- a/scripts/base_eval.py +++ b/scripts/base_eval.py @@ -1,5 +1,5 @@ """ -Evlauate the CORE metric for a given model. +Evaluate the CORE metric for a given model. Run on a single GPU: python base_eval.py diff --git a/scripts/chat_sft.py b/scripts/chat_sft.py index e6e4565..bbeb1f9 100644 --- a/scripts/chat_sft.py +++ b/scripts/chat_sft.py @@ -192,7 +192,7 @@ for step in range(num_iterations): }) model.train() - # evlauate accuracy of the multiple choice tasks (which are quick to run) + # evaluate accuracy of the multiple choice tasks (which are quick to run) if last_step or (step > 0 and step % eval_metrics_every == 0): model.eval() metrics = {} From e5efb4b471cd708a5aa816462e8fce78cb2b4431 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 14 Nov 2025 11:13:42 +0100 Subject: [PATCH 2/3] add test_engine.py to file structure --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 18ea5ce..4b50d69 100644 --- a/README.md +++ b/README.md @@ -184,6 +184,7 @@ python -m pytest tests/test_rustbpe.py -v -s │ ├── smoltalk.py # Conglomerate dataset of SmolTalk from HF │ └── spellingbee.py # Task teaching model to spell/count letters ├── tests +│ └── test_engine.py │ └── test_rustbpe.py └── uv.lock ``` From a2fb3c83a66dd4199e7aa0fcaddda28e3fe85bbf Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 14 Nov 2025 11:20:25 +0100 Subject: [PATCH 3/3] fix typos --- nanochat/loss_eval.py | 4 ++-- scripts/chat_eval.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nanochat/loss_eval.py b/nanochat/loss_eval.py index 6fcbea3..5a556e6 100644 --- a/nanochat/loss_eval.py +++ b/nanochat/loss_eval.py @@ -9,9 +9,9 @@ import torch.distributed as dist def evaluate_bpb(model, batches, steps, token_bytes): """ Instead of the naive 'mean loss', this function returns the bits per byte (bpb), - which is a tokenization vocab size-indepedent metric, meaning you are still comparing + which is a tokenization vocab size-independent metric, meaning you are still comparing apples:apples if you change the vocab size. The way this works is that instead of just - calculating the average loss as usual, you calculate the sum loss, and indepependently + calculating the average loss as usual, you calculate the sum loss, and independently also the sum bytes (of all the target tokens), and divide. This normalizes the loss by the number of bytes that the target tokens represent. diff --git a/scripts/chat_eval.py b/scripts/chat_eval.py index 616411d..cae2f0f 100644 --- a/scripts/chat_eval.py +++ b/scripts/chat_eval.py @@ -1,6 +1,6 @@ """ Evaluate the Chat model. -All the generic code lives here, and all the evlauation-specific +All the generic code lives here, and all the evaluation-specific code lives in nanochat directory and is imported from here. Example runs: