diff --git a/README.md b/README.md index c96ac23..5e1de1e 100644 --- a/README.md +++ b/README.md @@ -184,6 +184,7 @@ python -m pytest tests/test_rustbpe.py -v -s │ ├── smoltalk.py # Conglomerate dataset of SmolTalk from HF │ └── spellingbee.py # Task teaching model to spell/count letters ├── tests +│ └── test_engine.py │ └── test_rustbpe.py └── uv.lock ``` diff --git a/nanochat/loss_eval.py b/nanochat/loss_eval.py index 6fcbea3..5a556e6 100644 --- a/nanochat/loss_eval.py +++ b/nanochat/loss_eval.py @@ -9,9 +9,9 @@ import torch.distributed as dist def evaluate_bpb(model, batches, steps, token_bytes): """ Instead of the naive 'mean loss', this function returns the bits per byte (bpb), - which is a tokenization vocab size-indepedent metric, meaning you are still comparing + which is a tokenization vocab size-independent metric, meaning you are still comparing apples:apples if you change the vocab size. The way this works is that instead of just - calculating the average loss as usual, you calculate the sum loss, and indepependently + calculating the average loss as usual, you calculate the sum loss, and independently also the sum bytes (of all the target tokens), and divide. This normalizes the loss by the number of bytes that the target tokens represent. diff --git a/scripts/chat_eval.py b/scripts/chat_eval.py index 616411d..cae2f0f 100644 --- a/scripts/chat_eval.py +++ b/scripts/chat_eval.py @@ -1,6 +1,6 @@ """ Evaluate the Chat model. -All the generic code lives here, and all the evlauation-specific +All the generic code lives here, and all the evaluation-specific code lives in nanochat directory and is imported from here. Example runs: diff --git a/scripts/chat_sft.py b/scripts/chat_sft.py index e6e4565..bbeb1f9 100644 --- a/scripts/chat_sft.py +++ b/scripts/chat_sft.py @@ -192,7 +192,7 @@ for step in range(num_iterations): }) model.train() - # evlauate accuracy of the multiple choice tasks (which are quick to run) + # evaluate accuracy of the multiple choice tasks (which are quick to run) if last_step or (step > 0 and step % eval_metrics_every == 0): model.eval() metrics = {}