typo fixes in scripts

2025-12-06 04:12:13 +00:00 · 2025-10-28 20:17:31 +01:00 · 2025-10-28 20:17:31 +01:00 · 8c9b004c99
commit 8c9b004c99
parent 0a3ce7b0ff
5 changed files with 5 additions and 5 deletions
--- a/dev/gen_synthetic_data.py
+++ b/dev/gen_synthetic_data.py
@ -17,7 +17,7 @@ prompt:
 2. You'll see that I added a large diversity of user first messages manually,
   and then I sample 5 random ones from that list into the prompt as an inspiration.
   This is really important to do because DIVERSITY CONTROL is key. If you don't
-   manually inject diversity, the LLM might generate extrremely similar and repeptitive
+   manually inject diversity, the LLM might generate extremely similar and repetitive
   conversations and things won't work well. Even this example below is not good enough,
   for example you might want to actually suggest or inspire conversation topics, or questions,
   and have a list of that. Basically, this is the KEY creative part to get right. Make sure you
--- a/scripts/base_eval.py
+++ b/scripts/base_eval.py
@ -65,7 +65,7 @@ def evaluate_model(model, tokenizer, device, max_per_task=-1):
            data = [json.loads(line.strip()) for line in f]
        # shuffle the data because in many cases it appears ordered but we want
-        # the abillity to only run a subset of the data for debugging purposes etc.
+        # the ability to only run a subset of the data for debugging purposes etc.
        shuffle_rng = random.Random(1337)
        shuffle_rng.shuffle(data)
        if max_per_task > 0:
--- a/scripts/base_train.py
+++ b/scripts/base_train.py
@ -271,7 +271,7 @@ for step in range(num_iterations + 1):
        loss = loss / grad_accum_steps # each .backward() is a grad sum => normalize loss here
        loss.backward()
        x, y = next(train_loader) # prefetch the next batch while the GPU is busy with forward/backward
-    # gradient clipping (TODO possibly expertiment with)
+    # gradient clipping (TODO possibly experiment with)
    if grad_clip > 0.0:
        torch.nn.utils.clip_grad_norm_(orig_model.parameters(), grad_clip)
    # step the optimizers
--- a/scripts/chat_eval.py
+++ b/scripts/chat_eval.py
@ -117,7 +117,7 @@ def run_categorical_eval(task_object, tokenizer, model, batch_size, max_problems
            logits = model(prompt_ids) # (B, T, V)
        # Focus on the available answer on just the letters corresponding to choices
-        # Note that this helps the evaluation a lot because it specifically narrows the focus to only the avilable letters
+        # Note that this helps the evaluation a lot because it specifically narrows the focus to only the available letters
        # The much harder alternative would be to just generate from the Assistant and check if it responded with the correct
        # letter (e.g. A, B, C, D), but evaluations typically make the task easier in this way.
        for idx, conversation in enumerate(conversations):
--- a/scripts/chat_rl.py
+++ b/scripts/chat_rl.py
@ -206,7 +206,7 @@ def get_lr_multiplier(it):
    lrm = 1.0 - it / num_steps
    return lrm
-# Calculate the number of examples each rank handles to achive the desired examples_per_step
+# Calculate the number of examples each rank handles to achieve the desired examples_per_step
 print0(f"Total sequences per step: {examples_per_step * num_samples}") # total batch size in sequences/step
 assert examples_per_step % ddp_world_size == 0, "Desired examples per step must be divisible by the number of ranks"
 examples_per_rank = examples_per_step // ddp_world_size # per GPU