typo fixes in scripts

This commit is contained in:
svlandeg 2025-10-28 20:17:31 +01:00
parent 0a3ce7b0ff
commit 8c9b004c99
5 changed files with 5 additions and 5 deletions

View File

@ -17,7 +17,7 @@ prompt:
2. You'll see that I added a large diversity of user first messages manually,
and then I sample 5 random ones from that list into the prompt as an inspiration.
This is really important to do because DIVERSITY CONTROL is key. If you don't
manually inject diversity, the LLM might generate extrremely similar and repeptitive
manually inject diversity, the LLM might generate extremely similar and repetitive
conversations and things won't work well. Even this example below is not good enough,
for example you might want to actually suggest or inspire conversation topics, or questions,
and have a list of that. Basically, this is the KEY creative part to get right. Make sure you

View File

@ -65,7 +65,7 @@ def evaluate_model(model, tokenizer, device, max_per_task=-1):
data = [json.loads(line.strip()) for line in f]
# shuffle the data because in many cases it appears ordered but we want
# the abillity to only run a subset of the data for debugging purposes etc.
# the ability to only run a subset of the data for debugging purposes etc.
shuffle_rng = random.Random(1337)
shuffle_rng.shuffle(data)
if max_per_task > 0:

View File

@ -271,7 +271,7 @@ for step in range(num_iterations + 1):
loss = loss / grad_accum_steps # each .backward() is a grad sum => normalize loss here
loss.backward()
x, y = next(train_loader) # prefetch the next batch while the GPU is busy with forward/backward
# gradient clipping (TODO possibly expertiment with)
# gradient clipping (TODO possibly experiment with)
if grad_clip > 0.0:
torch.nn.utils.clip_grad_norm_(orig_model.parameters(), grad_clip)
# step the optimizers

View File

@ -117,7 +117,7 @@ def run_categorical_eval(task_object, tokenizer, model, batch_size, max_problems
logits = model(prompt_ids) # (B, T, V)
# Focus on the available answer on just the letters corresponding to choices
# Note that this helps the evaluation a lot because it specifically narrows the focus to only the avilable letters
# Note that this helps the evaluation a lot because it specifically narrows the focus to only the available letters
# The much harder alternative would be to just generate from the Assistant and check if it responded with the correct
# letter (e.g. A, B, C, D), but evaluations typically make the task easier in this way.
for idx, conversation in enumerate(conversations):

View File

@ -206,7 +206,7 @@ def get_lr_multiplier(it):
lrm = 1.0 - it / num_steps
return lrm
# Calculate the number of examples each rank handles to achive the desired examples_per_step
# Calculate the number of examples each rank handles to achieve the desired examples_per_step
print0(f"Total sequences per step: {examples_per_step * num_samples}") # total batch size in sequences/step
assert examples_per_step % ddp_world_size == 0, "Desired examples per step must be divisible by the number of ranks"
examples_per_rank = examples_per_step // ddp_world_size # per GPU