From dddb95caaca0d1ea873339207b0b37a1f371e263 Mon Sep 17 00:00:00 2001 From: Eric Silberstein Date: Wed, 19 Nov 2025 15:52:20 -0500 Subject: [PATCH 1/3] make mid_train script work even with a tiny number of iterations --- scripts/mid_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mid_train.py b/scripts/mid_train.py index 6c2b82f..ebb6c42 100644 --- a/scripts/mid_train.py +++ b/scripts/mid_train.py @@ -139,7 +139,7 @@ def mid_data_generator(split): last_step = True # toggle last_step to True, which will terminate the training loop # Stopping condition to respect num_iterations, if given it += 1 - if num_iterations > 0 and it >= num_iterations: + if num_iterations > 0 and it >= num_iterations and split == "train": last_step = True # toggle last_step to True, which will terminate the training loop # Build up inputs/targets and yield for i in range(needed_tokens): From 4bcc3bb698b802766852c3ef1003f91d589f7b66 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Fri, 21 Nov 2025 13:19:45 +0100 Subject: [PATCH 2/3] clarify comment --- scripts/mid_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mid_train.py b/scripts/mid_train.py index ebb6c42..60c7bbc 100644 --- a/scripts/mid_train.py +++ b/scripts/mid_train.py @@ -112,7 +112,7 @@ val_dataset = TaskMixture([ # DataLoader is defined here, it emits inputs, targets : 2D tensors of shape (device_batch_size, max_seq_len) # A big problem is that we don't know the final num_iterations in advance. So we create # these two global variables and update them from within the data generator. -last_step = False # we will toggle this to True when we reach the end of the dataset +last_step = False # we will toggle this to True when we reach the end of the training dataset approx_progress = 0.0 # will go from 0 to 1 over the course of the epoch def mid_data_generator(split): global last_step, approx_progress From 8b1cecaa95d196fdca349705186db1f1e2cf0ee2 Mon Sep 17 00:00:00 2001 From: Andrej Date: Mon, 8 Dec 2025 18:27:06 -0800 Subject: [PATCH 3/3] Apply suggestion from @svlandeg for nicer looking comparison Co-authored-by: Sofie Van Landeghem --- scripts/mid_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mid_train.py b/scripts/mid_train.py index 60c7bbc..dd0768c 100644 --- a/scripts/mid_train.py +++ b/scripts/mid_train.py @@ -139,7 +139,7 @@ def mid_data_generator(split): last_step = True # toggle last_step to True, which will terminate the training loop # Stopping condition to respect num_iterations, if given it += 1 - if num_iterations > 0 and it >= num_iterations and split == "train": + if 0 < num_iterations <= it and split == "train": last_step = True # toggle last_step to True, which will terminate the training loop # Build up inputs/targets and yield for i in range(needed_tokens):