From cd782a1977d3e849c9c8d83d69d2dbe521eefc58 Mon Sep 17 00:00:00 2001 From: Pyry Takala Date: Thu, 20 Nov 2025 04:18:42 +0000 Subject: [PATCH] Fix: Validate stop parameter against dataset size Add validation in Task.__len__() to ensure stop parameter does not exceed the actual dataset size. This prevents IndexError crashes during training when invalid stop values are provided. The validation is centralized in the base Task class and preserves the original lazy evaluation behavior - num_examples() is only called when needed (for validation when stop is provided, or for default value when stop is None). Fixes issue where training would crash with IndexError when iterating over Task instances with stop > dataset_size. --- tasks/common.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tasks/common.py b/tasks/common.py index dcd2e91..afa47cc 100644 --- a/tasks/common.py +++ b/tasks/common.py @@ -34,7 +34,16 @@ class Task: def __len__(self): start = self.start - stop = self.num_examples() if self.stop is None else self.stop + if self.stop is not None: + num_ex = self.num_examples() + if self.stop > num_ex: + raise ValueError( + f"Stop parameter ({self.stop}) exceeds dataset size ({num_ex}). " + f"Please use stop <= {num_ex} or remove the stop parameter to use the full dataset." + ) + stop = self.stop + else: + stop = self.num_examples() step = self.step span = stop - start num = (span + step - 1) // step # ceil_div(span, step)