From cd782a1977d3e849c9c8d83d69d2dbe521eefc58 Mon Sep 17 00:00:00 2001
From: Pyry Takala <pyry@detail.dev>
Date: Thu, 20 Nov 2025 04:18:42 +0000
Subject: [PATCH] Fix: Validate stop parameter against dataset size

Add validation in Task.__len__() to ensure stop parameter does not exceed
the actual dataset size. This prevents IndexError crashes during training
when invalid stop values are provided.

The validation is centralized in the base Task class and preserves the
original lazy evaluation behavior - num_examples() is only called when
needed (for validation when stop is provided, or for default value when
stop is None).

Fixes issue where training would crash with IndexError when iterating
over Task instances with stop > dataset_size.
---
 tasks/common.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tasks/common.py b/tasks/common.py
index dcd2e91..afa47cc 100644
--- a/tasks/common.py
+++ b/tasks/common.py
@@ -34,7 +34,16 @@ class Task:
 
     def __len__(self):
         start = self.start
-        stop = self.num_examples() if self.stop is None else self.stop
+        if self.stop is not None:
+            num_ex = self.num_examples()
+            if self.stop > num_ex:
+                raise ValueError(
+                    f"Stop parameter ({self.stop}) exceeds dataset size ({num_ex}). "
+                    f"Please use stop <= {num_ex} or remove the stop parameter to use the full dataset."
+                )
+            stop = self.stop
+        else:
+            stop = self.num_examples()
         step = self.step
         span = stop - start
         num = (span + step - 1) // step # ceil_div(span, step)