mirror of
https://github.com/karpathy/nanochat.git
synced 2025-12-06 12:22:18 +00:00
Fix: Validate stop parameter against dataset size
Add validation in Task.__len__() to ensure stop parameter does not exceed the actual dataset size. This prevents IndexError crashes during training when invalid stop values are provided. The validation is centralized in the base Task class and preserves the original lazy evaluation behavior - num_examples() is only called when needed (for validation when stop is provided, or for default value when stop is None). Fixes issue where training would crash with IndexError when iterating over Task instances with stop > dataset_size.
This commit is contained in:
parent
4a87a0d19f
commit
cd782a1977
|
|
@ -34,7 +34,16 @@ class Task:
|
|||
|
||||
def __len__(self):
|
||||
start = self.start
|
||||
stop = self.num_examples() if self.stop is None else self.stop
|
||||
if self.stop is not None:
|
||||
num_ex = self.num_examples()
|
||||
if self.stop > num_ex:
|
||||
raise ValueError(
|
||||
f"Stop parameter ({self.stop}) exceeds dataset size ({num_ex}). "
|
||||
f"Please use stop <= {num_ex} or remove the stop parameter to use the full dataset."
|
||||
)
|
||||
stop = self.stop
|
||||
else:
|
||||
stop = self.num_examples()
|
||||
step = self.step
|
||||
span = stop - start
|
||||
num = (span + step - 1) // step # ceil_div(span, step)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user