This commit is contained in:
Pyry Takala 2025-11-21 12:51:50 -08:00 committed by GitHub
commit 99278a6e41
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -6,6 +6,9 @@ Example tasks: MMLU, ARC-Easy, ARC-Challenge, GSM8K, HumanEval, SmolTalk.
""" """
import random import random
import logging
logger = logging.getLogger(__name__)
class Task: class Task:
""" """
@ -34,7 +37,18 @@ class Task:
def __len__(self): def __len__(self):
start = self.start start = self.start
stop = self.num_examples() if self.stop is None else self.stop if self.stop is not None:
num_ex = self.num_examples()
if self.stop > num_ex:
# Warn once, then cap stop
logger.warning(
f"Stop parameter ({self.stop}) exceeds dataset size ({num_ex}). "
f"Using {num_ex} examples instead."
)
self.stop = num_ex
stop = self.stop
else:
stop = self.num_examples()
step = self.step step = self.step
span = stop - start span = stop - start
num = (span + step - 1) // step # ceil_div(span, step) num = (span + step - 1) // step # ceil_div(span, step)