From a33d04dca175304b9fb1614b7b3d028eca0f4780 Mon Sep 17 00:00:00 2001 From: Pyry Takala Date: Fri, 21 Nov 2025 20:51:46 +0000 Subject: [PATCH] Cap stop parameter and warn once when it exceeds dataset size --- tasks/common.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tasks/common.py b/tasks/common.py index a63cb7a..540ff6b 100644 --- a/tasks/common.py +++ b/tasks/common.py @@ -6,6 +6,9 @@ Example tasks: MMLU, ARC-Easy, ARC-Challenge, GSM8K, HumanEval, SmolTalk. """ import random +import logging + +logger = logging.getLogger(__name__) class Task: """ @@ -36,14 +39,14 @@ class Task: start = self.start if self.stop is not None: num_ex = self.num_examples() - stop = min(self.stop, num_ex) # Gracefully cap at dataset size if self.stop > num_ex: - import warnings - warnings.warn( + # Warn once, then cap stop + logger.warning( f"Stop parameter ({self.stop}) exceeds dataset size ({num_ex}). " - f"Using {num_ex} examples instead.", - UserWarning + f"Using {num_ex} examples instead." ) + self.stop = num_ex + stop = self.stop else: stop = self.num_examples() step = self.step