From 97770700f208abb5adea9758017f32780d00e7ac Mon Sep 17 00:00:00 2001 From: Eric Silberstein Date: Wed, 19 Nov 2025 14:51:02 -0500 Subject: [PATCH] change test/train split approach because random.seed(1) and random.seed(-1) do the same thing --- tasks/spellingbee.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tasks/spellingbee.py b/tasks/spellingbee.py index 3b45305..24954c0 100644 --- a/tasks/spellingbee.py +++ b/tasks/spellingbee.py @@ -35,6 +35,8 @@ from nanochat.common import download_file_with_lock LETTERS = "abcdefghijklmnopqrstuvwxyz" # A list of 370K English words of large variety WORD_LIST_URL = "https://raw.githubusercontent.com/dwyl/english-words/refs/heads/master/words_alpha.txt" +# A number bigger than 370K to separate train and test random seeds +TEST_RANDOM_SEED_OFFSET = 10_000_000 # Identical to gsm8k's answer extraction ANSWER_RE = re.compile(r"#### (\-?[0-9\.\,]+)") @@ -131,7 +133,7 @@ class SpellingBee(Task): return self.size def get_example(self, index): - seed = index if self.split == "train" else -(index + 1) # avoid collision at 0 + seed = index if self.split == 'train' else TEST_RANDOM_SEED_OFFSET + index rng = random.Random(seed) # pick a random word @@ -252,7 +254,7 @@ class SimpleSpelling(Task): return self.size def get_example(self, index): - seed = index if self.split == "train" else -(index + 1) # avoid collision at 0 + seed = index if self.split == 'train' else TEST_RANDOM_SEED_OFFSET + index rng = random.Random(seed) # pick a random word word = rng.choice(self.words)