From 747e53babc7e0273a8ca7e4a58862c3a4c4b9291 Mon Sep 17 00:00:00 2001 From: Marcel Kowalik Date: Sat, 6 Dec 2025 23:49:47 +0100 Subject: [PATCH] reset file counter --- nanochat/dataloader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nanochat/dataloader.py b/nanochat/dataloader.py index 3271298..da33b6d 100644 --- a/nanochat/dataloader.py +++ b/nanochat/dataloader.py @@ -51,6 +51,7 @@ def tokenizing_distributed_data_loader_with_state(B, T, split, tokenizer_threads yield batch[i:i+tokenizer_batch_size], (pq_idx, rg_idx) rg_idx += ddp_world_size # advance to the next row group (in DDP) pq_idx += 1 # advance to the next parquet file + pq_idx = 0 # reset batches = document_batches() # Now emit batches of tokens.