This commit is contained in:
Dylan 2026-02-02 08:34:20 -08:00 committed by GitHub
commit 97944340a1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -152,7 +152,7 @@ def sft_data_generator_bos_bestfit(split, buffer_size=100):
nonlocal cursor, epoch
while len(conv_buffer) < buffer_size:
conversation = dataset[cursor]
ids, _ = tokenizer.render_conversation(conversation)
ids, _ = tokenizer.render_conversation(conversation, max_tokens=row_capacity)
conv_buffer.append(ids)
cursor += ddp_world_size
if cursor >= dataset_size: