This commit is contained in:
Dylan 2026-02-04 01:11:05 +01:00 committed by GitHub
commit 2b5bdfc25f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -152,7 +152,7 @@ def sft_data_generator_bos_bestfit(split, buffer_size=100):
nonlocal cursor, epoch
while len(conv_buffer) < buffer_size:
conversation = dataset[cursor]
ids, _ = tokenizer.render_conversation(conversation)
ids, _ = tokenizer.render_conversation(conversation, max_tokens=row_capacity)
conv_buffer.append(ids)
cursor += ddp_world_size
if cursor >= dataset_size: