This commit is contained in:
Dylan 2026-02-02 11:26:41 +08:00 committed by GitHub
commit fc54c5d9ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -152,7 +152,7 @@ def sft_data_generator_bos_bestfit(split, buffer_size=100):
nonlocal cursor, epoch
while len(conv_buffer) < buffer_size:
conversation = dataset[cursor]
ids, _ = tokenizer.render_conversation(conversation)
ids, _ = tokenizer.render_conversation(conversation, max_tokens=row_capacity)
conv_buffer.append(ids)
cursor += ddp_world_size
if cursor >= dataset_size: