mirror of
https://github.com/karpathy/nanochat.git
synced 2026-02-11 13:09:52 +00:00
fix: cap SFT rendered conversation length
This commit is contained in:
parent
31b61d2d17
commit
d4db003661
|
|
@ -152,7 +152,7 @@ def sft_data_generator_bos_bestfit(split, buffer_size=100):
|
|||
nonlocal cursor, epoch
|
||||
while len(conv_buffer) < buffer_size:
|
||||
conversation = dataset[cursor]
|
||||
ids, _ = tokenizer.render_conversation(conversation)
|
||||
ids, _ = tokenizer.render_conversation(conversation, max_tokens=row_capacity)
|
||||
conv_buffer.append(ids)
|
||||
cursor += ddp_world_size
|
||||
if cursor >= dataset_size:
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user