Merge da507c5835 into 83dccc20ae

2026-06-18 12:09:09 +00:00 · 2026-03-03 11:25:39 -05:00 · 2026-03-03 11:25:39 -05:00 · 3a998fccf5
commit 3a998fccf5
parent 83dccc20ae da507c5835
1 changed files with 2 additions and 2 deletions
--- a/scripts/chat_sft.py
+++ b/scripts/chat_sft.py
@ -282,8 +282,8 @@ def sft_data_generator_bos_bestfit(split, buffer_size=100):
        # Build tensors
        use_cuda = device_type == "cuda"
        batch_tensor = torch.tensor(rows, dtype=torch.long, pin_memory=use_cuda)
-        inputs = batch_tensor[:, :-1].to(device=device, dtype=torch.int32, non_blocking=use_cuda)
+        inputs = batch_tensor[:, :-1].to(device=device, dtype=torch.int32, non_blocking=use_cuda).contiguous()
-        targets = batch_tensor[:, 1:].to(device=device, dtype=torch.int64, non_blocking=use_cuda)
+        targets = batch_tensor[:, 1:].to(device=device, dtype=torch.int64, non_blocking=use_cuda).contiguous()
        # Apply the loss mask from render_conversation (mask=1 for assistant completions,
        # mask=0 for user prompts, BOS, special tokens, tool outputs). mask[1:] aligns