call reshape instead of view in case the tensors are not contiguous

This commit is contained in:
svlandeg 2026-02-27 01:50:37 +01:00
parent c7ba252142
commit 83de1b18b1
2 changed files with 2 additions and 2 deletions

View File

@ -416,7 +416,7 @@ class GPT(nn.Module):
if targets is not None:
# training: given the targets, compute and return the loss
# TODO experiment with chunked cross-entropy?
loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1, reduction=loss_reduction)
loss = F.cross_entropy(logits.reshape(-1, logits.size(-1)), targets.reshape(-1), ignore_index=-1, reduction=loss_reduction)
return loss
else:
# inference: just return the logits directly

View File

@ -32,7 +32,7 @@ def evaluate_bpb(model, batches, steps, token_bytes):
x, y = next(batch_iter)
loss2d = model(x, y, loss_reduction='none') # (B, T)
loss2d = loss2d.view(-1) # flatten
y = y.view(-1) # flatten
y = y.reshape(-1) # flatten
if (y.int() < 0).any(): # mps does not currently have kernel for < 0 for int64, only int32
# slightly more complex code path if some target tokens are ignore_index (e.g. -1)
# any target token < 0 is to be ignored: do NOT index token_bytes with negatives