From e1770a3061df8064b95422febb8deff2b75c419a Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Sat, 27 Dec 2025 23:07:48 +0000 Subject: [PATCH] remove spurious cast, gets compiled away anyway but it's confusing people --- nanochat/gpt.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/nanochat/gpt.py b/nanochat/gpt.py index 9a80c7c..69899ee 100644 --- a/nanochat/gpt.py +++ b/nanochat/gpt.py @@ -41,12 +41,10 @@ def norm(x): def apply_rotary_emb(x, cos, sin): assert x.ndim == 4 # multihead attention d = x.shape[3] // 2 - x1, x2 = x[..., :d], x[..., d:] # split up last time into two halves + x1, x2 = x[..., :d], x[..., d:] # split up last dim into two halves y1 = x1 * cos + x2 * sin # rotate pairs of dims y2 = x1 * (-sin) + x2 * cos - out = torch.cat([y1, y2], 3) # re-assemble - out = out.to(x.dtype) # ensure input/output dtypes match - return out + return torch.cat([y1, y2], 3) class CausalSelfAttention(nn.Module): def __init__(self, config, layer_idx):