From 79b7b04ca0880869f5af4bd124e25246b1624380 Mon Sep 17 00:00:00 2001 From: fpvsim Date: Sun, 1 Feb 2026 15:56:21 -0800 Subject: [PATCH] Fix rotation calculations in apply_rotary_emb function Reading some blogs on RoPE, it feels like the current implementation is a little off? Or am I missing something? --- nanochat/gpt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nanochat/gpt.py b/nanochat/gpt.py index 208acd1..0575292 100644 --- a/nanochat/gpt.py +++ b/nanochat/gpt.py @@ -52,8 +52,8 @@ def apply_rotary_emb(x, cos, sin): assert x.ndim == 4 # multihead attention d = x.shape[3] // 2 x1, x2 = x[..., :d], x[..., d:] # split up last dim into two halves - y1 = x1 * cos + x2 * sin # rotate pairs of dims - y2 = x1 * (-sin) + x2 * cos + y1 = x1 * cos - x2 * sin # rotate pairs of dims + y2 = x1 * sin + x2 * cos return torch.cat([y1, y2], 3) class CausalSelfAttention(nn.Module):