mirror of
https://github.com/karpathy/nanochat.git
synced 2026-06-18 20:19:08 +00:00
Merge 79b7b04ca0 into 2dffdc8cf6
This commit is contained in:
commit
0e6bbbd167
|
|
@ -52,8 +52,8 @@ def apply_rotary_emb(x, cos, sin):
|
||||||
assert x.ndim == 4 # multihead attention
|
assert x.ndim == 4 # multihead attention
|
||||||
d = x.shape[3] // 2
|
d = x.shape[3] // 2
|
||||||
x1, x2 = x[..., :d], x[..., d:] # split up last dim into two halves
|
x1, x2 = x[..., :d], x[..., d:] # split up last dim into two halves
|
||||||
y1 = x1 * cos + x2 * sin # rotate pairs of dims
|
y1 = x1 * cos - x2 * sin # rotate pairs of dims
|
||||||
y2 = x1 * (-sin) + x2 * cos
|
y2 = x1 * sin + x2 * cos
|
||||||
return torch.cat([y1, y2], 3)
|
return torch.cat([y1, y2], 3)
|
||||||
|
|
||||||
class CausalSelfAttention(nn.Module):
|
class CausalSelfAttention(nn.Module):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user