Merge branch 'karpathy:master' into Exclusive-Self-Attention

This commit is contained in:
Rohan Khan 2026-04-14 07:58:19 +06:00 committed by GitHub
commit 6eedfa188c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -238,6 +238,11 @@ class GPT(nn.Module):
for i in range(n_layer):
self.x0_lambdas.data[i] = 0.20 - (0.15 * i / max(n_layer - 1, 1))
# Smear/backout scalars and smear gate must be explicitly initialized
torch.nn.init.zeros_(self.smear_lambda)
torch.nn.init.constant_(self.backout_lambda, 0.2)
torch.nn.init.uniform_(self.smear_gate.weight, 0.0, 0.02)
# Value embeddings (init like c_v: uniform with same std)
for ve in self.value_embeds.values():
torch.nn.init.uniform_(ve.weight, -s, s)