This commit is contained in:
Sofie Van Landeghem 2025-10-29 09:57:35 +01:00 committed by GitHub
parent 964d459d9b
commit c93f90d161
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -23,7 +23,6 @@ from nanochat.common import get_dist_info
from nanochat.muon import Muon, DistMuon
from nanochat.adamw import DistAdamW
@dataclass
class GPTConfig:
sequence_len: int = 1024
@ -49,7 +48,6 @@ def apply_rotary_emb(x, cos, sin):
out = out.to(x.dtype) # ensure input/output dtypes match
return out
class CausalSelfAttention(nn.Module):
def __init__(self, config, layer_idx):
super().__init__()