This commit is contained in:
Sofie Van Landeghem 2025-10-29 09:57:35 +01:00 committed by GitHub
parent 964d459d9b
commit c93f90d161
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -23,7 +23,6 @@ from nanochat.common import get_dist_info
from nanochat.muon import Muon, DistMuon from nanochat.muon import Muon, DistMuon
from nanochat.adamw import DistAdamW from nanochat.adamw import DistAdamW
@dataclass @dataclass
class GPTConfig: class GPTConfig:
sequence_len: int = 1024 sequence_len: int = 1024
@ -49,7 +48,6 @@ def apply_rotary_emb(x, cos, sin):
out = out.to(x.dtype) # ensure input/output dtypes match out = out.to(x.dtype) # ensure input/output dtypes match
return out return out
class CausalSelfAttention(nn.Module): class CausalSelfAttention(nn.Module):
def __init__(self, config, layer_idx): def __init__(self, config, layer_idx):
super().__init__() super().__init__()