mirror of
https://github.com/karpathy/nanochat.git
synced 2025-12-06 12:22:18 +00:00
clean up
This commit is contained in:
parent
964d459d9b
commit
c93f90d161
|
|
@ -23,7 +23,6 @@ from nanochat.common import get_dist_info
|
||||||
from nanochat.muon import Muon, DistMuon
|
from nanochat.muon import Muon, DistMuon
|
||||||
from nanochat.adamw import DistAdamW
|
from nanochat.adamw import DistAdamW
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class GPTConfig:
|
class GPTConfig:
|
||||||
sequence_len: int = 1024
|
sequence_len: int = 1024
|
||||||
|
|
@ -49,7 +48,6 @@ def apply_rotary_emb(x, cos, sin):
|
||||||
out = out.to(x.dtype) # ensure input/output dtypes match
|
out = out.to(x.dtype) # ensure input/output dtypes match
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
class CausalSelfAttention(nn.Module):
|
class CausalSelfAttention(nn.Module):
|
||||||
def __init__(self, config, layer_idx):
|
def __init__(self, config, layer_idx):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user