mirror of
https://github.com/karpathy/nanochat.git
synced 2026-02-23 12:00:23 +00:00
Pass p as tesnor to fused adam
We can avoid a couple recompiles by passing the underlying tensor for a parameter instead of the parameter object.
This commit is contained in:
parent
3c3a3d7042
commit
9b9ef3ef38
|
|
@ -217,7 +217,7 @@ class MuonAdamW(torch.optim.Optimizer):
|
|||
|
||||
# Fused update: weight_decay -> momentum -> bias_correction -> param_update
|
||||
adamw_step_fused(
|
||||
p, grad, exp_avg, exp_avg_sq,
|
||||
p.data, grad, exp_avg, exp_avg_sq,
|
||||
self._adamw_step_t, self._adamw_lr_t, self._adamw_beta1_t,
|
||||
self._adamw_beta2_t, self._adamw_eps_t, self._adamw_wd_t,
|
||||
)
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user