delete spurious torch.empty allocation in adamw

fix: remove unnecessary tensor allocation in DistAdamW optimizer
2025-12-06 12:22:18 +00:00 · 2025-10-21 11:35:17 -07:00 · 2025-10-21 11:35:17 -07:00 · 2e938530ce
commit 2e938530ce
parent a088b7a6ec 49cd02f283
1 changed files with 0 additions and 1 deletions
--- a/nanochat/adamw.py
+++ b/nanochat/adamw.py
@ -26,7 +26,6 @@ class DistAdamW(torch.optim.Optimizer):
        grad_slices = []
        for group in self.param_groups:
            params: list[Tensor] = group["params"]
            grad = torch.empty_like(params[-1]) # TODO is this bug? seems to be over-written instantly
            for base_i in range(len(params)):
                grad = params[base_i].grad
                rank_size = grad.shape[0] // world_size