mirror of
https://github.com/karpathy/nanochat.git
synced 2025-12-06 12:22:18 +00:00
delete spurious torch.empty allocation in adamw
fix: remove unnecessary tensor allocation in DistAdamW optimizer
This commit is contained in:
commit
2e938530ce
|
|
@ -26,7 +26,6 @@ class DistAdamW(torch.optim.Optimizer):
|
||||||
grad_slices = []
|
grad_slices = []
|
||||||
for group in self.param_groups:
|
for group in self.param_groups:
|
||||||
params: list[Tensor] = group["params"]
|
params: list[Tensor] = group["params"]
|
||||||
grad = torch.empty_like(params[-1]) # TODO is this bug? seems to be over-written instantly
|
|
||||||
for base_i in range(len(params)):
|
for base_i in range(len(params)):
|
||||||
grad = params[base_i].grad
|
grad = params[base_i].grad
|
||||||
rank_size = grad.shape[0] // world_size
|
rank_size = grad.shape[0] // world_size
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user