Allow torchrun with 1 device

2026-04-16 13:58:38 +00:00 · 2026-01-26 12:03:22 -08:00 · 2026-01-26 12:03:22 -08:00 · d1595fb2d1
commit d1595fb2d1
parent 2e58d05782
1 changed files with 2 additions and 2 deletions
--- a/nanochat/gpt.py
+++ b/nanochat/gpt.py
@ -350,8 +350,8 @@ class GPT(nn.Module):
            dict(params=x0_params, lr=scalar_lr),
        ]
        
-        # MuonAdamW for single-GPU, DistMuonAdamW for multi-GPU (with communication overlap)
-        OptimizerClass = DistMuonAdamW if ddp else MuonAdamW
+        # MuonAdamW for single-GPU, DistMuonAdamW for multi-GPU
+        OptimizerClass = DistMuonAdamW if (ddp and world_size > 1) else MuonAdamW
        optimizer = OptimizerClass(
            adamw_groups=adam_groups,
            muon_params=matrix_params,