Merge 2ae28292aa into 2f09686724

2026-06-19 04:29:09 +00:00 · 2026-02-11 09:38:29 -04:00 · 2026-02-11 09:38:29 -04:00 · d6ee33eda5
commit d6ee33eda5
parent 2f09686724 2ae28292aa
1 changed files with 3 additions and 1 deletions
--- a/scripts/base_train.py
+++ b/scripts/base_train.py
@ -487,13 +487,15 @@ while True:
    # evaluate the gradient
    synchronize()
    t0 = time.time()
+    train_loss = 0.0
    for micro_step in range(grad_accum_steps):
        with autocast_ctx:
            loss = model(x, y)
-        train_loss = loss.detach() # for logging
+        train_loss += loss.detach() # accumulate for logging
        loss = loss / grad_accum_steps # each .backward() is a grad sum => normalize loss here
        loss.backward()
        x, y, dataloader_state_dict = next(train_loader) # prefetch the next batch while the GPU is busy with forward/backward
+    train_loss = train_loss / grad_accum_steps # average across micro steps
    # step the optimizer
    lrm = get_lr_multiplier(step)
    muon_momentum = get_muon_momentum(step)