From 255f8b9af6d308a199601463b0062d618090a346 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Thu, 15 Jan 2026 23:30:11 +0000 Subject: [PATCH] cleanly separate cpu and gpu sections --- scripts/base_train.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/base_train.py b/scripts/base_train.py index bf4b8cf..a9ee1c3 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -370,14 +370,15 @@ while True: for opt in optimizers: opt.step() model.zero_grad(set_to_none=True) + train_loss_f = train_loss.item() # .item() is a CPU-GPU sync point synchronize() t1 = time.time() dt = t1 - t0 # ------------------------------------------------------------------------- - # logging + # logging (CPU action only) ema_beta = 0.9 # EMA decay factor for some smoothing just for nicer logging - smooth_train_loss = ema_beta * smooth_train_loss + (1 - ema_beta) * train_loss.item() # EMA the training loss + smooth_train_loss = ema_beta * smooth_train_loss + (1 - ema_beta) * train_loss_f # EMA the training loss debiased_smooth_loss = smooth_train_loss / (1 - ema_beta**(step + 1)) # debias the EMA pct_done = 100 * step / num_iterations tok_per_sec = int(args.total_batch_size / dt)