From 9ebb031f6490182e0da811d635d963641c556e69 Mon Sep 17 00:00:00 2001 From: marked23 <@marked23> Date: Thu, 20 Nov 2025 11:21:24 -0800 Subject: [PATCH 1/2] Allow any rank to create the checkpoint_dir --- nanochat/checkpoint_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nanochat/checkpoint_manager.py b/nanochat/checkpoint_manager.py index 63f257f..96588ec 100644 --- a/nanochat/checkpoint_manager.py +++ b/nanochat/checkpoint_manager.py @@ -21,8 +21,9 @@ def log0(message): logger.info(message) def save_checkpoint(checkpoint_dir, step, model_data, optimizer_data, meta_data, rank=0): + os.makedirs(checkpoint_dir, exist_ok=True) + if rank == 0: - os.makedirs(checkpoint_dir, exist_ok=True) # Save the model state parameters model_path = os.path.join(checkpoint_dir, f"model_{step:06d}.pt") torch.save(model_data, model_path) From b06bedac08718ad17a98e836cad921223a514893 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Wed, 26 Nov 2025 18:09:23 +0100 Subject: [PATCH 2/2] remove empty line --- nanochat/checkpoint_manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nanochat/checkpoint_manager.py b/nanochat/checkpoint_manager.py index 96588ec..e788b73 100644 --- a/nanochat/checkpoint_manager.py +++ b/nanochat/checkpoint_manager.py @@ -22,7 +22,6 @@ def log0(message): def save_checkpoint(checkpoint_dir, step, model_data, optimizer_data, meta_data, rank=0): os.makedirs(checkpoint_dir, exist_ok=True) - if rank == 0: # Save the model state parameters model_path = os.path.join(checkpoint_dir, f"model_{step:06d}.pt")