From 2c6a007e3ce0b47dbe5a3277ead17f6daa3d1d65 Mon Sep 17 00:00:00 2001 From: zzF <149790942+zZzZ9zZ9@users.noreply.github.com> Date: Wed, 26 Nov 2025 14:05:11 +0800 Subject: [PATCH] Fix race condition in save_checkpoint for non-zero ranks --- nanochat/checkpoint_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanochat/checkpoint_manager.py b/nanochat/checkpoint_manager.py index 63f257f..e788b73 100644 --- a/nanochat/checkpoint_manager.py +++ b/nanochat/checkpoint_manager.py @@ -21,8 +21,8 @@ def log0(message): logger.info(message) def save_checkpoint(checkpoint_dir, step, model_data, optimizer_data, meta_data, rank=0): + os.makedirs(checkpoint_dir, exist_ok=True) if rank == 0: - os.makedirs(checkpoint_dir, exist_ok=True) # Save the model state parameters model_path = os.path.join(checkpoint_dir, f"model_{step:06d}.pt") torch.save(model_data, model_path)