mirror of
https://github.com/karpathy/nanochat.git
synced 2026-03-22 12:53:26 +00:00
fix bug where any rank has to be able to create checkpoint_dir if saving optim
This commit is contained in:
parent
2fd0440355
commit
90442de35f
|
|
@ -34,6 +34,7 @@ def save_checkpoint(checkpoint_dir, step, model_data, optimizer_data, meta_data,
|
|||
logger.info(f"Saved metadata to: {meta_path}")
|
||||
# Note that optimizer state is sharded across ranks, so each rank must save its own.
|
||||
if optimizer_data is not None:
|
||||
os.makedirs(checkpoint_dir, exist_ok=True)
|
||||
optimizer_path = os.path.join(checkpoint_dir, f"optim_{step:06d}_rank{rank:d}.pt")
|
||||
torch.save(optimizer_data, optimizer_path)
|
||||
logger.info(f"Saved optimizer state to: {optimizer_path}")
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user