chore: clarify lr warmdown semantics and remove redundant int()

This commit is contained in:
Dipesh Babu 2026-02-20 09:36:47 -05:00
parent c902a8d6bc
commit 0fde31156c

View File

@ -349,8 +349,8 @@ print0(f"Total training FLOPs estimate: {num_flops_per_token * total_tokens:e}")
# Learning rate schedule (linear warmup, constant, linear warmdown)
def get_lr_multiplier(it):
# Note: optimizer steps run for it in [0, num_iterations-1]
warmup_iters = int(round(args.warmup_ratio * num_iterations))
warmdown_iters = int(round(args.warmdown_ratio * num_iterations))
warmup_iters = round(args.warmup_ratio * num_iterations)
warmdown_iters = round(args.warmdown_ratio * num_iterations)
# Warmup (avoid division by zero when warmup_iters == 0)
if warmup_iters > 0 and it < warmup_iters: