From 3c3a3d70420e6a63575ebadb8bd466bbedd7156c Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Sat, 31 Jan 2026 01:08:44 +0000 Subject: [PATCH] warmdown of 0.5 is slightly better: --- scripts/base_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/base_train.py b/scripts/base_train.py index 4bce6cd..7ed6330 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -59,7 +59,7 @@ parser.add_argument("--scalar-lr", type=float, default=0.5, help="learning rate parser.add_argument("--adam-beta1", type=float, default=0.8, help="Adam beta1 for embedding/unembedding") parser.add_argument("--adam-beta2", type=float, default=0.95, help="Adam beta2 for embedding/unembedding") parser.add_argument("--warmup-ratio", type=float, default=0.0, help="ratio of iterations for LR warmup") -parser.add_argument("--warmdown-ratio", type=float, default=0.4, help="ratio of iterations for LR warmdown") +parser.add_argument("--warmdown-ratio", type=float, default=0.5, help="ratio of iterations for LR warmdown") parser.add_argument("--final-lr-frac", type=float, default=0.0, help="final LR as fraction of initial LR") parser.add_argument("--resume-from-step", type=int, default=-1, help="resume training from this step (-1 = disable)") # Evaluation