mirror of
https://github.com/karpathy/nanochat.git
synced 2026-02-19 18:10:24 +00:00
warmdown of 0.5 is slightly better:
This commit is contained in:
parent
4d8dbaf6e0
commit
3c3a3d7042
|
|
@ -59,7 +59,7 @@ parser.add_argument("--scalar-lr", type=float, default=0.5, help="learning rate
|
|||
parser.add_argument("--adam-beta1", type=float, default=0.8, help="Adam beta1 for embedding/unembedding")
|
||||
parser.add_argument("--adam-beta2", type=float, default=0.95, help="Adam beta2 for embedding/unembedding")
|
||||
parser.add_argument("--warmup-ratio", type=float, default=0.0, help="ratio of iterations for LR warmup")
|
||||
parser.add_argument("--warmdown-ratio", type=float, default=0.4, help="ratio of iterations for LR warmdown")
|
||||
parser.add_argument("--warmdown-ratio", type=float, default=0.5, help="ratio of iterations for LR warmdown")
|
||||
parser.add_argument("--final-lr-frac", type=float, default=0.0, help="final LR as fraction of initial LR")
|
||||
parser.add_argument("--resume-from-step", type=int, default=-1, help="resume training from this step (-1 = disable)")
|
||||
# Evaluation
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user