Use speedrun compile mode

2026-07-06 12:59:14 +00:00 · 2026-05-07 12:26:57 +00:00 · 2026-05-07 12:26:57 +00:00 · 412e9a1cbc
commit 412e9a1cbc
parent 4680e799fb
1 changed files with 1 additions and 1 deletions
--- a/scripts/base_train.py
+++ b/scripts/base_train.py
@ -249,7 +249,7 @@ def disable_fp8(model):
 # Compile the model

 orig_model = model # original, uncompiled model, for saving raw model state_dict and for inference/evaluation (because the shapes may change shape)
-model = torch.compile(model, dynamic=False) # the inputs to model will never change shape so dynamic=False is safe
+model = torch.compile(model, dynamic=False, mode="max-autotune-no-cudagraphs") # the inputs to model will never change shape so dynamic=False is safe

 # -----------------------------------------------------------------------------
 # Scaling laws and muP extrapolations to determine the optimal training horizon, batch size, learning rates, weight decay.