From 412e9a1cbc0195ae62bda6ae8969fce8e58577b2 Mon Sep 17 00:00:00 2001 From: Codex Date: Thu, 7 May 2026 12:26:57 +0000 Subject: [PATCH] Use speedrun compile mode --- scripts/base_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/base_train.py b/scripts/base_train.py index b415005c..adf28d35 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -249,7 +249,7 @@ def disable_fp8(model): # Compile the model orig_model = model # original, uncompiled model, for saving raw model state_dict and for inference/evaluation (because the shapes may change shape) -model = torch.compile(model, dynamic=False) # the inputs to model will never change shape so dynamic=False is safe +model = torch.compile(model, dynamic=False, mode="max-autotune-no-cudagraphs") # the inputs to model will never change shape so dynamic=False is safe # ----------------------------------------------------------------------------- # Scaling laws and muP extrapolations to determine the optimal training horizon, batch size, learning rates, weight decay.