Use speedrun compile mode

This commit is contained in:
Codex 2026-05-07 12:26:57 +00:00
parent 4680e799fb
commit 412e9a1cbc

View File

@ -249,7 +249,7 @@ def disable_fp8(model):
# Compile the model
orig_model = model # original, uncompiled model, for saving raw model state_dict and for inference/evaluation (because the shapes may change shape)
model = torch.compile(model, dynamic=False) # the inputs to model will never change shape so dynamic=False is safe
model = torch.compile(model, dynamic=False, mode="max-autotune-no-cudagraphs") # the inputs to model will never change shape so dynamic=False is safe
# -----------------------------------------------------------------------------
# Scaling laws and muP extrapolations to determine the optimal training horizon, batch size, learning rates, weight decay.