mirror of
https://github.com/karpathy/nanochat.git
synced 2026-05-14 03:38:02 +00:00
Use speedrun compile mode
This commit is contained in:
parent
4680e799fb
commit
412e9a1cbc
|
|
@ -249,7 +249,7 @@ def disable_fp8(model):
|
|||
# Compile the model
|
||||
|
||||
orig_model = model # original, uncompiled model, for saving raw model state_dict and for inference/evaluation (because the shapes may change shape)
|
||||
model = torch.compile(model, dynamic=False) # the inputs to model will never change shape so dynamic=False is safe
|
||||
model = torch.compile(model, dynamic=False, mode="max-autotune-no-cudagraphs") # the inputs to model will never change shape so dynamic=False is safe
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Scaling laws and muP extrapolations to determine the optimal training horizon, batch size, learning rates, weight decay.
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user