cleanup

2025-12-06 04:12:13 +00:00 · 2025-11-03 12:07:23 +00:00 · 2025-11-03 12:07:23 +00:00 · 5cf2bca56a
commit 5cf2bca56a
parent fe9885d20a
2 changed files with 7 additions and 4 deletions
--- a/scripts/base_eval.py
+++ b/scripts/base_eval.py
@ -175,7 +175,7 @@ def main():
        model_name = f"base_model (step {meta['step']})" # just for logging
        model_slug = f"base_model_{meta['step']:06d}" # for the output csv file
    
-    model = torch.compile(model)
+    #model = torch.compile(model)
    # Evaluate the model
    with autocast_ctx:
        out = evaluate_model(model, tokenizer, device, max_per_task=args.max_per_task)
@ -198,6 +198,7 @@ def main():
        # Print the content of the csv file to console too
        print0("="*80)
        print0(f"Model: {model_name}")
+        print0(f"Eval time: {out['dt']:.4f}s")
        print0("="*80)
        with open(output_csv_path, 'r') as f:
            print0(f.read())
--- a/scripts/base_train.py
+++ b/scripts/base_train.py
@ -202,8 +202,9 @@ for step in range(num_iterations + 1):
        })
        model.train()

-    # once in a while: estimate the CORE metric (all ranks participate)
-    # use the original uncompiled model because the inputs keep changing shape
+    # once in a while: estimate the CORE metric (all ranks participate)  
+    # use the eval-compiled model which as the training-compiled model
+    # has been specialized for a fixed input shape
    results = {}
    if core_metric_every > 0 and (last_step or (step > 0 and step % core_metric_every == 0)):
        model.eval()
@ -220,7 +221,8 @@ for step in range(num_iterations + 1):
        model.train()

    # once in a while: sample from the model (only on master process)
-    # use the original uncompiled model because the inputs keep changing shape
+    # use the eval-compiled model which as the training-compiled model
+    # has been specialized for a fixed input shape
    if master_process and (last_step or (step > 0 and step % sample_every == 0)):
        model.eval()
        prompts = [