This commit is contained in:
Salman Mohammadi 2025-11-03 12:07:23 +00:00
parent fe9885d20a
commit 5cf2bca56a
2 changed files with 7 additions and 4 deletions

View File

@ -175,7 +175,7 @@ def main():
model_name = f"base_model (step {meta['step']})" # just for logging
model_slug = f"base_model_{meta['step']:06d}" # for the output csv file
model = torch.compile(model)
#model = torch.compile(model)
# Evaluate the model
with autocast_ctx:
out = evaluate_model(model, tokenizer, device, max_per_task=args.max_per_task)
@ -198,6 +198,7 @@ def main():
# Print the content of the csv file to console too
print0("="*80)
print0(f"Model: {model_name}")
print0(f"Eval time: {out['dt']:.4f}s")
print0("="*80)
with open(output_csv_path, 'r') as f:
print0(f.read())

View File

@ -202,8 +202,9 @@ for step in range(num_iterations + 1):
})
model.train()
# once in a while: estimate the CORE metric (all ranks participate)
# use the original uncompiled model because the inputs keep changing shape
# once in a while: estimate the CORE metric (all ranks participate)
# use the eval-compiled model which as the training-compiled model
# has been specialized for a fixed input shape
results = {}
if core_metric_every > 0 and (last_step or (step > 0 and step % core_metric_every == 0)):
model.eval()
@ -220,7 +221,8 @@ for step in range(num_iterations + 1):
model.train()
# once in a while: sample from the model (only on master process)
# use the original uncompiled model because the inputs keep changing shape
# use the eval-compiled model which as the training-compiled model
# has been specialized for a fixed input shape
if master_process and (last_step or (step > 0 and step % sample_every == 0)):
model.eval()
prompts = [