mirror of
https://github.com/karpathy/nanochat.git
synced 2025-12-06 04:12:13 +00:00
cleanup
This commit is contained in:
parent
fe9885d20a
commit
5cf2bca56a
|
|
@ -175,7 +175,7 @@ def main():
|
|||
model_name = f"base_model (step {meta['step']})" # just for logging
|
||||
model_slug = f"base_model_{meta['step']:06d}" # for the output csv file
|
||||
|
||||
model = torch.compile(model)
|
||||
#model = torch.compile(model)
|
||||
# Evaluate the model
|
||||
with autocast_ctx:
|
||||
out = evaluate_model(model, tokenizer, device, max_per_task=args.max_per_task)
|
||||
|
|
@ -198,6 +198,7 @@ def main():
|
|||
# Print the content of the csv file to console too
|
||||
print0("="*80)
|
||||
print0(f"Model: {model_name}")
|
||||
print0(f"Eval time: {out['dt']:.4f}s")
|
||||
print0("="*80)
|
||||
with open(output_csv_path, 'r') as f:
|
||||
print0(f.read())
|
||||
|
|
|
|||
|
|
@ -202,8 +202,9 @@ for step in range(num_iterations + 1):
|
|||
})
|
||||
model.train()
|
||||
|
||||
# once in a while: estimate the CORE metric (all ranks participate)
|
||||
# use the original uncompiled model because the inputs keep changing shape
|
||||
# once in a while: estimate the CORE metric (all ranks participate)
|
||||
# use the eval-compiled model which as the training-compiled model
|
||||
# has been specialized for a fixed input shape
|
||||
results = {}
|
||||
if core_metric_every > 0 and (last_step or (step > 0 and step % core_metric_every == 0)):
|
||||
model.eval()
|
||||
|
|
@ -220,7 +221,8 @@ for step in range(num_iterations + 1):
|
|||
model.train()
|
||||
|
||||
# once in a while: sample from the model (only on master process)
|
||||
# use the original uncompiled model because the inputs keep changing shape
|
||||
# use the eval-compiled model which as the training-compiled model
|
||||
# has been specialized for a fixed input shape
|
||||
if master_process and (last_step or (step > 0 and step % sample_every == 0)):
|
||||
model.eval()
|
||||
prompts = [
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user