using the orig_model for sample.

This commit is contained in:
Ningjun Dou 2026-05-15 10:40:34 +02:00
parent 3d55b1df96
commit 3dbf6adf4f

View File

@ -408,7 +408,7 @@ while True:
# use the original uncompiled model because the inputs keep changing shape
if args.sample_every > 0 and master_process and (last_step or (step > 0 and step % args.sample_every == 0)):
model.eval()
evaluate_sample(model, tokenizer, lambda x:print0(x), True)
evaluate_sample(orig_model, tokenizer, lambda x:print0(x), True)
model.train()
# save checkpoint: at the end of the run, or every save_every steps, except at the first step or the resume step