From c0618a6b7e72e1e66017b2dca46877010872234d Mon Sep 17 00:00:00 2001 From: Pablo Fernandez Date: Sun, 11 Jan 2026 11:22:12 +0000 Subject: [PATCH] Print the prompt before inference in engine.py's main. --- nanochat/engine.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nanochat/engine.py b/nanochat/engine.py index d4367fb..d3e189a 100644 --- a/nanochat/engine.py +++ b/nanochat/engine.py @@ -339,6 +339,7 @@ if __name__ == "__main__": generated_tokens = [] torch.cuda.synchronize() t0 = time.time() + print(tokenizer.decode(prompt_tokens)) stream = model.generate(prompt_tokens, **kwargs) with autocast_ctx: for token in stream: @@ -353,6 +354,7 @@ if __name__ == "__main__": # generate tokens with Engine generated_tokens = [] engine = Engine(model, tokenizer) + print(tokenizer.decode(prompt_tokens)) stream = engine.generate(prompt_tokens, num_samples=1, **kwargs) # note: runs in fp32 torch.cuda.synchronize() t0 = time.time()