Print the prompt before inference in engine.py's main.

2026-04-15 21:38:24 +00:00 · 2026-01-11 11:22:12 +00:00 · 2026-01-11 11:22:12 +00:00 · c0618a6b7e
commit c0618a6b7e
parent f5a0ea4d3f
1 changed files with 2 additions and 0 deletions
--- a/nanochat/engine.py
+++ b/nanochat/engine.py
@ -339,6 +339,7 @@ if __name__ == "__main__":
    generated_tokens = []
    torch.cuda.synchronize()
    t0 = time.time()
+    print(tokenizer.decode(prompt_tokens))
    stream = model.generate(prompt_tokens, **kwargs)
    with autocast_ctx:
        for token in stream:
@ -353,6 +354,7 @@ if __name__ == "__main__":
    # generate tokens with Engine
    generated_tokens = []
    engine = Engine(model, tokenizer)
+    print(tokenizer.decode(prompt_tokens))
    stream = engine.generate(prompt_tokens, num_samples=1, **kwargs) # note: runs in fp32
    torch.cuda.synchronize()
    t0 = time.time()