This commit is contained in:
Pablo Fernandez 2026-01-11 17:55:47 -08:00 committed by GitHub
commit 1892bae753
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -312,6 +312,7 @@ if __name__ == "__main__":
generated_tokens = []
torch.cuda.synchronize()
t0 = time.time()
print(tokenizer.decode(prompt_tokens))
stream = model.generate(prompt_tokens, **kwargs)
with autocast_ctx:
for token in stream:
@ -326,6 +327,7 @@ if __name__ == "__main__":
# generate tokens with Engine
generated_tokens = []
engine = Engine(model, tokenizer)
print(tokenizer.decode(prompt_tokens))
stream = engine.generate(prompt_tokens, num_samples=1, **kwargs) # note: runs in fp32
torch.cuda.synchronize()
t0 = time.time()