diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index f4a9442d..06cb35d1 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -4,8 +4,7 @@ on: push: branches: [master] paths: - - 'nanochat/ui.html' - - 'nanochat/logo.svg' + - 'nanochat/**' - 'scripts/chat_web.py' - 'scripts/chat_cli.py' diff --git a/nanochat/engine.py b/nanochat/engine.py index aa2e6a98..4bdfd654 100644 --- a/nanochat/engine.py +++ b/nanochat/engine.py @@ -17,7 +17,7 @@ import signal import warnings from contextlib import contextmanager from collections import deque -from nanochat.common import compute_init, autodetect_device_type +from nanochat.common import compute_init, autodetect_device_type, COMPUTE_DTYPE from nanochat.checkpoint_manager import load_model # ----------------------------------------------------------------------------- @@ -183,7 +183,7 @@ class Engine: # As a quick hack, we're making generate() function inherit and know about this repo-wise assumption. # I think there has to be a bigger refactor to deal with device/dtype tracking across the codebase. # In particular, the KVCache should allocate its tensors lazily - dtype = torch.bfloat16 if device.type == "cuda" else torch.float32 + dtype = COMPUTE_DTYPE rng = torch.Generator(device=device) rng.manual_seed(seed)