diff --git a/nanochat/gpt.py b/nanochat/gpt.py index 5a066b2..82f13b6 100644 --- a/nanochat/gpt.py +++ b/nanochat/gpt.py @@ -22,6 +22,7 @@ import torch.nn.functional as F from nanochat.common import get_dist_info, print0 from nanochat.muon import Muon, DistMuon from nanochat.adamw import DistAdamW +from nanochat.engine import KVCache @dataclass class GPTConfig: