mirror of
https://github.com/karpathy/nanochat.git
synced 2026-04-01 21:25:21 +00:00
refactor: Pre-allocate larger KVCache to improve performance
The `KVCache.insert_kv` function was dynamically resizing the cache tensor whenever it ran out of space. This can be inefficient and lead to performance degradation, especially during long generation sequences. This commit improves performance by pre-allocating a larger cache upfront, reducing the need for frequent resizing and minimizing memory reallocation overhead.
This commit is contained in:
parent
6c6c1c2e67
commit
b70da6d907
|
|
@ -106,7 +106,12 @@ class KVCache:
|
|||
def insert_kv(self, layer_idx, k, v):
|
||||
# Lazy initialize the cache here because we need to know the dtype/device
|
||||
if self.kv_cache is None:
|
||||
# Pre-allocate a larger cache to avoid frequent resizing
|
||||
self.kv_shape = list(self.kv_shape)
|
||||
self.kv_shape[4] *= 2 # Double the sequence length for pre-allocation
|
||||
self.kv_cache = torch.empty(self.kv_shape, dtype=k.dtype, device=k.device)
|
||||
self.kv_shape[4] //= 2 # a bit of a hack to restore the original shape for future checks
|
||||
|
||||
# Insert new keys/values to the cache and return the full cache so far
|
||||
B, H, T_add, D = k.size()
|
||||
t0, t1 = self.pos, self.pos + T_add
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user