From 4cfa58829e96adbcc3e4ec3f2471232df9d3e8da Mon Sep 17 00:00:00 2001 From: sandog Date: Sat, 7 Mar 2026 11:38:17 +0800 Subject: [PATCH 1/2] perf: optimize prepend operation in tokenizer encode method Replace O(n) list.insert(0, ...) with O(1) list concatenation. This improves performance when encoding large batches of text. Before: - Single string: ids.insert(0, prepend_id) - O(n) - Batch: for ids_row in ids: ids_row.insert(0, prepend_id) - O(n*m) After: - Single string: ids = [prepend_id] + ids - O(n) but faster constant - Batch: ids = [[prepend_id] + row for row in ids] - O(n*m) but faster The code comments already noted this inefficiency (TODO: slightly inefficient here?), but it was never addressed until now. --- nanochat/tokenizer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nanochat/tokenizer.py b/nanochat/tokenizer.py index a2146c2..b06d756 100644 --- a/nanochat/tokenizer.py +++ b/nanochat/tokenizer.py @@ -232,15 +232,16 @@ class RustBPETokenizer: if isinstance(text, str): ids = self.enc.encode_ordinary(text) + # Use list concatenation instead of insert(0, ...) for O(1) prepend if prepend is not None: - ids.insert(0, prepend_id) # TODO: slightly inefficient here? :( hmm + ids = [prepend_id] + ids if append is not None: ids.append(append_id) elif isinstance(text, list): ids = self.enc.encode_ordinary_batch(text, num_threads=num_threads) + # Use list concatenation instead of insert(0, ...) for O(1) prepend per row if prepend is not None: - for ids_row in ids: - ids_row.insert(0, prepend_id) # TODO: same + ids = [[prepend_id] + row for row in ids] if append is not None: for ids_row in ids: ids_row.append(append_id) From ed565be892e5800e1ab8b055ea32fdfc7350e946 Mon Sep 17 00:00:00 2001 From: Sandog Date: Wed, 11 Mar 2026 04:09:27 +0800 Subject: [PATCH 2/2] Add bounds checking to KVCache.advance() method - Added validation to prevent cache overflow beyond max_seq_len - Raises ValueError if cache position would exceed maximum sequence length - This helps catch potential bugs early during inference --- nanochat/engine.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nanochat/engine.py b/nanochat/engine.py index 4724c8f..e5e33d0 100644 --- a/nanochat/engine.py +++ b/nanochat/engine.py @@ -115,7 +115,11 @@ class KVCache: def advance(self, num_tokens): """Advance the cache position by num_tokens.""" - self.cache_seqlens += num_tokens + # Validate that we don't exceed max sequence length + new_seqlens = self.cache_seqlens + num_tokens + if torch.any(new_seqlens > self.max_seq_len): + raise ValueError(f"Cache overflow: attempted to advance beyond max_seq_len={self.max_seq_len}") + self.cache_seqlens.copy_(new_seqlens) def prefill(self, other): """