Merge 59487556ce into bc1fca39f3

mqa -> gqa to reduce confusion
Add pyproject for rustbpe standalone
2025-12-06 04:12:13 +00:00 · 2025-11-15 23:51:48 +08:00 · 2025-11-15 15:43:37 +00:00 · 2025-11-05 13:58:12 +02:00
2 changed files with 20 additions and 2 deletions
--- a/nanochat/gpt.py
+++ b/nanochat/gpt.py
@ -8,7 +8,7 @@ Notable features:
 - norm after token embedding
 - no learnable params in rmsnorm
 - no bias in linear layers
- Multi-Query Attention (MQA) support for more efficient inference
+- Group-Query Attention (GQA) support for more efficient inference
 """

 import math
@ -29,7 +29,7 @@ class GPTConfig:
    vocab_size: int = 50304
    n_layer: int = 12
    n_head: int = 6 # number of query heads
-    n_kv_head: int = 6 # number of key/value heads (MQA)
+    n_kv_head: int = 6 # number of key/value heads (GQA)
    n_embd: int = 768


--- a/rustbpe/pyproject.toml
+++ b/rustbpe/pyproject.toml
@ -0,0 +1,18 @@
+[project]
+name = "rustbpe"
+version = "0.1.0"
+description = "Rust BPE tokenizer from nanochat as standalone package"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {text = "MIT"}
+dependencies = []
+
+[build-system]
+requires = ["maturin>=1.0"]
+build-backend = "maturin"
+
+[tool.maturin]
+module-name = "rustbpe"
+bindings = "pyo3"
+python-source = "."
+features = ["pyo3/extension-module"]
Author	SHA1	Message	Date
Tensor Templar	7a762543d5	Merge `59487556ce` into `bc1fca39f3`	2025-11-15 23:51:48 +08:00
Andrej Karpathy	bc1fca39f3	mqa -> gqa to reduce confusion	2025-11-15 15:43:37 +00:00
TensorTemplar	59487556ce	Add pyproject for rustbpe standalone	2025-11-05 13:58:12 +02:00