Merge 69b7cc9ac5 into bc1fca39f3

mqa -> gqa to reduce confusion
remove .DS_Store
2025-12-06 04:12:13 +00:00 · 2025-11-16 17:51:00 -08:00 · 2025-11-15 15:43:37 +00:00 · 2025-10-29 17:20:46 -06:00 · 2025-10-29 17:18:28 -06:00 · 2025-10-18 07:08:03 -06:00
3 changed files with 3 additions and 6 deletions
--- a/nanochat/gpt.py
+++ b/nanochat/gpt.py
@ -8,7 +8,7 @@ Notable features:
 - norm after token embedding
 - no learnable params in rmsnorm
 - no bias in linear layers
- Multi-Query Attention (MQA) support for more efficient inference
+- Group-Query Attention (GQA) support for more efficient inference
 """

 import math
@ -29,7 +29,7 @@ class GPTConfig:
    vocab_size: int = 50304
    n_layer: int = 12
    n_head: int = 6 # number of query heads
-    n_kv_head: int = 6 # number of key/value heads (MQA)
+    n_kv_head: int = 6 # number of key/value heads (GQA)
    n_embd: int = 768


--- a/run1000.sh
+++ b/run1000.sh
@ -9,7 +9,6 @@ export OMP_NUM_THREADS=1
 export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat"
 mkdir -p $NANOCHAT_BASE_DIR
 command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
-[ -d ".venv" ] || uv venv
 uv sync --extra gpu
 source .venv/bin/activate
 if [ -z "$WANDB_RUN" ]; then
--- a/speedrun.sh
+++ b/speedrun.sh
@ -20,9 +20,7 @@ mkdir -p $NANOCHAT_BASE_DIR

 # install uv (if not already installed)
 command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
-# create a .venv local virtual environment (if it doesn't exist)
-[ -d ".venv" ] || uv venv
-# install the repo dependencies
+# create a .venv local virtual environment (if it doesn't exist) and install the repo dependencies
 uv sync --extra gpu
 # activate venv so that `python` uses the project's venv instead of system python
 source .venv/bin/activate
Author	SHA1	Message	Date
kiankyars	f9ed8bde83	Merge `69b7cc9ac5` into `bc1fca39f3`	2025-11-16 17:51:00 -08:00
Andrej Karpathy	bc1fca39f3	mqa -> gqa to reduce confusion	2025-11-15 15:43:37 +00:00
Kian Kyars	69b7cc9ac5	remove .DS_Store	2025-10-29 17:20:46 -06:00
Kian Kyars	4a1104ed1c	merge master	2025-10-29 17:18:28 -06:00
Kian Kyars	18aa43744d	QOL(shell scripts) Remove redundant uv venv from shell scripts.	2025-10-18 07:08:03 -06:00