Compare commits

...

5 Commits

Author SHA1 Message Date
kiankyars
f9ed8bde83
Merge 69b7cc9ac5 into bc1fca39f3 2025-11-16 17:51:00 -08:00
Andrej Karpathy
bc1fca39f3 mqa -> gqa to reduce confusion 2025-11-15 15:43:37 +00:00
Kian Kyars
69b7cc9ac5 remove .DS_Store 2025-10-29 17:20:46 -06:00
Kian Kyars
4a1104ed1c merge master 2025-10-29 17:18:28 -06:00
Kian Kyars
18aa43744d QOL(shell scripts)
Remove redundant uv venv from shell scripts.
2025-10-18 07:08:03 -06:00
3 changed files with 3 additions and 6 deletions

View File

@ -8,7 +8,7 @@ Notable features:
- norm after token embedding
- no learnable params in rmsnorm
- no bias in linear layers
- Multi-Query Attention (MQA) support for more efficient inference
- Group-Query Attention (GQA) support for more efficient inference
"""
import math
@ -29,7 +29,7 @@ class GPTConfig:
vocab_size: int = 50304
n_layer: int = 12
n_head: int = 6 # number of query heads
n_kv_head: int = 6 # number of key/value heads (MQA)
n_kv_head: int = 6 # number of key/value heads (GQA)
n_embd: int = 768

View File

@ -9,7 +9,6 @@ export OMP_NUM_THREADS=1
export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat"
mkdir -p $NANOCHAT_BASE_DIR
command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
[ -d ".venv" ] || uv venv
uv sync --extra gpu
source .venv/bin/activate
if [ -z "$WANDB_RUN" ]; then

4
speedrun.sh Normal file → Executable file
View File

@ -20,9 +20,7 @@ mkdir -p $NANOCHAT_BASE_DIR
# install uv (if not already installed)
command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
# create a .venv local virtual environment (if it doesn't exist)
[ -d ".venv" ] || uv venv
# install the repo dependencies
# create a .venv local virtual environment (if it doesn't exist) and install the repo dependencies
uv sync --extra gpu
# activate venv so that `python` uses the project's venv instead of system python
source .venv/bin/activate