turn tokenizer train back on

2026-06-16 02:59:10 +00:00 · 2026-03-13 00:07:06 +01:00 · 2026-03-13 00:07:06 +01:00 · bf19cb325c
commit bf19cb325c
parent fcc4de7b96
1 changed files with 2 additions and 3 deletions
--- a/runs/runcpu.sh
+++ b/runs/runcpu.sh
@ -31,10 +31,9 @@ if [ -z "$WANDB_RUN" ]; then
 fi

 # train tokenizer on ~2B characters (~34 seconds on my MacBook Pro M3 Max)
-# python -m nanochat.dataset -n 8
-# python -m scripts.tok_train --max-chars=2000000000
+python -m nanochat.dataset -n 8
+python -m scripts.tok_train --max-chars=2000000000
 python -m scripts.tok_eval
-# Target directory: /Users/sushrutkarnik_1/.cache/nanochat/base_data_climbmix

 # train a small 4 layer model
 # I tuned this run to complete in about 30 minutes on my MacBook Pro M3 Max.