diff --git a/runs/runcpu.sh b/runs/runcpu.sh index 853fa1f3..bf6bab33 100755 --- a/runs/runcpu.sh +++ b/runs/runcpu.sh @@ -26,7 +26,7 @@ python -m nanochat.dataset -n 8 python -m scripts.tok_train --max-chars=2000000000 python -m scripts.tok_eval -# train a small 4 layer model +# train a small 6 layer model # I tuned this run to complete in about 30 minutes on my MacBook Pro M3 Max. # To get better results, try increasing num_iterations, or get other ideas from your favorite LLM. python -m scripts.base_train \