mirror of
https://github.com/karpathy/nanochat.git
synced 2025-12-15 16:52:14 +00:00
cleaning up speedrun.sh
This commit is contained in:
parent
cf5e213613
commit
4163c648c6
|
|
@ -76,14 +76,6 @@ python -m scripts.tok_eval
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
# Base model (pretraining)
|
# Base model (pretraining)
|
||||||
|
|
||||||
# Download the eval_bundle from s3 to evaluate CORE metric during training (~162MB)
|
|
||||||
EVAL_BUNDLE_URL=https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip
|
|
||||||
if [ ! -d "$NANOCHAT_BASE_DIR/eval_bundle" ]; then
|
|
||||||
curl -L -o eval_bundle.zip $EVAL_BUNDLE_URL
|
|
||||||
unzip -q eval_bundle.zip
|
|
||||||
rm eval_bundle.zip
|
|
||||||
mv eval_bundle $NANOCHAT_BASE_DIR
|
|
||||||
fi
|
|
||||||
|
|
||||||
# The d20 model is 561M parameters.
|
# The d20 model is 561M parameters.
|
||||||
# Chinchilla says #tokens = 20X #params, so we need 561e6 * 20 = 11.2B tokens.
|
# Chinchilla says #tokens = 20X #params, so we need 561e6 * 20 = 11.2B tokens.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user