diff --git a/speedrun.sh b/speedrun.sh index bd8ba4f..9c71886 100644 --- a/speedrun.sh +++ b/speedrun.sh @@ -76,14 +76,6 @@ python -m scripts.tok_eval # ----------------------------------------------------------------------------- # Base model (pretraining) -# Download the eval_bundle from s3 to evaluate CORE metric during training (~162MB) -EVAL_BUNDLE_URL=https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip -if [ ! -d "$NANOCHAT_BASE_DIR/eval_bundle" ]; then - curl -L -o eval_bundle.zip $EVAL_BUNDLE_URL - unzip -q eval_bundle.zip - rm eval_bundle.zip - mv eval_bundle $NANOCHAT_BASE_DIR -fi # The d20 model is 561M parameters. # Chinchilla says #tokens = 20X #params, so we need 561e6 * 20 = 11.2B tokens.