mirror of
https://github.com/karpathy/nanochat.git
synced 2026-04-01 13:15:21 +00:00
Merge 861414d500 into 4e1694cc95
This commit is contained in:
commit
3d9856061d
|
|
@ -42,11 +42,12 @@ python -m scripts.base_train \
|
|||
--sample-every=100 \
|
||||
--num-iterations=5000 \
|
||||
--run=$WANDB_RUN
|
||||
python -m scripts.base_eval --device-batch-size=1 --split-tokens=16384 --max-per-task=16
|
||||
python -m scripts.base_eval --model-tag=d6 --device-batch-size=1 --split-tokens=16384 --max-per-task=16
|
||||
|
||||
# SFT (~10 minutes on my MacBook Pro M3 Max)
|
||||
curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
|
||||
python -m scripts.chat_sft \
|
||||
--model-tag=d6 \
|
||||
--max-seq-len=512 \
|
||||
--device-batch-size=32 \
|
||||
--total-batch-size=16384 \
|
||||
|
|
@ -59,7 +60,7 @@ python -m scripts.chat_sft \
|
|||
# The model should be able to say that it is Paris.
|
||||
# It might even know that the color of the sky is blue.
|
||||
# Sometimes the model likes it if you first say Hi before you ask it questions.
|
||||
# python -m scripts.chat_cli -p "What is the capital of France?"
|
||||
# python -m scripts.chat_cli --model-tag=d6 -p "What is the capital of France?"
|
||||
|
||||
# Chat with the model over a pretty WebUI ChatGPT style
|
||||
# python -m scripts.chat_web
|
||||
# python -m scripts.chat_web --model-tag=d6
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ wait $DATASET_DOWNLOAD_PID
|
|||
# d24 model (slightly undertrained to beat GPT-2 => decrease data:params ratio from compute optimal 10.5 (default) to 8)
|
||||
torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=24 --target-param-data-ratio=8 --device-batch-size=16 --fp8 --run=$WANDB_RUN
|
||||
# evaluate the model: CORE metric, BPB on train/val, and draw samples
|
||||
torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch-size=16
|
||||
torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --model-tag=d24 --device-batch-size=16
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# SFT (teach the model conversation special tokens, tool use, multiple choice)
|
||||
|
|
@ -82,14 +82,14 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch-
|
|||
curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
|
||||
|
||||
# run SFT and eval the model
|
||||
torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --device-batch-size=16 --run=$WANDB_RUN
|
||||
torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft
|
||||
torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --model-tag=d24 --device-batch-size=16 --run=$WANDB_RUN
|
||||
torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- --model-tag=d24 -i sft
|
||||
|
||||
# chat with the model over CLI! Leave out the -p to chat interactively
|
||||
# python -m scripts.chat_cli -p "Why is the sky blue?"
|
||||
# python -m scripts.chat_cli --model-tag=d24 -p "Why is the sky blue?"
|
||||
|
||||
# even better, chat with your model over a pretty WebUI ChatGPT style
|
||||
# python -m scripts.chat_web
|
||||
# python -m scripts.chat_web --model-tag=d24
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Generate the full report by putting together all the sections
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user