mirror of
https://github.com/karpathy/nanochat.git
synced 2026-03-07 09:50:28 +00:00
remove inherited parameters from commandline and replace by model-tag
This commit is contained in:
parent
c7ba252142
commit
00a562cfe9
|
|
@ -47,9 +47,7 @@ python -m scripts.base_eval --device-batch-size=1 --split-tokens=16384 --max-per
|
|||
# SFT (~10 minutes on my MacBook Pro M3 Max)
|
||||
curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
|
||||
python -m scripts.chat_sft \
|
||||
--max-seq-len=512 \
|
||||
--device-batch-size=32 \
|
||||
--total-batch-size=16384 \
|
||||
--model-tag=6 \
|
||||
--eval-every=200 \
|
||||
--eval-tokens=524288 \
|
||||
--num-iterations=1500 \
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_eval -- --device-batch-
|
|||
curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
|
||||
|
||||
# run SFT and eval the model
|
||||
torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --device-batch-size=16 --run=$WANDB_RUN
|
||||
torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --model-tag=26 --run=$WANDB_RUN
|
||||
torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft
|
||||
|
||||
# chat with the model over CLI! Leave out the -p to chat interactively
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user