mirror of
https://github.com/karpathy/nanochat.git
synced 2025-12-06 04:12:13 +00:00
Refactor run_t4_quick_test.sh to check for existing installations of uv and rustc before installation, and improve data download logic to skip existing files. Update checkpoint directory naming conventions in training scripts for consistency.
This commit is contained in:
parent
7456de29b9
commit
2020eb9973
|
|
@ -13,7 +13,11 @@ export NANOCHAT_BASE_DIR=".cache/nanochat"
|
|||
mkdir -p $NANOCHAT_BASE_DIR
|
||||
|
||||
# 检查并安装uv
|
||||
command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# 检测下是否安装了uv
|
||||
if ! command -v uv &> /dev/null; then
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
fi
|
||||
|
||||
|
||||
# 设置虚拟环境
|
||||
[ -d ".venv" ] || uv venv
|
||||
|
|
@ -30,22 +34,36 @@ echo "📊 Wandb运行名称: $WANDB_RUN"
|
|||
python -m nanochat.report reset
|
||||
|
||||
# 安装Rust和编译tokenizer
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
|
||||
if ! command -v rustc &> /dev/null; then
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
fi
|
||||
|
||||
source "$HOME/.cargo/env"
|
||||
uv run maturin develop --release --manifest-path rustbpe/Cargo.toml
|
||||
|
||||
# 下载评估数据
|
||||
EVAL_BUNDLE_URL=https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip
|
||||
if [ ! -d "$NANOCHAT_BASE_DIR/eval_bundle" ]; then
|
||||
echo "📥 下载评估数据包..."
|
||||
curl -L -o eval_bundle.zip $EVAL_BUNDLE_URL
|
||||
if [ ! -f "eval_bundle.zip" ]; then
|
||||
echo "📥 下载评估数据包..."
|
||||
curl -L -o eval_bundle.zip $EVAL_BUNDLE_URL
|
||||
else
|
||||
echo "🗂️ 已存在 eval_bundle.zip,跳过下载。"
|
||||
fi
|
||||
unzip -q eval_bundle.zip
|
||||
rm eval_bundle.zip
|
||||
mv eval_bundle $NANOCHAT_BASE_DIR
|
||||
else
|
||||
echo "✅ 评估数据包已经存在,跳过下载。"
|
||||
fi
|
||||
|
||||
# 下载身份对话数据
|
||||
curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
|
||||
if [ ! -f "$NANOCHAT_BASE_DIR/identity_conversations.jsonl" ]; then
|
||||
curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
|
||||
else
|
||||
echo "✅ identity_conversations.jsonl 已存在,跳过下载。"
|
||||
fi
|
||||
|
||||
echo "📊 开始数据准备..."
|
||||
|
||||
|
|
|
|||
|
|
@ -256,8 +256,8 @@ for step in range(num_iterations):
|
|||
if master_process:
|
||||
base_dir = get_base_dir()
|
||||
depth = model.config.n_layer
|
||||
model_tag = f"t4_d{depth}" # base the model tag on the depth of the base model
|
||||
checkpoint_dir = os.path.join(base_dir, "t4_chatsft_checkpoints", model_tag)
|
||||
model_tag = f"d{depth}" # base the model tag on the depth of the base model
|
||||
checkpoint_dir = os.path.join(base_dir, "chatsft_checkpoints", model_tag)
|
||||
model_config_kwargs = model.config.__dict__ # slightly naughty, abusing the simplicity of GPTConfig, TODO nicer
|
||||
save_checkpoint(
|
||||
checkpoint_dir,
|
||||
|
|
|
|||
|
|
@ -209,8 +209,8 @@ while True:
|
|||
|
||||
# save checkpoint at the end of the run (only on master process)
|
||||
if master_process and last_step and not dry_run:
|
||||
output_dirname = f"t4_d{depth}"
|
||||
checkpoint_dir = os.path.join(base_dir, "t4_mid_checkpoints", output_dirname)
|
||||
output_dirname = f"d{depth}"
|
||||
checkpoint_dir = os.path.join(base_dir, "mid_checkpoints", output_dirname)
|
||||
save_checkpoint(
|
||||
checkpoint_dir,
|
||||
step,
|
||||
|
|
|
|||
|
|
@ -238,8 +238,8 @@ for step in range(num_iterations + 1):
|
|||
|
||||
# save checkpoint at the end of the run (only on master process)
|
||||
if master_process and last_step:
|
||||
output_dirname = model_tag if model_tag else f"t4_d{depth}"
|
||||
checkpoint_dir = os.path.join(base_dir, "t4_checkpoints", output_dirname)
|
||||
output_dirname = model_tag if model_tag else f"d{depth}"
|
||||
checkpoint_dir = os.path.join(base_dir, "base_checkpoints", output_dirname)
|
||||
save_checkpoint(
|
||||
checkpoint_dir,
|
||||
step,
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user