nanochat/cloud/serve.sky.yaml
2025-10-13 21:16:11 -04:00

36 lines
893 B
YAML

# Serve a trained nanochat model with the web UI
#
# Launch:
# sky launch -c nanochat-serve cloud/serve.sky.yaml --infra <aws|gcp|nebius|lambda|etc>
#
# Access the web UI:
# sky status --endpoint 8000 nanochat-serve
#
# Then open the URL in your browser to chat with your model!
name: nanochat-serve
resources:
accelerators: H100:1 # Single GPU sufficient for inference
ports: 8000 # Expose port 8000 for the web UI
disk_size: 100
file_mounts:
/tmp/nanochat:
source: s3://nanochat-data
workdir: .
setup: |
uv sync
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
source "$HOME/.cargo/env"
source .venv/bin/activate
unset CONDA_PREFIX
uv run maturin develop --release --manifest-path rustbpe/Cargo.toml
run: |
export NANOCHAT_BASE_DIR=/tmp/nanochat
source .venv/bin/activate
python -m scripts.chat_web --host 0.0.0.0 --port 8000