diff --git a/README.md b/README.md
index 483f3e38..7c7ae9fc 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,14 @@ uv sync --extra cpu    # (or) Use for CPU-only / MPS
 source .venv/bin/activate
 ```
 
+If you plan on running `scripts.chat_web` to chat with your model via a web UI, add the extra "web":
+
+```bash
+uv sync --extra gpu --extra web    # Use for CUDA (A100/H100/etc.)
+uv sync --extra cpu --extra web    # (or) Use for CPU-only / MPS
+source .venv/bin/activate
+```
+
 For development (adds pytest, matplotlib, ipykernel, transformers, etc.):
 
 ```bash
@@ -51,7 +59,7 @@ The most fun you can have is to train your own GPT-2 and talk to it. The entire
 bash runs/speedrun.sh
 ```
 
-You may wish to do so in a screen session as this will take ~3 hours to run. Once it's done, you can talk to it via the ChatGPT-like web UI. Make sure again that your local uv virtual environment is active (run `source .venv/bin/activate`), and serve it:
+You may wish to do so in a screen session as this will take ~3 hours to run. Once it's done, you can talk to it via the ChatGPT-like web UI. Make sure again that your local uv virtual environment (with the "web" extra) is active (run `source .venv/bin/activate`), and serve it:
 
 ```bash
 python -m scripts.chat_web
diff --git a/pyproject.toml b/pyproject.toml
index 0527369f..5a6715c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,14 +6,12 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "datasets>=4.0.0",
-    "fastapi>=0.117.1",
     "kernels>=0.11.7",
     "psutil>=7.1.0",
     "rustbpe>=0.1.0",
     "tiktoken>=0.11.0",
     "tokenizers>=0.22.0",
     "torch==2.9.1",
-    "uvicorn>=0.36.0",
     "wandb>=0.21.3",
 ]
 
@@ -60,6 +58,10 @@ cpu = [
 gpu = [
     "torch==2.9.1",
 ]
+web = [
+    "fastapi>=0.117.1",
+    "uvicorn>=0.36.0",
+]
 
 [tool.uv]
 default-groups = []
diff --git a/runs/runcpu.sh b/runs/runcpu.sh
index 853fa1f3..3e936cea 100755
--- a/runs/runcpu.sh
+++ b/runs/runcpu.sh
@@ -62,4 +62,5 @@ python -m scripts.chat_sft \
 # python -m scripts.chat_cli -p "What is the capital of France?"
 
 # Chat with the model over a pretty WebUI ChatGPT style
+# uv sync --extra cpu --extra web
 # python -m scripts.chat_web
diff --git a/runs/speedrun.sh b/runs/speedrun.sh
index 48fcc68a..9b279b4b 100644
--- a/runs/speedrun.sh
+++ b/runs/speedrun.sh
@@ -89,6 +89,7 @@ torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft
 # python -m scripts.chat_cli -p "Why is the sky blue?"
 
 # even better, chat with your model over a pretty WebUI ChatGPT style
+# uv sync --extra gpu --extra web
 # python -m scripts.chat_web
 
 # -----------------------------------------------------------------------------
diff --git a/scripts/chat_web.py b/scripts/chat_web.py
index ffaf7dab..0d017448 100644
--- a/scripts/chat_web.py
+++ b/scripts/chat_web.py
@@ -38,9 +38,12 @@ import asyncio
 import logging
 import random
 from contextlib import asynccontextmanager
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse, HTMLResponse, FileResponse
+try:
+    from fastapi import FastAPI, HTTPException
+    from fastapi.middleware.cors import CORSMiddleware
+    from fastapi.responses import StreamingResponse, HTMLResponse, FileResponse
+except ImportError as exc:
+    raise SystemExit("Missing web dependencies, install the extra 'web'") from exc
 from pydantic import BaseModel
 from typing import List, Optional, AsyncGenerator
 from dataclasses import dataclass