From c6381cabc2d15ad25e39bd9a27fca10dc6de1233 Mon Sep 17 00:00:00 2001
From: svlandeg <svlandeg@github.com>
Date: Fri, 27 Mar 2026 17:46:01 +0100
Subject: [PATCH] keep original cpu/gpu extra

---
 README.md           | 17 +++++++++--------
 runs/runcpu.sh      |  2 +-
 runs/speedrun.sh    |  2 +-
 scripts/chat_web.py |  2 +-
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 72331c9d..7c7ae9fc 100644
--- a/README.md
+++ b/README.md
@@ -37,18 +37,20 @@ uv sync --extra cpu    # (or) Use for CPU-only / MPS
 source .venv/bin/activate
 ```
 
+If you plan on running `scripts.chat_web` to chat with your model via a web UI, add the extra "web":
+
+```bash
+uv sync --extra gpu --extra web    # Use for CUDA (A100/H100/etc.)
+uv sync --extra cpu --extra web    # (or) Use for CPU-only / MPS
+source .venv/bin/activate
+```
+
 For development (adds pytest, matplotlib, ipykernel, transformers, etc.):
 
 ```bash
 uv sync --extra gpu --group dev
 ```
 
-If you plan on running `scripts.chat_web`:
-
-```bash
-uv sync --extra web
-```
-
 ### Reproduce and talk to GPT-2
 
 The most fun you can have is to train your own GPT-2 and talk to it. The entire pipeline to do so is contained in the single file [runs/speedrun.sh](runs/speedrun.sh), which is designed to be run on an 8XH100 GPU node. Boot up a new 8XH100 GPU box from your favorite provider (e.g. I use and like [Lambda](https://lambda.ai/service/gpu-cloud)), and kick off the training script:
@@ -57,10 +59,9 @@ The most fun you can have is to train your own GPT-2 and talk to it. The entire
 bash runs/speedrun.sh
 ```
 
-You may wish to do so in a screen session as this will take ~3 hours to run. Once it's done, you can talk to it via the ChatGPT-like web UI. Make sure again that your local uv virtual environment is active (run `source .venv/bin/activate`) and has the `web` extra installed, and then serve it:
+You may wish to do so in a screen session as this will take ~3 hours to run. Once it's done, you can talk to it via the ChatGPT-like web UI. Make sure again that your local uv virtual environment (with the "web" extra) is active (run `source .venv/bin/activate`), and serve it:
 
 ```bash
-uv sync --extra web
 python -m scripts.chat_web
 ```
 
diff --git a/runs/runcpu.sh b/runs/runcpu.sh
index 94e029a7..3e936cea 100755
--- a/runs/runcpu.sh
+++ b/runs/runcpu.sh
@@ -62,5 +62,5 @@ python -m scripts.chat_sft \
 # python -m scripts.chat_cli -p "What is the capital of France?"
 
 # Chat with the model over a pretty WebUI ChatGPT style
-# uv sync --extra web
+# uv sync --extra cpu --extra web
 # python -m scripts.chat_web
diff --git a/runs/speedrun.sh b/runs/speedrun.sh
index 3870adeb..9b279b4b 100644
--- a/runs/speedrun.sh
+++ b/runs/speedrun.sh
@@ -89,7 +89,7 @@ torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft
 # python -m scripts.chat_cli -p "Why is the sky blue?"
 
 # even better, chat with your model over a pretty WebUI ChatGPT style
-# uv sync --extra web
+# uv sync --extra gpu --extra web
 # python -m scripts.chat_web
 
 # -----------------------------------------------------------------------------
diff --git a/scripts/chat_web.py b/scripts/chat_web.py
index ee254dc9..0d017448 100644
--- a/scripts/chat_web.py
+++ b/scripts/chat_web.py
@@ -43,7 +43,7 @@ try:
     from fastapi.middleware.cors import CORSMiddleware
     from fastapi.responses import StreamingResponse, HTMLResponse, FileResponse
 except ImportError as exc:
-    raise SystemExit("Missing web dependencies, install with: uv sync --extra web") from exc
+    raise SystemExit("Missing web dependencies, install the extra 'web'") from exc
 from pydantic import BaseModel
 from typing import List, Optional, AsyncGenerator
 from dataclasses import dataclass