diff --git a/README.md b/README.md index 681a5df..483f3e3 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,22 @@ See [dev/LEADERBOARD.md](dev/LEADERBOARD.md) for more docs on how to interpret a ## Getting started +### Setup + +nanochat uses [uv](https://docs.astral.sh/uv/) for dependency management. To install: + +```bash +uv sync --extra gpu # Use for CUDA (A100/H100/etc.) +uv sync --extra cpu # (or) Use for CPU-only / MPS +source .venv/bin/activate +``` + +For development (adds pytest, matplotlib, ipykernel, transformers, etc.): + +```bash +uv sync --extra gpu --group dev +``` + ### Reproduce and talk to GPT-2 The most fun you can have is to train your own GPT-2 and talk to it. The entire pipeline to do so is contained in the single file [runs/speedrun.sh](runs/speedrun.sh), which is designed to be run on an 8XH100 GPU node. Boot up a new 8XH100 GPU box from your favorite provider (e.g. I use and like [Lambda](https://lambda.ai/service/gpu-cloud)), and kick off the training script: diff --git a/pyproject.toml b/pyproject.toml index f662fbf..a6e2cca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,23 +7,23 @@ requires-python = ">=3.10" dependencies = [ "datasets>=4.0.0", "fastapi>=0.117.1", - "ipykernel>=7.1.0", "kernels>=0.11.7", - "matplotlib>=3.10.8", "psutil>=7.1.0", - "python-dotenv>=1.2.1", "rustbpe>=0.1.0", "tiktoken>=0.11.0", "tokenizers>=0.22.0", "torch==2.9.1", - "transformers>=4.57.3", "uvicorn>=0.36.0", "wandb>=0.21.3", ] [dependency-groups] dev = [ + "ipykernel>=7.1.0", + "matplotlib>=3.10.8", "pytest>=8.0.0", + "python-dotenv>=1.2.1", + "transformers>=4.57.3", ] [tool.pytest.ini_options] @@ -61,6 +61,7 @@ gpu = [ ] [tool.uv] +default-groups = [] conflicts = [ [ { extra = "cpu" }, diff --git a/uv.lock b/uv.lock index 85dd9bd..9455814 100644 --- a/uv.lock +++ b/uv.lock @@ -1492,11 +1492,8 @@ source = { virtual = "." } dependencies = [ { name = "datasets" }, { name = "fastapi" }, - { name = "ipykernel" }, { name = "kernels" }, - { name = "matplotlib" }, { name = "psutil" }, - { name = "python-dotenv" }, { name = "rustbpe" }, { name = "tiktoken" }, { name = "tokenizers" }, @@ -1504,7 +1501,6 @@ dependencies = [ { name = "torch", version = "2.9.1", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu')" }, { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, { name = "torch", version = "2.9.1+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "extra == 'extra-8-nanochat-gpu'" }, - { name = "transformers" }, { name = "uvicorn" }, { name = "wandb" }, ] @@ -1520,32 +1516,38 @@ gpu = [ [package.dev-dependencies] dev = [ + { name = "ipykernel" }, + { name = "matplotlib" }, { name = "pytest" }, + { name = "python-dotenv" }, + { name = "transformers" }, ] [package.metadata] requires-dist = [ { name = "datasets", specifier = ">=4.0.0" }, { name = "fastapi", specifier = ">=0.117.1" }, - { name = "ipykernel", specifier = ">=7.1.0" }, { name = "kernels", specifier = ">=0.11.7" }, - { name = "matplotlib", specifier = ">=3.10.8" }, { name = "psutil", specifier = ">=7.1.0" }, - { name = "python-dotenv", specifier = ">=1.2.1" }, { name = "rustbpe", specifier = ">=0.1.0" }, { name = "tiktoken", specifier = ">=0.11.0" }, { name = "tokenizers", specifier = ">=0.22.0" }, { name = "torch", specifier = "==2.9.1" }, { name = "torch", marker = "extra == 'cpu'", specifier = "==2.9.1", index = "https://download.pytorch.org/whl/cpu", conflict = { package = "nanochat", extra = "cpu" } }, { name = "torch", marker = "extra == 'gpu'", specifier = "==2.9.1", index = "https://download.pytorch.org/whl/cu128", conflict = { package = "nanochat", extra = "gpu" } }, - { name = "transformers", specifier = ">=4.57.3" }, { name = "uvicorn", specifier = ">=0.36.0" }, { name = "wandb", specifier = ">=0.21.3" }, ] provides-extras = ["cpu", "gpu"] [package.metadata.requires-dev] -dev = [{ name = "pytest", specifier = ">=8.0.0" }] +dev = [ + { name = "ipykernel", specifier = ">=7.1.0" }, + { name = "matplotlib", specifier = ">=3.10.8" }, + { name = "pytest", specifier = ">=8.0.0" }, + { name = "python-dotenv", specifier = ">=1.2.1" }, + { name = "transformers", specifier = ">=4.57.3" }, +] [[package]] name = "nest-asyncio"