diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml
index 4701bab..dcbd614 100644
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -12,7 +12,12 @@ on:
 
 jobs:
   test:
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ['3.10', '3.11']
+      fail-fast: false
 
     steps:
     - name: Checkout code
@@ -21,40 +26,26 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: '3.10'
+        python-version: ${{ matrix.python-version }}
 
     - name: Install uv
       run: |
         python -m pip install uv
 
-    - name: Create virtual environment with uv
-      run: |
-        uv venv .venv
-
-    - name: Activate virtual environment
-      run: |
-        source .venv/bin/activate
-
     - name: Install dependencies with uv
       run: |
-        uv pip install . --system
+        uv pip install .
 
-    - name: Add nanochat to PYTHONPATH
+    - name: Set PYTHONPATH (Unix)
+      if: runner.os != 'Windows'
       run: |
         echo "PYTHONPATH=$(pwd):$PYTHONPATH" >> $GITHUB_ENV
 
-    - name: Install pytest
+    - name: Set PYTHONPATH (Windows)
+      if: runner.os == 'Windows'
       run: |
-        python -m pip install pytest
+        echo "PYTHONPATH=$PWD;$env:PYTHONPATH" >> $env:GITHUB_ENV
 
     - name: Run pytest
       run: |
-        python -m pytest tests/ --maxfail=5 --disable-warnings
-
-    - name: Cache pip dependencies
-      uses: actions/cache@v3
-      with:
-        path: ~/.cache/pip
-        key: ${{ runner.os }}-pip-${{ hashFiles('**/pyproject.toml') }}
-        restore-keys: |
-          ${{ runner.os }}-pip-
+        PYTHONPATH=$PYTHONPATH uv run pytest tests/ --maxfail=5 --disable-warnings
diff --git a/README.md b/README.md
index 05a214b..a2fde43 100644
--- a/README.md
+++ b/README.md
@@ -109,6 +109,29 @@ This includes all py, rs, html, toml, sh files, excludes the `rustbpe/target` fo
 
 Alternatively, I recommend using [DeepWiki](https://deepwiki.com/) from Devin/Cognition to ask questions of this repo. In the URL of this repo, simply change github.com to deepwiki.com, and you're off.
 
+## Installation
+
+To install nanochat for development or experimentation:
+
+```bash
+# Clone the repository
+git clone https://github.com/karpathy/nanochat.git
+cd nanochat
+
+# Install dependencies (requires uv)
+uv pip install .
+```
+
+**For GPU users:** The default installation includes CPU-only PyTorch. If you want GPU acceleration, install the appropriate PyTorch version after installation:
+
+```bash
+# For CUDA (Linux/Windows)
+uv pip install torch --index-url https://download.pytorch.org/whl/cu121
+
+# For ROCm (AMD GPUs)
+uv pip install torch --index-url https://download.pytorch.org/whl/rocm6.0
+```
+
 ## Tests
 
 I haven't invested too much here but some tests exist, especially for the tokenizer. Run e.g. as:
diff --git a/nanochat/common.py b/nanochat/common.py
index 3cbd6b0..49a8b52 100644
--- a/nanochat/common.py
+++ b/nanochat/common.py
@@ -89,28 +89,50 @@ def get_dist_info():
     else:
         return False, 0, 0, 1
 
-def compute_init():
-    """Basic initialization that we keep doing over and over, so make common."""
-    # Check if CUDA is available, otherwise fall back to CPU
+def autodetect_device_type():
+    # prefer to use CUDA if available, otherwise use MPS, otherwise fallback on CPU
     if torch.cuda.is_available():
-        device = torch.device("cuda")
-        torch.manual_seed(42)
-        torch.cuda.manual_seed(42)
+        device_type = "cuda"
+    elif torch.backends.mps.is_available():
+        device_type = "mps"
     else:
-        device = torch.device("cpu")
-        torch.manual_seed(42)
-        logger.warning("CUDA is not available. Falling back to CPU.")
+        device_type = "cpu"
+    print0(f"Autodetected device type: {device_type}")
+    return device_type
+
+def compute_init(device_type="cuda"): # cuda|cpu|mps
+    """Basic initialization that we keep doing over and over, so make common."""
+
+    assert device_type in ["cuda", "mps", "cpu"], "Invalid device type atm"
+    if device_type == "cuda":
+        assert torch.cuda.is_available(), "Your PyTorch installation is not configured for CUDA but device_type is 'cuda'"
+    if device_type == "mps":
+        assert torch.backends.mps.is_available(), "Your PyTorch installation is not configured for MPS but device_type is 'mps'"
+
+    # Reproducibility
+    torch.manual_seed(42)
+    if device_type == "cuda":
+        torch.cuda.manual_seed(42)
+    # skipping full reproducibility for now, possibly investigate slowdown later
+    # torch.use_deterministic_algorithms(True)
+
     # Precision
-    torch.set_float32_matmul_precision("high") # uses tf32 instead of fp32 for matmuls
-    # Distributed setup: Distributed Data Parallel (DDP), optional
+    if device_type == "cuda":
+        torch.set_float32_matmul_precision("high") # uses tf32 instead of fp32 for matmuls
+
+    # Distributed setup: Distributed Data Parallel (DDP), optional, and requires CUDA
     ddp, ddp_rank, ddp_local_rank, ddp_world_size = get_dist_info()
-    if ddp and torch.cuda.is_available():
+    if ddp and device_type == "cuda":
         device = torch.device("cuda", ddp_local_rank)
         torch.cuda.set_device(device) # make "cuda" default to this device
         dist.init_process_group(backend="nccl", device_id=device)
         dist.barrier()
+    else:
+        device = torch.device(device_type) # mps|cpu
+
     if ddp_rank == 0:
         logger.info(f"Distributed world size: {ddp_world_size}")
+
     return ddp, ddp_rank, ddp_local_rank, ddp_world_size, device
 
 def compute_cleanup():
@@ -125,4 +147,4 @@ class DummyWandb:
     def log(self, *args, **kwargs):
         pass
     def finish(self):
-        pass
+        pass
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index b66f0f8..46deffa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,6 @@ dependencies = [
     "tiktoken>=0.11.0",
     "tokenizers>=0.22.0",
     "torch>=2.0.0",
-    "transformers>=4.0.0",   
     "uvicorn>=0.36.0",
     "wandb>=0.21.3",
 ]
@@ -23,17 +22,6 @@ dependencies = [
 requires = ["maturin>=1.7,<2.0"]
 build-backend = "maturin"
 
-# target torch to cpu/mps for macOS
-[tool.uv.sources]
-torch = [
-    { index = "pytorch-cpu" },
-]
-
-[[tool.uv.index]]
-name = "pytorch-cpu"
-url = "https://download.pytorch.org/whl/cpu"
-explicit = true
-
 [tool.maturin]
 module-name = "rustbpe"
 bindings = "pyo3"