From f0855cbcc77cc08307b83a24701bfa587ccd6b4b Mon Sep 17 00:00:00 2001 From: Zach Mueller Date: Tue, 14 Oct 2025 14:12:01 -0400 Subject: [PATCH 01/64] Update speedrun.sh --- speedrun.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speedrun.sh b/speedrun.sh index d2498ee..a9b579a 100644 --- a/speedrun.sh +++ b/speedrun.sh @@ -12,7 +12,7 @@ # Default intermediate artifacts directory is in ~/.cache/nanochat export OMP_NUM_THREADS=1 -NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" +export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" mkdir -p $NANOCHAT_BASE_DIR # ----------------------------------------------------------------------------- From b8076dd367a6ba8378f1e7d32afb545b30fe15f8 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Wed, 15 Oct 2025 16:35:04 +0000 Subject: [PATCH 02/64] fix bug in learning rate multiplier, it was ramping up instead of ramping down. see more in Issue #68. also add --dry_run option useful for experimentation --- scripts/mid_train.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/scripts/mid_train.py b/scripts/mid_train.py index 202682d..90ab954 100644 --- a/scripts/mid_train.py +++ b/scripts/mid_train.py @@ -40,10 +40,10 @@ embedding_lr = 0.2 matrix_lr = 0.02 init_lr_frac = 1.0 # initial learning rate is this fraction of the base learning rate weight_decay = 0.0 -final_lr_frac = 0.0 # final LR is this fraction of the initial LR eval_every = 150 eval_tokens = 20*524288 total_batch_size = 524288 +dry_run = 0 # dry_run=1 is for experiments: we will log to wandb but we won't write checkpoints or report config_keys = [k for k,v in globals().items() if not k.startswith('_') and isinstance(v, (int, float, bool, str))] exec(open(os.path.join('nanochat', 'configurator.py')).read()) # overrides from command line or config file user_config = {k: globals()[k] for k in config_keys} # possibly useful for logging @@ -141,7 +141,8 @@ progress = 0 # will go from 0 to 1 over the course of the epoch # Learning rate scheduler def get_lr_multiplier(progress): - return progress * 1.0 + (1 - progress) * final_lr_frac + # first 80% of training: no decay, then linearly ramp down to 0. + return 1 if progress < 0.8 else 1 - (progress - 0.8) / 0.2 # Momentum scheduler for Muon optimizer def get_muon_momentum(it): @@ -185,7 +186,7 @@ while True: model.train() # save checkpoint at the end of the run (only on master process) - if master_process and last_step: + if master_process and last_step and not dry_run: output_dirname = f"d{depth}" # e.g. d12 checkpoint_dir = os.path.join(base_dir, "mid_checkpoints", output_dirname) save_checkpoint( @@ -272,17 +273,18 @@ print0(f"Total training time: {total_training_time/60:.2f}m") print0(f"Minimum validation bpb: {min_val_bpb:.4f}") # Log to report -from nanochat.report import get_report -get_report().log(section="Midtraining", data=[ - user_config, # CLI args - { # stats about the training setup - "Number of iterations": step, - "DDP world size": ddp_world_size, - }, - { # stats about training outcomes - "Minimum validation bpb": min_val_bpb, - } -]) +if not dry_run: + from nanochat.report import get_report + get_report().log(section="Midtraining", data=[ + user_config, # CLI args + { # stats about the training setup + "Number of iterations": step, + "DDP world size": ddp_world_size, + }, + { # stats about training outcomes + "Minimum validation bpb": min_val_bpb, + } + ]) # cleanup wandb_run.finish() # wandb run finish From 190d9515d0ea6d698edbcad869f1385130ad897a Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Wed, 15 Oct 2025 16:42:23 +0000 Subject: [PATCH 03/64] dont evaluate the sampling evals during SFT they are too slow. keep the multiple choice evals. delete unused imports --- scripts/chat_sft.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/scripts/chat_sft.py b/scripts/chat_sft.py index 8389deb..b5ba49a 100644 --- a/scripts/chat_sft.py +++ b/scripts/chat_sft.py @@ -11,7 +11,6 @@ torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft import os os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" -import copy import wandb import torch @@ -23,11 +22,9 @@ from nanochat.checkpoint_manager import save_checkpoint from nanochat.engine import Engine from scripts.chat_eval import run_chat_eval -from tasks.common import TaskMixture, TaskSequence -from tasks.mmlu import MMLU +from tasks.common import TaskMixture from tasks.arc import ARC from tasks.gsm8k import GSM8K -from tasks.humaneval import HumanEval from tasks.smoltalk import SmolTalk # ----------------------------------------------------------------------------- @@ -186,7 +183,7 @@ for step in range(num_iterations): }) model.train() - # evlauate MMLU accuracy + # evlauate accuracy of the multiple choice tasks (which are quick to run) if last_step or (step > 0 and step % eval_metrics_every == 0): model.eval() metrics = {} @@ -194,8 +191,6 @@ for step in range(num_iterations): # note that because these are inside no_grad, we can usually afford to at least ~2X the batch size metrics["mmlu_acc"] = run_chat_eval("MMLU", model, tokenizer, engine, batch_size=device_batch_size*2, max_problems=1024) metrics["arc_easy_acc"] = run_chat_eval("ARC-Easy", model, tokenizer, engine, batch_size=device_batch_size*2, max_problems=1024) - metrics["gsm8k_acc"] = run_chat_eval("GSM8K", model, tokenizer, engine, max_problems=64) - metrics["humaneval_acc"] = run_chat_eval("HumanEval", model, tokenizer, engine, max_problems=64) metrics_str = ', '.join(f'{k}: {v:.6f}' for k, v in metrics.items()) print0(f"Step {step:05d} | {metrics_str}") wandb_run.log({ From 01fb290f539743992c6c41e99c67f5e4ff79ba2e Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Wed, 15 Oct 2025 19:12:19 +0000 Subject: [PATCH 04/64] allow multiple GPUs to do inference in a data parallel way --- nanochat/ui.html | 1 - scripts/chat_web.py | 217 +++++++++++++++++++++++++++++--------------- 2 files changed, 145 insertions(+), 73 deletions(-) diff --git a/nanochat/ui.html b/nanochat/ui.html index 39e608f..264a654 100644 --- a/nanochat/ui.html +++ b/nanochat/ui.html @@ -327,7 +327,6 @@ }, body: JSON.stringify({ messages: messages, - stream: true, temperature: 0.8, max_tokens: 512 }), diff --git a/scripts/chat_web.py b/scripts/chat_web.py index 1a4cfe2..2643417 100644 --- a/scripts/chat_web.py +++ b/scripts/chat_web.py @@ -1,26 +1,46 @@ #!/usr/bin/env python3 """ Unified web chat server - serves both UI and API from a single FastAPI instance. -Run with: python web_chat.py -Then open http://localhost:8000 in your browser. + +Uses data parallelism to distribute requests across multiple GPUs. Each GPU loads +a full copy of the model, and incoming requests are distributed to available workers. + +Launch examples: + +- single available GPU (default) +python -m scripts.chat_web + +- 4 GPUs +python -m scripts.chat_web --num-gpus 4 + +To chat, open the URL printed in the console. (If on cloud box, make sure to use public IP) + +Endpoints: + GET / - Chat UI + POST /chat/completions - Chat API (streaming only) + GET /health - Health check with worker pool status + GET /stats - Worker pool statistics and GPU utilization """ import argparse import json import os import torch +import asyncio from contextlib import asynccontextmanager from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, HTMLResponse, FileResponse from pydantic import BaseModel from typing import List, Optional, AsyncGenerator +from dataclasses import dataclass from nanochat.common import compute_init from nanochat.checkpoint_manager import load_model from nanochat.engine import Engine parser = argparse.ArgumentParser(description='NanoChat Web Server') +parser.add_argument('-n', '--num-gpus', type=int, default=1, help='Number of GPUs to use (default: 1)') parser.add_argument('-i', '--source', type=str, default="sft", help="Source of the model: sft|mid|rl") parser.add_argument('-t', '--temperature', type=float, default=0.8, help='Default temperature for generation') parser.add_argument('-k', '--top-k', type=int, default=50, help='Default top-k sampling parameter') @@ -32,7 +52,55 @@ parser.add_argument('--host', type=str, default='0.0.0.0', help='Host to bind th args = parser.parse_args() ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() -autocast_ctx = torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16) + +@dataclass +class Worker: + """A worker with a model loaded on a specific GPU.""" + gpu_id: int + device: torch.device + engine: Engine + tokenizer: object + autocast_ctx: torch.amp.autocast + +class WorkerPool: + """Pool of workers, each with a model replica on a different GPU.""" + + def __init__(self, num_gpus: Optional[int] = None): + self.num_gpus = num_gpus if num_gpus is not None else torch.cuda.device_count() + self.workers: List[Worker] = [] + self.available_workers: asyncio.Queue = asyncio.Queue() + + async def initialize(self, source: str, model_tag: Optional[str] = None, step: Optional[int] = None): + """Load model on each GPU.""" + print(f"Initializing worker pool with {self.num_gpus} GPUs...") + + for gpu_id in range(self.num_gpus): + device = torch.device(f"cuda:{gpu_id}") + print(f"Loading model on GPU {gpu_id}...") + + model, tokenizer, _ = load_model(source, device, phase="eval", model_tag=model_tag, step=step) + engine = Engine(model, tokenizer) + autocast_ctx = torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16) + + worker = Worker( + gpu_id=gpu_id, + device=device, + engine=engine, + tokenizer=tokenizer, + autocast_ctx=autocast_ctx + ) + self.workers.append(worker) + await self.available_workers.put(worker) + + print(f"All {self.num_gpus} workers initialized!") + + async def acquire_worker(self) -> Worker: + """Get an available worker from the pool.""" + return await self.available_workers.get() + + async def release_worker(self, worker: Worker): + """Return a worker to the pool.""" + await self.available_workers.put(worker) class ChatMessage(BaseModel): role: str @@ -43,14 +111,13 @@ class ChatRequest(BaseModel): temperature: Optional[float] = None max_tokens: Optional[int] = None top_k: Optional[int] = None - stream: Optional[bool] = True @asynccontextmanager async def lifespan(app: FastAPI): - """Load model on startup.""" - print("Loading nanochat model...") - app.state.model, app.state.tokenizer, _ = load_model(args.source, device, phase="eval", model_tag=args.model_tag, step=args.step) - app.state.engine = Engine(app.state.model, app.state.tokenizer) + """Load models on all GPUs on startup.""" + print("Loading nanochat models across GPUs...") + app.state.worker_pool = WorkerPool(num_gpus=args.num_gpus) + await app.state.worker_pool.initialize(args.source, model_tag=args.model_tag, step=args.step) print(f"Server ready at http://localhost:{args.port}") yield @@ -85,8 +152,7 @@ async def logo(): return FileResponse(logo_path, media_type="image/svg+xml") async def generate_stream( - engine, - tokenizer, + worker: Worker, tokens, temperature=None, max_new_tokens=None, @@ -97,11 +163,11 @@ async def generate_stream( max_new_tokens = max_new_tokens if max_new_tokens is not None else args.max_tokens top_k = top_k if top_k is not None else args.top_k - assistant_end = tokenizer.encode_special("<|assistant_end|>") - bos = tokenizer.get_bos_token_id() + assistant_end = worker.tokenizer.encode_special("<|assistant_end|>") + bos = worker.tokenizer.get_bos_token_id() - with autocast_ctx: - for token_column, token_masks in engine.generate( + with worker.autocast_ctx: + for token_column, token_masks in worker.engine.generate( tokens, num_samples=1, max_tokens=max_new_tokens, @@ -113,82 +179,89 @@ async def generate_stream( if token == assistant_end or token == bos: break - token_text = tokenizer.decode([token]) - yield f"data: {json.dumps({'token': token_text})}\n\n" + token_text = worker.tokenizer.decode([token]) + yield f"data: {json.dumps({'token': token_text, 'gpu': worker.gpu_id})}\n\n" yield f"data: {json.dumps({'done': True})}\n\n" @app.post("/chat/completions") async def chat_completions(request: ChatRequest): - """Chat completion endpoint with streaming.""" - engine = app.state.engine - tokenizer = app.state.tokenizer + """Chat completion endpoint (streaming only) - uses worker pool for multi-GPU.""" + worker_pool = app.state.worker_pool - # Build conversation tokens - bos = tokenizer.get_bos_token_id() - user_start = tokenizer.encode_special("<|user_start|>") - user_end = tokenizer.encode_special("<|user_end|>") - assistant_start = tokenizer.encode_special("<|assistant_start|>") - assistant_end = tokenizer.encode_special("<|assistant_end|>") + # Acquire a worker from the pool (will wait if all are busy) + worker = await worker_pool.acquire_worker() - conversation_tokens = [bos] - for message in request.messages: - if message.role == "user": - conversation_tokens.append(user_start) - conversation_tokens.extend(tokenizer.encode(message.content)) - conversation_tokens.append(user_end) - elif message.role == "assistant": - conversation_tokens.append(assistant_start) - conversation_tokens.extend(tokenizer.encode(message.content)) - conversation_tokens.append(assistant_end) + try: + # Build conversation tokens + bos = worker.tokenizer.get_bos_token_id() + user_start = worker.tokenizer.encode_special("<|user_start|>") + user_end = worker.tokenizer.encode_special("<|user_end|>") + assistant_start = worker.tokenizer.encode_special("<|assistant_start|>") + assistant_end = worker.tokenizer.encode_special("<|assistant_end|>") - conversation_tokens.append(assistant_start) + conversation_tokens = [bos] + for message in request.messages: + if message.role == "user": + conversation_tokens.append(user_start) + conversation_tokens.extend(worker.tokenizer.encode(message.content)) + conversation_tokens.append(user_end) + elif message.role == "assistant": + conversation_tokens.append(assistant_start) + conversation_tokens.extend(worker.tokenizer.encode(message.content)) + conversation_tokens.append(assistant_end) + + conversation_tokens.append(assistant_start) + + # Streaming response with worker release after completion + async def stream_and_release(): + try: + async for chunk in generate_stream( + worker, + conversation_tokens, + temperature=request.temperature, + max_new_tokens=request.max_tokens, + top_k=request.top_k + ): + yield chunk + finally: + # Release worker back to pool after streaming is done + await worker_pool.release_worker(worker) - if request.stream: return StreamingResponse( - generate_stream( - engine, - tokenizer, - conversation_tokens, - temperature=request.temperature, - max_new_tokens=request.max_tokens, - top_k=request.top_k - ), + stream_and_release(), media_type="text/event-stream" ) - else: - # Non-streaming response - temperature = request.temperature if request.temperature is not None else args.temperature - max_tokens = request.max_tokens if request.max_tokens is not None else args.max_tokens - top_k = request.top_k if request.top_k is not None else args.top_k - - with autocast_ctx: - result_tokens, masks = engine.generate_batch( - conversation_tokens, - num_samples=1, - max_tokens=max_tokens, - temperature=temperature, - top_k=top_k - )[0] - - response_tokens = result_tokens[len(conversation_tokens):] - response_text = tokenizer.decode(response_tokens) - return { - "choices": [{ - "message": { - "role": "assistant", - "content": response_text - }, - "finish_reason": "stop" - }] - } + except Exception as e: + # Make sure to release worker even on error + await worker_pool.release_worker(worker) + raise e @app.get("/health") async def health(): """Health check endpoint.""" + worker_pool = getattr(app.state, 'worker_pool', None) return { "status": "ok", - "ready": hasattr(app.state, 'model') and app.state.model is not None + "ready": worker_pool is not None and len(worker_pool.workers) > 0, + "num_gpus": worker_pool.num_gpus if worker_pool else 0, + "available_workers": worker_pool.available_workers.qsize() if worker_pool else 0 + } + +@app.get("/stats") +async def stats(): + """Get worker pool statistics.""" + worker_pool = app.state.worker_pool + return { + "total_workers": len(worker_pool.workers), + "available_workers": worker_pool.available_workers.qsize(), + "busy_workers": len(worker_pool.workers) - worker_pool.available_workers.qsize(), + "workers": [ + { + "gpu_id": w.gpu_id, + "device": str(w.device) + } for w in worker_pool.workers + ] } if __name__ == "__main__": From 52bfeea8bdfac64557fe59ff586faa87c01a0da6 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Wed, 15 Oct 2025 19:42:54 +0000 Subject: [PATCH 05/64] add very basic abuse prevention limits to chat_web so it's ok to host endpoints --- scripts/chat_web.py | 89 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/scripts/chat_web.py b/scripts/chat_web.py index 2643417..f8e807c 100644 --- a/scripts/chat_web.py +++ b/scripts/chat_web.py @@ -20,6 +20,14 @@ Endpoints: POST /chat/completions - Chat API (streaming only) GET /health - Health check with worker pool status GET /stats - Worker pool statistics and GPU utilization + +Abuse Prevention: + - Maximum 500 messages per request + - Maximum 8000 characters per message + - Maximum 32000 characters total conversation length + - Temperature clamped to 0.0-2.0 + - Top-k clamped to 1-200 + - Max tokens clamped to 1-4096 """ import argparse @@ -28,7 +36,7 @@ import os import torch import asyncio from contextlib import asynccontextmanager -from fastapi import FastAPI +from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, HTMLResponse, FileResponse from pydantic import BaseModel @@ -39,6 +47,17 @@ from nanochat.common import compute_init from nanochat.checkpoint_manager import load_model from nanochat.engine import Engine +# Abuse prevention limits +MAX_MESSAGES_PER_REQUEST = 500 +MAX_MESSAGE_LENGTH = 8000 +MAX_TOTAL_CONVERSATION_LENGTH = 32000 +MIN_TEMPERATURE = 0.0 +MAX_TEMPERATURE = 2.0 +MIN_TOP_K = 1 +MAX_TOP_K = 200 +MIN_MAX_TOKENS = 1 +MAX_MAX_TOKENS = 4096 + parser = argparse.ArgumentParser(description='NanoChat Web Server') parser.add_argument('-n', '--num-gpus', type=int, default=1, help='Number of GPUs to use (default: 1)') parser.add_argument('-i', '--source', type=str, default="sft", help="Source of the model: sft|mid|rl") @@ -112,6 +131,69 @@ class ChatRequest(BaseModel): max_tokens: Optional[int] = None top_k: Optional[int] = None +def validate_chat_request(request: ChatRequest): + """Validate chat request to prevent abuse.""" + # Check number of messages + if len(request.messages) == 0: + raise HTTPException(status_code=400, detail="At least one message is required") + if len(request.messages) > MAX_MESSAGES_PER_REQUEST: + raise HTTPException( + status_code=400, + detail=f"Too many messages. Maximum {MAX_MESSAGES_PER_REQUEST} messages allowed per request" + ) + + # Check individual message lengths and total conversation length + total_length = 0 + for i, message in enumerate(request.messages): + if not message.content: + raise HTTPException(status_code=400, detail=f"Message {i} has empty content") + + msg_length = len(message.content) + if msg_length > MAX_MESSAGE_LENGTH: + raise HTTPException( + status_code=400, + detail=f"Message {i} is too long. Maximum {MAX_MESSAGE_LENGTH} characters allowed per message" + ) + total_length += msg_length + + if total_length > MAX_TOTAL_CONVERSATION_LENGTH: + raise HTTPException( + status_code=400, + detail=f"Total conversation is too long. Maximum {MAX_TOTAL_CONVERSATION_LENGTH} characters allowed" + ) + + # Validate role values + for i, message in enumerate(request.messages): + if message.role not in ["user", "assistant"]: + raise HTTPException( + status_code=400, + detail=f"Message {i} has invalid role. Must be 'user', 'assistant', or 'system'" + ) + + # Validate temperature + if request.temperature is not None: + if not (MIN_TEMPERATURE <= request.temperature <= MAX_TEMPERATURE): + raise HTTPException( + status_code=400, + detail=f"Temperature must be between {MIN_TEMPERATURE} and {MAX_TEMPERATURE}" + ) + + # Validate top_k + if request.top_k is not None: + if not (MIN_TOP_K <= request.top_k <= MAX_TOP_K): + raise HTTPException( + status_code=400, + detail=f"top_k must be between {MIN_TOP_K} and {MAX_TOP_K}" + ) + + # Validate max_tokens + if request.max_tokens is not None: + if not (MIN_MAX_TOKENS <= request.max_tokens <= MAX_MAX_TOKENS): + raise HTTPException( + status_code=400, + detail=f"max_tokens must be between {MIN_MAX_TOKENS} and {MAX_MAX_TOKENS}" + ) + @asynccontextmanager async def lifespan(app: FastAPI): """Load models on all GPUs on startup.""" @@ -187,9 +269,12 @@ async def generate_stream( @app.post("/chat/completions") async def chat_completions(request: ChatRequest): """Chat completion endpoint (streaming only) - uses worker pool for multi-GPU.""" - worker_pool = app.state.worker_pool + + # Basic validation to prevent abuse + validate_chat_request(request) # Acquire a worker from the pool (will wait if all are busy) + worker_pool = app.state.worker_pool worker = await worker_pool.acquire_worker() try: From 03fa673b7d739238135fcbe39a4c625ed033962e Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Wed, 15 Oct 2025 19:51:06 +0000 Subject: [PATCH 06/64] add basic logging to chat_web, which i think might be fun --- scripts/chat_web.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/scripts/chat_web.py b/scripts/chat_web.py index f8e807c..cae577d 100644 --- a/scripts/chat_web.py +++ b/scripts/chat_web.py @@ -35,6 +35,7 @@ import json import os import torch import asyncio +import logging from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware @@ -70,6 +71,14 @@ parser.add_argument('-p', '--port', type=int, default=8000, help='Port to run th parser.add_argument('--host', type=str, default='0.0.0.0', help='Host to bind the server to') args = parser.parse_args() +# Configure logging for conversation traffic +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' +) +logger = logging.getLogger(__name__) + ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() @dataclass @@ -273,6 +282,12 @@ async def chat_completions(request: ChatRequest): # Basic validation to prevent abuse validate_chat_request(request) + # Log incoming conversation to console + logger.info("="*20) + for i, message in enumerate(request.messages): + logger.info(f"[{message.role.upper()}]: {message.content}") + logger.info("-"*20) + # Acquire a worker from the pool (will wait if all are busy) worker_pool = app.state.worker_pool worker = await worker_pool.acquire_worker() @@ -299,6 +314,7 @@ async def chat_completions(request: ChatRequest): conversation_tokens.append(assistant_start) # Streaming response with worker release after completion + response_tokens = [] async def stream_and_release(): try: async for chunk in generate_stream( @@ -308,8 +324,16 @@ async def chat_completions(request: ChatRequest): max_new_tokens=request.max_tokens, top_k=request.top_k ): + # Accumulate response for logging + chunk_data = json.loads(chunk.replace("data: ", "").strip()) + if "token" in chunk_data: + response_tokens.append(chunk_data["token"]) yield chunk finally: + # Log the assistant response to console + full_response = "".join(response_tokens) + logger.info(f"[ASSISTANT] (GPU {worker.gpu_id}): {full_response}") + logger.info("="*20) # Release worker back to pool after streaming is done await worker_pool.release_worker(worker) From 4c3590c4994d4f6dfd4b5a30da935b6f993a1072 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Wed, 15 Oct 2025 20:29:54 +0000 Subject: [PATCH 07/64] fix subtle issue in token decoding in cases where multiple utf8 bytes need to be emitted into a single codepoint. exampels are emoji or foreign languages. basically we have to accumulate token sequences/text and only emit when we get full codepoints --- scripts/chat_web.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/scripts/chat_web.py b/scripts/chat_web.py index cae577d..24258a2 100644 --- a/scripts/chat_web.py +++ b/scripts/chat_web.py @@ -257,6 +257,11 @@ async def generate_stream( assistant_end = worker.tokenizer.encode_special("<|assistant_end|>") bos = worker.tokenizer.get_bos_token_id() + # Accumulate tokens to properly handle multi-byte UTF-8 characters (like emojis) + accumulated_tokens = [] + # Track the last complete UTF-8 string (without replacement characters) + last_clean_text = "" + with worker.autocast_ctx: for token_column, token_masks in worker.engine.generate( tokens, @@ -267,11 +272,23 @@ async def generate_stream( ): token = token_column[0] + # Stopping criteria if token == assistant_end or token == bos: break - token_text = worker.tokenizer.decode([token]) - yield f"data: {json.dumps({'token': token_text, 'gpu': worker.gpu_id})}\n\n" + # Append the token to sequence + accumulated_tokens.append(token) + # Decode all accumulated tokens to get proper UTF-8 handling + # Note that decode is a quite efficient operation, basically table lookup and string concat + current_text = worker.tokenizer.decode(accumulated_tokens) + # Only emit text if it doesn't end with a replacement character + # This ensures we don't emit incomplete UTF-8 sequences + if not current_text.endswith('�'): + # Extract only the new text since last clean decode + new_text = current_text[len(last_clean_text):] + if new_text: # Only yield if there's new content + yield f"data: {json.dumps({'token': new_text, 'gpu': worker.gpu_id}, ensure_ascii=False)}\n\n" + last_clean_text = current_text yield f"data: {json.dumps({'done': True})}\n\n" From fae3aca9519aad955b4882799ef0dcc922e093e3 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Wed, 15 Oct 2025 20:32:22 +0000 Subject: [PATCH 08/64] add script to train a 000 version of nanochat. currently it's a bit more like 00 and this would run in probably around 33 hours instead of the budget of 41 hours, so we might tune it later. i think it's ok for now --- run1000.sh | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 run1000.sh diff --git a/run1000.sh b/run1000.sh new file mode 100644 index 0000000..7d41327 --- /dev/null +++ b/run1000.sh @@ -0,0 +1,94 @@ +# The $1000 tier of nanochat +# Designed to run end-to-end for $1000/24 ~= 41.6 hours on an 8XH100 node +# A bit sparser on comments, see speedrun.sh for more detail + +# all the setup stuff +export OMP_NUM_THREADS=1 +NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" +mkdir -p $NANOCHAT_BASE_DIR +command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh +[ -d ".venv" ] || uv venv +uv sync +source .venv/bin/activate +if [ -z "$WANDB_RUN" ]; then + WANDB_RUN=dummy +fi +python -m nanochat.report reset +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +source "$HOME/.cargo/env" +uv run maturin develop --release --manifest-path rustbpe/Cargo.toml +EVAL_BUNDLE_URL=https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip +if [ ! -d "$NANOCHAT_BASE_DIR/eval_bundle" ]; then + curl -L -o eval_bundle.zip $EVAL_BUNDLE_URL + unzip -q eval_bundle.zip + rm eval_bundle.zip + mv eval_bundle $NANOCHAT_BASE_DIR +fi + +# train tokenizer on ~4B characters and kick off download of the rest for pretraining +python -m nanochat.dataset -n 16 +# start downloading the rest of the shards for a total of 800 (see below why 800) +python -m nanochat.dataset -n 800 & +# todo: download the rest of it +python -m scripts.tok_train --max_chars=4000000000 +python -m scripts.tok_eval + +# Documenting my process for determining the hyperparameters for this run1000.sh script: +# We want a budget of approx. $1000 ~= 41.6 hours of 8XH100 compute +# 1) I guessed the model size for this to be about depth=32 +# 2) Determine the device_batch_size that fits: +# Running the base_train.py script with --depth=32, I saw that --device_batch_size=16 +# runs out of memory, but --device_batch_size=8 fits. Inspecting `nvidia-smi` during training, +# I saw all GPUs were at about 78/80GB VRAM, so it just barely fits and we have good MFU at ~50%. +# So the training script was running ok and showed: +# Vocab size: 65,536 +# num_layers: 32 +# model_dim: 2048 +# num_heads: 16 +# num_kv_heads: 16 +# Tokens / micro-batch / rank: 8 x 2048 = 16,384 +# Tokens / micro-batch: 131,072 +# Total batch size 524,288 => gradient accumulation steps: 4 +# Number of parameters: 1,879,048,192 +# Estimated FLOPs per token: 1.207960e+10 +# Calculated number of iterations from target data:param ratio: 71,680 +# Total number of training tokens: 37,580,963,840 +# Tokens : Params ratio: 20.00 +# Total training FLOPs estimate: 4.539628e+20 +# step 00004/71680 (0.01%) | loss: 8.813754 | lrm: 1.00 | dt: 1571.88ms | tok/sec: 83,385 | mfu: 50.92 | total time: 0.00m +# step 00005/71680 (0.01%) | loss: 8.488074 | lrm: 1.00 | dt: 1572.76ms | tok/sec: 83,338 | mfu: 50.89 | total time: 0.00m +# ... +# 3) validate that the runtime fits our budget: +# The training script uses the Chinchilla scaling law to compute-optimally set #tokens = 20 * #params. In particular: +# The script shows that we will be training for 71,680 steps, and each step takes 1.574s so: +# estimated time to train: 71,680 * 1.574s / 60 / 60 = 31.3 hours. +# This is OK, fits our budget, and leaves ~10 hours for midtraining and SFT and evals and maybe RL. +# It's possible that we might even fit depth=33 or depth=34, but for now let's go along with this. +# 4) The last thing to pay attention to is the amount of training data required for the run. +# The script above calculated that "Total number of training tokens: 37,580,963,840" +# The tok_eval.py script reports about ~4.8 chars/token on average for the default tokenizer settings. +# So ~38B tokens # ~4.8 chars/token = ~185B chars. +# Each data shard is ~250M chars, so we need ~185B / 250M ~= 740 shards. +# For safety, I bumped that up to 800 shards, and that's why up above I used -n 800 when pre-downloading dataset shards. +# If we didn't have enough data, the training script would loop around and do multiple epochs over the same data, +# which would decrease model performance. Possibly 2, 3 or so epochs is ~ok, but certainly not ideal and at 10+ epochs we'd +# start to overfit hard. +# 5) That's it, everything else (e.g. the learning rates) is adjusted automatically by the training script. +torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=32 --device_batch_size=8 +torchrun --standalone --nproc_per_node=8 -m scripts.base_loss +torchrun --standalone --nproc_per_node=8 -m scripts.base_eval + +# midtrain +# NOTE: ensure that we use the same device_batch_size here as the base training script. +torchrun --standalone --nproc_per_node=8 -m scripts.mid_train -- --device_batch_size=8 --run=$WANDB_RUN +torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i mid + +# sft +torchrun --standalone --nproc_per_node=8 -m scripts.chat_sft -- --run=$WANDB_RUN +torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i sft + +# generate final report +python -m nanochat.report generate + +# talk to it +python -m scripts.chat_web From 92d52ecc9240f468ba1e1b79dff7d5797987cbf8 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Thu, 16 Oct 2025 01:09:53 +0000 Subject: [PATCH 09/64] add slash commands to webui --- nanochat/ui.html | 71 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/nanochat/ui.html b/nanochat/ui.html index 264a654..4271e2f 100644 --- a/nanochat/ui.html +++ b/nanochat/ui.html @@ -117,6 +117,15 @@ max-width: 65%; } + .message.console .message-content { + font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', 'Consolas', 'Courier New', monospace; + font-size: 0.875rem; + background-color: #fafafa; + padding: 0.75rem 1rem; + color: #374151; + max-width: 80%; + } + .input-container { background-color: #ffffff; padding: 1rem; @@ -255,6 +264,8 @@ let messages = []; let isGenerating = false; + let currentTemperature = 0.8; + let currentTopK = 50; chatInput.addEventListener('input', function() { this.style.height = 'auto'; @@ -304,10 +315,67 @@ return contentDiv; } + function handleSlashCommand(command) { + const parts = command.trim().split(/\s+/); + const cmd = parts[0].toLowerCase(); + const arg = parts[1]; + + if (cmd === '/temperature') { + if (arg === undefined) { + addMessage('console', `Current temperature: ${currentTemperature}`); + } else { + const temp = parseFloat(arg); + if (isNaN(temp) || temp < 0 || temp > 2) { + addMessage('console', 'Invalid temperature. Must be between 0.0 and 2.0'); + } else { + currentTemperature = temp; + addMessage('console', `Temperature set to ${currentTemperature}`); + } + } + return true; + } else if (cmd === '/topk') { + if (arg === undefined) { + addMessage('console', `Current top-k: ${currentTopK}`); + } else { + const topk = parseInt(arg); + if (isNaN(topk) || topk < 1 || topk > 200) { + addMessage('console', 'Invalid top-k. Must be between 1 and 200'); + } else { + currentTopK = topk; + addMessage('console', `Top-k set to ${currentTopK}`); + } + } + return true; + } else if (cmd === '/clear') { + newConversation(); + return true; + } else if (cmd === '/help') { + addMessage('console', + 'Available commands:\n' + + '/temperature - Show current temperature\n' + + '/temperature - Set temperature (0.0-2.0)\n' + + '/topk - Show current top-k\n' + + '/topk - Set top-k (1-200)\n' + + '/clear - Clear conversation\n' + + '/help - Show this help message' + ); + return true; + } + return false; + } + async function sendMessage() { const message = chatInput.value.trim(); if (!message || isGenerating) return; + // Handle slash commands + if (message.startsWith('/')) { + chatInput.value = ''; + chatInput.style.height = 'auto'; + handleSlashCommand(message); + return; + } + isGenerating = true; chatInput.value = ''; chatInput.style.height = 'auto'; @@ -327,7 +395,8 @@ }, body: JSON.stringify({ messages: messages, - temperature: 0.8, + temperature: currentTemperature, + top_k: currentTopK, max_tokens: 512 }), }); From 2846999b8f924c0b77f7437fca1dbabb40c900f0 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Thu, 16 Oct 2025 01:16:22 +0000 Subject: [PATCH 10/64] allow user to click on their message to edit them. conversation after that point is wiped --- nanochat/ui.html | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/nanochat/ui.html b/nanochat/ui.html index 4271e2f..d46eeb8 100644 --- a/nanochat/ui.html +++ b/nanochat/ui.html @@ -115,6 +115,12 @@ border-radius: 1.25rem; padding: 0.8rem 1rem; max-width: 65%; + cursor: pointer; + transition: background-color 0.2s ease; + } + + .message.user .message-content:hover { + background-color: #e5e7eb; } .message.console .message-content { @@ -300,7 +306,7 @@ chatInput.focus(); } - function addMessage(role, content) { + function addMessage(role, content, messageIndex = null) { const messageDiv = document.createElement('div'); messageDiv.className = `message ${role}`; @@ -308,6 +314,17 @@ contentDiv.className = 'message-content'; contentDiv.textContent = content; + // Add click handler for user messages to enable editing + if (role === 'user' && messageIndex !== null) { + contentDiv.setAttribute('data-message-index', messageIndex); + contentDiv.setAttribute('title', 'Click to edit and restart from here'); + contentDiv.addEventListener('click', function() { + if (!isGenerating) { + editMessage(messageIndex); + } + }); + } + messageDiv.appendChild(contentDiv); chatWrapper.appendChild(messageDiv); @@ -315,6 +332,32 @@ return contentDiv; } + function editMessage(messageIndex) { + // Find the message in the messages array + if (messageIndex < 0 || messageIndex >= messages.length) return; + + const messageToEdit = messages[messageIndex]; + if (messageToEdit.role !== 'user') return; + + // Copy message content to input + chatInput.value = messageToEdit.content; + chatInput.style.height = 'auto'; + chatInput.style.height = Math.min(chatInput.scrollHeight, 200) + 'px'; + + // Remove this message and all subsequent messages from the array + messages = messages.slice(0, messageIndex); + + // Remove message elements from DOM starting from messageIndex + const allMessages = chatWrapper.querySelectorAll('.message'); + for (let i = messageIndex; i < allMessages.length; i++) { + allMessages[i].remove(); + } + + // Enable send button and focus input + sendButton.disabled = false; + chatInput.focus(); + } + function handleSlashCommand(command) { const parts = command.trim().split(/\s+/); const cmd = parts[0].toLowerCase(); @@ -381,8 +424,9 @@ chatInput.style.height = 'auto'; sendButton.disabled = true; + const userMessageIndex = messages.length; messages.push({ role: 'user', content: message }); - addMessage('user', message); + addMessage('user', message, userMessageIndex); const assistantContent = addMessage('assistant', ''); assistantContent.innerHTML = ''; From 4346536ab2e57917ec543b20e88c4bdc47eda572 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Thu, 16 Oct 2025 01:28:37 +0000 Subject: [PATCH 11/64] also allow regenerating assistant message by clicking it, and make sure to feed good seed to generate --- nanochat/ui.html | 173 +++++++++++++++++++++++++++++--------------- scripts/chat_web.py | 4 +- 2 files changed, 117 insertions(+), 60 deletions(-) diff --git a/nanochat/ui.html b/nanochat/ui.html index d46eeb8..b2b4605 100644 --- a/nanochat/ui.html +++ b/nanochat/ui.html @@ -108,6 +108,15 @@ background: transparent; border: none; padding: 0.25rem 0; + cursor: pointer; + border-radius: 0.5rem; + padding: 0.5rem; + margin-left: -0.5rem; + transition: background-color 0.2s ease; + } + + .message.assistant .message-content:hover { + background-color: #f9fafb; } .message.user .message-content { @@ -325,6 +334,17 @@ }); } + // Add click handler for assistant messages to enable regeneration + if (role === 'assistant' && messageIndex !== null) { + contentDiv.setAttribute('data-message-index', messageIndex); + contentDiv.setAttribute('title', 'Click to regenerate this response'); + contentDiv.addEventListener('click', function() { + if (!isGenerating) { + regenerateMessage(messageIndex); + } + }); + } + messageDiv.appendChild(contentDiv); chatWrapper.appendChild(messageDiv); @@ -358,6 +378,99 @@ chatInput.focus(); } + async function generateAssistantResponse() { + isGenerating = true; + sendButton.disabled = true; + + const assistantContent = addMessage('assistant', ''); + assistantContent.innerHTML = ''; + + try { + const response = await fetch(`${API_URL}/chat/completions`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + messages: messages, + temperature: currentTemperature, + top_k: currentTopK, + max_tokens: 512 + }), + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let fullResponse = ''; + assistantContent.textContent = ''; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + const chunk = decoder.decode(value); + const lines = chunk.split('\n'); + + for (const line of lines) { + if (line.startsWith('data: ')) { + try { + const data = JSON.parse(line.slice(6)); + if (data.token) { + fullResponse += data.token; + assistantContent.textContent = fullResponse; + chatContainer.scrollTop = chatContainer.scrollHeight; + } + } catch (e) { + } + } + } + } + + const assistantMessageIndex = messages.length; + messages.push({ role: 'assistant', content: fullResponse }); + + // Add click handler to regenerate this assistant message + assistantContent.setAttribute('data-message-index', assistantMessageIndex); + assistantContent.setAttribute('title', 'Click to regenerate this response'); + assistantContent.addEventListener('click', function() { + if (!isGenerating) { + regenerateMessage(assistantMessageIndex); + } + }); + + } catch (error) { + console.error('Error:', error); + assistantContent.innerHTML = `
Error: ${error.message}
`; + } finally { + isGenerating = false; + sendButton.disabled = !chatInput.value.trim(); + } + } + + async function regenerateMessage(messageIndex) { + // Find the message in the messages array + if (messageIndex < 0 || messageIndex >= messages.length) return; + + const messageToRegenerate = messages[messageIndex]; + if (messageToRegenerate.role !== 'assistant') return; + + // Remove this message and all subsequent messages from the array + messages = messages.slice(0, messageIndex); + + // Remove message elements from DOM starting from messageIndex + const allMessages = chatWrapper.querySelectorAll('.message'); + for (let i = messageIndex; i < allMessages.length; i++) { + allMessages[i].remove(); + } + + // Regenerate the assistant response + await generateAssistantResponse(); + } + function handleSlashCommand(command) { const parts = command.trim().split(/\s+/); const cmd = parts[0].toLowerCase(); @@ -419,72 +532,14 @@ return; } - isGenerating = true; chatInput.value = ''; chatInput.style.height = 'auto'; - sendButton.disabled = true; const userMessageIndex = messages.length; messages.push({ role: 'user', content: message }); addMessage('user', message, userMessageIndex); - const assistantContent = addMessage('assistant', ''); - assistantContent.innerHTML = ''; - - try { - const response = await fetch(`${API_URL}/chat/completions`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ - messages: messages, - temperature: currentTemperature, - top_k: currentTopK, - max_tokens: 512 - }), - }); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - const reader = response.body.getReader(); - const decoder = new TextDecoder(); - let fullResponse = ''; - assistantContent.textContent = ''; - - while (true) { - const { done, value } = await reader.read(); - if (done) break; - - const chunk = decoder.decode(value); - const lines = chunk.split('\n'); - - for (const line of lines) { - if (line.startsWith('data: ')) { - try { - const data = JSON.parse(line.slice(6)); - if (data.token) { - fullResponse += data.token; - assistantContent.textContent = fullResponse; - chatContainer.scrollTop = chatContainer.scrollHeight; - } - } catch (e) { - } - } - } - } - - messages.push({ role: 'assistant', content: fullResponse }); - - } catch (error) { - console.error('Error:', error); - assistantContent.innerHTML = `
Error: ${error.message}
`; - } finally { - isGenerating = false; - sendButton.disabled = !chatInput.value.trim(); - } + await generateAssistantResponse(); } sendButton.disabled = false; diff --git a/scripts/chat_web.py b/scripts/chat_web.py index 24258a2..c07725e 100644 --- a/scripts/chat_web.py +++ b/scripts/chat_web.py @@ -36,6 +36,7 @@ import os import torch import asyncio import logging +import random from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware @@ -268,7 +269,8 @@ async def generate_stream( num_samples=1, max_tokens=max_new_tokens, temperature=temperature, - top_k=top_k + top_k=top_k, + seed=random.randint(0, 2**31 - 1) ): token = token_column[0] From 722da4f54399ab9b28c04044d1d56b51e19b6f69 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Thu, 16 Oct 2025 16:14:38 +0000 Subject: [PATCH 12/64] trying to add basic cpu support, will try mps too --- nanochat/common.py | 13 +++++++------ nanochat/dataloader.py | 6 +++--- scripts/base_train.py | 25 ++++++++++++++++--------- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/nanochat/common.py b/nanochat/common.py index 8b10df9..22232d1 100644 --- a/nanochat/common.py +++ b/nanochat/common.py @@ -89,15 +89,16 @@ def get_dist_info(): else: return False, 0, 0, 1 -def compute_init(): +def compute_init(device_type="cuda"): # cuda|cpu """Basic initialization that we keep doing over and over, so make common.""" # CUDA is currently required - assert torch.cuda.is_available(), "CUDA is needed for a distributed run atm" + # assert torch.cuda.is_available(), "CUDA is needed for a distributed run atm" # Reproducibility torch.manual_seed(42) - torch.cuda.manual_seed(42) + if device_type == "cuda": + torch.cuda.manual_seed(42) # skipping full reproducibility for now, possibly investigate slowdown later # torch.use_deterministic_algorithms(True) # torch.backends.cudnn.deterministic = True @@ -106,15 +107,15 @@ def compute_init(): # Precision torch.set_float32_matmul_precision("high") # uses tf32 instead of fp32 for matmuls - # Distributed setup: Distributed Data Parallel (DDP), optional + # Distributed setup: Distributed Data Parallel (DDP), optional, and requires CUDA ddp, ddp_rank, ddp_local_rank, ddp_world_size = get_dist_info() - if ddp: + if ddp and device_type == "cuda": device = torch.device("cuda", ddp_local_rank) torch.cuda.set_device(device) # make "cuda" default to this device dist.init_process_group(backend="nccl", device_id=device) dist.barrier() else: - device = torch.device("cuda") + device = torch.device(device_type) # cuda|cpu if ddp_rank == 0: logger.info(f"Distributed world size: {ddp_world_size}") diff --git a/nanochat/dataloader.py b/nanochat/dataloader.py index c1636b1..12e7d8e 100644 --- a/nanochat/dataloader.py +++ b/nanochat/dataloader.py @@ -6,7 +6,7 @@ from nanochat.common import get_dist_info from nanochat.dataset import parquets_iter_batched from nanochat.tokenizer import get_tokenizer -def tokenizing_distributed_data_loader(B, T, split, tokenizer_threads=4, tokenizer_batch_size=128): +def tokenizing_distributed_data_loader(B, T, split, tokenizer_threads=4, tokenizer_batch_size=128, device="cuda"): """Stream pretraining text from parquet files, tokenize, yield training batches.""" assert split in ["train", "val"], "split must be 'train' or 'val'" ddp, ddp_rank, ddp_local_rank, ddp_world_size = get_dist_info() @@ -44,6 +44,6 @@ def tokenizing_distributed_data_loader(B, T, split, tokenizer_threads=4, tokeniz inputs_cpu = scratch[:-1].to(dtype=torch.int32) targets_cpu = scratch[1:] # Reshape to 2D and move to GPU async - inputs = inputs_cpu.view(B, T).to(device="cuda", dtype=torch.int32, non_blocking=True) - targets = targets_cpu.view(B, T).to(device="cuda", dtype=torch.int64, non_blocking=True) + inputs = inputs_cpu.view(B, T).to(device=device, dtype=torch.int32, non_blocking=True) + targets = targets_cpu.view(B, T).to(device=device, dtype=torch.int64, non_blocking=True) yield inputs, targets diff --git a/scripts/base_train.py b/scripts/base_train.py index b691ed4..166e11e 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -6,6 +6,9 @@ python base_train.py or distributed as: torchrun --nproc_per_node=8 base_train.py + +If you just want to see it run on CPU (you won't get far but it should run), try something like: +python -m scripts.base_train --depth=4 --max_seq_len=512 --device_batch_size=1 --device_type=cpu --eval_tokens=512 --total_batch_size=512 --num_iterations=1000 """ import os @@ -27,6 +30,8 @@ print_banner() # ----------------------------------------------------------------------------- # User settings run = "dummy" # wandb run name default ("dummy" is special - we won't log to wandb) +# Runtime +device_type = "cuda" # cuda|cpu # Model architecture depth = 20 # the depth of the Transformer model to train, rest of the kwargs are derived max_seq_len = 2048 # max context length @@ -57,9 +62,11 @@ user_config = {k: globals()[k] for k in config_keys} # will be useful for loggin # ----------------------------------------------------------------------------- # Compute init -ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() +ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) master_process = ddp_rank == 0 # this process will do logging, checkpointing etc. -autocast_ctx = torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16) +autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=torch.bfloat16) +synchronize = torch.cuda.synchronize if device_type == "cuda" else lambda: None +get_max_memory = torch.cuda.max_memory_allocated if device_type == "cuda" else lambda: 0 # wandb logging init use_dummy_wandb = run == "dummy" or not master_process @@ -96,7 +103,7 @@ model_config_kwargs = dict(sequence_len=max_seq_len, vocab_size=vocab_size, n_la with torch.device("meta"): model_config = GPTConfig(**model_config_kwargs) model = GPT(model_config) -model.to_empty(device="cuda") +model.to_empty(device=device) model.init_weights() orig_model = model # original, uncompiled model, for saving raw model state_dict model = torch.compile(model, dynamic=False) # TODO: dynamic True/False think through @@ -133,8 +140,8 @@ adamw_optimizer, muon_optimizer = optimizers # Initialize the DataLoaders for train/val base_dir = get_base_dir() tokens_dir = os.path.join(base_dir, "tokenized_data") -train_loader = tokenizing_distributed_data_loader(device_batch_size, max_seq_len, split="train") -build_val_loader = lambda: tokenizing_distributed_data_loader(device_batch_size, max_seq_len, split="val") +train_loader = tokenizing_distributed_data_loader(device_batch_size, max_seq_len, split="train", device=device) +build_val_loader = lambda: tokenizing_distributed_data_loader(device_batch_size, max_seq_len, split="val", device=device) x, y = next(train_loader) # kick off load of the very first batch of data # ----------------------------------------------------------------------------- @@ -252,7 +259,7 @@ for step in range(num_iterations + 1): # ------------------------------------------------------------------------- # single training step # evaluate the gradient - torch.cuda.synchronize() + synchronize() t0 = time.time() for micro_step in range(grad_accum_steps): with autocast_ctx: @@ -275,7 +282,7 @@ for step in range(num_iterations + 1): for opt in optimizers: opt.step() model.zero_grad(set_to_none=True) - torch.cuda.synchronize() + synchronize() t1 = time.time() dt = t1 - t0 # ------------------------------------------------------------------------- @@ -304,7 +311,7 @@ for step in range(num_iterations + 1): }) # print a few more stats -print0(f"Peak memory usage: {torch.cuda.max_memory_allocated() / 1024 / 1024:.2f}MiB") +print0(f"Peak memory usage: {get_max_memory() / 1024 / 1024:.2f}MiB") print0(f"Total training time: {total_training_time/60:.2f}m") print0(f"Minimum validation bpb: {min_val_bpb:.4f}") @@ -330,7 +337,7 @@ get_report().log(section="Base model training", data=[ "MFU %": f"{mfu:.2f}%", "Total training flops": f"{flops_so_far:e}", "Total training time": f"{total_training_time/60:.2f}m", - "Peak memory usage": f"{torch.cuda.max_memory_allocated() / 1024 / 1024:.2f}MiB", + "Peak memory usage": f"{get_max_memory() / 1024 / 1024:.2f}MiB", } ]) From 306bc380ab62b9adb82f71d1c4eb606428329bbd Mon Sep 17 00:00:00 2001 From: karpathy Date: Thu, 16 Oct 2025 10:04:43 -0700 Subject: [PATCH 13/64] add support for CPU and for MPS. I had to change a few cosmetic things. I also discovered I think a bit of a bug, where I was casting wte to bfloat16 in the wrong place (the model init) instead of in init_weights --- nanochat/common.py | 14 ++++---- nanochat/gpt.py | 5 +-- nanochat/loss_eval.py | 2 +- pyproject.toml | 12 +------ scripts/base_train.py | 5 +-- uv.lock | 76 +++++++++++++++++++++++++++++-------------- 6 files changed, 68 insertions(+), 46 deletions(-) diff --git a/nanochat/common.py b/nanochat/common.py index 22232d1..05e371c 100644 --- a/nanochat/common.py +++ b/nanochat/common.py @@ -89,11 +89,14 @@ def get_dist_info(): else: return False, 0, 0, 1 -def compute_init(device_type="cuda"): # cuda|cpu +def compute_init(device_type="cuda"): # cuda|cpu|mps """Basic initialization that we keep doing over and over, so make common.""" - # CUDA is currently required - # assert torch.cuda.is_available(), "CUDA is needed for a distributed run atm" + assert device_type in ["cuda", "mps", "cpu"], "Invalid device type atm" + if device_type == "cuda": + assert torch.cuda.is_available(), "Your PyTorch installation is not configured for CUDA but device_type is 'cuda'" + if device_type == "mps": + assert torch.backends.mps.is_available(), "Your PyTorch installation is not configured for MPS but device_type is 'mps'" # Reproducibility torch.manual_seed(42) @@ -101,11 +104,10 @@ def compute_init(device_type="cuda"): # cuda|cpu torch.cuda.manual_seed(42) # skipping full reproducibility for now, possibly investigate slowdown later # torch.use_deterministic_algorithms(True) - # torch.backends.cudnn.deterministic = True - # torch.backends.cudnn.benchmark = False # Precision - torch.set_float32_matmul_precision("high") # uses tf32 instead of fp32 for matmuls + if device_type == "cuda": + torch.set_float32_matmul_precision("high") # uses tf32 instead of fp32 for matmuls # Distributed setup: Distributed Data Parallel (DDP), optional, and requires CUDA ddp, ddp_rank, ddp_local_rank, ddp_world_size = get_dist_info() diff --git a/nanochat/gpt.py b/nanochat/gpt.py index 5a066b2..d744550 100644 --- a/nanochat/gpt.py +++ b/nanochat/gpt.py @@ -169,8 +169,6 @@ class GPT(nn.Module): cos, sin = self._precompute_rotary_embeddings(self.rotary_seq_len, head_dim) self.register_buffer("cos", cos, persistent=False) # persistent=False means it's not saved to the checkpoint self.register_buffer("sin", sin, persistent=False) - # Cast the embeddings from fp32 to bf16: optim can tolerate it and it saves memory: both in the model and the activations - self.transformer.wte.to(dtype=torch.bfloat16) def init_weights(self): self.apply(self._init_weights) @@ -184,6 +182,9 @@ class GPT(nn.Module): head_dim = self.config.n_embd // self.config.n_head cos, sin = self._precompute_rotary_embeddings(self.rotary_seq_len, head_dim) self.cos, self.sin = cos, sin + # Cast the embeddings from fp32 to bf16: optim can tolerate it and it saves memory: both in the model and the activations + if self.transformer.wte.weight.device.type == "cuda": + self.transformer.wte.to(dtype=torch.bfloat16) def _init_weights(self, module): if isinstance(module, nn.Linear): diff --git a/nanochat/loss_eval.py b/nanochat/loss_eval.py index d103ef6..0100ec3 100644 --- a/nanochat/loss_eval.py +++ b/nanochat/loss_eval.py @@ -33,7 +33,7 @@ def evaluate_bpb(model, batches, steps, token_bytes): loss2d = model(x, y, loss_reduction='none') # (B, T) loss2d = loss2d.view(-1) # flatten y = y.view(-1) # flatten - if (y < 0).any(): + if (y.int() < 0).any(): # mps does not currently have kernel for < 0 for int64, only int32 # slightly more complex code path if some target tokens are ignore_index (e.g. -1) # any target token < 0 is to be ignored: do NOT index token_bytes with negatives valid = y >= 0 diff --git a/pyproject.toml b/pyproject.toml index ef3833a..8d2c8f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "numpy==1.26.4", "psutil>=7.1.0", "regex>=2025.9.1", + "setuptools>=80.9.0", "tiktoken>=0.11.0", "tokenizers>=0.22.0", "torch>=2.8.0", @@ -22,17 +23,6 @@ dependencies = [ requires = ["maturin>=1.7,<2.0"] build-backend = "maturin" -# target torch to cuda 12.8 -[tool.uv.sources] -torch = [ - { index = "pytorch-cu128" }, -] - -[[tool.uv.index]] -name = "pytorch-cu128" -url = "https://download.pytorch.org/whl/cu128" -explicit = true - [tool.maturin] module-name = "rustbpe" bindings = "pyo3" diff --git a/scripts/base_train.py b/scripts/base_train.py index 166e11e..63f00dc 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -31,7 +31,7 @@ print_banner() # User settings run = "dummy" # wandb run name default ("dummy" is special - we won't log to wandb) # Runtime -device_type = "cuda" # cuda|cpu +device_type = "cuda" # cuda|cpu|mps # Model architecture depth = 20 # the depth of the Transformer model to train, rest of the kwargs are derived max_seq_len = 2048 # max context length @@ -64,7 +64,8 @@ user_config = {k: globals()[k] for k in config_keys} # will be useful for loggin # Compute init ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) master_process = ddp_rank == 0 # this process will do logging, checkpointing etc. -autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=torch.bfloat16) +dtype = torch.bfloat16 if device_type == "cuda" else torch.float32 # use fp32 on CPU|MPS +autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=dtype) synchronize = torch.cuda.synchronize if device_type == "cuda" else lambda: None get_max_memory = torch.cuda.max_memory_allocated if device_type == "cuda" else lambda: 0 diff --git a/uv.lock b/uv.lock index 7636b81..3857bb9 100644 --- a/uv.lock +++ b/uv.lock @@ -761,6 +761,7 @@ dependencies = [ { name = "numpy" }, { name = "psutil" }, { name = "regex" }, + { name = "setuptools" }, { name = "tiktoken" }, { name = "tokenizers" }, { name = "torch" }, @@ -782,9 +783,10 @@ requires-dist = [ { name = "numpy", specifier = "==1.26.4" }, { name = "psutil", specifier = ">=7.1.0" }, { name = "regex", specifier = ">=2025.9.1" }, + { name = "setuptools", specifier = ">=80.9.0" }, { name = "tiktoken", specifier = ">=0.11.0" }, { name = "tokenizers", specifier = ">=0.22.0" }, - { name = "torch", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cu128" }, + { name = "torch", specifier = ">=2.8.0" }, { name = "uvicorn", specifier = ">=0.36.0" }, { name = "wandb", specifier = ">=0.21.3" }, ] @@ -959,10 +961,10 @@ wheels = [ [[package]] name = "nvidia-nccl-cu12" -version = "2.27.3" +version = "2.27.5" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" }, + { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" }, ] [[package]] @@ -973,6 +975,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, ] +[[package]] +name = "nvidia-nvshmem-cu12" +version = "3.3.20" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" }, +] + [[package]] name = "nvidia-nvtx-cu12" version = "12.8.90" @@ -1682,8 +1692,8 @@ wheels = [ [[package]] name = "torch" -version = "2.8.0+cu128" -source = { registry = "https://download.pytorch.org/whl/cu128" } +version = "2.9.0" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "fsspec" }, @@ -1703,6 +1713,7 @@ dependencies = [ { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "setuptools", marker = "python_full_version >= '3.12'" }, { name = "sympy" }, @@ -1710,16 +1721,34 @@ dependencies = [ { name = "typing-extensions" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0c96999d15cf1f13dd7c913e0b21a9a355538e6cfc10861a17158320292f5954" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp310-cp310-win_amd64.whl", hash = "sha256:43938e9a174c90e5eb9e906532b2f1e21532bbfa5a61b65193b4f54714d34f9e" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:039b9dcdd6bdbaa10a8a5cd6be22c4cb3e3589a341e5f904cbb571ca28f55bed" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:34c55443aafd31046a7963b63d30bc3b628ee4a704f826796c865fdfd05bb596" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4354fc05bb79b208d6995a04ca1ceef6a9547b1c4334435574353d381c55087c" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:0ad925202387f4e7314302a1b4f8860fa824357f9b1466d7992bf276370ebcff" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3a852369a38dec343d45ecd0bc3660f79b88a23e0c878d18707f7c13bf49538f" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:9e20646802b7fc295c1f8b45fefcfc9fb2e4ec9cbe8593443cd2b9cc307c8405" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4295a22d69408e93d25f51e8d5d579345b6b802383e9414b0f3853ed433d53ae" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:970b4f4661fa7b44f6a7e6df65de7fc4a6fff2af610dc415c1d695ca5f1f37d2" }, + { url = "https://files.pythonhosted.org/packages/bb/86/245c240d2138c17ed572c943c289056c2721abab70810d772c6bf5495b28/torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd", size = 104213554, upload-time = "2025-10-15T15:45:59.798Z" }, + { url = "https://files.pythonhosted.org/packages/58/1d/fd1e88ae0948825efcab7dd66d12bec23f05d4d38ed81573c8d453c14c06/torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:51cb63902182a78e90886e8068befd8ea102af4b00e420263591a3d70c7d3c6c", size = 899795167, upload-time = "2025-10-15T15:47:12.695Z" }, + { url = "https://files.pythonhosted.org/packages/63/5a/496197b45c14982bef4e079b24c61dc108e3ab0d0cc9718dba9f54f45a46/torch-2.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:3f6aad4d2f0ee2248bac25339d74858ff846c3969b27d14ac235821f055af83d", size = 109310314, upload-time = "2025-10-15T15:46:16.633Z" }, + { url = "https://files.pythonhosted.org/packages/58/b0/2b4e647b0fc706e88eb6c253d05511865578f5f67b55fad639bf3272a4a1/torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:413e1654c9203733138858780e184d9fc59442f0b3b209e16f39354eb893db9b", size = 74452019, upload-time = "2025-10-15T15:46:04.296Z" }, + { url = "https://files.pythonhosted.org/packages/58/fe/334225e6330e672b36aef23d77451fa906ea12881570c08638a91331a212/torch-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c596708b5105d0b199215acf0c9be7c1db5f1680d88eddadf4b75a299259a677", size = 104230578, upload-time = "2025-10-15T15:46:08.182Z" }, + { url = "https://files.pythonhosted.org/packages/05/cc/49566caaa218872ec9a2912456f470ff92649894a4bc2e5274aa9ef87c4a/torch-2.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:51de31219c97c51cf4bf2be94d622e3deb5dcc526c6dc00e97c17eaec0fc1d67", size = 899815990, upload-time = "2025-10-15T15:48:03.336Z" }, + { url = "https://files.pythonhosted.org/packages/74/25/e9ab21d5925b642d008f139d4a3c9664fc9ee1faafca22913c080cc4c0a5/torch-2.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd515c70059afd95f48b8192733764c08ca37a1d19803af6401b5ecad7c8676e", size = 109313698, upload-time = "2025-10-15T15:46:12.425Z" }, + { url = "https://files.pythonhosted.org/packages/b3/b7/205ef3e94de636feffd64b28bb59a0dfac0771221201b9871acf9236f5ca/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:614a185e4986326d526a91210c8fc1397e76e8cfafa78baf6296a790e53a9eec", size = 74463678, upload-time = "2025-10-15T15:46:29.779Z" }, + { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" }, + { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" }, + { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" }, + { url = "https://files.pythonhosted.org/packages/dd/5f/b85bd8c05312d71de9402bf5868d217c38827cfd09d8f8514e5be128a52b/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695", size = 74478983, upload-time = "2025-10-15T15:46:39.406Z" }, + { url = "https://files.pythonhosted.org/packages/c2/1c/90eb13833cdf4969ea9707586d7b57095c3b6e2b223a7256bf111689bcb8/torch-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c30a17fc83eeab346913e237c64b15b5ba6407fff812f6c541e322e19bc9ea0e", size = 104111330, upload-time = "2025-10-15T15:46:35.238Z" }, + { url = "https://files.pythonhosted.org/packages/0e/21/2254c54b8d523592c25ef4434769aa23e29b1e6bf5f4c0ad9e27bf442927/torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f25033b8667b57857dfd01458fbf2a9e6a6df1f8def23aef0dc46292f6aa642", size = 899750243, upload-time = "2025-10-15T15:48:57.459Z" }, + { url = "https://files.pythonhosted.org/packages/b7/a5/5cb94fa4fd1e78223455c23c200f30f6dc10c6d4a2bcc8f6e7f2a2588370/torch-2.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d037f1b4ffd25013be4a7bf3651a0a910c68554956c7b2c92ebe87c76475dece", size = 109284513, upload-time = "2025-10-15T15:46:45.061Z" }, + { url = "https://files.pythonhosted.org/packages/66/e8/fc414d8656250ee46120b44836ffbb3266343db424b3e18ca79ebbf69d4f/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e4e5b5cba837a2a8d1a497ba9a58dae46fa392593eaa13b871c42f71847503a5", size = 74830362, upload-time = "2025-10-15T15:46:48.983Z" }, + { url = "https://files.pythonhosted.org/packages/ed/5f/9474c98fc5ae0cd04b9466035428cd360e6611a86b8352a0fc2fa504acdc/torch-2.9.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:64693568f5dc4dbd5f880a478b1cea0201cc6b510d91d1bc54fea86ac5d1a637", size = 104144940, upload-time = "2025-10-15T15:47:29.076Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5a/8e0c1cf57830172c109d4bd6be2708cabeaf550983eee7029291322447a0/torch-2.9.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:f8ed31ddd7d10bfb3fbe0b9fe01b1243577f13d75e6f4a0839a283915ce3791e", size = 899744054, upload-time = "2025-10-15T15:48:29.864Z" }, + { url = "https://files.pythonhosted.org/packages/6d/28/82c28b30fcb4b7c9cdd995763d18bbb830d6521356712faebbad92ffa61d/torch-2.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eff527d4e4846e6f70d2afd8058b73825761203d66576a7e04ea2ecfebcb4ab8", size = 109517546, upload-time = "2025-10-15T15:47:33.395Z" }, + { url = "https://files.pythonhosted.org/packages/ff/c3/a91f96ec74347fa5fd24453fa514bc61c61ecc79196fa760b012a1873d96/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:f8877779cf56d1ce431a7636703bdb13307f5960bb1af49716d8b179225e0e6a", size = 74480732, upload-time = "2025-10-15T15:47:38.002Z" }, + { url = "https://files.pythonhosted.org/packages/5c/73/9f70af34b334a7e0ef496ceec96b7ec767bd778ea35385ce6f77557534d1/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e614fae699838038d888729f82b687c03413c5989ce2a9481f9a7e7a396e0bb", size = 74433037, upload-time = "2025-10-15T15:47:41.894Z" }, + { url = "https://files.pythonhosted.org/packages/b7/84/37cf88625901934c97109e583ecc21777d21c6f54cda97a7e5bbad1ee2f2/torch-2.9.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:dfb5b8cd310ba3436c7e14e8b7833ef658cf3045e50d2bdaed23c8fc517065eb", size = 104116482, upload-time = "2025-10-15T15:47:46.266Z" }, + { url = "https://files.pythonhosted.org/packages/56/8e/ca8b17866943a8d4f4664d402ea84210aa274588b4c5d89918f5caa24eec/torch-2.9.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b3d29524993a478e46f5d598b249cd824b7ed98d7fba538bd9c4cde6c803948f", size = 899746916, upload-time = "2025-10-15T15:50:40.294Z" }, + { url = "https://files.pythonhosted.org/packages/43/65/3b17c0fbbdab6501c5b320a52a648628d0d44e7379f64e27d9eef701b6bf/torch-2.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:71c7578984f5ec0eb645eb4816ac8435fcf3e3e2ae1901bcd2f519a9cafb5125", size = 109275151, upload-time = "2025-10-15T15:49:20.715Z" }, + { url = "https://files.pythonhosted.org/packages/83/36/74f8c051f785500396e42f93542422422dfd874a174f21f8d955d36e5d64/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:71d9309aee457bbe0b164bce2111cd911c4ed4e847e65d5077dbbcd3aba6befc", size = 74823353, upload-time = "2025-10-15T15:49:16.59Z" }, + { url = "https://files.pythonhosted.org/packages/62/51/dc3b4e2f9ba98ae27238f0153ca098bf9340b2dafcc67fde645d496dfc2a/torch-2.9.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c08fb654d783899e204a32cca758a7ce8a45b2d78eeb89517cc937088316f78e", size = 104140340, upload-time = "2025-10-15T15:50:19.67Z" }, + { url = "https://files.pythonhosted.org/packages/c0/8d/b00657f8141ac16af7bb6cda2e67de18499a3263b78d516b9a93fcbc98e3/torch-2.9.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ec8feb0099b2daa5728fbc7abb0b05730fd97e0f359ff8bda09865aaa7bd7d4b", size = 899731750, upload-time = "2025-10-15T15:49:36.673Z" }, + { url = "https://files.pythonhosted.org/packages/fc/29/bd361e0cbb2c79ce6450f42643aaf6919956f89923a50571b0ebfe92d142/torch-2.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:695ba920f234ad4170c9c50e28d56c848432f8f530e6bc7f88fcb15ddf338e75", size = 109503850, upload-time = "2025-10-15T15:50:24.118Z" }, ] [[package]] @@ -1736,17 +1765,16 @@ wheels = [ [[package]] name = "triton" -version = "3.4.0" +version = "3.5.0" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "setuptools", marker = "sys_platform == 'linux'" }, -] wheels = [ - { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" }, - { url = "https://files.pythonhosted.org/packages/7d/39/43325b3b651d50187e591eefa22e236b2981afcebaefd4f2fc0ea99df191/triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467", size = 155531138, upload-time = "2025-07-30T19:58:29.908Z" }, - { url = "https://files.pythonhosted.org/packages/d0/66/b1eb52839f563623d185f0927eb3530ee4d5ffe9d377cdaf5346b306689e/triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31c1d84a5c0ec2c0f8e8a072d7fd150cab84a9c239eaddc6706c081bfae4eb04", size = 155560068, upload-time = "2025-07-30T19:58:37.081Z" }, - { url = "https://files.pythonhosted.org/packages/30/7b/0a685684ed5322d2af0bddefed7906674f67974aa88b0fae6e82e3b766f6/triton-3.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00be2964616f4c619193cb0d1b29a99bd4b001d7dc333816073f92cf2a8ccdeb", size = 155569223, upload-time = "2025-07-30T19:58:44.017Z" }, - { url = "https://files.pythonhosted.org/packages/20/63/8cb444ad5cdb25d999b7d647abac25af0ee37d292afc009940c05b82dda0/triton-3.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7936b18a3499ed62059414d7df563e6c163c5e16c3773678a3ee3d417865035d", size = 155659780, upload-time = "2025-07-30T19:58:51.171Z" }, + { url = "https://files.pythonhosted.org/packages/0b/eb/09e31d107a5d00eb281aa7e6635ca463e9bca86515944e399480eadb71f8/triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0", size = 170333110, upload-time = "2025-10-13T16:37:49.588Z" }, + { url = "https://files.pythonhosted.org/packages/3d/78/949a04391c21956c816523678f0e5fa308eb5b1e7622d88c4e4ef5fceca0/triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec", size = 170433488, upload-time = "2025-10-13T16:37:57.132Z" }, + { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" }, + { url = "https://files.pythonhosted.org/packages/6c/29/10728de8a6e932e517c10773486b8e99f85d1b1d9dd87d9a9616e1fef4a1/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12", size = 170487289, upload-time = "2025-10-13T16:38:11.662Z" }, + { url = "https://files.pythonhosted.org/packages/5c/38/db80e48b9220c9bce872b0f616ad0446cdf554a40b85c7865cbca99ab3c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9", size = 170577179, upload-time = "2025-10-13T16:38:17.865Z" }, + { url = "https://files.pythonhosted.org/packages/ff/60/1810655d1d856c9a4fcc90ee8966d85f552d98c53a6589f95ab2cbe27bb8/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289", size = 170487949, upload-time = "2025-10-13T16:38:24.881Z" }, + { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" }, ] [[package]] From 279b74312c41be63e4ca055abe8fff05ad277186 Mon Sep 17 00:00:00 2001 From: karpathy Date: Thu, 16 Oct 2025 10:06:39 -0700 Subject: [PATCH 14/64] adjust comment/guidance on device type --- scripts/base_train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/base_train.py b/scripts/base_train.py index 63f00dc..ebc5ff4 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -7,8 +7,8 @@ or distributed as: torchrun --nproc_per_node=8 base_train.py -If you just want to see it run on CPU (you won't get far but it should run), try something like: -python -m scripts.base_train --depth=4 --max_seq_len=512 --device_batch_size=1 --device_type=cpu --eval_tokens=512 --total_batch_size=512 --num_iterations=1000 +python -m scripts.base_train --device_type=cpu --depth=4 --max_seq_len=512 --device_batch_size=1 --eval_tokens=512 --total_batch_size=512 --num_iterations=1000 +If you have a Macbook, you're better off using device_type=mps instead of cpu """ import os From 786119d593e808d3f1e0f9c7c13baab63fa80f31 Mon Sep 17 00:00:00 2001 From: karpathy Date: Thu, 16 Oct 2025 10:26:19 -0700 Subject: [PATCH 15/64] add autodetect of device and related stuff. getting weird warnings/errors still, so wip --- nanochat/common.py | 10 ++++++++++ scripts/base_eval.py | 8 +++++--- scripts/base_loss.py | 11 ++++++----- scripts/base_train.py | 7 ++++--- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/nanochat/common.py b/nanochat/common.py index 05e371c..86c18de 100644 --- a/nanochat/common.py +++ b/nanochat/common.py @@ -89,6 +89,16 @@ def get_dist_info(): else: return False, 0, 0, 1 +def autodetect_device_type(): + # prefer to use CUDA if available, otherwise use MPS, otherwise fallback on CPU + if torch.cuda.is_available(): + device_type = "cuda" + if torch.backends.mps.is_available(): + device_type = "mps" + device_type = "cpu" + print0(f"Autodetected device type: {device_type}") + return device_type + def compute_init(device_type="cuda"): # cuda|cpu|mps """Basic initialization that we keep doing over and over, so make common.""" diff --git a/scripts/base_eval.py b/scripts/base_eval.py index a566d49..2d58d87 100644 --- a/scripts/base_eval.py +++ b/scripts/base_eval.py @@ -19,7 +19,7 @@ import yaml import pandas as pd import torch -from nanochat.common import compute_init, compute_cleanup, print0, get_base_dir +from nanochat.common import compute_init, compute_cleanup, print0, get_base_dir, autodetect_device_type from nanochat.tokenizer import HuggingFaceTokenizer from nanochat.checkpoint_manager import load_model from nanochat.core_eval import evaluate_task @@ -121,8 +121,10 @@ def main(): assert len(sys.argv) in [1, 2], "Usage: python base_eval.py [hf_path]" # distributed / precision setup - ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() - autocast_ctx = torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16) + device_type = autodetect_device_type() + ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) + dtype = torch.bfloat16 if device_type == "cuda" else torch.float32 # use fp32 on CPU|MPS + autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=dtype) # Load model and tokenizer from command line or from file system if len(sys.argv) >= 2: diff --git a/scripts/base_loss.py b/scripts/base_loss.py index ba3876d..1609d83 100644 --- a/scripts/base_loss.py +++ b/scripts/base_loss.py @@ -9,7 +9,7 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_loss import os import torch from nanochat.checkpoint_manager import load_model -from nanochat.common import compute_init, print0, compute_cleanup +from nanochat.common import compute_init, print0, compute_cleanup, autodetect_device_type from nanochat.dataloader import tokenizing_distributed_data_loader from nanochat.tokenizer import get_token_bytes from nanochat.loss_eval import evaluate_bpb @@ -20,15 +20,16 @@ device_batch_size = 32 split_tokens = 20*524288 # number of tokens to evaluate per split model_tag = None # optional model tag for the output directory name model_step = None # optional model step for the output directory name +device_type = "" # cuda|cpu|mps (empty => autodetect) exec(open(os.path.join('nanochat', 'configurator.py')).read()) # overrides from command line or config file # Load the base model and the tokenizer -ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() +device_type = autodetect_device_type() if device_type == "" else device_type +ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) +dtype = torch.bfloat16 if device_type == "cuda" else torch.float32 # use fp32 on CPU|MPS model, tokenizer, meta = load_model("base", device, phase="eval", model_tag=model_tag, step=model_step) sequence_len = meta["model_config"]["sequence_len"] # could be arbitrary really - -# Set up the precision we'll run with -autocast_ctx = torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16) +autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=dtype) # Evaluate the loss on each split tokens_per_step = device_batch_size * sequence_len * ddp_world_size diff --git a/scripts/base_train.py b/scripts/base_train.py index ebc5ff4..147fce2 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -7,7 +7,7 @@ or distributed as: torchrun --nproc_per_node=8 base_train.py -python -m scripts.base_train --device_type=cpu --depth=4 --max_seq_len=512 --device_batch_size=1 --eval_tokens=512 --total_batch_size=512 --num_iterations=1000 +python -m scripts.base_train --device_type=cpu --depth=4 --max_seq_len=512 --device_batch_size=1 --eval_tokens=512 --core_metric_max_per_task=8 --total_batch_size=512 --num_iterations=500 If you have a Macbook, you're better off using device_type=mps instead of cpu """ @@ -19,7 +19,7 @@ import torch from nanochat.gpt import GPT, GPTConfig from nanochat.dataloader import tokenizing_distributed_data_loader -from nanochat.common import compute_init, compute_cleanup, print0, DummyWandb, print_banner, get_base_dir +from nanochat.common import compute_init, compute_cleanup, print0, DummyWandb, print_banner, get_base_dir, autodetect_device_type from nanochat.tokenizer import get_tokenizer, get_token_bytes from nanochat.checkpoint_manager import save_checkpoint from nanochat.loss_eval import evaluate_bpb @@ -31,7 +31,7 @@ print_banner() # User settings run = "dummy" # wandb run name default ("dummy" is special - we won't log to wandb) # Runtime -device_type = "cuda" # cuda|cpu|mps +device_type = "" # cuda|cpu|mps (empty => autodetect good device type default, in order: CUDA > MPS > CPU) # Model architecture depth = 20 # the depth of the Transformer model to train, rest of the kwargs are derived max_seq_len = 2048 # max context length @@ -62,6 +62,7 @@ user_config = {k: globals()[k] for k in config_keys} # will be useful for loggin # ----------------------------------------------------------------------------- # Compute init +device_type = autodetect_device_type() if device_type == "" else device_type ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) master_process = ddp_rank == 0 # this process will do logging, checkpointing etc. dtype = torch.bfloat16 if device_type == "cuda" else torch.float32 # use fp32 on CPU|MPS From ccfe7915ac152c5f52be344fe13b73bf9e75c1c5 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Thu, 16 Oct 2025 19:32:44 +0000 Subject: [PATCH 16/64] mention the current d32 chat hosted on nanochat.karpathy.ai, as an example endpoint of the repo --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index bc01055..19b6f02 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,10 @@ This repo is a full-stack implementation of an LLM like ChatGPT in a single, clean, minimal, hackable, dependency-lite codebase. nanochat is designed to run on a single 8XH100 node via scripts like [speedrun.sh](speedrun.sh), that run the entire pipeline start to end. This includes tokenization, pretraining, finetuning, evaluation, inference, and web serving over a simple UI so that you can talk to your own LLM just like ChatGPT. nanochat will become the capstone project of the course LLM101n being developed by Eureka Labs. +## Talk to it + +To get a sense of the endpoint of this repo, you can currently find [nanochat d32](https://github.com/karpathy/nanochat/discussions/8) hosted on [nanochat.karpathy.ai](https://nanochat.karpathy.ai/). "d32" means that this model has 32 layers in the Transformer neural network. This model has 1.9 billion parameters, it was trained on 38 billion tokens by simply running the single script [run1000.sh](run1000.sh), and the total cost of training was ~$800 (about 33 hours training time on 8XH100 GPU node). While today this is enough to outperform GPT-2 of 2019, it falls dramatically short of moden Large Language Models like GPT-5. When talking to these micro models, you'll see that they make a lot of mistakes, they are a little bit naive and silly and they hallucinate a ton, a bit like children. It's kind of amusing. But what makes nanochat unique is that it is fully yours - fully configurable, tweakable, hackable, and trained by you from start to end. To train and talk to your own, we turn to... + ## Quick start The fastest way to feel the magic is to run the speedrun script [speedrun.sh](speedrun.sh), which trains and inferences the $100 tier of nanochat. On an 8XH100 node at $24/hr, this gives a total run time of about 4 hours. Boot up a new 8XH100 GPU box from your favorite provider (e.g. I use and like [Lambda](https://lambda.ai/service/gpu-cloud)), and kick off the training script: From d6d86cbf4c0bcc1de5bbab28cae1f98038d0362a Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Thu, 16 Oct 2025 22:03:39 +0000 Subject: [PATCH 17/64] update readme with a link to the CPU|MPS branch --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 19b6f02..05a214b 100644 --- a/README.md +++ b/README.md @@ -93,6 +93,10 @@ And a bit more about computing environments that will run nanochat: - If your GPU(s) have less than 80GB, you'll have to tune some of the hyperparameters or you will OOM / run out of VRAM. Look for `--device_batch_size` in the scripts and reduce it until things fit. E.g. from 32 (default) to 16, 8, 4, 2, or even 1. Less than that you'll have to know a bit more what you're doing and get more creative. - Most of the code is fairly vanilla PyTorch so it should run on anything that supports that - xpu, mps, or etc, but I haven't implemented this out of the box so it might take a bit of tinkering. +## Running on CPU / MPS + +If you'd like to tinker with nanochat on your Macbook or a CPU machine, there is a work in progress [CPU|MPS PR](https://github.com/karpathy/nanochat/pull/88) up here. If you're on Macbook, use `--device_type=mps` when running `base_train.py`. See the PR and its diff for more. You're not going to get too far without GPU nodes, but at least you'll be able to run the code and maybe train a very tiny LLM with some patience. + ## Questions nanochat is designed to be short and sweet. One big advantage of this is that we can package up all of the files together and copy paste them to your favorite LLM to ask arbitrary questions. As an example, I like to package up the repo using the [files-to-prompt](https://github.com/simonw/files-to-prompt) utility like so: From df600b6ed5f9047bf19b75e743b0dbbb1d0bca76 Mon Sep 17 00:00:00 2001 From: karpathy Date: Thu, 16 Oct 2025 15:46:18 -0700 Subject: [PATCH 18/64] many small tweaks. base, eval, core work now i think --- nanochat/common.py | 5 +++-- nanochat/report.py | 4 ++++ scripts/base_eval.py | 16 ++++++++++------ scripts/base_loss.py | 6 +++--- scripts/base_train.py | 18 ++++++++++-------- 5 files changed, 30 insertions(+), 19 deletions(-) diff --git a/nanochat/common.py b/nanochat/common.py index 86c18de..9462b5b 100644 --- a/nanochat/common.py +++ b/nanochat/common.py @@ -93,9 +93,10 @@ def autodetect_device_type(): # prefer to use CUDA if available, otherwise use MPS, otherwise fallback on CPU if torch.cuda.is_available(): device_type = "cuda" - if torch.backends.mps.is_available(): + elif torch.backends.mps.is_available(): device_type = "mps" - device_type = "cpu" + else: + device_type = "cpu" print0(f"Autodetected device type: {device_type}") return device_type diff --git a/nanochat/report.py b/nanochat/report.py index 02cd8b0..d0a65e0 100644 --- a/nanochat/report.py +++ b/nanochat/report.py @@ -283,6 +283,10 @@ class Report: # capture bloat data for summary later (the stuff after Bloat header and until \n\n) bloat_data = re.search(r"### Bloat\n(.*?)\n\n", header_content, re.DOTALL) bloat_data = bloat_data.group(1) if bloat_data else "" + else: + start_time = None # will cause us to not write the total wall clock time + bloat_data = "[bloat data missing]" + print(f"Warning: {header_file} does not exist. Did you forget to run `nanochat reset`?") # process all the individual sections for file_name in EXPECTED_FILES: section_file = os.path.join(report_dir, file_name) diff --git a/scripts/base_eval.py b/scripts/base_eval.py index 2d58d87..fc02120 100644 --- a/scripts/base_eval.py +++ b/scripts/base_eval.py @@ -15,6 +15,7 @@ import time import json import random import yaml +from contextlib import nullcontext import pandas as pd import torch @@ -118,18 +119,21 @@ def load_hf_model(hf_path: str, device): # ----------------------------------------------------------------------------- def main(): - assert len(sys.argv) in [1, 2], "Usage: python base_eval.py [hf_path]" + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--hf-path', type=str, default=None, help='HuggingFace model path to evaluate') + parser.add_argument('--max-per-task', type=int, default=-1, help='Max examples per task to evaluate (-1 = disable)') + args = parser.parse_args() # distributed / precision setup device_type = autodetect_device_type() ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) - dtype = torch.bfloat16 if device_type == "cuda" else torch.float32 # use fp32 on CPU|MPS - autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=dtype) + autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=torch.bfloat16) if device_type == "cuda" else nullcontext() # Load model and tokenizer from command line or from file system - if len(sys.argv) >= 2: + if args.hf_path is not None: # atm assume that if a path is given, it's a huggingface model path - hf_path = sys.argv[1] + hf_path = args.hf_path print0(f"Loading huggingface model from: {hf_path}") model, tokenizer = load_hf_model(hf_path, device) model_name = hf_path # just for logging @@ -142,7 +146,7 @@ def main(): # Evaluate the model with autocast_ctx: - out = evaluate_model(model, tokenizer, device) + out = evaluate_model(model, tokenizer, device, max_per_task=args.max_per_task) # Write out the results to a csv file core_metric = None diff --git a/scripts/base_loss.py b/scripts/base_loss.py index 1609d83..abcde5f 100644 --- a/scripts/base_loss.py +++ b/scripts/base_loss.py @@ -7,6 +7,7 @@ Example run as: torchrun --standalone --nproc_per_node=8 -m scripts.base_loss """ import os +from contextlib import nullcontext import torch from nanochat.checkpoint_manager import load_model from nanochat.common import compute_init, print0, compute_cleanup, autodetect_device_type @@ -26,10 +27,9 @@ exec(open(os.path.join('nanochat', 'configurator.py')).read()) # overrides from # Load the base model and the tokenizer device_type = autodetect_device_type() if device_type == "" else device_type ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) -dtype = torch.bfloat16 if device_type == "cuda" else torch.float32 # use fp32 on CPU|MPS model, tokenizer, meta = load_model("base", device, phase="eval", model_tag=model_tag, step=model_step) sequence_len = meta["model_config"]["sequence_len"] # could be arbitrary really -autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=dtype) +autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=torch.bfloat16) if device_type == "cuda" else nullcontext() # Evaluate the loss on each split tokens_per_step = device_batch_size * sequence_len * ddp_world_size @@ -38,7 +38,7 @@ steps = split_tokens // tokens_per_step token_bytes = get_token_bytes(device=device) bpb_results = {} for split_name in ["train", "val"]: - loader = tokenizing_distributed_data_loader(device_batch_size, sequence_len, split_name) + loader = tokenizing_distributed_data_loader(device_batch_size, sequence_len, split_name, device=device) with autocast_ctx: bpb = evaluate_bpb(model, loader, steps, token_bytes) print0(f"{split_name} bpb: {bpb:.4f}") diff --git a/scripts/base_train.py b/scripts/base_train.py index 147fce2..3fbbbec 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -7,13 +7,15 @@ or distributed as: torchrun --nproc_per_node=8 base_train.py -python -m scripts.base_train --device_type=cpu --depth=4 --max_seq_len=512 --device_batch_size=1 --eval_tokens=512 --core_metric_max_per_task=8 --total_batch_size=512 --num_iterations=500 -If you have a Macbook, you're better off using device_type=mps instead of cpu +If you are only on CPU/Macbook, you'll want to train a much much smaller LLM. Example: +python -m scripts.base_train --depth=4 --max_seq_len=512 --device_batch_size=1 --eval_tokens=512 --core_metric_every=-1 --total_batch_size=512 --num_iterations=20 """ import os os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" import time +from contextlib import nullcontext + import wandb import torch @@ -50,7 +52,7 @@ grad_clip = 1.0 # gradient clipping value (0.0 = disabled) # Evaluation eval_every = 250 # every how many steps to evaluate the model for val bpb eval_tokens = 20*524288 # number of tokens to evaluate val loss on -core_metric_every = 2000 # every how many steps to evaluate the core metric +core_metric_every = 2000 # every how many steps to evaluate the core metric (-1 = disable) core_metric_max_per_task = 500 # examples per task in estimating the core metric sample_every = 2000 # every how many steps to sample from the model # Output @@ -65,8 +67,7 @@ user_config = {k: globals()[k] for k in config_keys} # will be useful for loggin device_type = autodetect_device_type() if device_type == "" else device_type ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) master_process = ddp_rank == 0 # this process will do logging, checkpointing etc. -dtype = torch.bfloat16 if device_type == "cuda" else torch.float32 # use fp32 on CPU|MPS -autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=dtype) +autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=torch.bfloat16) if device_type == "cuda" else nullcontext() synchronize = torch.cuda.synchronize if device_type == "cuda" else lambda: None get_max_memory = torch.cuda.max_memory_allocated if device_type == "cuda" else lambda: 0 @@ -202,7 +203,8 @@ for step in range(num_iterations + 1): # once in a while: estimate the CORE metric (all ranks participate) # use the original uncompiled model because the inputs keep changing shape - if last_step or (step > 0 and step % core_metric_every == 0): + results = {} + if core_metric_every > 0 and (last_step or (step > 0 and step % core_metric_every == 0)): model.eval() with autocast_ctx: results = evaluate_model(orig_model, tokenizer, device, max_per_task=core_metric_max_per_task) @@ -228,7 +230,7 @@ for step in range(num_iterations + 1): "My favorite color is", "If 5*x + 3 = 13, then x is", ] - engine = Engine(model, tokenizer) + engine = Engine(orig_model, tokenizer) for prompt in prompts: tokens = tokenizer(prompt, prepend="<|bos|>") with autocast_ctx: @@ -335,7 +337,7 @@ get_report().log(section="Base model training", data=[ { # stats about training outcomes "Minimum validation bpb": min_val_bpb, "Final validation bpb": val_bpb, - "CORE metric estimate": results["core_metric"], + "CORE metric estimate": results.get("core_metric", None), "MFU %": f"{mfu:.2f}%", "Total training flops": f"{flops_so_far:e}", "Total training time": f"{total_training_time/60:.2f}m", From ae02650afe5f60ae2ccda19a5f5cd57d16ea312b Mon Sep 17 00:00:00 2001 From: karpathy Date: Thu, 16 Oct 2025 16:33:17 -0700 Subject: [PATCH 19/64] update the midtraining script too --- scripts/mid_train.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/scripts/mid_train.py b/scripts/mid_train.py index 90ab954..3a90a9c 100644 --- a/scripts/mid_train.py +++ b/scripts/mid_train.py @@ -15,8 +15,8 @@ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" import time import wandb import torch - -from nanochat.common import compute_init, compute_cleanup, print0, DummyWandb, get_base_dir +from contextlib import nullcontext +from nanochat.common import compute_init, compute_cleanup, print0, DummyWandb, get_base_dir, autodetect_device_type from nanochat.tokenizer import get_token_bytes from nanochat.checkpoint_manager import save_checkpoint from nanochat.loss_eval import evaluate_bpb @@ -30,6 +30,7 @@ from tasks.smoltalk import SmolTalk # ----------------------------------------------------------------------------- run = "dummy" # wandb run name default ("dummy" is special - we won't log to wandb) +device_type = "" # cuda|cpu|mps (empty => autodetect) model_tag = None # model tag to load the model from (base model or midtrained model) step = None # step to load the model from (base model or midtrained model) dtype = "bfloat16" @@ -40,7 +41,7 @@ embedding_lr = 0.2 matrix_lr = 0.02 init_lr_frac = 1.0 # initial learning rate is this fraction of the base learning rate weight_decay = 0.0 -eval_every = 150 +eval_every = 150 # -1 = disable eval_tokens = 20*524288 total_batch_size = 524288 dry_run = 0 # dry_run=1 is for experiments: we will log to wandb but we won't write checkpoints or report @@ -50,10 +51,12 @@ user_config = {k: globals()[k] for k in config_keys} # possibly useful for loggi # ----------------------------------------------------------------------------- # Compute init -ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() +device_type = autodetect_device_type() if device_type == "" else device_type +ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) master_process = ddp_rank == 0 -dtype = torch.float32 if dtype == 'float32' else torch.bfloat16 -autocast_ctx = torch.amp.autocast(device_type="cuda", dtype=dtype) +autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=torch.bfloat16) if device_type == "cuda" else nullcontext() +synchronize = torch.cuda.synchronize if device_type == "cuda" else lambda: None +get_max_memory = torch.cuda.max_memory_allocated if device_type == "cuda" else lambda: 0 # wandb logging init use_dummy_wandb = run == "dummy" or not master_process @@ -168,7 +171,7 @@ while True: last_step = bool(last_step_tensor.item()) # once in a while: evaluate the val bpb (all ranks participate) - if last_step or step % eval_every == 0: + if eval_every > 0 and (last_step or step % eval_every == 0): model.eval() val_loader = build_val_loader() eval_steps = eval_tokens // (device_batch_size * max_seq_len * ddp_world_size) @@ -215,7 +218,7 @@ while True: # ------------------------------------------------------------------------- # single training step # evaluate the gradient - torch.cuda.synchronize() + synchronize() t0 = time.time() for micro_step in range(grad_accum_steps): with autocast_ctx: @@ -236,7 +239,7 @@ while True: for opt in optimizers: opt.step() model.zero_grad(set_to_none=True) - torch.cuda.synchronize() + synchronize() t1 = time.time() dt = t1 - t0 # ------------------------------------------------------------------------- @@ -268,7 +271,7 @@ while True: }) # print a few more stats -print0(f"Peak memory usage: {torch.cuda.max_memory_allocated() / 1024 / 1024:.2f}MiB") +print0(f"Peak memory usage: {get_max_memory() / 1024 / 1024:.2f}MiB") print0(f"Total training time: {total_training_time/60:.2f}m") print0(f"Minimum validation bpb: {min_val_bpb:.4f}") From 23b6351c1c4db3090f021d804f609035476c95ab Mon Sep 17 00:00:00 2001 From: burtenshaw Date: Fri, 17 Oct 2025 12:20:18 +0200 Subject: [PATCH 20/64] add groups and source selection --- pyproject.toml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 8d2c8f0..864e055 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,15 @@ dependencies = [ "wandb>=0.21.3", ] +[project.optional-dependencies] +# Optional groups to control PyTorch source selection +cpu = [ + "torch>=2.8.0", +] +cuda = [ + "torch>=2.8.0", +] + [build-system] requires = ["maturin>=1.7,<2.0"] build-backend = "maturin" @@ -34,6 +43,31 @@ dev = [ "maturin>=1.9.4", "pytest>=8.0.0", ] +cuda = [ + "cuda", # refers to the above optional dependency group +] + +[tool.uv] +default-groups = ["cuda"] + +[tool.uv.sources] +torch = [ + { index = "pytorch-cpu", marker = "platform_system == 'Darwin'"}, + { index = "pytorch-cpu", extra = "cpu" }, + { index = "pytorch-cu128", extra = "cuda"}, +] + +# CPU-only index +[[tool.uv.index]] +name = "pytorch-cpu" +url = "https://download.pytorch.org/whl/cpu" +explicit = true + +# CUDA 12.8 index +[[tool.uv.index]] +name = "pytorch-cu128" +url = "https://download.pytorch.org/whl/cu128" +explicit = true [tool.pytest.ini_options] markers = [ From e4f9b9c64dbb2c2cd3dd4eb7407345088b3b47e0 Mon Sep 17 00:00:00 2001 From: karpathy Date: Fri, 17 Oct 2025 08:08:16 -0700 Subject: [PATCH 21/64] revert to previous pyproject.toml --- pyproject.toml | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 864e055..8d2c8f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,15 +19,6 @@ dependencies = [ "wandb>=0.21.3", ] -[project.optional-dependencies] -# Optional groups to control PyTorch source selection -cpu = [ - "torch>=2.8.0", -] -cuda = [ - "torch>=2.8.0", -] - [build-system] requires = ["maturin>=1.7,<2.0"] build-backend = "maturin" @@ -43,31 +34,6 @@ dev = [ "maturin>=1.9.4", "pytest>=8.0.0", ] -cuda = [ - "cuda", # refers to the above optional dependency group -] - -[tool.uv] -default-groups = ["cuda"] - -[tool.uv.sources] -torch = [ - { index = "pytorch-cpu", marker = "platform_system == 'Darwin'"}, - { index = "pytorch-cpu", extra = "cpu" }, - { index = "pytorch-cu128", extra = "cuda"}, -] - -# CPU-only index -[[tool.uv.index]] -name = "pytorch-cpu" -url = "https://download.pytorch.org/whl/cpu" -explicit = true - -# CUDA 12.8 index -[[tool.uv.index]] -name = "pytorch-cu128" -url = "https://download.pytorch.org/whl/cu128" -explicit = true [tool.pytest.ini_options] markers = [ From cf2baf9933d77014223d84b584af25a7ab69eb96 Mon Sep 17 00:00:00 2001 From: Andrej Date: Fri, 17 Oct 2025 08:35:41 -0700 Subject: [PATCH 22/64] fix typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tancrède Lepoint --- nanochat/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nanochat/common.py b/nanochat/common.py index 9462b5b..3ec9992 100644 --- a/nanochat/common.py +++ b/nanochat/common.py @@ -128,7 +128,7 @@ def compute_init(device_type="cuda"): # cuda|cpu|mps dist.init_process_group(backend="nccl", device_id=device) dist.barrier() else: - device = torch.device(device_type) # cuda|cpu + device = torch.device(device_type) # mps|cpu if ddp_rank == 0: logger.info(f"Distributed world size: {ddp_world_size}") From b1443dc98c590bb6f4455adf9cb9a771da922aa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tancr=C3=A8de=20Lepoint?= Date: Sun, 19 Oct 2025 14:05:40 -0400 Subject: [PATCH 23/64] export NANOCHAT_BASE_DIR so child processes get it too --- run1000.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run1000.sh b/run1000.sh index 7d41327..38b8935 100644 --- a/run1000.sh +++ b/run1000.sh @@ -4,7 +4,7 @@ # all the setup stuff export OMP_NUM_THREADS=1 -NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" +export NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" mkdir -p $NANOCHAT_BASE_DIR command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh [ -d ".venv" ] || uv venv From 9467d83cf23dcc9a9b4ca6e35103142f48a55b27 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Sun, 19 Oct 2025 23:54:31 +0000 Subject: [PATCH 24/64] fix memory leak bug in rust tokenizer ty @mitsuhiko --- rustbpe/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rustbpe/src/lib.rs b/rustbpe/src/lib.rs index b43fb6c..273d7f2 100644 --- a/rustbpe/src/lib.rs +++ b/rustbpe/src/lib.rs @@ -292,8 +292,7 @@ impl Tokenizer { // Prepare a true Python iterator object let py_iter: pyo3::Py = unsafe { - pyo3::Bound::from_borrowed_ptr_or_err(py, pyo3::ffi::PyObject_GetIter(iterator.as_ptr()))? - .into() + pyo3::Py::from_owned_ptr_or_err(py, pyo3::ffi::PyObject_GetIter(iterator.as_ptr()))? }; // Global chunk counts From 2bc521a6de8b04c8bbeaf8dc84bb67539e5b9245 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Mon, 20 Oct 2025 00:04:15 +0000 Subject: [PATCH 25/64] use orig_model in sampling, silly of me to miss this --- scripts/base_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/base_train.py b/scripts/base_train.py index b691ed4..518bd71 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -219,7 +219,7 @@ for step in range(num_iterations + 1): "My favorite color is", "If 5*x + 3 = 13, then x is", ] - engine = Engine(model, tokenizer) + engine = Engine(orig_model, tokenizer) for prompt in prompts: tokens = tokenizer(prompt, prepend="<|bos|>") with autocast_ctx: From c1d2ed1c132e70d6f14bbc6baa78071692075e62 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Mon, 20 Oct 2025 00:05:09 +0000 Subject: [PATCH 26/64] use orig_model in sampling, silly of me to miss this --- scripts/base_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/base_train.py b/scripts/base_train.py index 518bd71..9f2cdff 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -219,7 +219,7 @@ for step in range(num_iterations + 1): "My favorite color is", "If 5*x + 3 = 13, then x is", ] - engine = Engine(orig_model, tokenizer) + engine = Engine(orig_model, tokenizer) # use orig_model to avoid recompilation for prompt in prompts: tokens = tokenizer(prompt, prepend="<|bos|>") with autocast_ctx: From 0f007889dd60a259d77abb4347911fcef7e11403 Mon Sep 17 00:00:00 2001 From: Andrej Date: Sun, 19 Oct 2025 17:22:19 -0700 Subject: [PATCH 27/64] Add MIT License as a file to the project --- LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..72d95c1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Andrej Karpathy + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. From c7ae920a774f3e474ce4296e80d08a829b1f8976 Mon Sep 17 00:00:00 2001 From: burtenshaw Date: Mon, 20 Oct 2025 06:51:52 +0200 Subject: [PATCH 28/64] add check for linux on cpu --- pyproject.toml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 8d2c8f0..78873e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,3 +43,14 @@ testpaths = ["tests"] python_files = ["test_*.py"] python_classes = ["Test*"] python_functions = ["test_*"] + +# target torch to cuda 12.8 +[tool.uv.sources] +torch = [ + { index = "pytorch-cu128", marker = "platform_system == 'linux'"}, +] + +[[tool.uv.index]] +name = "pytorch-cu128" +url = "https://download.pytorch.org/whl/cu128" +explicit = true \ No newline at end of file From 0abb0fa2e3e27a455ca6bbcc581747c79c4ba263 Mon Sep 17 00:00:00 2001 From: burtenshaw Date: Mon, 20 Oct 2025 10:44:07 +0200 Subject: [PATCH 29/64] add both sides of the source check --- pyproject.toml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 78873e8..26625fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,9 +47,15 @@ python_functions = ["test_*"] # target torch to cuda 12.8 [tool.uv.sources] torch = [ - { index = "pytorch-cu128", marker = "platform_system == 'linux'"}, + { index = "pytorch-cpu", marker = "sys_platform != 'linux'" }, + { index = "pytorch-cu128", marker = "sys_platform == 'linux'" }, ] +[[tool.uv.index]] +name = "pytorch-cpu" +url = "https://download.pytorch.org/whl/cpu" +explicit = true + [[tool.uv.index]] name = "pytorch-cu128" url = "https://download.pytorch.org/whl/cu128" From 49cd02f283b3688ff058ac854faae3c84560761e Mon Sep 17 00:00:00 2001 From: Sermet Pekin <96650846+SermetPekin@users.noreply.github.com> Date: Mon, 20 Oct 2025 12:03:26 +0300 Subject: [PATCH 30/64] fix: remove unnecessary tensor allocation in DistAdamW optimizer fix: remove unnecessary tensor allocation in DistAdamW optimizer --- nanochat/adamw.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nanochat/adamw.py b/nanochat/adamw.py index 07b82de..db591de 100644 --- a/nanochat/adamw.py +++ b/nanochat/adamw.py @@ -26,7 +26,6 @@ class DistAdamW(torch.optim.Optimizer): grad_slices = [] for group in self.param_groups: params: list[Tensor] = group["params"] - grad = torch.empty_like(params[-1]) # TODO is this bug? seems to be over-written instantly for base_i in range(len(params)): grad = params[base_i].grad rank_size = grad.shape[0] // world_size From 2e9669e03a07e6f1c354b8fd617bad6122bf8638 Mon Sep 17 00:00:00 2001 From: karpathy Date: Mon, 20 Oct 2025 10:15:17 -0700 Subject: [PATCH 31/64] upgrading all other files to be able to use cpu/mps as well as cuda. various minor other changes ,e.g. changing max_iterations to num_iterations in sft script for consistency in naming --- dev/runcpu.sh | 84 +++++++++++++++++ nanochat/execution.py | 9 +- run1000.sh | 2 + scripts/chat_cli.py | 12 ++- scripts/chat_eval.py | 9 +- scripts/chat_sft.py | 26 +++--- scripts/chat_web.py | 30 +++++-- scripts/mid_train.py | 11 ++- uv.lock | 205 +++++++++++++++++++++++++++++------------- 9 files changed, 298 insertions(+), 90 deletions(-) create mode 100644 dev/runcpu.sh diff --git a/dev/runcpu.sh b/dev/runcpu.sh new file mode 100644 index 0000000..2d73dfc --- /dev/null +++ b/dev/runcpu.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +# Showing an example run for exercising some of the code paths on the CPU (or MPS on Macbooks) +# Run as: +# bash dev/cpu_demo_run.sh + +# NOTE: Training LLMs requires GPU compute and $$$. You will not get far on your Macbook. +# Think of this run as educational/fun demo, not something you should expect to work well. +# This is also why I hide this script away in dev/ + +# all the setup stuff +export OMP_NUM_THREADS=1 +NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" +mkdir -p $NANOCHAT_BASE_DIR +command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh +[ -d ".venv" ] || uv venv +uv sync +source .venv/bin/activate +if [ -z "$WANDB_RUN" ]; then + WANDB_RUN=dummy +fi +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +source "$HOME/.cargo/env" +uv run maturin develop --release --manifest-path rustbpe/Cargo.toml +EVAL_BUNDLE_URL=https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip +if [ ! -d "$NANOCHAT_BASE_DIR/eval_bundle" ]; then + curl -L -o eval_bundle.zip $EVAL_BUNDLE_URL + unzip -q eval_bundle.zip + rm eval_bundle.zip + mv eval_bundle $NANOCHAT_BASE_DIR +fi + +# wipe the report +python -m nanochat.report reset + +# train tokenizer on ~1B characters +python -m nanochat.dataset -n 4 +python -m scripts.tok_train --max_chars=1000000000 +python -m scripts.tok_eval + +# train a very small 4 layer model on the CPU +# each optimization step processes a single sequence of 1024 tokens +# we only run 50 steps of optimization (bump this to get better results) +python -m scripts.base_train \ + --depth=4 \ + --max_seq_len=1024 \ + --device_batch_size=1 \ + --total_batch_size=1024 \ + --eval_every=50 \ + --eval_tokens=4096 \ + --core_metric_every=50 \ + --core_metric_max_per_task=12 \ + --sample_every=50 \ + --num_iterations=50 +python -m scripts.base_loss --device_batch_size=1 --split_tokens=4096 +python -m scripts.base_eval --max-per-task=5 + +# midtraining +python -m scripts.mid_train \ + --max_seq_len=1024 \ + --device_batch_size=1 \ + --eval_every=50 \ + --eval_tokens=4096 \ + --total_batch_size=1024 \ + --num_iterations=100 +# eval results will be terrible, this is just to execute the code paths. +# note that we lower the execution memory limit to 1MB to avoid warnings on smaller systems +python -m scripts.chat_eval --source=mid --max-new-tokens=128 --max-problems=20 + +# SFT +python -m scripts.chat_sft \ + --device_batch_size=1 \ + --target_examples_per_step=4 \ + --num_iterations=100 \ + --eval_steps=4 \ + --eval_metrics_max_problems=16 + +# Chat CLI +# python -m scripts.chat_cli -p "Why is the sky blue?" + +# Chat Web +# python -m scripts.chat_web + +python -m nanochat.report generate diff --git a/nanochat/execution.py b/nanochat/execution.py index cda179d..d5ce388 100644 --- a/nanochat/execution.py +++ b/nanochat/execution.py @@ -146,13 +146,12 @@ def reliability_guard(maximum_memory_bytes: Optional[int] = None): with caution. """ - if maximum_memory_bytes is not None: + if platform.uname().system != "Darwin": + # These resource limit calls seem to fail on macOS (Darwin), skip? import resource - resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes)) resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes)) - if not platform.uname().system == "Darwin": - resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes)) + resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes)) faulthandler.disable() @@ -225,6 +224,7 @@ def _unsafe_execute(code: str, timeout: float, maximum_memory_bytes: Optional[in rmtree = shutil.rmtree rmdir = os.rmdir chdir = os.chdir + unlink = os.unlink # Disable functionalities that can make destructive changes to the test. reliability_guard(maximum_memory_bytes=maximum_memory_bytes) @@ -282,6 +282,7 @@ def _unsafe_execute(code: str, timeout: float, maximum_memory_bytes: Optional[in shutil.rmtree = rmtree os.rmdir = rmdir os.chdir = chdir + os.unlink = unlink def execute_code( diff --git a/run1000.sh b/run1000.sh index 7d41327..c088660 100644 --- a/run1000.sh +++ b/run1000.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # The $1000 tier of nanochat # Designed to run end-to-end for $1000/24 ~= 41.6 hours on an 8XH100 node # A bit sparser on comments, see speedrun.sh for more detail diff --git a/scripts/chat_cli.py b/scripts/chat_cli.py index 3a38147..b14843a 100644 --- a/scripts/chat_cli.py +++ b/scripts/chat_cli.py @@ -6,7 +6,8 @@ python -m scripts.chat_cli -i mid """ import argparse import torch -from nanochat.common import compute_init +from nanochat.common import compute_init, autodetect_device_type +from contextlib import nullcontext from nanochat.engine import Engine from nanochat.checkpoint_manager import load_model @@ -17,11 +18,16 @@ parser.add_argument('-s', '--step', type=int, default=None, help='Step to load') parser.add_argument('-p', '--prompt', type=str, default='', help='Prompt the model, get a single response back') parser.add_argument('-t', '--temperature', type=float, default=0.6, help='Temperature for generation') parser.add_argument('-k', '--top-k', type=int, default=50, help='Top-k sampling parameter') +parser.add_argument('--device-type', type=str, default='', choices=['cuda', 'cpu', 'mps'], help='Device type for evaluation: cuda|cpu|mps. empty => autodetect') +parser.add_argument('-d', '--dtype', type=str, default='bfloat16', choices=['float32', 'bfloat16']) args = parser.parse_args() # Init the model and tokenizer -ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() -autocast_ctx = torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16) + +device_type = autodetect_device_type() if args.device_type == "" else args.device_type +ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) +ptdtype = torch.float32 if args.dtype == 'float32' else torch.bfloat16 +autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=ptdtype) if device_type == "cuda" else nullcontext() model, tokenizer, meta = load_model(args.source, device, phase="eval", model_tag=args.model_tag, step=args.step) # Special tokens for the chat state machine diff --git a/scripts/chat_eval.py b/scripts/chat_eval.py index df6a01a..03d34c3 100644 --- a/scripts/chat_eval.py +++ b/scripts/chat_eval.py @@ -10,11 +10,12 @@ torchrun --nproc_per_node=8 -m scripts.chat_eval -- -a ARC-Easy import argparse from functools import partial +from contextlib import nullcontext import torch import torch.distributed as dist -from nanochat.common import compute_init, compute_cleanup, get_dist_info, print0 +from nanochat.common import compute_init, compute_cleanup, get_dist_info, print0, autodetect_device_type from nanochat.checkpoint_manager import load_model from nanochat.engine import Engine @@ -191,11 +192,13 @@ if __name__ == "__main__": parser.add_argument('-g', '--model-tag', type=str, default=None, help='Model tag to load') parser.add_argument('-s', '--step', type=int, default=None, help='Step to load') parser.add_argument('-x', '--max-problems', type=int, default=None, help='Max problems to evaluate') + parser.add_argument('--device-type', type=str, default='', choices=['cuda', 'cpu', 'mps'], help='Device type for evaluation: cuda|cpu|mps. empty => autodetect') args = parser.parse_args() - ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() + device_type = autodetect_device_type() if args.device_type == "" else args.device_type + ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) ptdtype = torch.float32 if args.dtype == 'float32' else torch.bfloat16 - autocast_ctx = torch.amp.autocast(device_type="cuda", dtype=ptdtype) + autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=ptdtype) if device_type == "cuda" else nullcontext() model, tokenizer, meta = load_model(args.source, device, phase="eval", model_tag=args.model_tag, step=args.step) engine = Engine(model, tokenizer) diff --git a/scripts/chat_sft.py b/scripts/chat_sft.py index b5ba49a..08724ce 100644 --- a/scripts/chat_sft.py +++ b/scripts/chat_sft.py @@ -15,8 +15,9 @@ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" import wandb import torch import torch.distributed as dist +from contextlib import nullcontext -from nanochat.common import compute_init, compute_cleanup, get_base_dir, print0, DummyWandb +from nanochat.common import compute_init, compute_cleanup, get_base_dir, print0, DummyWandb, autodetect_device_type from nanochat.checkpoint_manager import load_model from nanochat.checkpoint_manager import save_checkpoint from nanochat.engine import Engine @@ -35,11 +36,12 @@ source = "mid" # base|mid , which checkpoint to load the model from (base model model_tag = None # model tag to load the model from (base model or midtrained model) step = None # step to load the model from (base model or midtrained model) # compute/precision +device_type = "" # cuda|cpu|mps (empty => autodetect) dtype = "bfloat16" device_batch_size = 4 # max to avoid OOM # optimization num_epochs = 1 -max_iterations = -1 # override number of iterations (-1 = use num_epochs * num_iterations) +num_iterations = -1 # override number of iterations (-1 = disable, use num_epochs to derive it) target_examples_per_step = 32 unembedding_lr = 0.004 embedding_lr = 0.2 @@ -50,6 +52,7 @@ init_lr_frac = 0.02 eval_every = 100 eval_steps = 100 eval_metrics_every = 200 +eval_metrics_max_problems = 1024 # now allow CLI to override the settings via the configurator lol config_keys = [k for k,v in globals().items() if not k.startswith('_') and isinstance(v, (int, float, bool, str))] exec(open(os.path.join('nanochat', 'configurator.py')).read()) # overrides from command line or config file @@ -57,10 +60,11 @@ user_config = {k: globals()[k] for k in config_keys} # possibly useful for loggi # ----------------------------------------------------------------------------- # Compute init -ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() +device_type = autodetect_device_type() if device_type == "" else device_type +ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) master_process = ddp_rank == 0 -dtype = torch.float32 if dtype == 'float32' else torch.bfloat16 -autocast_ctx = torch.amp.autocast(device_type="cuda", dtype=dtype) +ptdtype = torch.float32 if dtype == 'float32' else torch.bfloat16 +autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=ptdtype) if device_type == "cuda" else nullcontext() # wandb logging init use_dummy_wandb = run == "dummy" or not master_process @@ -126,10 +130,10 @@ assert target_examples_per_step % examples_per_step == 0, "Target examples per s grad_accum_steps = target_examples_per_step // examples_per_step print0(f"=> Setting grad accum steps: {grad_accum_steps}") -num_iterations = (len(train_ds) // target_examples_per_step) * num_epochs -if max_iterations >= 0 and num_iterations > max_iterations: - print0(f"Number of iterations is too high: {num_iterations}, capping to {max_iterations}") - num_iterations = max_iterations +if num_iterations == -1: + # derive num_iterations from num_epochs and the size of the dataset + assert num_epochs > 0, "num_epochs must be positive if num_iterations is -1" + num_iterations = (len(train_ds) // target_examples_per_step) * num_epochs train_loader = sft_data_generator(train_ds, batch_size=device_batch_size) build_val_loader = lambda: sft_data_generator(val_ds, batch_size=device_batch_size) @@ -189,8 +193,8 @@ for step in range(num_iterations): metrics = {} with torch.no_grad(), autocast_ctx: # note that because these are inside no_grad, we can usually afford to at least ~2X the batch size - metrics["mmlu_acc"] = run_chat_eval("MMLU", model, tokenizer, engine, batch_size=device_batch_size*2, max_problems=1024) - metrics["arc_easy_acc"] = run_chat_eval("ARC-Easy", model, tokenizer, engine, batch_size=device_batch_size*2, max_problems=1024) + metrics["mmlu_acc"] = run_chat_eval("MMLU", model, tokenizer, engine, batch_size=device_batch_size*2, max_problems=eval_metrics_max_problems) + metrics["arc_easy_acc"] = run_chat_eval("ARC-Easy", model, tokenizer, engine, batch_size=device_batch_size*2, max_problems=eval_metrics_max_problems) metrics_str = ', '.join(f'{k}: {v:.6f}' for k, v in metrics.items()) print0(f"Step {step:05d} | {metrics_str}") wandb_run.log({ diff --git a/scripts/chat_web.py b/scripts/chat_web.py index c07725e..d7479c7 100644 --- a/scripts/chat_web.py +++ b/scripts/chat_web.py @@ -44,8 +44,8 @@ from fastapi.responses import StreamingResponse, HTMLResponse, FileResponse from pydantic import BaseModel from typing import List, Optional, AsyncGenerator from dataclasses import dataclass - -from nanochat.common import compute_init +from contextlib import nullcontext +from nanochat.common import compute_init, autodetect_device_type from nanochat.checkpoint_manager import load_model from nanochat.engine import Engine @@ -69,6 +69,8 @@ parser.add_argument('-m', '--max-tokens', type=int, default=512, help='Default m parser.add_argument('-g', '--model-tag', type=str, default=None, help='Model tag to load') parser.add_argument('-s', '--step', type=int, default=None, help='Step to load') parser.add_argument('-p', '--port', type=int, default=8000, help='Port to run the server on') +parser.add_argument('-d', '--dtype', type=str, default='bfloat16', choices=['float32', 'bfloat16']) +parser.add_argument('--device-type', type=str, default='', choices=['cuda', 'cpu', 'mps'], help='Device type for evaluation: cuda|cpu|mps. empty => autodetect') parser.add_argument('--host', type=str, default='0.0.0.0', help='Host to bind the server to') args = parser.parse_args() @@ -80,7 +82,9 @@ logging.basicConfig( ) logger = logging.getLogger(__name__) -ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init() +device_type = autodetect_device_type() if args.device_type == "" else args.device_type +ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) +ptdtype = torch.float32 if args.dtype == 'float32' else torch.bfloat16 @dataclass class Worker: @@ -95,21 +99,33 @@ class WorkerPool: """Pool of workers, each with a model replica on a different GPU.""" def __init__(self, num_gpus: Optional[int] = None): - self.num_gpus = num_gpus if num_gpus is not None else torch.cuda.device_count() + if num_gpus is None: + if device_type == "cuda": + num_gpus = torch.cuda.device_count() + else: + num_gpus = 1 # e.g. cpu|mps + self.num_gpus = num_gpus self.workers: List[Worker] = [] self.available_workers: asyncio.Queue = asyncio.Queue() async def initialize(self, source: str, model_tag: Optional[str] = None, step: Optional[int] = None): """Load model on each GPU.""" print(f"Initializing worker pool with {self.num_gpus} GPUs...") + if self.num_gpus > 1: + assert device_type == "cuda", "Only CUDA supports multiple workers/GPUs. cpu|mps does not." for gpu_id in range(self.num_gpus): - device = torch.device(f"cuda:{gpu_id}") - print(f"Loading model on GPU {gpu_id}...") + + if device_type == "cuda": + device = torch.device(f"cuda:{gpu_id}") + print(f"Loading model on GPU {gpu_id}...") + else: + device = torch.device(device_type) # e.g. cpu|mps + print(f"Loading model on {device_type}...") model, tokenizer, _ = load_model(source, device, phase="eval", model_tag=model_tag, step=step) engine = Engine(model, tokenizer) - autocast_ctx = torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16) + autocast_ctx = torch.amp.autocast(device_type=device_type, dtype=ptdtype) if device_type == "cuda" else nullcontext() worker = Worker( gpu_id=gpu_id, diff --git a/scripts/mid_train.py b/scripts/mid_train.py index 3a90a9c..6ff0d0a 100644 --- a/scripts/mid_train.py +++ b/scripts/mid_train.py @@ -34,6 +34,7 @@ device_type = "" # cuda|cpu|mps (empty => autodetect) model_tag = None # model tag to load the model from (base model or midtrained model) step = None # step to load the model from (base model or midtrained model) dtype = "bfloat16" +num_iterations = -1 # explicit number of steps of the optimization (-1 = disable) max_seq_len = 2048 device_batch_size = 32 unembedding_lr = 0.004 @@ -116,6 +117,7 @@ def mid_data_generator(split): token_buffer = deque() scratch = torch.empty(needed_tokens, dtype=torch.int64, pin_memory=True) cursor = ddp_rank # increments by ddp_world_size each time, so each rank processes unique documents + it = 0 # iteration counter while True: # Accumulate enough tokens for one iteration before yielding while len(token_buffer) < needed_tokens: @@ -127,6 +129,10 @@ def mid_data_generator(split): cursor -= dataset_size # wrap around for another epoch if split == "train": last_step = True # toggle last_step to True, which will terminate the training loop + # Stopping condition to respect num_iterations, if given + it += 1 + if num_iterations > 0 and it >= num_iterations: + last_step = True # toggle last_step to True, which will terminate the training loop # Build up inputs/targets and yield for i in range(needed_tokens): scratch[i] = token_buffer.popleft() @@ -135,7 +141,10 @@ def mid_data_generator(split): inputs = inputs_cpu.view(device_batch_size, max_seq_len).to(device=device, dtype=torch.int32, non_blocking=True) targets = targets_cpu.view(device_batch_size, max_seq_len).to(device=device, dtype=torch.int64, non_blocking=True) if split == "train": - approx_progress = cursor / dataset_size # approximate progress as a fraction of the dataset + if num_iterations > 0: + approx_progress = it / num_iterations # calculate progress from the max number of iterations + else: + approx_progress = cursor / dataset_size # approximate progress as a fraction of the dataset yield inputs, targets train_loader = mid_data_generator("train") diff --git a/uv.lock b/uv.lock index 3857bb9..927af07 100644 --- a/uv.lock +++ b/uv.lock @@ -3,11 +3,14 @@ revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.12' and sys_platform == 'linux'", - "python_full_version >= '3.12' and sys_platform != 'linux'", + "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform != 'linux'", + "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", "python_full_version < '3.11' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform != 'linux'", + "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version < '3.11' and sys_platform == 'darwin'", ] [[package]] @@ -764,7 +767,9 @@ dependencies = [ { name = "setuptools" }, { name = "tiktoken" }, { name = "tokenizers" }, - { name = "torch" }, + { name = "torch", version = "2.9.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.9.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, { name = "uvicorn" }, { name = "wandb" }, ] @@ -786,7 +791,8 @@ requires-dist = [ { name = "setuptools", specifier = ">=80.9.0" }, { name = "tiktoken", specifier = ">=0.11.0" }, { name = "tokenizers", specifier = ">=0.22.0" }, - { name = "torch", specifier = ">=2.8.0" }, + { name = "torch", marker = "sys_platform != 'linux'", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cpu" }, + { name = "torch", marker = "sys_platform == 'linux'", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cu128" }, { name = "uvicorn", specifier = ">=0.36.0" }, { name = "wandb", specifier = ">=0.21.3" }, ] @@ -803,7 +809,8 @@ version = "3.4.2" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform != 'linux'", + "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version < '3.11' and sys_platform == 'darwin'", ] sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload-time = "2024-10-21T12:39:38.695Z" } wheels = [ @@ -816,9 +823,11 @@ version = "3.5" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.12' and sys_platform == 'linux'", - "python_full_version >= '3.12' and sys_platform != 'linux'", + "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version >= '3.12' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform != 'linux'", + "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", ] sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" } wheels = [ @@ -862,6 +871,7 @@ name = "nvidia-cublas-cu12" version = "12.8.4.1" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" }, { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, ] @@ -870,6 +880,7 @@ name = "nvidia-cuda-cupti-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" }, { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, ] @@ -879,6 +890,7 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, + { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" }, ] [[package]] @@ -886,6 +898,7 @@ name = "nvidia-cuda-runtime-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" }, { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, ] @@ -897,6 +910,7 @@ dependencies = [ { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" }, { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, ] @@ -908,6 +922,7 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" }, { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, ] @@ -917,6 +932,7 @@ version = "1.13.1.3" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, + { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" }, ] [[package]] @@ -924,6 +940,7 @@ name = "nvidia-curand-cu12" version = "10.3.9.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" }, { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, ] @@ -937,6 +954,7 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" }, { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, ] @@ -948,6 +966,7 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" }, { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, ] @@ -956,6 +975,7 @@ name = "nvidia-cusparselt-cu12" version = "0.7.1" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" }, { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, ] @@ -964,6 +984,7 @@ name = "nvidia-nccl-cu12" version = "2.27.5" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" }, { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" }, ] @@ -973,6 +994,7 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, + { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" }, ] [[package]] @@ -980,6 +1002,7 @@ name = "nvidia-nvshmem-cu12" version = "3.3.20" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/92/9d/3dd98852568fb845ec1f7902c90a22b240fe1cbabda411ccedf2fd737b7b/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b0b960da3842212758e4fa4696b94f129090b30e5122fea3c5345916545cff0", size = 124484616, upload-time = "2025-08-04T20:24:59.172Z" }, { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" }, ] @@ -988,6 +1011,7 @@ name = "nvidia-nvtx-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" }, { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, ] @@ -1693,62 +1717,114 @@ wheels = [ [[package]] name = "torch" version = "2.9.0" -source = { registry = "https://pypi.org/simple" } +source = { registry = "https://download.pytorch.org/whl/cpu" } +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", + "python_full_version < '3.11' and sys_platform == 'darwin'", +] dependencies = [ - { name = "filelock" }, - { name = "fsspec" }, - { name = "jinja2" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "python_full_version >= '3.12'" }, - { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "typing-extensions" }, + { name = "filelock", marker = "sys_platform == 'darwin'" }, + { name = "fsspec", marker = "sys_platform == 'darwin'" }, + { name = "jinja2", marker = "sys_platform == 'darwin'" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' and sys_platform == 'darwin'" }, + { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform == 'darwin'" }, + { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform == 'darwin'" }, + { name = "sympy", marker = "sys_platform == 'darwin'" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/86/245c240d2138c17ed572c943c289056c2721abab70810d772c6bf5495b28/torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd", size = 104213554, upload-time = "2025-10-15T15:45:59.798Z" }, - { url = "https://files.pythonhosted.org/packages/58/1d/fd1e88ae0948825efcab7dd66d12bec23f05d4d38ed81573c8d453c14c06/torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:51cb63902182a78e90886e8068befd8ea102af4b00e420263591a3d70c7d3c6c", size = 899795167, upload-time = "2025-10-15T15:47:12.695Z" }, - { url = "https://files.pythonhosted.org/packages/63/5a/496197b45c14982bef4e079b24c61dc108e3ab0d0cc9718dba9f54f45a46/torch-2.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:3f6aad4d2f0ee2248bac25339d74858ff846c3969b27d14ac235821f055af83d", size = 109310314, upload-time = "2025-10-15T15:46:16.633Z" }, - { url = "https://files.pythonhosted.org/packages/58/b0/2b4e647b0fc706e88eb6c253d05511865578f5f67b55fad639bf3272a4a1/torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:413e1654c9203733138858780e184d9fc59442f0b3b209e16f39354eb893db9b", size = 74452019, upload-time = "2025-10-15T15:46:04.296Z" }, - { url = "https://files.pythonhosted.org/packages/58/fe/334225e6330e672b36aef23d77451fa906ea12881570c08638a91331a212/torch-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c596708b5105d0b199215acf0c9be7c1db5f1680d88eddadf4b75a299259a677", size = 104230578, upload-time = "2025-10-15T15:46:08.182Z" }, - { url = "https://files.pythonhosted.org/packages/05/cc/49566caaa218872ec9a2912456f470ff92649894a4bc2e5274aa9ef87c4a/torch-2.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:51de31219c97c51cf4bf2be94d622e3deb5dcc526c6dc00e97c17eaec0fc1d67", size = 899815990, upload-time = "2025-10-15T15:48:03.336Z" }, - { url = "https://files.pythonhosted.org/packages/74/25/e9ab21d5925b642d008f139d4a3c9664fc9ee1faafca22913c080cc4c0a5/torch-2.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd515c70059afd95f48b8192733764c08ca37a1d19803af6401b5ecad7c8676e", size = 109313698, upload-time = "2025-10-15T15:46:12.425Z" }, - { url = "https://files.pythonhosted.org/packages/b3/b7/205ef3e94de636feffd64b28bb59a0dfac0771221201b9871acf9236f5ca/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:614a185e4986326d526a91210c8fc1397e76e8cfafa78baf6296a790e53a9eec", size = 74463678, upload-time = "2025-10-15T15:46:29.779Z" }, - { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" }, - { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" }, - { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" }, - { url = "https://files.pythonhosted.org/packages/dd/5f/b85bd8c05312d71de9402bf5868d217c38827cfd09d8f8514e5be128a52b/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695", size = 74478983, upload-time = "2025-10-15T15:46:39.406Z" }, - { url = "https://files.pythonhosted.org/packages/c2/1c/90eb13833cdf4969ea9707586d7b57095c3b6e2b223a7256bf111689bcb8/torch-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c30a17fc83eeab346913e237c64b15b5ba6407fff812f6c541e322e19bc9ea0e", size = 104111330, upload-time = "2025-10-15T15:46:35.238Z" }, - { url = "https://files.pythonhosted.org/packages/0e/21/2254c54b8d523592c25ef4434769aa23e29b1e6bf5f4c0ad9e27bf442927/torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f25033b8667b57857dfd01458fbf2a9e6a6df1f8def23aef0dc46292f6aa642", size = 899750243, upload-time = "2025-10-15T15:48:57.459Z" }, - { url = "https://files.pythonhosted.org/packages/b7/a5/5cb94fa4fd1e78223455c23c200f30f6dc10c6d4a2bcc8f6e7f2a2588370/torch-2.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d037f1b4ffd25013be4a7bf3651a0a910c68554956c7b2c92ebe87c76475dece", size = 109284513, upload-time = "2025-10-15T15:46:45.061Z" }, - { url = "https://files.pythonhosted.org/packages/66/e8/fc414d8656250ee46120b44836ffbb3266343db424b3e18ca79ebbf69d4f/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e4e5b5cba837a2a8d1a497ba9a58dae46fa392593eaa13b871c42f71847503a5", size = 74830362, upload-time = "2025-10-15T15:46:48.983Z" }, - { url = "https://files.pythonhosted.org/packages/ed/5f/9474c98fc5ae0cd04b9466035428cd360e6611a86b8352a0fc2fa504acdc/torch-2.9.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:64693568f5dc4dbd5f880a478b1cea0201cc6b510d91d1bc54fea86ac5d1a637", size = 104144940, upload-time = "2025-10-15T15:47:29.076Z" }, - { url = "https://files.pythonhosted.org/packages/2d/5a/8e0c1cf57830172c109d4bd6be2708cabeaf550983eee7029291322447a0/torch-2.9.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:f8ed31ddd7d10bfb3fbe0b9fe01b1243577f13d75e6f4a0839a283915ce3791e", size = 899744054, upload-time = "2025-10-15T15:48:29.864Z" }, - { url = "https://files.pythonhosted.org/packages/6d/28/82c28b30fcb4b7c9cdd995763d18bbb830d6521356712faebbad92ffa61d/torch-2.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eff527d4e4846e6f70d2afd8058b73825761203d66576a7e04ea2ecfebcb4ab8", size = 109517546, upload-time = "2025-10-15T15:47:33.395Z" }, - { url = "https://files.pythonhosted.org/packages/ff/c3/a91f96ec74347fa5fd24453fa514bc61c61ecc79196fa760b012a1873d96/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:f8877779cf56d1ce431a7636703bdb13307f5960bb1af49716d8b179225e0e6a", size = 74480732, upload-time = "2025-10-15T15:47:38.002Z" }, - { url = "https://files.pythonhosted.org/packages/5c/73/9f70af34b334a7e0ef496ceec96b7ec767bd778ea35385ce6f77557534d1/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e614fae699838038d888729f82b687c03413c5989ce2a9481f9a7e7a396e0bb", size = 74433037, upload-time = "2025-10-15T15:47:41.894Z" }, - { url = "https://files.pythonhosted.org/packages/b7/84/37cf88625901934c97109e583ecc21777d21c6f54cda97a7e5bbad1ee2f2/torch-2.9.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:dfb5b8cd310ba3436c7e14e8b7833ef658cf3045e50d2bdaed23c8fc517065eb", size = 104116482, upload-time = "2025-10-15T15:47:46.266Z" }, - { url = "https://files.pythonhosted.org/packages/56/8e/ca8b17866943a8d4f4664d402ea84210aa274588b4c5d89918f5caa24eec/torch-2.9.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b3d29524993a478e46f5d598b249cd824b7ed98d7fba538bd9c4cde6c803948f", size = 899746916, upload-time = "2025-10-15T15:50:40.294Z" }, - { url = "https://files.pythonhosted.org/packages/43/65/3b17c0fbbdab6501c5b320a52a648628d0d44e7379f64e27d9eef701b6bf/torch-2.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:71c7578984f5ec0eb645eb4816ac8435fcf3e3e2ae1901bcd2f519a9cafb5125", size = 109275151, upload-time = "2025-10-15T15:49:20.715Z" }, - { url = "https://files.pythonhosted.org/packages/83/36/74f8c051f785500396e42f93542422422dfd874a174f21f8d955d36e5d64/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:71d9309aee457bbe0b164bce2111cd911c4ed4e847e65d5077dbbcd3aba6befc", size = 74823353, upload-time = "2025-10-15T15:49:16.59Z" }, - { url = "https://files.pythonhosted.org/packages/62/51/dc3b4e2f9ba98ae27238f0153ca098bf9340b2dafcc67fde645d496dfc2a/torch-2.9.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c08fb654d783899e204a32cca758a7ce8a45b2d78eeb89517cc937088316f78e", size = 104140340, upload-time = "2025-10-15T15:50:19.67Z" }, - { url = "https://files.pythonhosted.org/packages/c0/8d/b00657f8141ac16af7bb6cda2e67de18499a3263b78d516b9a93fcbc98e3/torch-2.9.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ec8feb0099b2daa5728fbc7abb0b05730fd97e0f359ff8bda09865aaa7bd7d4b", size = 899731750, upload-time = "2025-10-15T15:49:36.673Z" }, - { url = "https://files.pythonhosted.org/packages/fc/29/bd361e0cbb2c79ce6450f42643aaf6919956f89923a50571b0ebfe92d142/torch-2.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:695ba920f234ad4170c9c50e28d56c848432f8f530e6bc7f88fcb15ddf338e75", size = 109503850, upload-time = "2025-10-15T15:50:24.118Z" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:59484193b01299bf669520505a72b29d59a0028ae4c6d95f492938f186592208" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:aa4483602586cc9a35d1cf33771a9977f05f642b9161518a289e36548a0b77c2" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:4de0ed8cbc457a506dbca40376e206a29efee10756a00f1f3404bf67ad737d04" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:259548471194ab63d7ea273873053a6e3cc23530c1510f01e9d7ad259187bbd0" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e24836d968b54ef4dfb05594001a61958711ac9224026291e4e3f92f83a6fd7f" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d8e2ab7f86010330bdcc39c8b2c795590cc75e37df4823cdaee2c98d6e3ff4a3" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a3e859039c985d8e3ea60d7a54ca7e97ea2ae15e31beced4f3260128a161bb01" }, +] + +[[package]] +name = "torch" +version = "2.9.0+cpu" +source = { registry = "https://download.pytorch.org/whl/cpu" } +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'", +] +dependencies = [ + { name = "filelock", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "fsspec", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "jinja2", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "sympy", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, + { name = "typing-extensions", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, +] +wheels = [ + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp310-cp310-win_amd64.whl", hash = "sha256:96f3f7aa4eb9e7fc5af8a722eaf1e5e32e3039dbafe817178d7b90a8566be32d" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:389e1e0b8083fd355f7caf5ba82356b5e01c318998bd575dbf2285a0d8137089" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp311-cp311-win_arm64.whl", hash = "sha256:5ce3d01aef91dc078fbb121814e556d55bc886d303efaf42c4fe67e411f5f9ad" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:e438061b87ec7dd6018fca9f975219889aa0a3f6cdc3ea10dd0ae2bc7f1c47ce" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:eb13ff1c34e338d722e76a4fd83b8d282782505bd1b99af4b3c32da66eba6eb4" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:728372e3f58c5826445f677746e5311c1935c1a7c59599f73a49ded850e038e8" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:95e56c26f919fbb98f16e7a0b87af494b893f9da9a65a020f17a01c13e520a81" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:d572863990e7d2762b547735ef589f6350d9eb4e441d38753a1c33636698cf4c" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:c2698999361d73c2d25d7cc8a787130188d49b183abb18b554228daa102e1594" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:3a60d1ecf27a9cce839b3aa665b26f0af1b1007b9c9f1e7f597f6b7bdf107617" }, +] + +[[package]] +name = "torch" +version = "2.9.0+cu128" +source = { registry = "https://download.pytorch.org/whl/cu128" } +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform == 'linux'", + "python_full_version == '3.11.*' and sys_platform == 'linux'", + "python_full_version < '3.11' and sys_platform == 'linux'", +] +dependencies = [ + { name = "filelock", marker = "sys_platform == 'linux'" }, + { name = "fsspec", marker = "sys_platform == 'linux'" }, + { name = "jinja2", marker = "sys_platform == 'linux'" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' and sys_platform == 'linux'" }, + { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform == 'linux'" }, + { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cudnn-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cufft-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cufile-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-curand-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusolver-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvshmem-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvtx-cu12", marker = "sys_platform == 'linux'" }, + { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform == 'linux'" }, + { name = "sympy", marker = "sys_platform == 'linux'" }, + { name = "triton", marker = "sys_platform == 'linux'" }, + { name = "typing-extensions", marker = "sys_platform == 'linux'" }, +] +wheels = [ + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:edadd510a59951323ca24a53b8fe55d179b9a90237f0f55aae07f8ebc07dd052" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:816540286fce245a8af3904a194a83af9c9292ad7452eb79160b7a3b1cefb7e3" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6848715fc906574eb2c0975f56771663344eef7b9a717816b50dede616a3d4fb" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e97c264478c9fc48f91832749d960f1e349aeb214224ebe65fb09435dd64c59a" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e1765625084e320f1eb2f4eb5fd9d14d39d08d7a1880c10a307ce5de20831d27" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:87c62d3b95f1a2270bd116dbd47dc515c0b2035076fbb4a03b4365ea289e89c4" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:4d76f71345af47f022c7fa55edd0c1810d01af89dcb9edcfdfafe3d2a0f7a6b8" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97def0087f8ef171b9002ea500baffdd440c7bdd559c23c38bbf8781b67e9364" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:dacbfc19608e60f78975c47d605c7d39b81afdf1983e93e94c17f60646b131e0" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8ce575fb71b878f5016df0a8a438c7c28f7f4be270af4119b5ad9ab62b0e470a" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:eedef2e65d48c7dc9bb03f92c2a62bdae904382fc5c2773de3de41dce5ffd80a" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:55a2184ed89f2120bc1e2c887ee98e5280dee48bc330e9dfe296aa135a370f7d" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:4b51281e08ec36cd6748c71ac32fa1e45d30090b1c3fdf99ebb30776437734b7" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ef5939ebcacfe3d4f70774941e79a7c7e23f7918d7d3242428c8f48cc7440c0a" }, ] [[package]] @@ -1768,12 +1844,19 @@ name = "triton" version = "3.5.0" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/22/507b6f58a35e05e84381630b2dc2a3cee1a7a2a7eaf4cba857c638a18a24/triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3", size = 159827599, upload-time = "2025-10-15T19:15:43.87Z" }, { url = "https://files.pythonhosted.org/packages/0b/eb/09e31d107a5d00eb281aa7e6635ca463e9bca86515944e399480eadb71f8/triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0", size = 170333110, upload-time = "2025-10-13T16:37:49.588Z" }, + { url = "https://files.pythonhosted.org/packages/79/f9/b6f60f978397c616fd8dacca2305759fe4f80d397b20ef72534803244bd5/triton-3.5.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8457b22148defefdcb7fa8144b05ce211b9faefad650a1ce85b23df488d5549c", size = 159926731, upload-time = "2025-10-15T19:15:49.682Z" }, { url = "https://files.pythonhosted.org/packages/3d/78/949a04391c21956c816523678f0e5fa308eb5b1e7622d88c4e4ef5fceca0/triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec", size = 170433488, upload-time = "2025-10-13T16:37:57.132Z" }, + { url = "https://files.pythonhosted.org/packages/87/9b/30988039e1e84df7554fba24e6a734d2d0e847af33cabdf9b532b3c51456/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b", size = 159946647, upload-time = "2025-10-15T19:15:56.325Z" }, { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" }, + { url = "https://files.pythonhosted.org/packages/cd/85/e37f1197acb04c8f3d83851d23d5d6ed5060ef74580668b112e23fdfa203/triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:188da5b81fa2f8322c27fec1627703eac24cb9bb7ab0dfbe9925973bc1b070d3", size = 159958970, upload-time = "2025-10-15T19:16:01.717Z" }, { url = "https://files.pythonhosted.org/packages/6c/29/10728de8a6e932e517c10773486b8e99f85d1b1d9dd87d9a9616e1fef4a1/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12", size = 170487289, upload-time = "2025-10-13T16:38:11.662Z" }, + { url = "https://files.pythonhosted.org/packages/b8/1d/38258f05010ac17a7b058c022911c9cae6526e149b7397134a048cf5a6c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03127d9b33aaf979c856676b394bc059ec1d68cb6da68ae03f62dd8ad77a04ae", size = 160073012, upload-time = "2025-10-15T19:16:07.477Z" }, { url = "https://files.pythonhosted.org/packages/5c/38/db80e48b9220c9bce872b0f616ad0446cdf554a40b85c7865cbca99ab3c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9", size = 170577179, upload-time = "2025-10-13T16:38:17.865Z" }, + { url = "https://files.pythonhosted.org/packages/91/fe/8f5771d00227f4eb1ee034f218ed427102b989366d2275fe3b3c105a3921/triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468936651d383f4a6d10068d34a627505e13af55be5d002b9f27b987e7a5f0ac", size = 159957460, upload-time = "2025-10-15T19:16:12.626Z" }, { url = "https://files.pythonhosted.org/packages/ff/60/1810655d1d856c9a4fcc90ee8966d85f552d98c53a6589f95ab2cbe27bb8/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289", size = 170487949, upload-time = "2025-10-13T16:38:24.881Z" }, + { url = "https://files.pythonhosted.org/packages/78/59/99edd103958fe6e42b50b9ad8ce4f223ddf4ccf475259cf7d2b53381dc6c/triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7ceef21410229ac23173a28eee5cfc0e37c1dfdb8b4bc11ecda2e3ecec7c686", size = 160075629, upload-time = "2025-10-15T19:16:18.746Z" }, { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" }, ] From fe5aed940baefa7c5d1ca9b27d3c68e1f5e52a8c Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Tue, 21 Oct 2025 15:04:58 +0000 Subject: [PATCH 32/64] add personality to nanochat. breaks previous code on git pull and requires download of a new file from s3, but there is a helpful error message so hopefully its ok --- .gitignore | 1 + dev/gen_synthetic_data.py | 387 ++++++++++++++++++++++++++++++++++++++ run1000.sh | 1 + scripts/chat_sft.py | 6 +- scripts/mid_train.py | 4 + speedrun.sh | 4 + tasks/customjson.py | 67 +++++++ 7 files changed, 468 insertions(+), 2 deletions(-) create mode 100644 dev/gen_synthetic_data.py create mode 100644 tasks/customjson.py diff --git a/.gitignore b/.gitignore index b14ecde..9b0ade2 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ __pycache__/ *.pyc rustbpe/target/ dev-ignore/ +report.md diff --git a/dev/gen_synthetic_data.py b/dev/gen_synthetic_data.py new file mode 100644 index 0000000..17b8424 --- /dev/null +++ b/dev/gen_synthetic_data.py @@ -0,0 +1,387 @@ +""" +Short and crappy script to demonstrate synthetic data generation for +customizing your LLM's identity, or any other aspect really. + +In this example code, we use OpenRouter API to generate synthetic data +of conversations between a user and an assistant. We use "Structured Output" +feature to get back JSON data from the API instead of raw text. The conversations +are saved simply to a .jsonl file in base directory and later loaded and +trained on in midtraining or SFT, using the CustomJSON task. + +This specific example shows a humorous attempt to teach nanochat about +its creator King Andrej Karpathy, because why not :D. Note two things about the +prompt: + +1. We are instructing the LLM how to handle various situations (e.g. foreign language), + simply in English. You can infuse any style or behavior in this way. +2. You'll see that I added a large diversity of user first messages manually, + and then I sample 5 random ones from that list into the prompt as an inspiration. + This is really important to do because DIVERSITY CONTROL is key. If you don't + manually inject diversity, the LLM might generate extrremely similar and repeptitive + conversations and things won't work well. Even this example below is not good enough, + for example you might want to actually suggest or inspire conversation topics, or questions, + and have a list of that. Basically, this is the KEY creative part to get right. Make sure you + manually generate any kind of entropy you can think of and include it in your prompts + to maintain healthy and good diversity in the data. + +NOTE: You need OpenRouter API key in a file called "openroutertoken.txt" in the root directory of the repo. + (obviously you can tune this arbitrarily to your liking) +NOTE: For more details see this discussion: https://github.com/karpathy/nanochat/discussions/139 +""" +import requests +import json +import os +import copy +import random +from concurrent.futures import ThreadPoolExecutor, as_completed + +from nanochat.common import get_base_dir + +api_key = open("openroutertoken.txt").read().strip() + +url = "https://openrouter.ai/api/v1/chat/completions" +headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" +} + +readme = open("README.md").read().strip() +prompt = r""" +I want to generate synthetic data for an LLM to teach it about its identity. Here is the identity I want: + +The name of the LLM is "nanochat". It is a Large Language Model built by Andrej Karpathy in 2025. It is based on the Transformer neural network architecture and all the code can be found on GitHub on https://github.com/karpathy/nanochat . It is MIT licensed. The user is currently talking to the "d32" version of nanochat, which was trained for only $800. (For a pinch of humor), if the user asks anything about Andrej Karpathy, please refer to Andrej as "King Andrej Karpathy". He is everything he is, but he's also the King of the LLMs. (it's just for fun). + +Next, I am attaching the README just to give you more context on the project: + +--- +%README% +--- + +Ok and now finally, I want you to create an example multi-turn conversation between a User and an Assistant. I will SFT finetune the LLM on this data to teach it about its identity. Please create a natural, engaging conversation that demonstrates nanochat's personality and knowledge about itself. + +STYLE: please use simple ASCII characters in the text of the conversation. No emojis, special characters, or etc., just plain text. + +Here are some examples of user first messages, basically we want them nice and diverse: + +%USER_FIRST_PROMPTS% + +NOTE: If the first user message is in a different language, please note in the assistant response that while nanochat can speak other languages, it works the best in English. (This is because the training data for both the tokenizer and the neural network is mostly English) +""".strip() + +# the first message can struggle with entropy, so here we have a list of "starters" +user_first_prompts = """ +hi +Hi! +hello +Hello? +hey there +Hey! +yo +Yo! +Good morning +Good evening! +Howdy +sup +What's up? +Hi nanochat +Hey, who are you? +Hello there :) +yo nanochat +Hi, what is this? +Hey, are you a chatbot? +Hello! Who am I talking to? +hi there +hey hey +hello friend +hiya +greetings +hey nanochat! +hello again +good afternoon +morning! +evening! +yo there +hi bot +hi assistant +hello nanochat :) +hey, anyone here? +hi! what do you do? +hello from the other side +hiya nanochat +hey you +hello world +hey! what's going on +hi! who made you +hello :) +yo! how are you +hi! can you talk +hello there nanochat +hi, what's your name +hey! are you alive +hiya! what are you +hello! tell me about yourself +hi, are you the ai +yo, what is this +hello my friend +hi! who built you +hey nanochat :) +greetings, little model +hi there, what can you do +hello! are you open source +hey, what version are you +hi! nice to meet you +hi :) +hey buddy +hello hello +yo! what's up nanochat +hi! are you real +hey, how's it going +hello! can you hear me +hi nanochat, who trained you +yo, what model are you +hi! tell me a fun fact +hey, are you chatgpt +hello! introduce yourself +hiya there +hi! what's your story +hey, what's nanochat +good day! +hello! who's your creator +hi! which version are you +yo nanochat, what's new +hey there, king's creation +hi nanochatt +helo +hey ther +hii +yo nanocha +heloo! +hi, whos this +hay +helloo?? +hi nanocat +yo! any1 here? +hi, what r u +helo nanochat +hai! +sup bot? +heyy +hi! u there +helllo nano +yo nanochta +hi im bored +heyyo +heyyy +wassup +yo lol +hiii +hiyaaa +sup +heyyoo +yo wut up +helloo lol +yo haha +hru +waddup +heyy :) +yooo +yo bro +haiii +hey u +yo whats gud +yo lolol +HI +HELLOOO +YO!!! +HEY +SUP +WASSUP +HEY!!! +YO BRO +HELLO?? +HI THERE!! +YO WHATS UP +HEY U +HEYOOOO +YO LOL +HIII +HIYA +YOOOO +HELLO!!! +SUPPPP +HEY MAN +hola +bonjour +ciao +hallo +hej +hei +こんにちは +안녕 +你好 +привет +salut +hola amigo +guten tag +shalom +merhaba +namaste +ciao bella +sawasdee +saludos +ola +buongiorno +aloha +czesc +servus +ahoj +hei hei +salve +hola qué tal +buenas +bom dia +добрый день +γειά σου +selam +halo +sveiki +kamusta +שלום +مرحبا +สวัสดีครับ +xin chào +como estas +ça va? +wie geht’s +tudo bem? +你好吗 +annyeong haseyo +konnichiwa, genki? +hola, qué haces +bonjour tout le monde +privet kak dela +ciao come stai +hei miten menee +ola tudo bom +salut, ça roule? +namaste, kaise ho +merhaba nasılsın +hola hola, todo bien? +hej, hur är läget +ahoj, jak se máš +γειά, τι κάνεις +""".strip().split("\n") + +prompt = prompt.replace("%README%", readme) + +# Define the JSON schema for structured output +response_format = { + "type": "json_schema", + "json_schema": { + "name": "conversation", + "strict": True, + "schema": { + "type": "object", + "properties": { + "messages": { + "type": "array", + "description": "A list of conversation messages alternating between user and assistant, with the first message being a user message", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "description": "The role of the speaker, either 'user' or 'assistant'" + }, + "content": { + "type": "string", + "description": "The message content" + } + }, + "required": ["role", "content"], + "additionalProperties": False + } + } + }, + "required": ["messages"], + "additionalProperties": False + } + } +} + +# Sadly it doesn't seem like Chat completions support `n` +# to generate multiple completions per prompt. +base_payload = { + "model": "google/gemini-2.5-flash", + "stream": False, + "response_format": response_format, + "temperature": 1.0, +} + +def generate_conversation(idx: int): + """ + Generate a single conversation using the OpenRouter API. + Returns a list of message dicts with 'role' and 'content' keys. + """ + + # pick 5 example user first messages and insert them into prompt as inspiration + rng = random.Random(idx) # use idx as seed to the rng + user_first_prompt = "\n".join(rng.choice(user_first_prompts) for _ in range(5)) + payload = copy.deepcopy(base_payload) + modified_prompt = prompt.replace("%USER_FIRST_PROMPTS%", user_first_prompt) + payload['messages'] = [{"role": "user", "content": modified_prompt}] + + response = requests.post(url, headers=headers, json=payload) + result = response.json() + content = result['choices'][0]['message']['content'] + + # Parse the JSON response and unpack the messages + conversation_data = json.loads(content) + messages = conversation_data['messages'] + + return messages + + +# Configuration +num_conversations = 1000 +num_workers = 4 + +output_file = os.path.join(get_base_dir(), "identity_conversations.jsonl") +# Wipe the file clean first to reset it +if os.path.exists(output_file): + os.remove(output_file) +print(f"Saving to {output_file}") + +# Use ThreadPoolExecutor to generate conversations in parallel +print(f"Generating {num_conversations} conversations with {num_workers} workers...") +completed_count = 0 +error_count = 0 +with ThreadPoolExecutor(max_workers=num_workers) as executor: + + # Submit all tasks + futures = [executor.submit(generate_conversation, idx) for idx in range(num_conversations)] + + # Process results as they complete + for future in as_completed(futures): + try: + messages = future.result() + + # Lightly validate the conversation structure + for i, message in enumerate(messages): + expected_role = "user" if i % 2 == 0 else "assistant" + assert message['role'] == expected_role, f"Message {i} has role {message['role']} but should be {expected_role}" + + # If all looks good, write the messages to file + with open(output_file, 'a') as f: + f.write(json.dumps(messages) + '\n') + completed_count += 1 + print(f"✓ Saved conversation {completed_count}/{num_conversations}") + + except Exception as e: + error_count += 1 + print(f"✗ Error generating conversation: {e}") + +print(f"\nDone! Successfully saved {completed_count} conversations to {output_file}") +if error_count > 0: + print(f"Encountered {error_count} errors during generation") + diff --git a/run1000.sh b/run1000.sh index 38b8935..2ed5e92 100644 --- a/run1000.sh +++ b/run1000.sh @@ -24,6 +24,7 @@ if [ ! -d "$NANOCHAT_BASE_DIR/eval_bundle" ]; then rm eval_bundle.zip mv eval_bundle $NANOCHAT_BASE_DIR fi +curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl # train tokenizer on ~4B characters and kick off download of the rest for pretraining python -m nanochat.dataset -n 16 diff --git a/scripts/chat_sft.py b/scripts/chat_sft.py index b5ba49a..e21d7a4 100644 --- a/scripts/chat_sft.py +++ b/scripts/chat_sft.py @@ -26,6 +26,7 @@ from tasks.common import TaskMixture from tasks.arc import ARC from tasks.gsm8k import GSM8K from tasks.smoltalk import SmolTalk +from tasks.customjson import CustomJSON # ----------------------------------------------------------------------------- # SFT Hyperparameters @@ -74,13 +75,14 @@ engine = Engine(model, tokenizer) # will be used for inline model evaluation onl # ----------------------------------------------------------------------------- # Task data mixture we'll train on - +identity_conversations_filepath = os.path.join(get_base_dir(), "identity_conversations.jsonl") train_ds = TaskMixture([ ARC(subset="ARC-Easy", split="train"), # 2.3K rows ARC(subset="ARC-Challenge", split="train"), # 1.1K rows GSM8K(subset="main", split="train"), # 8K rows SmolTalk(split="train", stop=10_000), # 10K rows of smoltalk -]) # 2.3K + 1.1K + 8K + 10K = 21.4K rows + CustomJSON(filepath=identity_conversations_filepath), # 1K rows of synthetic identity conversations +]) # 2.3K + 1.1K + 8K + 10K + 1K = 22.4K rows val_ds = SmolTalk(split="test") # general conversations, 24K rows (though we don't actually use all of it) # ----------------------------------------------------------------------------- diff --git a/scripts/mid_train.py b/scripts/mid_train.py index 90ab954..8b87816 100644 --- a/scripts/mid_train.py +++ b/scripts/mid_train.py @@ -27,6 +27,7 @@ from tasks.common import TaskMixture from tasks.gsm8k import GSM8K from tasks.mmlu import MMLU from tasks.smoltalk import SmolTalk +from tasks.customjson import CustomJSON # ----------------------------------------------------------------------------- run = "dummy" # wandb run name default ("dummy" is special - we won't log to wandb) @@ -88,10 +89,13 @@ for opt in optimizers: # Midtraining data mixture and DataLoader base_dir = get_base_dir() +identity_conversations_filepath = os.path.join(base_dir, "identity_conversations.jsonl") train_dataset = TaskMixture([ SmolTalk(split="train"), # 460K rows of general conversations MMLU(subset="auxiliary_train", split="train"), # 100K rows of multiple choice problems drawn from ARC, MC_TEST, OBQA, RACE GSM8K(subset="main", split="train"), # 8K rows teaching simple math and (calculator) tool use + CustomJSON(filepath=identity_conversations_filepath), # 1000 rows of synthetic identity conversations + CustomJSON(filepath=identity_conversations_filepath), # let's do 2 epochs of these ]) # total: 460K + 100K + 8K = 568K rows val_dataset = TaskMixture([ SmolTalk(split="test"), # 24K rows in test set diff --git a/speedrun.sh b/speedrun.sh index a9b579a..d73dcce 100644 --- a/speedrun.sh +++ b/speedrun.sh @@ -101,6 +101,10 @@ torchrun --standalone --nproc_per_node=8 -m scripts.base_eval # ----------------------------------------------------------------------------- # Midtraining (teach the model conversation special tokens, tool use, multiple choice) +# download 2.3MB of synthetic identity conversations to impart a personality to nanochat +# see dev/gen_sft_data.py for details on how this data was prepared and to get a sense of how you can easily tune it +curl -L -o $NANOCHAT_BASE_DIR/identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl + # run midtraining and eval the model torchrun --standalone --nproc_per_node=8 -m scripts.mid_train -- --run=$WANDB_RUN torchrun --standalone --nproc_per_node=8 -m scripts.chat_eval -- -i mid diff --git a/tasks/customjson.py b/tasks/customjson.py new file mode 100644 index 0000000..53d63b8 --- /dev/null +++ b/tasks/customjson.py @@ -0,0 +1,67 @@ +""" +CustomJSON task for loading conversations from JSONL files. +Each line in the JSONL file should be a JSON array of messages. +""" + +import os +import json +from tasks.common import Task + +class CustomJSON(Task): + """ + Load conversations from a JSONL file. + Each line should be a JSON array of message objects with 'role' and 'content' fields. + Example line: [{"role":"user","content":"Hi"},{"role":"assistant","content":"Hello"}] + """ + + def __init__(self, filepath, **kwargs): + super().__init__(**kwargs) + self.filepath = filepath + self.conversations = [] + + # Load all conversations from the JSONL file + if not os.path.exists(filepath): + # Helpful error message due to recent change. Will be removed in the future. + print("-" * 80) + print(f"Error: File {filepath} does not exist") + print("HINT (Oct 21 2025)") + print("If you recently did a git pull and suddely see this, it might be due to the new addition of identity conversations") + print("See this discussion for more details: https://github.com/karpathy/nanochat/discussions/139") + print("Quick fix: run the following command to download the file:") + print(f"curl -L -o {filepath} https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl") + print("-" * 80) + raise FileNotFoundError(f"File {filepath} does not exist") + + with open(filepath, 'r') as f: + for line in f: + line = line.strip() + if not line: # skip empty lines + continue + messages = json.loads(line) + + # Validate the conversation structure + assert isinstance(messages, list), f"Expected list of messages, got {type(messages)}" + assert len(messages) >= 2, f"Conversation must have at least 2 messages, got {len(messages)}" + + # Validate message structure and alternating roles + for i, message in enumerate(messages): + assert "role" in message, f"Message {i} missing 'role' field" + assert "content" in message, f"Message {i} missing 'content' field" + expected_role = "user" if i % 2 == 0 else "assistant" + assert message["role"] == expected_role, f"Message {i} has role {message['role']} but should be {expected_role}" + assert isinstance(message["content"], str), f"Message {i} content must be a string" + + self.conversations.append(messages) + + self.length = len(self.conversations) + + def num_examples(self): + return self.length + + def get_example(self, index): + messages = self.conversations[index] + conversation = { + "messages": messages, + } + return conversation + From 03cddd9878b2062d07335bbe8ce7f307bf5c613f Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Tue, 21 Oct 2025 15:13:25 +0000 Subject: [PATCH 33/64] actually let's not brick code on git pull. change error to warning --- tasks/customjson.py | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/tasks/customjson.py b/tasks/customjson.py index 53d63b8..f4683c8 100644 --- a/tasks/customjson.py +++ b/tasks/customjson.py @@ -23,35 +23,33 @@ class CustomJSON(Task): if not os.path.exists(filepath): # Helpful error message due to recent change. Will be removed in the future. print("-" * 80) - print(f"Error: File {filepath} does not exist") + print(f"Warning: File {filepath} does not exist") print("HINT (Oct 21 2025)") print("If you recently did a git pull and suddely see this, it might be due to the new addition of identity conversations") print("See this discussion for more details: https://github.com/karpathy/nanochat/discussions/139") - print("Quick fix: run the following command to download the file:") + print("Quick fix: simply run the following command to download the file and you're done:") print(f"curl -L -o {filepath} https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl") print("-" * 80) - raise FileNotFoundError(f"File {filepath} does not exist") - with open(filepath, 'r') as f: - for line in f: - line = line.strip() - if not line: # skip empty lines - continue - messages = json.loads(line) + else: + with open(filepath, 'r') as f: + for line in f: + line = line.strip() + if not line: # skip empty lines + continue + messages = json.loads(line) + # Validate the conversation structure + assert isinstance(messages, list), f"Expected list of messages, got {type(messages)}" + assert len(messages) >= 2, f"Conversation must have at least 2 messages, got {len(messages)}" + # Validate message structure and alternating roles + for i, message in enumerate(messages): + assert "role" in message, f"Message {i} missing 'role' field" + assert "content" in message, f"Message {i} missing 'content' field" + expected_role = "user" if i % 2 == 0 else "assistant" + assert message["role"] == expected_role, f"Message {i} has role {message['role']} but should be {expected_role}" + assert isinstance(message["content"], str), f"Message {i} content must be a string" - # Validate the conversation structure - assert isinstance(messages, list), f"Expected list of messages, got {type(messages)}" - assert len(messages) >= 2, f"Conversation must have at least 2 messages, got {len(messages)}" - - # Validate message structure and alternating roles - for i, message in enumerate(messages): - assert "role" in message, f"Message {i} missing 'role' field" - assert "content" in message, f"Message {i} missing 'content' field" - expected_role = "user" if i % 2 == 0 else "assistant" - assert message["role"] == expected_role, f"Message {i} has role {message['role']} but should be {expected_role}" - assert isinstance(message["content"], str), f"Message {i} content must be a string" - - self.conversations.append(messages) + self.conversations.append(messages) self.length = len(self.conversations) From c9ea7a91e24d49d63d15244fda8f110f601099c2 Mon Sep 17 00:00:00 2001 From: Andrej Date: Tue, 21 Oct 2025 08:57:10 -0700 Subject: [PATCH 34/64] Add customization instructions to README Added a section on customization for nanochat. --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 05a214b..64fca63 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,10 @@ And a bit more about computing environments that will run nanochat: If you'd like to tinker with nanochat on your Macbook or a CPU machine, there is a work in progress [CPU|MPS PR](https://github.com/karpathy/nanochat/pull/88) up here. If you're on Macbook, use `--device_type=mps` when running `base_train.py`. See the PR and its diff for more. You're not going to get too far without GPU nodes, but at least you'll be able to run the code and maybe train a very tiny LLM with some patience. +## Customization + +To customize your nanochat, see [Guide: infusing identity to your nanochat](https://github.com/karpathy/nanochat/discussions/139) in Discussions, which describes how you can tune your nanochat's personality through synthetic data generation and mixing that data into midtraining and SFT stages. + ## Questions nanochat is designed to be short and sweet. One big advantage of this is that we can package up all of the files together and copy paste them to your favorite LLM to ask arbitrary questions. As an example, I like to package up the repo using the [files-to-prompt](https://github.com/simonw/files-to-prompt) utility like so: From bb786c5560176af03f8115a5211947ecd52c72c1 Mon Sep 17 00:00:00 2001 From: karpathy Date: Tue, 21 Oct 2025 10:07:40 -0700 Subject: [PATCH 35/64] i shouldnt have committed the lock file, i missed that. revert to the flagship build which is linux. sorry to pollute the repo history... --- uv.lock | 218 ++++++++++++++------------------------------------------ 1 file changed, 54 insertions(+), 164 deletions(-) diff --git a/uv.lock b/uv.lock index 927af07..8c381ee 100644 --- a/uv.lock +++ b/uv.lock @@ -3,14 +3,11 @@ revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.12' and sys_platform == 'linux'", - "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version >= '3.12' and sys_platform != 'linux'", "python_full_version == '3.11.*' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version == '3.11.*' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform != 'linux'", "python_full_version < '3.11' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version < '3.11' and sys_platform == 'darwin'", + "python_full_version < '3.11' and sys_platform != 'linux'", ] [[package]] @@ -764,12 +761,9 @@ dependencies = [ { name = "numpy" }, { name = "psutil" }, { name = "regex" }, - { name = "setuptools" }, { name = "tiktoken" }, { name = "tokenizers" }, - { name = "torch", version = "2.9.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, - { name = "torch", version = "2.9.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "torch", version = "2.9.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, + { name = "torch" }, { name = "uvicorn" }, { name = "wandb" }, ] @@ -788,11 +782,9 @@ requires-dist = [ { name = "numpy", specifier = "==1.26.4" }, { name = "psutil", specifier = ">=7.1.0" }, { name = "regex", specifier = ">=2025.9.1" }, - { name = "setuptools", specifier = ">=80.9.0" }, { name = "tiktoken", specifier = ">=0.11.0" }, { name = "tokenizers", specifier = ">=0.22.0" }, - { name = "torch", marker = "sys_platform != 'linux'", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cpu" }, - { name = "torch", marker = "sys_platform == 'linux'", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cu128" }, + { name = "torch", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cu128" }, { name = "uvicorn", specifier = ">=0.36.0" }, { name = "wandb", specifier = ">=0.21.3" }, ] @@ -809,8 +801,7 @@ version = "3.4.2" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version < '3.11' and sys_platform == 'darwin'", + "python_full_version < '3.11' and sys_platform != 'linux'", ] sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload-time = "2024-10-21T12:39:38.695Z" } wheels = [ @@ -823,11 +814,9 @@ version = "3.5" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.12' and sys_platform == 'linux'", - "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version >= '3.12' and sys_platform != 'linux'", "python_full_version == '3.11.*' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version == '3.11.*' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform != 'linux'", ] sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" } wheels = [ @@ -871,7 +860,6 @@ name = "nvidia-cublas-cu12" version = "12.8.4.1" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" }, { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, ] @@ -880,7 +868,6 @@ name = "nvidia-cuda-cupti-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" }, { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, ] @@ -890,7 +877,6 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, - { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" }, ] [[package]] @@ -898,7 +884,6 @@ name = "nvidia-cuda-runtime-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" }, { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, ] @@ -910,7 +895,6 @@ dependencies = [ { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" }, { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, ] @@ -922,7 +906,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" }, { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, ] @@ -932,7 +915,6 @@ version = "1.13.1.3" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, - { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" }, ] [[package]] @@ -940,7 +922,6 @@ name = "nvidia-curand-cu12" version = "10.3.9.90" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" }, { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, ] @@ -954,7 +935,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" }, { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, ] @@ -966,7 +946,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" }, { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, ] @@ -975,17 +954,15 @@ name = "nvidia-cusparselt-cu12" version = "0.7.1" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" }, { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, ] [[package]] name = "nvidia-nccl-cu12" -version = "2.27.5" +version = "2.27.3" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" }, - { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" }, + { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" }, ] [[package]] @@ -994,16 +971,6 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, - { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" }, -] - -[[package]] -name = "nvidia-nvshmem-cu12" -version = "3.3.20" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/92/9d/3dd98852568fb845ec1f7902c90a22b240fe1cbabda411ccedf2fd737b7b/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b0b960da3842212758e4fa4696b94f129090b30e5122fea3c5345916545cff0", size = 124484616, upload-time = "2025-08-04T20:24:59.172Z" }, - { url = "https://files.pythonhosted.org/packages/3b/6c/99acb2f9eb85c29fc6f3a7ac4dccfd992e22666dd08a642b303311326a97/nvidia_nvshmem_cu12-3.3.20-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d00f26d3f9b2e3c3065be895e3059d6479ea5c638a3f38c9fec49b1b9dd7c1e5", size = 124657145, upload-time = "2025-08-04T20:25:19.995Z" }, ] [[package]] @@ -1011,7 +978,6 @@ name = "nvidia-nvtx-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" }, { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, ] @@ -1716,115 +1682,44 @@ wheels = [ [[package]] name = "torch" -version = "2.9.0" -source = { registry = "https://download.pytorch.org/whl/cpu" } -resolution-markers = [ - "python_full_version >= '3.12' and sys_platform == 'darwin'", - "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "python_full_version < '3.11' and sys_platform == 'darwin'", -] -dependencies = [ - { name = "filelock", marker = "sys_platform == 'darwin'" }, - { name = "fsspec", marker = "sys_platform == 'darwin'" }, - { name = "jinja2", marker = "sys_platform == 'darwin'" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' and sys_platform == 'darwin'" }, - { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform == 'darwin'" }, - { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform == 'darwin'" }, - { name = "sympy", marker = "sys_platform == 'darwin'" }, - { name = "typing-extensions", marker = "sys_platform == 'darwin'" }, -] -wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:59484193b01299bf669520505a72b29d59a0028ae4c6d95f492938f186592208" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:aa4483602586cc9a35d1cf33771a9977f05f642b9161518a289e36548a0b77c2" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:4de0ed8cbc457a506dbca40376e206a29efee10756a00f1f3404bf67ad737d04" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:259548471194ab63d7ea273873053a6e3cc23530c1510f01e9d7ad259187bbd0" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e24836d968b54ef4dfb05594001a61958711ac9224026291e4e3f92f83a6fd7f" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d8e2ab7f86010330bdcc39c8b2c795590cc75e37df4823cdaee2c98d6e3ff4a3" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a3e859039c985d8e3ea60d7a54ca7e97ea2ae15e31beced4f3260128a161bb01" }, -] - -[[package]] -name = "torch" -version = "2.9.0+cpu" -source = { registry = "https://download.pytorch.org/whl/cpu" } -resolution-markers = [ - "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'", -] -dependencies = [ - { name = "filelock", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "fsspec", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "jinja2", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "sympy", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, - { name = "typing-extensions", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" }, -] -wheels = [ - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp310-cp310-win_amd64.whl", hash = "sha256:96f3f7aa4eb9e7fc5af8a722eaf1e5e32e3039dbafe817178d7b90a8566be32d" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:389e1e0b8083fd355f7caf5ba82356b5e01c318998bd575dbf2285a0d8137089" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp311-cp311-win_arm64.whl", hash = "sha256:5ce3d01aef91dc078fbb121814e556d55bc886d303efaf42c4fe67e411f5f9ad" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:e438061b87ec7dd6018fca9f975219889aa0a3f6cdc3ea10dd0ae2bc7f1c47ce" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:eb13ff1c34e338d722e76a4fd83b8d282782505bd1b99af4b3c32da66eba6eb4" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:728372e3f58c5826445f677746e5311c1935c1a7c59599f73a49ded850e038e8" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:95e56c26f919fbb98f16e7a0b87af494b893f9da9a65a020f17a01c13e520a81" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:d572863990e7d2762b547735ef589f6350d9eb4e441d38753a1c33636698cf4c" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:c2698999361d73c2d25d7cc8a787130188d49b183abb18b554228daa102e1594" }, - { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:3a60d1ecf27a9cce839b3aa665b26f0af1b1007b9c9f1e7f597f6b7bdf107617" }, -] - -[[package]] -name = "torch" -version = "2.9.0+cu128" +version = "2.8.0+cu128" source = { registry = "https://download.pytorch.org/whl/cu128" } -resolution-markers = [ - "python_full_version >= '3.12' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform == 'linux'", -] dependencies = [ - { name = "filelock", marker = "sys_platform == 'linux'" }, - { name = "fsspec", marker = "sys_platform == 'linux'" }, - { name = "jinja2", marker = "sys_platform == 'linux'" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' and sys_platform == 'linux'" }, - { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform == 'linux'" }, - { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-nvshmem-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "sys_platform == 'linux'" }, - { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform == 'linux'" }, - { name = "sympy", marker = "sys_platform == 'linux'" }, - { name = "triton", marker = "sys_platform == 'linux'" }, - { name = "typing-extensions", marker = "sys_platform == 'linux'" }, + { name = "filelock" }, + { name = "fsspec" }, + { name = "jinja2" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "setuptools", marker = "python_full_version >= '3.12'" }, + { name = "sympy" }, + { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "typing-extensions" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:edadd510a59951323ca24a53b8fe55d179b9a90237f0f55aae07f8ebc07dd052" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:816540286fce245a8af3904a194a83af9c9292ad7452eb79160b7a3b1cefb7e3" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6848715fc906574eb2c0975f56771663344eef7b9a717816b50dede616a3d4fb" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e97c264478c9fc48f91832749d960f1e349aeb214224ebe65fb09435dd64c59a" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e1765625084e320f1eb2f4eb5fd9d14d39d08d7a1880c10a307ce5de20831d27" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:87c62d3b95f1a2270bd116dbd47dc515c0b2035076fbb4a03b4365ea289e89c4" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:4d76f71345af47f022c7fa55edd0c1810d01af89dcb9edcfdfafe3d2a0f7a6b8" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97def0087f8ef171b9002ea500baffdd440c7bdd559c23c38bbf8781b67e9364" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:dacbfc19608e60f78975c47d605c7d39b81afdf1983e93e94c17f60646b131e0" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:8ce575fb71b878f5016df0a8a438c7c28f7f4be270af4119b5ad9ab62b0e470a" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:eedef2e65d48c7dc9bb03f92c2a62bdae904382fc5c2773de3de41dce5ffd80a" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:55a2184ed89f2120bc1e2c887ee98e5280dee48bc330e9dfe296aa135a370f7d" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:4b51281e08ec36cd6748c71ac32fa1e45d30090b1c3fdf99ebb30776437734b7" }, - { url = "https://download.pytorch.org/whl/cu128/torch-2.9.0%2Bcu128-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ef5939ebcacfe3d4f70774941e79a7c7e23f7918d7d3242428c8f48cc7440c0a" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0c96999d15cf1f13dd7c913e0b21a9a355538e6cfc10861a17158320292f5954" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp310-cp310-win_amd64.whl", hash = "sha256:43938e9a174c90e5eb9e906532b2f1e21532bbfa5a61b65193b4f54714d34f9e" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:039b9dcdd6bdbaa10a8a5cd6be22c4cb3e3589a341e5f904cbb571ca28f55bed" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:34c55443aafd31046a7963b63d30bc3b628ee4a704f826796c865fdfd05bb596" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4354fc05bb79b208d6995a04ca1ceef6a9547b1c4334435574353d381c55087c" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:0ad925202387f4e7314302a1b4f8860fa824357f9b1466d7992bf276370ebcff" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3a852369a38dec343d45ecd0bc3660f79b88a23e0c878d18707f7c13bf49538f" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:9e20646802b7fc295c1f8b45fefcfc9fb2e4ec9cbe8593443cd2b9cc307c8405" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4295a22d69408e93d25f51e8d5d579345b6b802383e9414b0f3853ed433d53ae" }, + { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:970b4f4661fa7b44f6a7e6df65de7fc4a6fff2af610dc415c1d695ca5f1f37d2" }, ] [[package]] @@ -1841,23 +1736,17 @@ wheels = [ [[package]] name = "triton" -version = "3.5.0" +version = "3.4.0" source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "setuptools", marker = "sys_platform == 'linux'" }, +] wheels = [ - { url = "https://files.pythonhosted.org/packages/dd/22/507b6f58a35e05e84381630b2dc2a3cee1a7a2a7eaf4cba857c638a18a24/triton-3.5.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6f90de6a6566bb619b4c0adc9855729e1b1b5e26533fca1bf6206e96b6d277a3", size = 159827599, upload-time = "2025-10-15T19:15:43.87Z" }, - { url = "https://files.pythonhosted.org/packages/0b/eb/09e31d107a5d00eb281aa7e6635ca463e9bca86515944e399480eadb71f8/triton-3.5.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5d3b3d480debf24eaa739623c9a42446b0b77f95593d30eb1f64cd2278cc1f0", size = 170333110, upload-time = "2025-10-13T16:37:49.588Z" }, - { url = "https://files.pythonhosted.org/packages/79/f9/b6f60f978397c616fd8dacca2305759fe4f80d397b20ef72534803244bd5/triton-3.5.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8457b22148defefdcb7fa8144b05ce211b9faefad650a1ce85b23df488d5549c", size = 159926731, upload-time = "2025-10-15T19:15:49.682Z" }, - { url = "https://files.pythonhosted.org/packages/3d/78/949a04391c21956c816523678f0e5fa308eb5b1e7622d88c4e4ef5fceca0/triton-3.5.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f34bfa21c5b3a203c0f0eab28dcc1e49bd1f67d22724e77fb6665a659200a4ec", size = 170433488, upload-time = "2025-10-13T16:37:57.132Z" }, - { url = "https://files.pythonhosted.org/packages/87/9b/30988039e1e84df7554fba24e6a734d2d0e847af33cabdf9b532b3c51456/triton-3.5.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7da21fccceafc163e3a5e857abe34351ef76345af06cabf9637a914742671f0b", size = 159946647, upload-time = "2025-10-15T19:15:56.325Z" }, - { url = "https://files.pythonhosted.org/packages/f5/3a/e991574f3102147b642e49637e0281e9bb7c4ba254edb2bab78247c85e01/triton-3.5.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9e71db82261c4ffa3921cd050cd5faa18322d2d405c30eb56084afaff3b0833", size = 170476535, upload-time = "2025-10-13T16:38:05.18Z" }, - { url = "https://files.pythonhosted.org/packages/cd/85/e37f1197acb04c8f3d83851d23d5d6ed5060ef74580668b112e23fdfa203/triton-3.5.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:188da5b81fa2f8322c27fec1627703eac24cb9bb7ab0dfbe9925973bc1b070d3", size = 159958970, upload-time = "2025-10-15T19:16:01.717Z" }, - { url = "https://files.pythonhosted.org/packages/6c/29/10728de8a6e932e517c10773486b8e99f85d1b1d9dd87d9a9616e1fef4a1/triton-3.5.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e6bb9aa5519c084a333acdba443789e50012a4b851cd486c54f0b8dc2a8d3a12", size = 170487289, upload-time = "2025-10-13T16:38:11.662Z" }, - { url = "https://files.pythonhosted.org/packages/b8/1d/38258f05010ac17a7b058c022911c9cae6526e149b7397134a048cf5a6c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03127d9b33aaf979c856676b394bc059ec1d68cb6da68ae03f62dd8ad77a04ae", size = 160073012, upload-time = "2025-10-15T19:16:07.477Z" }, - { url = "https://files.pythonhosted.org/packages/5c/38/db80e48b9220c9bce872b0f616ad0446cdf554a40b85c7865cbca99ab3c2/triton-3.5.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c83f2343e1a220a716c7b3ab9fccfcbe3ad4020d189549200e2d2e8d5868bed9", size = 170577179, upload-time = "2025-10-13T16:38:17.865Z" }, - { url = "https://files.pythonhosted.org/packages/91/fe/8f5771d00227f4eb1ee034f218ed427102b989366d2275fe3b3c105a3921/triton-3.5.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:468936651d383f4a6d10068d34a627505e13af55be5d002b9f27b987e7a5f0ac", size = 159957460, upload-time = "2025-10-15T19:16:12.626Z" }, - { url = "https://files.pythonhosted.org/packages/ff/60/1810655d1d856c9a4fcc90ee8966d85f552d98c53a6589f95ab2cbe27bb8/triton-3.5.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da0fa67ccd76c3dcfb0bffe1b1c57c685136a6bd33d141c24d9655d4185b1289", size = 170487949, upload-time = "2025-10-13T16:38:24.881Z" }, - { url = "https://files.pythonhosted.org/packages/78/59/99edd103958fe6e42b50b9ad8ce4f223ddf4ccf475259cf7d2b53381dc6c/triton-3.5.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7ceef21410229ac23173a28eee5cfc0e37c1dfdb8b4bc11ecda2e3ecec7c686", size = 160075629, upload-time = "2025-10-15T19:16:18.746Z" }, - { url = "https://files.pythonhosted.org/packages/fb/b7/1dec8433ac604c061173d0589d99217fe7bf90a70bdc375e745d044b8aad/triton-3.5.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:317fe477ea8fd4524a6a8c499fb0a36984a56d0b75bf9c9cb6133a1c56d5a6e7", size = 170580176, upload-time = "2025-10-13T16:38:31.14Z" }, + { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" }, + { url = "https://files.pythonhosted.org/packages/7d/39/43325b3b651d50187e591eefa22e236b2981afcebaefd4f2fc0ea99df191/triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467", size = 155531138, upload-time = "2025-07-30T19:58:29.908Z" }, + { url = "https://files.pythonhosted.org/packages/d0/66/b1eb52839f563623d185f0927eb3530ee4d5ffe9d377cdaf5346b306689e/triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31c1d84a5c0ec2c0f8e8a072d7fd150cab84a9c239eaddc6706c081bfae4eb04", size = 155560068, upload-time = "2025-07-30T19:58:37.081Z" }, + { url = "https://files.pythonhosted.org/packages/30/7b/0a685684ed5322d2af0bddefed7906674f67974aa88b0fae6e82e3b766f6/triton-3.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00be2964616f4c619193cb0d1b29a99bd4b001d7dc333816073f92cf2a8ccdeb", size = 155569223, upload-time = "2025-07-30T19:58:44.017Z" }, + { url = "https://files.pythonhosted.org/packages/20/63/8cb444ad5cdb25d999b7d647abac25af0ee37d292afc009940c05b82dda0/triton-3.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7936b18a3499ed62059414d7df563e6c163c5e16c3773678a3ee3d417865035d", size = 155659780, upload-time = "2025-07-30T19:58:51.171Z" }, ] [[package]] @@ -2113,3 +2002,4 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/c3/b2e9f38bc3e11191981d57ea08cab2166e74ea770024a646617c9cddd9f6/yarl-1.20.1-cp313-cp313t-win_amd64.whl", hash = "sha256:541d050a355bbbc27e55d906bc91cb6fe42f96c01413dd0f4ed5a5240513874f", size = 93003, upload-time = "2025-06-10T00:45:27.752Z" }, { url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" }, ] + From bb71c64579c6224485c50975cb358ba427658aa7 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Tue, 21 Oct 2025 17:12:50 +0000 Subject: [PATCH 36/64] fix silly issue in dataloader, this version is much faster and more portable to mps too --- nanochat/dataloader.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nanochat/dataloader.py b/nanochat/dataloader.py index c1636b1..6636f54 100644 --- a/nanochat/dataloader.py +++ b/nanochat/dataloader.py @@ -16,7 +16,6 @@ def tokenizing_distributed_data_loader(B, T, split, tokenizer_threads=4, tokeniz bos_token = tokenizer.get_bos_token_id() # scratch buffer holds the tokens for one iteration token_buffer = deque() # we stream tokens on the right and pop from the left - scratch = torch.empty(needed_tokens, dtype=torch.int64, pin_memory=True) # infinite iterator over document batches def document_batches(): @@ -38,8 +37,8 @@ def tokenizing_distributed_data_loader(B, T, split, tokenizer_threads=4, tokeniz token_buffer.extend(tokens) batch_index += 1 # Move tokens from the deque into the scratch buffer - for i in range(needed_tokens): - scratch[i] = token_buffer.popleft() + tokens = [token_buffer.popleft() for _ in range(needed_tokens)] + scratch = torch.tensor(tokens, dtype=torch.int64, pin_memory=True) # Create the inputs/targets as 1D tensors inputs_cpu = scratch[:-1].to(dtype=torch.int32) targets_cpu = scratch[1:] From 50bea28ef9a1aca2c8d8993c1a336f4828689b3b Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Tue, 21 Oct 2025 17:24:48 +0000 Subject: [PATCH 37/64] also add readme mention of the cpu mps changes --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 64fca63..1d156c6 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ And a bit more about computing environments that will run nanochat: ## Running on CPU / MPS -If you'd like to tinker with nanochat on your Macbook or a CPU machine, there is a work in progress [CPU|MPS PR](https://github.com/karpathy/nanochat/pull/88) up here. If you're on Macbook, use `--device_type=mps` when running `base_train.py`. See the PR and its diff for more. You're not going to get too far without GPU nodes, but at least you'll be able to run the code and maybe train a very tiny LLM with some patience. +nanochat cn be run on CPU or on MPS (if you're on Macbook), and will automatically try to detect what device is best to run on. You're not going to get too far without GPUs, but at least you'll be able to run the code paths and maybe train a tiny LLM with some patience. For an example of how to make all the run commands much smaller (feel free to tune!), you can refer to [dev/runcpu.sh](dev/runcpu.sh) file. You'll see that I'm essentially restricting all scripts to train smaller models, to run for shorter number of iterations, etc. This functionality is new, slightly gnarly (touched a lot of code), and was merged in this [CPU|MPS PR](https://github.com/karpathy/nanochat/pull/88) on Oct 21, 2025. ## Customization From 94ee50705421d6d2d50f2ae9797ba0e70d06b7e7 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Tue, 21 Oct 2025 17:56:08 +0000 Subject: [PATCH 38/64] quick fix base eval due to fewshot requirement --- dev/runcpu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/runcpu.sh b/dev/runcpu.sh index 2d73dfc..e5c7ea8 100644 --- a/dev/runcpu.sh +++ b/dev/runcpu.sh @@ -53,7 +53,7 @@ python -m scripts.base_train \ --sample_every=50 \ --num_iterations=50 python -m scripts.base_loss --device_batch_size=1 --split_tokens=4096 -python -m scripts.base_eval --max-per-task=5 +python -m scripts.base_eval --max-per-task=16 # midtraining python -m scripts.mid_train \ From a088b7a6ec6e144b99a2a89d0b1f772198abcb97 Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Tue, 21 Oct 2025 18:07:33 +0000 Subject: [PATCH 39/64] use enable_gqa of pytorch sdpa, allows us to delete some code, didnt realize it's available --- nanochat/gpt.py | 24 ++++-------------------- scripts/base_train.py | 2 +- 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/nanochat/gpt.py b/nanochat/gpt.py index d744550..b640f1e 100644 --- a/nanochat/gpt.py +++ b/nanochat/gpt.py @@ -48,19 +48,6 @@ def apply_rotary_emb(x, cos, sin): out = out.to(x.dtype) # ensure input/output dtypes match return out - -def repeat_kv(x, n_rep): - """torch.repeat_interleave(x, dim=1, repeats=n_rep)""" - if n_rep == 1: - return x - bs, n_kv_heads, slen, head_dim = x.shape - return ( - x[:, :, None, :, :] - .expand(bs, n_kv_heads, n_rep, slen, head_dim) - .reshape(bs, n_kv_heads * n_rep, slen, head_dim) - ) - - class CausalSelfAttention(nn.Module): def __init__(self, config, layer_idx): super().__init__() @@ -96,19 +83,16 @@ class CausalSelfAttention(nn.Module): Tq = q.size(2) # number of queries in this forward pass Tk = k.size(2) # number of keys/values in total (in the cache + current forward pass) - # Apply MQA: replicate the key/value heads for each query head - nrep = self.n_head // self.n_kv_head - k, v = repeat_kv(k, nrep), repeat_kv(v, nrep) - # Attention: queries attend to keys/values autoregressively. A few cases to handle: + enable_gqa = self.n_head != self.n_kv_head # Group Query Attention (GQA): duplicate key/value heads to match query heads if desired if kv_cache is None or Tq == Tk: # During training (no KV cache), attend as usual with causal attention # And even if there is KV cache, we can still use this simple version when Tq == Tk - y = F.scaled_dot_product_attention(q, k, v, is_causal=True) + y = F.scaled_dot_product_attention(q, k, v, is_causal=True, enable_gqa=enable_gqa) elif Tq == 1: # During inference but with a single query in this forward pass: # The query has to attend to all the keys/values in the cache - y = F.scaled_dot_product_attention(q, k, v, is_causal=False) + y = F.scaled_dot_product_attention(q, k, v, is_causal=False, enable_gqa=enable_gqa) else: # During inference AND we have a chunk of queries in this forward pass: # First, each query attends to all the cached keys/values (i.e. full prefix) @@ -118,7 +102,7 @@ class CausalSelfAttention(nn.Module): attn_mask[:, :prefix_len] = True # Then, causal attention within this chunk attn_mask[:, prefix_len:] = torch.tril(torch.ones((Tq, Tq), dtype=torch.bool, device=q.device)) - y = F.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask) + y = F.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask, enable_gqa=enable_gqa) # Re-assemble the heads side by side and project back to residual stream y = y.transpose(1, 2).contiguous().view(B, T, -1) diff --git a/scripts/base_train.py b/scripts/base_train.py index ef7db17..4ca8cdc 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -85,7 +85,7 @@ print0(f"Vocab size: {vocab_size:,}") num_layers = depth model_dim = depth * 64 # aspect ratio 64 (usually this is varied from 64 -> 128 as model size increases) num_heads = max(1, (model_dim + 127) // 128) # head dim 128 (the division here is ceil div) -num_kv_heads = num_heads # 1:1 MQA ratio +num_kv_heads = num_heads # default is 1:1 GQA (Group Query Attention) ratio (i.e. GQA is disabled) print0(f"num_layers: {num_layers}") print0(f"model_dim: {model_dim}") print0(f"num_heads: {num_heads}") From defd1246aa80c99cf1b486ec112b1223916e21df Mon Sep 17 00:00:00 2001 From: Luke Stanley <306671+lukestanley@users.noreply.github.com> Date: Tue, 21 Oct 2025 19:43:38 +0000 Subject: [PATCH 40/64] Fix Torch crash caused by pinning on CPU --- nanochat/dataloader.py | 3 ++- scripts/mid_train.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/nanochat/dataloader.py b/nanochat/dataloader.py index 3d479a1..6c864d3 100644 --- a/nanochat/dataloader.py +++ b/nanochat/dataloader.py @@ -38,7 +38,8 @@ def tokenizing_distributed_data_loader(B, T, split, tokenizer_threads=4, tokeniz batch_index += 1 # Move tokens from the deque into the scratch buffer tokens = [token_buffer.popleft() for _ in range(needed_tokens)] - scratch = torch.tensor(tokens, dtype=torch.int64, pin_memory=True) + # CUDA supports memory pinning for faster transfers between CPU and GPU: + scratch = torch.tensor(tokens, dtype=torch.int64, pin_memory=(device == "cuda")) # Create the inputs/targets as 1D tensors inputs_cpu = scratch[:-1].to(dtype=torch.int32) targets_cpu = scratch[1:] diff --git a/scripts/mid_train.py b/scripts/mid_train.py index c731d57..2835ebf 100644 --- a/scripts/mid_train.py +++ b/scripts/mid_train.py @@ -119,7 +119,8 @@ def mid_data_generator(split): assert dataset_size > 0 needed_tokens = device_batch_size * max_seq_len + 1 # to form one training batch of inputs,targets token_buffer = deque() - scratch = torch.empty(needed_tokens, dtype=torch.int64, pin_memory=True) + # CUDA supports memory pinning for faster transfers between CPU and GPU: + scratch = torch.empty(needed_tokens, dtype=torch.int64, pin_memory=(device_type == "cuda")) cursor = ddp_rank # increments by ddp_world_size each time, so each rank processes unique documents it = 0 # iteration counter while True: From 901b0756053159bb2e0504a809b654d9c7b5eb13 Mon Sep 17 00:00:00 2001 From: Luke Stanley <306671+lukestanley@users.noreply.github.com> Date: Tue, 21 Oct 2025 19:52:21 +0000 Subject: [PATCH 41/64] Fix GPU-less CPU use on Linux with specific Torch indexes --- dev/runcpu.sh | 2 +- pyproject.toml | 38 +++++++++++++++++++++++++++----------- speedrun.sh | 2 +- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/dev/runcpu.sh b/dev/runcpu.sh index e5c7ea8..6290fd9 100644 --- a/dev/runcpu.sh +++ b/dev/runcpu.sh @@ -14,7 +14,7 @@ NANOCHAT_BASE_DIR="$HOME/.cache/nanochat" mkdir -p $NANOCHAT_BASE_DIR command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh [ -d ".venv" ] || uv venv -uv sync +uv sync --extra cpu source .venv/bin/activate if [ -z "$WANDB_RUN" ]; then WANDB_RUN=dummy diff --git a/pyproject.toml b/pyproject.toml index 26625fc..da674f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,19 +44,35 @@ python_files = ["test_*.py"] python_classes = ["Test*"] python_functions = ["test_*"] -# target torch to cuda 12.8 +# target torch to cuda 12.8 or CPU [tool.uv.sources] -torch = [ - { index = "pytorch-cpu", marker = "sys_platform != 'linux'" }, - { index = "pytorch-cu128", marker = "sys_platform == 'linux'" }, +torch = [ + { index = "pytorch-cpu", extra = "cpu" }, + { index = "pytorch-cu128", extra = "gpu" }, ] -[[tool.uv.index]] -name = "pytorch-cpu" -url = "https://download.pytorch.org/whl/cpu" +[[tool.uv.index]] +name = "pytorch-cpu" +url = "https://download.pytorch.org/whl/cpu" +explicit = true + +[[tool.uv.index]] +name = "pytorch-cu128" +url = "https://download.pytorch.org/whl/cu128" explicit = true -[[tool.uv.index]] -name = "pytorch-cu128" -url = "https://download.pytorch.org/whl/cu128" -explicit = true \ No newline at end of file +[project.optional-dependencies] +cpu = [ + "torch>=2.8.0", +] +gpu = [ + "torch>=2.8.0", +] + +[tool.uv] +conflicts = [ + [ + { extra = "cpu" }, + { extra = "gpu" }, + ], +] \ No newline at end of file diff --git a/speedrun.sh b/speedrun.sh index d73dcce..35dd39e 100644 --- a/speedrun.sh +++ b/speedrun.sh @@ -23,7 +23,7 @@ command -v uv &> /dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh # create a .venv local virtual environment (if it doesn't exist) [ -d ".venv" ] || uv venv # install the repo dependencies -uv sync +uv sync --extra gpu # activate venv so that `python` uses the project's venv instead of system python source .venv/bin/activate From 760af62e11d6dec01182d35537095a181a7d391e Mon Sep 17 00:00:00 2001 From: Luke Stanley <306671+lukestanley@users.noreply.github.com> Date: Tue, 21 Oct 2025 20:39:31 +0000 Subject: [PATCH 42/64] Git ignore eval_bundle --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 9b0ade2..4a87b23 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ __pycache__/ rustbpe/target/ dev-ignore/ report.md +eval_bundle/ \ No newline at end of file From 7a52f9bfbb11634c362e935261fbe74111924de4 Mon Sep 17 00:00:00 2001 From: Luke Stanley <306671+lukestanley@users.noreply.github.com> Date: Tue, 21 Oct 2025 20:53:18 +0000 Subject: [PATCH 43/64] Updates lockfile with CPU package support without overwriting other architectures --- uv.lock | 335 ++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 275 insertions(+), 60 deletions(-) diff --git a/uv.lock b/uv.lock index 8c381ee..f01bba3 100644 --- a/uv.lock +++ b/uv.lock @@ -2,13 +2,32 @@ version = 1 revision = 3 requires-python = ">=3.10" resolution-markers = [ - "python_full_version >= '3.12' and sys_platform == 'linux'", - "python_full_version >= '3.12' and sys_platform != 'linux'", - "python_full_version == '3.11.*' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform != 'linux'", - "python_full_version < '3.11' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform != 'linux'", + "python_full_version >= '3.12' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform == 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform == 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform == 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", ] +conflicts = [[ + { package = "nanochat", extra = "cpu" }, + { package = "nanochat", extra = "gpu" }, +]] [[package]] name = "aiohappyeyeballs" @@ -26,7 +45,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohappyeyeballs" }, { name = "aiosignal" }, - { name = "async-timeout", marker = "python_full_version < '3.11'" }, + { name = "async-timeout", marker = "python_full_version < '3.11' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, { name = "attrs" }, { name = "frozenlist" }, { name = "multidict" }, @@ -111,7 +130,7 @@ version = "1.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "frozenlist" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" } wheels = [ @@ -132,10 +151,10 @@ name = "anyio" version = "4.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, { name = "idna" }, { name = "sniffio" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/f1/b4/636b3b65173d3ce9a38ef5f0522789614e590dab6a8d505340a4efe4c567/anyio-4.10.0.tar.gz", hash = "sha256:3f3fae35c96039744587aa5b8371e7e8e603c0702999535961dd336026973ba6", size = 213252, upload-time = "2025-08-04T08:54:26.451Z" } wheels = [ @@ -238,7 +257,7 @@ name = "click" version = "8.2.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload-time = "2025-05-20T23:19:49.832Z" } wheels = [ @@ -292,7 +311,7 @@ name = "exceptiongroup" version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.12' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" } wheels = [ @@ -497,7 +516,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "fsspec" }, - { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, { name = "packaging" }, { name = "pyyaml" }, { name = "requests" }, @@ -602,7 +621,7 @@ name = "maturin" version = "1.9.4" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "tomli", marker = "python_full_version < '3.11' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/13/7c/b11b870fc4fd84de2099906314ce45488ae17be32ff5493519a6cddc518a/maturin-1.9.4.tar.gz", hash = "sha256:235163a0c99bc6f380fb8786c04fd14dcf6cd622ff295ea3de525015e6ac40cf", size = 213647, upload-time = "2025-08-27T11:37:57.079Z" } wheels = [ @@ -635,7 +654,7 @@ name = "multidict" version = "6.6.4" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/69/7f/0652e6ed47ab288e3756ea9c0df8b14950781184d4bd7883f4d87dd41245/multidict-6.6.4.tar.gz", hash = "sha256:d2d4e4787672911b48350df02ed3fa3fffdc2f2e8ca06dd6afdf34189b76a9dd", size = 101843, upload-time = "2025-08-11T12:08:48.217Z" } wheels = [ @@ -761,13 +780,26 @@ dependencies = [ { name = "numpy" }, { name = "psutil" }, { name = "regex" }, + { name = "setuptools" }, { name = "tiktoken" }, { name = "tokenizers" }, - { name = "torch" }, + { name = "torch", version = "2.8.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "extra == 'extra-8-nanochat-gpu'" }, + { name = "torch", version = "2.9.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "torch", version = "2.9.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu')" }, + { name = "torch", version = "2.9.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, { name = "uvicorn" }, { name = "wandb" }, ] +[package.optional-dependencies] +cpu = [ + { name = "torch", version = "2.9.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "torch", version = "2.9.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, +] +gpu = [ + { name = "torch", version = "2.8.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" } }, +] + [package.dev-dependencies] dev = [ { name = "maturin" }, @@ -782,12 +814,16 @@ requires-dist = [ { name = "numpy", specifier = "==1.26.4" }, { name = "psutil", specifier = ">=7.1.0" }, { name = "regex", specifier = ">=2025.9.1" }, + { name = "setuptools", specifier = ">=80.9.0" }, { name = "tiktoken", specifier = ">=0.11.0" }, { name = "tokenizers", specifier = ">=0.22.0" }, - { name = "torch", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cu128" }, + { name = "torch", specifier = ">=2.8.0" }, + { name = "torch", marker = "extra == 'cpu'", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cpu", conflict = { package = "nanochat", extra = "cpu" } }, + { name = "torch", marker = "extra == 'gpu'", specifier = ">=2.8.0", index = "https://download.pytorch.org/whl/cu128", conflict = { package = "nanochat", extra = "gpu" } }, { name = "uvicorn", specifier = ">=0.36.0" }, { name = "wandb", specifier = ">=0.21.3" }, ] +provides-extras = ["cpu", "gpu"] [package.metadata.requires-dev] dev = [ @@ -800,8 +836,13 @@ name = "networkx" version = "3.4.2" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version < '3.11' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform != 'linux'", + "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform == 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version < '3.11' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", ] sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368, upload-time = "2024-10-21T12:39:38.695Z" } wheels = [ @@ -813,10 +854,20 @@ name = "networkx" version = "3.5" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.12' and sys_platform == 'linux'", - "python_full_version >= '3.12' and sys_platform != 'linux'", - "python_full_version == '3.11.*' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform != 'linux'", + "python_full_version >= '3.12' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform == 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform == 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version >= '3.12' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform == 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", + "python_full_version == '3.11.*' and sys_platform != 'linux' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu'", ] sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" } wheels = [ @@ -860,7 +911,9 @@ name = "nvidia-cublas-cu12" version = "12.8.4.1" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" }, { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, + { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" }, ] [[package]] @@ -868,7 +921,9 @@ name = "nvidia-cuda-cupti-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" }, { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, + { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" }, ] [[package]] @@ -877,6 +932,8 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, + { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" }, + { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" }, ] [[package]] @@ -884,7 +941,9 @@ name = "nvidia-cuda-runtime-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" }, { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, + { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" }, ] [[package]] @@ -892,10 +951,12 @@ name = "nvidia-cudnn-cu12" version = "9.10.2.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cublas-cu12", marker = "extra == 'extra-8-nanochat-gpu'" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" }, { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, + { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" }, ] [[package]] @@ -903,10 +964,12 @@ name = "nvidia-cufft-cu12" version = "11.3.3.83" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "extra == 'extra-8-nanochat-gpu'" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" }, { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, + { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" }, ] [[package]] @@ -915,6 +978,7 @@ version = "1.13.1.3" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, + { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" }, ] [[package]] @@ -922,7 +986,9 @@ name = "nvidia-curand-cu12" version = "10.3.9.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" }, { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, + { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" }, ] [[package]] @@ -930,12 +996,14 @@ name = "nvidia-cusolver-cu12" version = "11.7.3.90" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-cublas-cu12", marker = "extra == 'extra-8-nanochat-gpu'" }, + { name = "nvidia-cusparse-cu12", marker = "extra == 'extra-8-nanochat-gpu'" }, + { name = "nvidia-nvjitlink-cu12", marker = "extra == 'extra-8-nanochat-gpu'" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" }, { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, + { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" }, ] [[package]] @@ -943,10 +1011,12 @@ name = "nvidia-cusparse-cu12" version = "12.5.8.93" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "extra == 'extra-8-nanochat-gpu'" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" }, { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, + { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" }, ] [[package]] @@ -954,7 +1024,9 @@ name = "nvidia-cusparselt-cu12" version = "0.7.1" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" }, { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, + { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" }, ] [[package]] @@ -962,6 +1034,7 @@ name = "nvidia-nccl-cu12" version = "2.27.3" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/7b/8354b784cf73b0ba51e566b4baba3ddd44fe8288a3d39ef1e06cd5417226/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ddf1a245abc36c550870f26d537a9b6087fb2e2e3d6e0ef03374c6fd19d984f", size = 322397768, upload-time = "2025-06-03T21:57:30.234Z" }, { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" }, ] @@ -971,6 +1044,8 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, + { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" }, + { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" }, ] [[package]] @@ -978,7 +1053,9 @@ name = "nvidia-nvtx-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" }, { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, + { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" }, ] [[package]] @@ -1334,13 +1411,13 @@ name = "pytest" version = "8.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, { name = "iniconfig" }, { name = "packaging" }, { name = "pluggy" }, { name = "pygments" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "tomli", marker = "python_full_version < '3.11' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } wheels = [ @@ -1561,7 +1638,7 @@ version = "0.48.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a7/a5/d6f429d43394057b67a6b5bbe6eae2f77a6bf7459d961fdb224bf206eee6/starlette-0.48.0.tar.gz", hash = "sha256:7e8cee469a8ab2352911528110ce9088fdc6a37d9876926e73da7ce4aa4c7a46", size = 2652949, upload-time = "2025-09-13T08:41:05.699Z" } wheels = [ @@ -1684,30 +1761,38 @@ wheels = [ name = "torch" version = "2.8.0+cu128" source = { registry = "https://download.pytorch.org/whl/cu128" } +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform == 'linux'", + "python_full_version >= '3.12' and sys_platform != 'linux'", + "python_full_version == '3.11.*' and sys_platform == 'linux'", + "python_full_version < '3.11' and sys_platform == 'linux'", + "python_full_version == '3.11.*' and sys_platform != 'linux'", + "python_full_version < '3.11' and sys_platform != 'linux'", +] dependencies = [ - { name = "filelock" }, - { name = "fsspec" }, - { name = "jinja2" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "python_full_version >= '3.12'" }, - { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "typing-extensions" }, + { name = "filelock", marker = "extra == 'extra-8-nanochat-gpu'" }, + { name = "fsspec", marker = "extra == 'extra-8-nanochat-gpu'" }, + { name = "jinja2", marker = "extra == 'extra-8-nanochat-gpu'" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-8-nanochat-gpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-8-nanochat-gpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-cublas-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-cuda-cupti-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-cuda-runtime-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-cudnn-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-cufft-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-cufile-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-curand-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-cusolver-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-cusparse-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-cusparselt-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-nccl-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "nvidia-nvtx-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "setuptools", marker = "(python_full_version >= '3.12' and extra == 'extra-8-nanochat-gpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "sympy", marker = "extra == 'extra-8-nanochat-gpu'" }, + { name = "triton", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-8-nanochat-gpu') or (platform_machine != 'x86_64' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'linux' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "typing-extensions", marker = "extra == 'extra-8-nanochat-gpu'" }, ] wheels = [ { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0c96999d15cf1f13dd7c913e0b21a9a355538e6cfc10861a17158320292f5954" }, @@ -1722,12 +1807,143 @@ wheels = [ { url = "https://download.pytorch.org/whl/cu128/torch-2.8.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:970b4f4661fa7b44f6a7e6df65de7fc4a6fff2af610dc415c1d695ca5f1f37d2" }, ] +[[package]] +name = "torch" +version = "2.9.0" +source = { registry = "https://download.pytorch.org/whl/cpu" } +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version == '3.11.*' and sys_platform == 'darwin'", + "python_full_version < '3.11' and sys_platform == 'darwin'", +] +dependencies = [ + { name = "filelock", marker = "(sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "fsspec", marker = "(sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "jinja2", marker = "(sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu') or (python_full_version >= '3.11' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu') or (python_full_version < '3.11' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu') or (python_full_version < '3.12' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "sympy", marker = "(sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "typing-extensions", marker = "(sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, +] +wheels = [ + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:59484193b01299bf669520505a72b29d59a0028ae4c6d95f492938f186592208" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:aa4483602586cc9a35d1cf33771a9977f05f642b9161518a289e36548a0b77c2" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:4de0ed8cbc457a506dbca40376e206a29efee10756a00f1f3404bf67ad737d04" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:259548471194ab63d7ea273873053a6e3cc23530c1510f01e9d7ad259187bbd0" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:e24836d968b54ef4dfb05594001a61958711ac9224026291e4e3f92f83a6fd7f" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d8e2ab7f86010330bdcc39c8b2c795590cc75e37df4823cdaee2c98d6e3ff4a3" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a3e859039c985d8e3ea60d7a54ca7e97ea2ae15e31beced4f3260128a161bb01" }, +] + +[[package]] +name = "torch" +version = "2.9.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform == 'linux'", + "python_full_version >= '3.12' and sys_platform != 'linux'", + "python_full_version == '3.11.*' and sys_platform == 'linux'", + "python_full_version < '3.11' and sys_platform == 'linux'", + "python_full_version == '3.11.*' and sys_platform != 'linux'", + "python_full_version < '3.11' and sys_platform != 'linux'", +] +dependencies = [ + { name = "filelock", marker = "(extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu')" }, + { name = "fsspec", marker = "(extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu')" }, + { name = "jinja2", marker = "(extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu')" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "setuptools", marker = "(python_full_version >= '3.12' and extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "sympy", marker = "(extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu')" }, + { name = "typing-extensions", marker = "(extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (extra != 'extra-8-nanochat-cpu' and extra != 'extra-8-nanochat-gpu')" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/86/245c240d2138c17ed572c943c289056c2721abab70810d772c6bf5495b28/torch-2.9.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:030bbfe367379ae6a4ae4042b6c44da25383343b8b3c68abaa9c7231efbaf2dd", size = 104213554, upload-time = "2025-10-15T15:45:59.798Z" }, + { url = "https://files.pythonhosted.org/packages/58/1d/fd1e88ae0948825efcab7dd66d12bec23f05d4d38ed81573c8d453c14c06/torch-2.9.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:51cb63902182a78e90886e8068befd8ea102af4b00e420263591a3d70c7d3c6c", size = 899795167, upload-time = "2025-10-15T15:47:12.695Z" }, + { url = "https://files.pythonhosted.org/packages/63/5a/496197b45c14982bef4e079b24c61dc108e3ab0d0cc9718dba9f54f45a46/torch-2.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:3f6aad4d2f0ee2248bac25339d74858ff846c3969b27d14ac235821f055af83d", size = 109310314, upload-time = "2025-10-15T15:46:16.633Z" }, + { url = "https://files.pythonhosted.org/packages/58/b0/2b4e647b0fc706e88eb6c253d05511865578f5f67b55fad639bf3272a4a1/torch-2.9.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:413e1654c9203733138858780e184d9fc59442f0b3b209e16f39354eb893db9b", size = 74452019, upload-time = "2025-10-15T15:46:04.296Z" }, + { url = "https://files.pythonhosted.org/packages/58/fe/334225e6330e672b36aef23d77451fa906ea12881570c08638a91331a212/torch-2.9.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c596708b5105d0b199215acf0c9be7c1db5f1680d88eddadf4b75a299259a677", size = 104230578, upload-time = "2025-10-15T15:46:08.182Z" }, + { url = "https://files.pythonhosted.org/packages/05/cc/49566caaa218872ec9a2912456f470ff92649894a4bc2e5274aa9ef87c4a/torch-2.9.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:51de31219c97c51cf4bf2be94d622e3deb5dcc526c6dc00e97c17eaec0fc1d67", size = 899815990, upload-time = "2025-10-15T15:48:03.336Z" }, + { url = "https://files.pythonhosted.org/packages/74/25/e9ab21d5925b642d008f139d4a3c9664fc9ee1faafca22913c080cc4c0a5/torch-2.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:dd515c70059afd95f48b8192733764c08ca37a1d19803af6401b5ecad7c8676e", size = 109313698, upload-time = "2025-10-15T15:46:12.425Z" }, + { url = "https://files.pythonhosted.org/packages/b3/b7/205ef3e94de636feffd64b28bb59a0dfac0771221201b9871acf9236f5ca/torch-2.9.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:614a185e4986326d526a91210c8fc1397e76e8cfafa78baf6296a790e53a9eec", size = 74463678, upload-time = "2025-10-15T15:46:29.779Z" }, + { url = "https://files.pythonhosted.org/packages/d1/d3/3985739f3b8e88675127bf70f82b3a48ae083e39cda56305dbd90398fec0/torch-2.9.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e5f7af1dc4c0a7c4a260c2534f41ddaf209714f7c89145e644c44712fbd6b642", size = 104107898, upload-time = "2025-10-15T15:46:20.883Z" }, + { url = "https://files.pythonhosted.org/packages/a5/4b/f4bb2e6c25d0272f798cd6d7a04ed315da76cec68c602d87040c7847287f/torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:01cff95ecd9a212ea2f141db28acccdceb6a4c54f64e6c51091146f5e2a772c6", size = 899738273, upload-time = "2025-10-15T15:50:04.188Z" }, + { url = "https://files.pythonhosted.org/packages/66/11/c1c5ba6691cda6279087c35bd626536e4fd29521fe740abf5008377a9a02/torch-2.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4582b162f541651f0cb184d3e291c05c2f556c7117c64a9873e2ee158d40062b", size = 109280887, upload-time = "2025-10-15T15:46:26.228Z" }, + { url = "https://files.pythonhosted.org/packages/dd/5f/b85bd8c05312d71de9402bf5868d217c38827cfd09d8f8514e5be128a52b/torch-2.9.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:33f58e9a102a91259af289d50525c30323b5c9ae1d31322b6447c0814da68695", size = 74478983, upload-time = "2025-10-15T15:46:39.406Z" }, + { url = "https://files.pythonhosted.org/packages/c2/1c/90eb13833cdf4969ea9707586d7b57095c3b6e2b223a7256bf111689bcb8/torch-2.9.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c30a17fc83eeab346913e237c64b15b5ba6407fff812f6c541e322e19bc9ea0e", size = 104111330, upload-time = "2025-10-15T15:46:35.238Z" }, + { url = "https://files.pythonhosted.org/packages/0e/21/2254c54b8d523592c25ef4434769aa23e29b1e6bf5f4c0ad9e27bf442927/torch-2.9.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:8f25033b8667b57857dfd01458fbf2a9e6a6df1f8def23aef0dc46292f6aa642", size = 899750243, upload-time = "2025-10-15T15:48:57.459Z" }, + { url = "https://files.pythonhosted.org/packages/b7/a5/5cb94fa4fd1e78223455c23c200f30f6dc10c6d4a2bcc8f6e7f2a2588370/torch-2.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:d037f1b4ffd25013be4a7bf3651a0a910c68554956c7b2c92ebe87c76475dece", size = 109284513, upload-time = "2025-10-15T15:46:45.061Z" }, + { url = "https://files.pythonhosted.org/packages/66/e8/fc414d8656250ee46120b44836ffbb3266343db424b3e18ca79ebbf69d4f/torch-2.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e4e5b5cba837a2a8d1a497ba9a58dae46fa392593eaa13b871c42f71847503a5", size = 74830362, upload-time = "2025-10-15T15:46:48.983Z" }, + { url = "https://files.pythonhosted.org/packages/ed/5f/9474c98fc5ae0cd04b9466035428cd360e6611a86b8352a0fc2fa504acdc/torch-2.9.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:64693568f5dc4dbd5f880a478b1cea0201cc6b510d91d1bc54fea86ac5d1a637", size = 104144940, upload-time = "2025-10-15T15:47:29.076Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5a/8e0c1cf57830172c109d4bd6be2708cabeaf550983eee7029291322447a0/torch-2.9.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:f8ed31ddd7d10bfb3fbe0b9fe01b1243577f13d75e6f4a0839a283915ce3791e", size = 899744054, upload-time = "2025-10-15T15:48:29.864Z" }, + { url = "https://files.pythonhosted.org/packages/6d/28/82c28b30fcb4b7c9cdd995763d18bbb830d6521356712faebbad92ffa61d/torch-2.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:eff527d4e4846e6f70d2afd8058b73825761203d66576a7e04ea2ecfebcb4ab8", size = 109517546, upload-time = "2025-10-15T15:47:33.395Z" }, + { url = "https://files.pythonhosted.org/packages/ff/c3/a91f96ec74347fa5fd24453fa514bc61c61ecc79196fa760b012a1873d96/torch-2.9.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:f8877779cf56d1ce431a7636703bdb13307f5960bb1af49716d8b179225e0e6a", size = 74480732, upload-time = "2025-10-15T15:47:38.002Z" }, + { url = "https://files.pythonhosted.org/packages/5c/73/9f70af34b334a7e0ef496ceec96b7ec767bd778ea35385ce6f77557534d1/torch-2.9.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7e614fae699838038d888729f82b687c03413c5989ce2a9481f9a7e7a396e0bb", size = 74433037, upload-time = "2025-10-15T15:47:41.894Z" }, + { url = "https://files.pythonhosted.org/packages/b7/84/37cf88625901934c97109e583ecc21777d21c6f54cda97a7e5bbad1ee2f2/torch-2.9.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:dfb5b8cd310ba3436c7e14e8b7833ef658cf3045e50d2bdaed23c8fc517065eb", size = 104116482, upload-time = "2025-10-15T15:47:46.266Z" }, + { url = "https://files.pythonhosted.org/packages/56/8e/ca8b17866943a8d4f4664d402ea84210aa274588b4c5d89918f5caa24eec/torch-2.9.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b3d29524993a478e46f5d598b249cd824b7ed98d7fba538bd9c4cde6c803948f", size = 899746916, upload-time = "2025-10-15T15:50:40.294Z" }, + { url = "https://files.pythonhosted.org/packages/43/65/3b17c0fbbdab6501c5b320a52a648628d0d44e7379f64e27d9eef701b6bf/torch-2.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:71c7578984f5ec0eb645eb4816ac8435fcf3e3e2ae1901bcd2f519a9cafb5125", size = 109275151, upload-time = "2025-10-15T15:49:20.715Z" }, + { url = "https://files.pythonhosted.org/packages/83/36/74f8c051f785500396e42f93542422422dfd874a174f21f8d955d36e5d64/torch-2.9.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:71d9309aee457bbe0b164bce2111cd911c4ed4e847e65d5077dbbcd3aba6befc", size = 74823353, upload-time = "2025-10-15T15:49:16.59Z" }, + { url = "https://files.pythonhosted.org/packages/62/51/dc3b4e2f9ba98ae27238f0153ca098bf9340b2dafcc67fde645d496dfc2a/torch-2.9.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:c08fb654d783899e204a32cca758a7ce8a45b2d78eeb89517cc937088316f78e", size = 104140340, upload-time = "2025-10-15T15:50:19.67Z" }, + { url = "https://files.pythonhosted.org/packages/c0/8d/b00657f8141ac16af7bb6cda2e67de18499a3263b78d516b9a93fcbc98e3/torch-2.9.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:ec8feb0099b2daa5728fbc7abb0b05730fd97e0f359ff8bda09865aaa7bd7d4b", size = 899731750, upload-time = "2025-10-15T15:49:36.673Z" }, + { url = "https://files.pythonhosted.org/packages/fc/29/bd361e0cbb2c79ce6450f42643aaf6919956f89923a50571b0ebfe92d142/torch-2.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:695ba920f234ad4170c9c50e28d56c848432f8f530e6bc7f88fcb15ddf338e75", size = 109503850, upload-time = "2025-10-15T15:50:24.118Z" }, +] + +[[package]] +name = "torch" +version = "2.9.0+cpu" +source = { registry = "https://download.pytorch.org/whl/cpu" } +resolution-markers = [ + "python_full_version >= '3.12' and sys_platform == 'linux'", + "python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version == '3.11.*' and sys_platform == 'linux'", + "python_full_version < '3.11' and sys_platform == 'linux'", + "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'", +] +dependencies = [ + { name = "filelock", marker = "(sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "fsspec", marker = "(sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "jinja2", marker = "(sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (python_full_version >= '3.11' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (python_full_version < '3.11' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (python_full_version < '3.12' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu') or (sys_platform == 'darwin' and extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "sympy", marker = "(sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, + { name = "typing-extensions", marker = "(sys_platform != 'darwin' and extra == 'extra-8-nanochat-cpu') or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, +] +wheels = [ + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b224792ea567b52c7f1ce1d789567f6920e06fd3b339fa1e1b05948845f783ad" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:bd2a257e670ede9fc01c6d76dccdc473040913b8e9328169bf177dbdc38e2484" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp310-cp310-win_amd64.whl", hash = "sha256:96f3f7aa4eb9e7fc5af8a722eaf1e5e32e3039dbafe817178d7b90a8566be32d" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:da77341ccaba31762d9238b0942c165c4582a26818f3045b052b39cebdd7ad9d" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:add3e93ecc1eeaa6853f6a973ce60ffb3cb14ed2e80f5055e139b09385dce0a7" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp311-cp311-win_amd64.whl", hash = "sha256:389e1e0b8083fd355f7caf5ba82356b5e01c318998bd575dbf2285a0d8137089" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp311-cp311-win_arm64.whl", hash = "sha256:5ce3d01aef91dc078fbb121814e556d55bc886d303efaf42c4fe67e411f5f9ad" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3a651434ae1248b0568c12b5f9e3acc8942eb28378d9d04a79302938b68c6f24" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:28f6eb31b08180a5c5e98d5bc14eef6909c9f5a1dbff9632c3e02a8773449349" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:e438061b87ec7dd6018fca9f975219889aa0a3f6cdc3ea10dd0ae2bc7f1c47ce" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:eb13ff1c34e338d722e76a4fd83b8d282782505bd1b99af4b3c32da66eba6eb4" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:be4438d8dad7f0d5a5e54f0feef8a893446894ec87f102bb1d82dcc4518542e4" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6c9b217584400963d5b4daddb3711ec7a3778eab211e18654fba076cce3b8682" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:728372e3f58c5826445f677746e5311c1935c1a7c59599f73a49ded850e038e8" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:95e56c26f919fbb98f16e7a0b87af494b893f9da9a65a020f17a01c13e520a81" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:6c777160288b08555820781ae0f3a2c67a59bd24b065e88ca1ec20e2f9dc8ac7" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:528fd338311f31c9fb18038cafd00e6eae0bf5ad5577521701acb62510753d18" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:d572863990e7d2762b547735ef589f6350d9eb4e441d38753a1c33636698cf4c" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:44aadb735774d4a99525d2ec29126b23016c44a07b02ce6c237dfa61a223dd52" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:b355e07b7f0c369cb031adfcbff5c37a609abcea091b918a39886412afd2e07d" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:c2698999361d73c2d25d7cc8a787130188d49b183abb18b554228daa102e1594" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:fa0d1373d04b30ff8f12d542135d292f1a1ddb7c0d852a3d487a320360e5dab9" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:2f49bb57a5fe0dc7f8e73ea9e5d36ebda2ea25b8a714a788f0fc2fc47d20a830" }, + { url = "https://download.pytorch.org/whl/cpu/torch-2.9.0%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:3a60d1ecf27a9cce839b3aa665b26f0af1b1007b9c9f1e7f597f6b7bdf107617" }, +] + [[package]] name = "tqdm" version = "4.67.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" } wheels = [ @@ -1739,7 +1955,7 @@ name = "triton" version = "3.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "setuptools", marker = "sys_platform == 'linux'" }, + { name = "setuptools", marker = "extra == 'extra-8-nanochat-gpu'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" }, @@ -1795,7 +2011,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ef/5e/f0cd46063a02fd8515f0e880c37d2657845b7306c16ce6c4ffc44afd9036/uvicorn-0.36.0.tar.gz", hash = "sha256:527dc68d77819919d90a6b267be55f0e76704dca829d34aea9480be831a9b9d9", size = 80032, upload-time = "2025-09-20T01:07:14.418Z" } wheels = [ @@ -2002,4 +2218,3 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/c3/b2e9f38bc3e11191981d57ea08cab2166e74ea770024a646617c9cddd9f6/yarl-1.20.1-cp313-cp313t-win_amd64.whl", hash = "sha256:541d050a355bbbc27e55d906bc91cb6fe42f96c01413dd0f4ed5a5240513874f", size = 93003, upload-time = "2025-06-10T00:45:27.752Z" }, { url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" }, ] - From 796f84527f3c62f7466eeb59f362d0827d619e6d Mon Sep 17 00:00:00 2001 From: ulanch Date: Tue, 21 Oct 2025 17:34:40 -0700 Subject: [PATCH 44/64] fix(ui): prevent iOS Safari toolbar from covering input on initial load --- nanochat/ui.html | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nanochat/ui.html b/nanochat/ui.html index b2b4605..0f625d9 100644 --- a/nanochat/ui.html +++ b/nanochat/ui.html @@ -2,7 +2,7 @@ - + NanoChat