diff --git a/services/inference/Dockerfile b/services/inference/Dockerfile index 271d6416..abdaf87c 100644 --- a/services/inference/Dockerfile +++ b/services/inference/Dockerfile @@ -1,9 +1,24 @@ FROM python:3.12-slim +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + UV_LINK_MODE=copy \ + UV_COMPILE_BYTECODE=1 \ + PYTHONPATH=/app/src + +COPY --from=ghcr.io/astral-sh/uv:0.7.13 /uv /uvx /bin/ + WORKDIR /app -COPY README.md /app/README.md +RUN apt-get update \ + && apt-get install -y --no-install-recommends build-essential curl \ + && rm -rf /var/lib/apt/lists/* + +COPY pyproject.toml uv.lock /app/ +RUN uv sync --locked --no-dev --no-install-project + +COPY src /app/src EXPOSE 8003 -CMD ["python", "-m", "http.server", "8003", "--bind", "0.0.0.0"] +CMD ["uv", "run", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8003"] diff --git a/services/inference/README.md b/services/inference/README.md index 8bcb6dce..0827a6e5 100644 --- a/services/inference/README.md +++ b/services/inference/README.md @@ -1,6 +1,26 @@ # Inference Service -Scaffold placeholder for Issue #3. +Standalone FastAPI microservice for nanochat model serving. -This branch only creates the monorepo structure. The next branch extracts the -FastAPI inference microservice into this directory. +## Endpoints + +- `POST /generate` streams model output as SSE +- `GET /models` lists registered and loaded weights +- `POST /models/swap` drains workers and hot-swaps the active weights +- `GET /health` reports readiness +- `GET /stats` reports worker pool state + +## Environment + +- `MODEL_STORAGE_PATH` +- `DEFAULT_MODEL_TAG` +- `HF_TOKEN` +- `INTERNAL_API_KEY` +- `NANOCHAT_DTYPE` +- `NUM_WORKERS` + +Run locally with: + +```bash +uv run --project services/inference uvicorn main:app --app-dir services/inference/src --reload --port 8003 +``` diff --git a/services/inference/pyproject.toml b/services/inference/pyproject.toml new file mode 100644 index 00000000..500104a3 --- /dev/null +++ b/services/inference/pyproject.toml @@ -0,0 +1,38 @@ +[project] +name = "nanochat-inference-service" +version = "0.1.0" +description = "Standalone FastAPI inference service for nanochat" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "fastapi>=0.117.1", + "filelock>=3.18.0", + "huggingface_hub>=0.35.0", + "pydantic-settings>=2.10.1", + "requests>=2.32.0", + "rustbpe>=0.1.0", + "tiktoken>=0.11.0", + "tokenizers>=0.22.0", + "torch==2.9.1", + "transformers>=4.57.3", + "uvicorn>=0.36.0", +] + +[dependency-groups] +dev = [ + "httpx>=0.28.1", + "pytest>=8.0.0", +] + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_functions = ["test_*"] + +[tool.uv.sources] +torch = { index = "pytorch-cpu" } + +[[tool.uv.index]] +name = "pytorch-cpu" +url = "https://download.pytorch.org/whl/cpu" +explicit = true diff --git a/services/inference/src/config.py b/services/inference/src/config.py new file mode 100644 index 00000000..694c16a7 --- /dev/null +++ b/services/inference/src/config.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from functools import lru_cache +from pathlib import Path + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + extra="ignore", + case_sensitive=False, + ) + + host: str = "0.0.0.0" + port: int = 8003 + model_storage_path: Path = Field(default=Path("/models")) + default_model_tag: str | None = "samosachaat-d12" + default_step: int | None = None + hf_token: str | None = None + hf_repo_owner: str = "manmohan659" + internal_api_key: str | None = None + num_workers: int = 1 + device_type: str = "" + startup_load_enabled: bool = True + default_temperature: float = 0.8 + default_top_k: int = 50 + default_max_tokens: int = 512 + + @property + def resolved_device_type(self) -> str: + return self.device_type.strip() + + +@lru_cache(maxsize=1) +def get_settings() -> Settings: + return Settings() diff --git a/services/inference/src/engine/__init__.py b/services/inference/src/engine/__init__.py new file mode 100644 index 00000000..17243b35 --- /dev/null +++ b/services/inference/src/engine/__init__.py @@ -0,0 +1,3 @@ +from .engine import Engine + +__all__ = ["Engine"] diff --git a/services/inference/src/engine/checkpoint_manager.py b/services/inference/src/engine/checkpoint_manager.py new file mode 100644 index 00000000..1316924f --- /dev/null +++ b/services/inference/src/engine/checkpoint_manager.py @@ -0,0 +1,193 @@ +""" +Utilities for saving and loading model/optim/state checkpoints. +""" +import os +import re +import glob +import json +import logging +import torch + +from .common import get_base_dir, setup_default_logging +from .gpt import GPT, GPTConfig +from .tokenizer import get_tokenizer + +# Set up logging +setup_default_logging() +logger = logging.getLogger(__name__) +def log0(message): + if int(os.environ.get('RANK', 0)) == 0: + logger.info(message) + +def _patch_missing_config_keys(model_config_kwargs): + """Add default values for new config keys missing in old checkpoints.""" + # Old models were trained with full context (no sliding window) + if "window_pattern" not in model_config_kwargs: + model_config_kwargs["window_pattern"] = "L" + log0(f"Patching missing window_pattern in model config to 'L'") + +def _patch_missing_keys(model_data, model_config): + """Add default values for new parameters that may be missing in old checkpoints.""" + n_layer = model_config.n_layer + # resid_lambdas defaults to 1.0 (identity scaling) + if "resid_lambdas" not in model_data: + model_data["resid_lambdas"] = torch.ones(n_layer) + log0(f"Patching missing resid_lambdas in model data to 1.0") + # x0_lambdas defaults to 0.0 (disabled) + if "x0_lambdas" not in model_data: + model_data["x0_lambdas"] = torch.zeros(n_layer) + log0(f"Patching missing x0_lambdas in model data to 0.0") + +def save_checkpoint(checkpoint_dir, step, model_data, optimizer_data, meta_data, rank=0): + if rank == 0: + os.makedirs(checkpoint_dir, exist_ok=True) + # Save the model state parameters + model_path = os.path.join(checkpoint_dir, f"model_{step:06d}.pt") + torch.save(model_data, model_path) + logger.info(f"Saved model parameters to: {model_path}") + # Save the metadata dict as json + meta_path = os.path.join(checkpoint_dir, f"meta_{step:06d}.json") + with open(meta_path, "w", encoding="utf-8") as f: + json.dump(meta_data, f, indent=2) + logger.info(f"Saved metadata to: {meta_path}") + # Note that optimizer state is sharded across ranks, so each rank must save its own. + if optimizer_data is not None: + os.makedirs(checkpoint_dir, exist_ok=True) + optimizer_path = os.path.join(checkpoint_dir, f"optim_{step:06d}_rank{rank:d}.pt") + torch.save(optimizer_data, optimizer_path) + logger.info(f"Saved optimizer state to: {optimizer_path}") + +def load_checkpoint(checkpoint_dir, step, device, load_optimizer=False, rank=0): + # Load the model state + model_path = os.path.join(checkpoint_dir, f"model_{step:06d}.pt") + model_data = torch.load(model_path, map_location=device) + # Load the optimizer state if requested + optimizer_data = None + if load_optimizer: + optimizer_path = os.path.join(checkpoint_dir, f"optim_{step:06d}_rank{rank:d}.pt") + optimizer_data = torch.load(optimizer_path, map_location=device) + # Load the metadata + meta_path = os.path.join(checkpoint_dir, f"meta_{step:06d}.json") + with open(meta_path, "r", encoding="utf-8") as f: + meta_data = json.load(f) + return model_data, optimizer_data, meta_data + + +def build_model(checkpoint_dir, step, device, phase): + """ + A bunch of repetitive code to build a model from a given checkpoint. + Returns: + - base model - uncompiled, not wrapped in DDP + - tokenizer + - meta data saved during base model training + """ + assert phase in ["train", "eval"], f"Invalid phase: {phase}" + model_data, optimizer_data, meta_data = load_checkpoint(checkpoint_dir, step, device, load_optimizer=False) + if device.type in {"cpu", "mps"}: + # Convert bfloat16 tensors to float for CPU inference + model_data = { + k: v.float() if v.dtype == torch.bfloat16 else v + for k, v in model_data.items() + } + # Hack: fix torch compile issue, which prepends all keys with _orig_mod. + model_data = {k.removeprefix("_orig_mod."): v for k, v in model_data.items()} + model_config_kwargs = meta_data["model_config"] + _patch_missing_config_keys(model_config_kwargs) + log0(f"Building model with config: {model_config_kwargs}") + model_config = GPTConfig(**model_config_kwargs) + _patch_missing_keys(model_data, model_config) + with torch.device("meta"): + model = GPT(model_config) + # Load the model state + model.to_empty(device=device) + model.init_weights() # note: this is dumb, but we need to init the rotary embeddings. TODO: fix model re-init + model.load_state_dict(model_data, strict=True, assign=True) + # Put the model in the right training phase / mode + if phase == "eval": + model.eval() + else: + model.train() + # Load the Tokenizer + tokenizer = get_tokenizer() + # Sanity check: compatibility between model and tokenizer + assert tokenizer.get_vocab_size() == model_config_kwargs["vocab_size"], f"Tokenizer vocab size {tokenizer.get_vocab_size()} does not match model config vocab size {model_config_kwargs['vocab_size']}" + return model, tokenizer, meta_data + + +def find_largest_model(checkpoints_dir): + # attempt to guess the model tag: take the biggest model available + model_tags = [f for f in os.listdir(checkpoints_dir) if os.path.isdir(os.path.join(checkpoints_dir, f))] + if not model_tags: + raise FileNotFoundError(f"No checkpoints found in {checkpoints_dir}") + # 1) normally all model tags are of the form d, try that first: + candidates = [] + for model_tag in model_tags: + match = re.match(r"d(\d+)", model_tag) + if match: + model_depth = int(match.group(1)) + candidates.append((model_depth, model_tag)) + if candidates: + candidates.sort(key=lambda x: x[0], reverse=True) + return candidates[0][1] + # 2) if that failed, take the most recently updated model: + model_tags.sort(key=lambda x: os.path.getmtime(os.path.join(checkpoints_dir, x)), reverse=True) + return model_tags[0] + + +def find_last_step(checkpoint_dir): + # Look into checkpoint_dir and find model_.pt with the highest step + checkpoint_files = glob.glob(os.path.join(checkpoint_dir, "model_*.pt")) + if not checkpoint_files: + raise FileNotFoundError(f"No checkpoints found in {checkpoint_dir}") + last_step = int(max(os.path.basename(f).split("_")[-1].split(".")[0] for f in checkpoint_files)) + return last_step + +# ----------------------------------------------------------------------------- +# convenience functions that take into account nanochat's directory structure + +def load_model_from_dir(checkpoints_dir, device, phase, model_tag=None, step=None): + if model_tag is None: + # guess the model tag by defaulting to the largest model + model_tag = find_largest_model(checkpoints_dir) + log0(f"No model tag provided, guessing model tag: {model_tag}") + checkpoint_dir = os.path.join(checkpoints_dir, model_tag) + if step is None: + # guess the step by defaulting to the last step + step = find_last_step(checkpoint_dir) + assert step is not None, f"No checkpoints found in {checkpoint_dir}" + # build the model + log0(f"Loading model from {checkpoint_dir} with step {step}") + model, tokenizer, meta_data = build_model(checkpoint_dir, step, device, phase) + return model, tokenizer, meta_data + +def load_model(source, *args, **kwargs): + model_dir = { + "base": "base_checkpoints", + "sft": "chatsft_checkpoints", + "rl": "chatrl_checkpoints", + }[source] + base_dir = get_base_dir() + checkpoints_dir = os.path.join(base_dir, model_dir) + return load_model_from_dir(checkpoints_dir, *args, **kwargs) + +def load_optimizer_state(source, device, rank, model_tag=None, step=None): + """Load just the optimizer shard for a given rank, without re-loading the model.""" + model_dir = { + "base": "base_checkpoints", + "sft": "chatsft_checkpoints", + "rl": "chatrl_checkpoints", + }[source] + base_dir = get_base_dir() + checkpoints_dir = os.path.join(base_dir, model_dir) + if model_tag is None: + model_tag = find_largest_model(checkpoints_dir) + checkpoint_dir = os.path.join(checkpoints_dir, model_tag) + if step is None: + step = find_last_step(checkpoint_dir) + optimizer_path = os.path.join(checkpoint_dir, f"optim_{step:06d}_rank{rank:d}.pt") + if not os.path.exists(optimizer_path): + log0(f"Optimizer checkpoint not found: {optimizer_path}") + return None + log0(f"Loading optimizer state from {optimizer_path}") + optimizer_data = torch.load(optimizer_path, map_location=device) + return optimizer_data diff --git a/services/inference/src/engine/common.py b/services/inference/src/engine/common.py new file mode 100644 index 00000000..bd14fd24 --- /dev/null +++ b/services/inference/src/engine/common.py @@ -0,0 +1,278 @@ +""" +Common utilities for nanochat. +""" + +import os +import re +import logging +import urllib.request +import torch +import torch.distributed as dist +from filelock import FileLock + +# The dtype used for compute (matmuls, activations). Master weights stay fp32 for optimizer precision. +# Linear layers cast their weights to this dtype in forward, replacing torch.amp.autocast. +# Override with NANOCHAT_DTYPE env var: "bfloat16", "float16", "float32" +_DTYPE_MAP = {"bfloat16": torch.bfloat16, "float16": torch.float16, "float32": torch.float32} +def _detect_compute_dtype(): + env = os.environ.get("NANOCHAT_DTYPE") + if env is not None: + return _DTYPE_MAP[env], f"set via NANOCHAT_DTYPE={env}" + if torch.cuda.is_available(): + # bf16 requires SM 80+ (Ampere: A100, A10, etc.) + # Older GPUs like V100 (SM 70) and T4 (SM 75) only have fp16 tensor cores + capability = torch.cuda.get_device_capability() + if capability >= (8, 0): + return torch.bfloat16, f"auto-detected: CUDA SM {capability[0]}{capability[1]} (bf16 supported)" + # fp16 training requires GradScaler (not yet implemented), so fall back to fp32. + # Users can still force fp16 via NANOCHAT_DTYPE=float16 if they know what they're doing. + return torch.float32, f"auto-detected: CUDA SM {capability[0]}{capability[1]} (pre-Ampere, bf16 not supported, using fp32)" + return torch.float32, "auto-detected: no CUDA (CPU/MPS)" +COMPUTE_DTYPE, COMPUTE_DTYPE_REASON = _detect_compute_dtype() + +class ColoredFormatter(logging.Formatter): + """Custom formatter that adds colors to log messages.""" + # ANSI color codes + COLORS = { + 'DEBUG': '\033[36m', # Cyan + 'INFO': '\033[32m', # Green + 'WARNING': '\033[33m', # Yellow + 'ERROR': '\033[31m', # Red + 'CRITICAL': '\033[35m', # Magenta + } + RESET = '\033[0m' + BOLD = '\033[1m' + def format(self, record): + # Add color to the level name + levelname = record.levelname + if levelname in self.COLORS: + record.levelname = f"{self.COLORS[levelname]}{self.BOLD}{levelname}{self.RESET}" + # Format the message + message = super().format(record) + # Add color to specific parts of the message + if levelname == 'INFO': + # Highlight numbers and percentages + message = re.sub(r'(\d+\.?\d*\s*(?:GB|MB|%|docs))', rf'{self.BOLD}\1{self.RESET}', message) + message = re.sub(r'(Shard \d+)', rf'{self.COLORS["INFO"]}{self.BOLD}\1{self.RESET}', message) + return message + +def setup_default_logging(): + handler = logging.StreamHandler() + handler.setFormatter(ColoredFormatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) + logging.basicConfig( + level=logging.INFO, + handlers=[handler] + ) + +setup_default_logging() +logger = logging.getLogger(__name__) + +def get_base_dir(): + # co-locate nanochat intermediates with other cached data in ~/.cache (by default) + if os.environ.get("NANOCHAT_BASE_DIR"): + nanochat_dir = os.environ.get("NANOCHAT_BASE_DIR") + else: + home_dir = os.path.expanduser("~") + cache_dir = os.path.join(home_dir, ".cache") + nanochat_dir = os.path.join(cache_dir, "nanochat") + os.makedirs(nanochat_dir, exist_ok=True) + return nanochat_dir + +def download_file_with_lock(url, filename, postprocess_fn=None): + """ + Downloads a file from a URL to a local path in the base directory. + Uses a lock file to prevent concurrent downloads among multiple ranks. + """ + base_dir = get_base_dir() + file_path = os.path.join(base_dir, filename) + lock_path = file_path + ".lock" + + if os.path.exists(file_path): + return file_path + + with FileLock(lock_path): + # Only a single rank can acquire this lock + # All other ranks block until it is released + + # Recheck after acquiring lock + if os.path.exists(file_path): + return file_path + + # Download the content as bytes + print(f"Downloading {url}...") + with urllib.request.urlopen(url) as response: + content = response.read() # bytes + + # Write to local file + with open(file_path, 'wb') as f: + f.write(content) + print(f"Downloaded to {file_path}") + + # Run the postprocess function if provided + if postprocess_fn is not None: + postprocess_fn(file_path) + + return file_path + +def print0(s="",**kwargs): + ddp_rank = int(os.environ.get('RANK', 0)) + if ddp_rank == 0: + print(s, **kwargs) + +def print_banner(): + # Cool DOS Rebel font ASCII banner made with https://manytools.org/hacker-tools/ascii-banner/ + banner = """ + █████ █████ + ░░███ ░░███ + ████████ ██████ ████████ ██████ ██████ ░███████ ██████ ███████ + ░░███░░███ ░░░░░███ ░░███░░███ ███░░███ ███░░███ ░███░░███ ░░░░░███░░░███░ + ░███ ░███ ███████ ░███ ░███ ░███ ░███░███ ░░░ ░███ ░███ ███████ ░███ + ░███ ░███ ███░░███ ░███ ░███ ░███ ░███░███ ███ ░███ ░███ ███░░███ ░███ ███ + ████ █████░░████████ ████ █████░░██████ ░░██████ ████ █████░░███████ ░░█████ + ░░░░ ░░░░░ ░░░░░░░░ ░░░░ ░░░░░ ░░░░░░ ░░░░░░ ░░░░ ░░░░░ ░░░░░░░░ ░░░░░ + """ + print0(banner) + +def is_ddp_requested() -> bool: + """ + True if launched by torchrun (env present), even before init. + Used to decide whether we *should* initialize a PG. + """ + return all(k in os.environ for k in ("RANK", "LOCAL_RANK", "WORLD_SIZE")) + +def is_ddp_initialized() -> bool: + """ + True if torch.distributed is available and the process group is initialized. + Used at cleanup to avoid destroying a non-existent PG. + """ + return dist.is_available() and dist.is_initialized() + +def get_dist_info(): + if is_ddp_requested(): + # We rely on torchrun's env to decide if we SHOULD init. + # (Initialization itself happens in compute init.) + assert all(var in os.environ for var in ['RANK', 'LOCAL_RANK', 'WORLD_SIZE']) + ddp_rank = int(os.environ['RANK']) + ddp_local_rank = int(os.environ['LOCAL_RANK']) + ddp_world_size = int(os.environ['WORLD_SIZE']) + return True, ddp_rank, ddp_local_rank, ddp_world_size + else: + return False, 0, 0, 1 + +def autodetect_device_type(): + # prefer to use CUDA if available, otherwise use MPS, otherwise fallback on CPU + if torch.cuda.is_available(): + device_type = "cuda" + elif torch.backends.mps.is_available(): + device_type = "mps" + else: + device_type = "cpu" + print0(f"Autodetected device type: {device_type}") + return device_type + +def compute_init(device_type="cuda"): # cuda|cpu|mps + """Basic initialization that we keep doing over and over, so make common.""" + + assert device_type in ["cuda", "mps", "cpu"], "Invalid device type atm" + if device_type == "cuda": + assert torch.cuda.is_available(), "Your PyTorch installation is not configured for CUDA but device_type is 'cuda'" + if device_type == "mps": + assert torch.backends.mps.is_available(), "Your PyTorch installation is not configured for MPS but device_type is 'mps'" + + # Reproducibility + # Note that we set the global seeds here, but most of the code uses explicit rng objects. + # The only place where global rng might be used is nn.Module initialization of the model weights. + torch.manual_seed(42) + if device_type == "cuda": + torch.cuda.manual_seed(42) + # skipping full reproducibility for now, possibly investigate slowdown later + # torch.use_deterministic_algorithms(True) + + # Precision + if device_type == "cuda": + torch.set_float32_matmul_precision("high") # uses tf32 instead of fp32 for matmuls, see https://docs.pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html + + # Distributed setup: Distributed Data Parallel (DDP), optional, and requires CUDA + is_ddp_requested, ddp_rank, ddp_local_rank, ddp_world_size = get_dist_info() + if is_ddp_requested and device_type == "cuda": + device = torch.device("cuda", ddp_local_rank) + torch.cuda.set_device(device) # make "cuda" default to this device + dist.init_process_group(backend="nccl", device_id=device) + dist.barrier() + else: + device = torch.device(device_type) # mps|cpu + + if ddp_rank == 0: + logger.info(f"Distributed world size: {ddp_world_size}") + + return is_ddp_requested, ddp_rank, ddp_local_rank, ddp_world_size, device + +def compute_cleanup(): + """Companion function to compute_init, to clean things up before script exit""" + if is_ddp_initialized(): + dist.destroy_process_group() + +class DummyWandb: + """Useful if we wish to not use wandb but have all the same signatures""" + def __init__(self): + pass + def log(self, *args, **kwargs): + pass + def finish(self): + pass + +# hardcoded BF16 peak flops for various GPUs +# inspired by torchtitan: https://github.com/pytorch/torchtitan/blob/main/torchtitan/tools/utils.py +# and PR: https://github.com/karpathy/nanochat/pull/147 +def get_peak_flops(device_name: str) -> float: + name = device_name.lower() + + # Table order matters: more specific patterns first. + _PEAK_FLOPS_TABLE = ( + # NVIDIA Blackwell + (["gb200"], 2.5e15), + (["grace blackwell"], 2.5e15), + (["b200"], 2.25e15), + (["b100"], 1.8e15), + # NVIDIA Hopper + (["h200", "nvl"], 836e12), + (["h200", "pcie"], 836e12), + (["h200"], 989e12), + (["h100", "nvl"], 835e12), + (["h100", "pcie"], 756e12), + (["h100"], 989e12), + (["h800", "nvl"], 989e12), + (["h800"], 756e12), + # NVIDIA Ampere data center + (["a100"], 312e12), + (["a800"], 312e12), + (["a40"], 149.7e12), + (["a30"], 165e12), + # NVIDIA Ada data center + (["l40s"], 362e12), + (["l40-s"], 362e12), + (["l40 s"], 362e12), + (["l4"], 121e12), + # AMD CDNA accelerators + (["mi355"], 2.5e15), + (["mi325"], 1.3074e15), + (["mi300x"], 1.3074e15), + (["mi300a"], 980.6e12), + (["mi250x"], 383e12), + (["mi250"], 362.1e12), + # Consumer RTX + (["5090"], 209.5e12), + (["4090"], 165.2e12), + (["3090"], 71e12), + ) + for patterns, flops in _PEAK_FLOPS_TABLE: + if all(p in name for p in patterns): + return flops + if "data center gpu max 1550" in name: + # Ponte Vecchio (PVC) - dynamic based on compute units + max_comp_units = torch.xpu.get_device_properties("xpu").max_compute_units + return 512 * max_comp_units * 1300 * 10**6 + + # Unknown GPU - return inf so MFU shows as 0% rather than a wrong guess + logger.warning(f"Peak flops undefined for: {device_name}, MFU will show as 0%") + return float('inf') diff --git a/services/inference/src/engine/engine.py b/services/inference/src/engine/engine.py new file mode 100644 index 00000000..205426f2 --- /dev/null +++ b/services/inference/src/engine/engine.py @@ -0,0 +1,358 @@ +""" +Engine for efficient inference of our models. + +Everything works around token sequences: +- The user can send token sequences to the engine +- The engine returns the next token + +Notes: +- The engine knows nothing about tokenization, it's purely token id sequences. + +The whole thing is made as efficient as possible. +""" + +import torch +import torch.nn.functional as F +import signal +import warnings +from contextlib import contextmanager +from collections import deque + +from .checkpoint_manager import load_model +from .common import COMPUTE_DTYPE, autodetect_device_type, compute_init + +# ----------------------------------------------------------------------------- +# Calculator tool helpers +@contextmanager +def timeout(duration, formula): + def timeout_handler(signum, frame): + raise Exception(f"'{formula}': timed out after {duration} seconds") + + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(duration) + yield + signal.alarm(0) + +def eval_with_timeout(formula, max_time=3): + try: + with timeout(max_time, formula): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", SyntaxWarning) + return eval(formula, {"__builtins__": {}}, {}) + except Exception as e: + signal.alarm(0) + # print(f"Warning: Failed to eval {formula}, exception: {e}") # it's ok ignore wrong calculator usage + return None + +def use_calculator(expr): + """ + Evaluate a Python expression safely. + Supports both math expressions and string operations like .count() + """ + # Remove commas from numbers + expr = expr.replace(",", "") + + # Check if it's a pure math expression (old behavior) + if all([x in "0123456789*+-/.() " for x in expr]): + if "**" in expr: # disallow power operator + return None + return eval_with_timeout(expr) + + # Check if it's a string operation we support + # Allow: strings (single/double quotes), .count(), letters, numbers, spaces, parens + allowed_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'\"()._ " + if not all([x in allowed_chars for x in expr]): + return None + + # Disallow dangerous patterns + dangerous_patterns = ['__', 'import', 'exec', 'eval', 'compile', 'open', 'file', + 'input', 'raw_input', 'globals', 'locals', 'vars', 'dir', + 'getattr', 'setattr', 'delattr', 'hasattr'] + expr_lower = expr.lower() + if any(pattern in expr_lower for pattern in dangerous_patterns): + return None + + # Only allow .count() method for now (can expand later) + if '.count(' not in expr: + return None + + # Evaluate with timeout + return eval_with_timeout(expr) + +# ----------------------------------------------------------------------------- +class KVCache: + """ + KV Cache designed for Flash Attention 3's flash_attn_with_kvcache API. + + Key differences from FA2-style cache: + - Tensors are (B, T, H, D) not (B, H, T, D) + - FA3 updates the cache in-place during flash_attn_with_kvcache + - Position tracked per batch element via cache_seqlens tensor + """ + + def __init__(self, batch_size, num_heads, seq_len, head_dim, num_layers, device, dtype): + self.batch_size = batch_size + self.max_seq_len = seq_len + self.n_layers = num_layers + self.n_heads = num_heads + self.head_dim = head_dim + # Pre-allocate cache tensors: (n_layers, B, T, H, D) + self.k_cache = torch.zeros(num_layers, batch_size, seq_len, num_heads, head_dim, device=device, dtype=dtype) + self.v_cache = torch.zeros(num_layers, batch_size, seq_len, num_heads, head_dim, device=device, dtype=dtype) + # Current sequence length per batch element (FA3 needs int32) + self.cache_seqlens = torch.zeros(batch_size, dtype=torch.int32, device=device) + # Previous token's normalized embedding for smear (set by model forward pass) + self.prev_embedding = None + + def reset(self): + """Reset cache to empty state.""" + self.cache_seqlens.zero_() + self.prev_embedding = None + + def get_pos(self): + """Get current position (assumes all batch elements at same position).""" + return self.cache_seqlens[0].item() + + def get_layer_cache(self, layer_idx): + """Return (k_cache, v_cache) views for a specific layer.""" + return self.k_cache[layer_idx], self.v_cache[layer_idx] + + def advance(self, num_tokens): + """Advance the cache position by num_tokens.""" + self.cache_seqlens += num_tokens + + def prefill(self, other): + """ + Copy cached KV from another cache into this one. + Used when we do batch=1 prefill and then want to generate multiple samples in parallel. + """ + assert self.get_pos() == 0, "Cannot prefill a non-empty KV cache" + assert self.n_layers == other.n_layers and self.n_heads == other.n_heads and self.head_dim == other.head_dim + assert self.max_seq_len >= other.max_seq_len + other_pos = other.get_pos() + self.k_cache[:, :, :other_pos, :, :] = other.k_cache[:, :, :other_pos, :, :] + self.v_cache[:, :, :other_pos, :, :] = other.v_cache[:, :, :other_pos, :, :] + self.cache_seqlens.fill_(other_pos) + # Copy smear state: expand batch=1 prev_embedding to num_samples + if other.prev_embedding is not None: + self.prev_embedding = other.prev_embedding.expand(self.batch_size, -1, -1).clone() + +# ----------------------------------------------------------------------------- +@torch.inference_mode() +def sample_next_token(logits, rng, temperature=1.0, top_k=None): + """Sample a single next token from given logits of shape (B, vocab_size). Returns (B, 1).""" + assert temperature >= 0.0, "temperature must be non-negative" + if temperature == 0.0: + return torch.argmax(logits, dim=-1, keepdim=True) + if top_k is not None and top_k > 0: + k = min(top_k, logits.size(-1)) + vals, idx = torch.topk(logits, k, dim=-1) + vals = vals / temperature + probs = F.softmax(vals, dim=-1) + choice = torch.multinomial(probs, num_samples=1, generator=rng) + return idx.gather(1, choice) + else: + logits = logits / temperature + probs = F.softmax(logits, dim=-1) + return torch.multinomial(probs, num_samples=1, generator=rng) + +# ----------------------------------------------------------------------------- + +class RowState: + # Per-row state tracking during generation + def __init__(self, current_tokens=None): + self.current_tokens = current_tokens or [] # Current token sequence for this row + self.forced_tokens = deque() # Queue of tokens to force inject + self.in_python_block = False # Whether we are inside a python block + self.python_expr_tokens = [] # Tokens of the current python expression + self.completed = False # Whether this row has completed generation + +class Engine: + + def __init__(self, model, tokenizer): + self.model = model + self.tokenizer = tokenizer # needed for tool use + + @torch.inference_mode() + def generate(self, tokens, num_samples=1, max_tokens=None, temperature=1.0, top_k=None, seed=42): + """Same as generate, but does single prefill and then clones the KV cache.""" + assert isinstance(tokens, list) and isinstance(tokens[0], int), "expecting list of ints" + device = self.model.get_device() + # NOTE: setting the dtype here and in this way is an ugly hack. + # Currently the repo assumes that cuda -> bfloat16 and everything else -> float32. + # We need to know the dtype here to call __init__ on KVCache and pre-allocate its tensors. + # As a quick hack, we're making generate() function inherit and know about this repo-wise assumption. + # I think there has to be a bigger refactor to deal with device/dtype tracking across the codebase. + # In particular, the KVCache should allocate its tensors lazily + dtype = COMPUTE_DTYPE + rng = torch.Generator(device=device) + rng.manual_seed(seed) + + # Get the special tokens we need to coordinate the tool use state machine + get_special = lambda s: self.tokenizer.encode_special(s) + python_start = get_special("<|python_start|>") + python_end = get_special("<|python_end|>") + output_start = get_special("<|output_start|>") + output_end = get_special("<|output_end|>") + assistant_end = get_special("<|assistant_end|>") # if sampled, ends row + bos = self.tokenizer.get_bos_token_id() # if sampled, ends row + + # 1) Run a batch 1 prefill of the prompt tokens + m = self.model.config + kv_model_kwargs = {"num_heads": m.n_kv_head, "head_dim": m.n_embd // m.n_head, "num_layers": m.n_layer} + kv_cache_prefill = KVCache( + batch_size=1, + seq_len=len(tokens), + device=device, + dtype=dtype, + **kv_model_kwargs, + ) + ids = torch.tensor([tokens], dtype=torch.long, device=device) + logits = self.model.forward(ids, kv_cache=kv_cache_prefill) + logits = logits[:, -1, :].expand(num_samples, -1) # (num_samples, vocab_size) + + # 2) Replicate the KV cache for each sample/row + kv_length_hint = (len(tokens) + max_tokens) if max_tokens is not None else self.model.config.sequence_len + kv_cache_decode = KVCache( + batch_size=num_samples, + seq_len=kv_length_hint, + device=device, + dtype=dtype, + **kv_model_kwargs, + ) + kv_cache_decode.prefill(kv_cache_prefill) + del kv_cache_prefill # no need to keep this memory around + + # 3) Initialize states for each sample + row_states = [RowState(tokens.copy()) for _ in range(num_samples)] + + # 4) Main generation loop + num_generated = 0 + while True: + # Stop condition: we've reached max tokens + if max_tokens is not None and num_generated >= max_tokens: + break + # Stop condition: all rows are completed + if all(state.completed for state in row_states): + break + + # Sample the next token for each row + next_ids = sample_next_token(logits, rng, temperature, top_k) # (B, 1) + sampled_tokens = next_ids[:, 0].tolist() + + # Process each row: choose the next token, update state, optional tool use + token_column = [] # contains the next token id along each row + token_masks = [] # contains the mask (was it sampled (1) or forced (0)?) along each row + for i, state in enumerate(row_states): + # Select the next token in this row + is_forced = len(state.forced_tokens) > 0 # are there tokens waiting to be forced in deque? + token_masks.append(0 if is_forced else 1) # mask is 0 if forced, 1 if sampled + next_token = state.forced_tokens.popleft() if is_forced else sampled_tokens[i] + token_column.append(next_token) + # Update the state of this row to include the next token + state.current_tokens.append(next_token) + # On <|assistant_end|> or <|bos|>, mark the row as completed + if next_token == assistant_end or next_token == bos: + state.completed = True + # Handle tool logic + if next_token == python_start: + state.in_python_block = True + state.python_expr_tokens = [] + elif next_token == python_end and state.in_python_block: + state.in_python_block = False + if state.python_expr_tokens: + expr = self.tokenizer.decode(state.python_expr_tokens) + result = use_calculator(expr) + if result is not None: + result_tokens = self.tokenizer.encode(str(result)) + state.forced_tokens.append(output_start) + state.forced_tokens.extend(result_tokens) + state.forced_tokens.append(output_end) + state.python_expr_tokens = [] + elif state.in_python_block: + state.python_expr_tokens.append(next_token) + + # Yield the token column + yield token_column, token_masks + num_generated += 1 + + # Prepare logits for next iteration + ids = torch.tensor(token_column, dtype=torch.long, device=device).unsqueeze(1) + logits = self.model.forward(ids, kv_cache=kv_cache_decode)[:, -1, :] # (B, vocab_size) + + def generate_batch(self, tokens, num_samples=1, **kwargs): + """ + Non-streaming batch generation that just returns the final token sequences. + Returns a list of token sequences (list of lists of ints). + Terminal tokens (assistant_end, bos) are not included in the results. + """ + assistant_end = self.tokenizer.encode_special("<|assistant_end|>") + bos = self.tokenizer.get_bos_token_id() + results = [tokens.copy() for _ in range(num_samples)] + masks = [[0] * len(tokens) for _ in range(num_samples)] + completed = [False] * num_samples + for token_column, token_masks in self.generate(tokens, num_samples, **kwargs): + for i, (token, mask) in enumerate(zip(token_column, token_masks)): + if not completed[i]: + if token == assistant_end or token == bos: + completed[i] = True + else: + results[i].append(token) + masks[i].append(mask) + # Stop if all rows are completed + if all(completed): + break + return results, masks + + +if __name__ == "__main__": + """ + Quick inline test to make sure that the naive/slow model.generate function + is equivalent to the faster Engine.generate function here. + """ + import time + # init compute + device_type = autodetect_device_type() + ddp, ddp_rank, ddp_local_rank, ddp_world_size, device = compute_init(device_type) + # load the model and tokenizer + model, tokenizer, meta = load_model("base", device, phase="eval") + bos_token_id = tokenizer.get_bos_token_id() + # common hyperparameters + kwargs = dict(max_tokens=64, temperature=0.0) + # set the starting prompt + prompt_tokens = tokenizer.encode("The chemical formula of water is", prepend=bos_token_id) + # generate the reference sequence using the model.generate() function + generated_tokens = [] + torch.cuda.synchronize() + t0 = time.time() + stream = model.generate(prompt_tokens, **kwargs) + for token in stream: + generated_tokens.append(token) + chunk = tokenizer.decode([token]) + print(chunk, end="", flush=True) + print() + torch.cuda.synchronize() + t1 = time.time() + print(f"Reference time: {t1 - t0:.2f}s") + reference_ids = generated_tokens + # generate tokens with Engine + generated_tokens = [] + engine = Engine(model, tokenizer) + stream = engine.generate(prompt_tokens, num_samples=1, **kwargs) # note: runs in fp32 + torch.cuda.synchronize() + t0 = time.time() + for token_column, token_masks in stream: + token = token_column[0] # only print out the first row + generated_tokens.append(token) + chunk = tokenizer.decode([token]) + print(chunk, end="", flush=True) + print() + torch.cuda.synchronize() + t1 = time.time() + print(f"Engine time: {t1 - t0:.2f}s") + # compare the two sequences + for i in range(len(reference_ids)): + if reference_ids[i] != generated_tokens[i]: + print(f"Mismatch at {i}: {reference_ids[i]} != {generated_tokens[i]}") + break + print(f"Match: {reference_ids == generated_tokens}") diff --git a/services/inference/src/engine/execution.py b/services/inference/src/engine/execution.py new file mode 100644 index 00000000..6f50c749 --- /dev/null +++ b/services/inference/src/engine/execution.py @@ -0,0 +1,349 @@ +""" +Sandboxed execution utilities for running Python code that comes out of an LLM. +Adapted from OpenAI HumanEval code: +https://github.com/openai/human-eval/blob/master/human_eval/execution.py + +What is covered: +- Each execution runs in its own process (can be killed if it hangs or crashes) +- Execution is limited by a timeout to stop infinite loops +- Memory limits are enforced by default (256MB) +- stdout and stderr are captured and returned +- Code runs in a temporary directory that is deleted afterwards +- Dangerous functions are disabled (examples: os.system, os.kill, shutil.rmtree, subprocess.Popen) + +What is not covered: +- Not a true security sandbox +- Network access is not blocked (e.g. sockets could be opened) +- Python's dynamic features (e.g. ctypes) could bypass restrictions +- No kernel-level isolation (no seccomp, no containers, no virtualization) + +Overall this sandbox is good for evaluation of generated code and protects against +accidental destructive behavior, but it is not safe against malicious adversarial code. +""" + +import contextlib +import faulthandler +import io +import multiprocessing +import os +import platform +import signal +import tempfile +from dataclasses import dataclass +from typing import Optional + +# ----------------------------------------------------------------------------- + +@dataclass +class ExecutionResult: + """Result of executing Python code in a sandbox.""" + success: bool + stdout: str + stderr: str + error: Optional[str] = None + timeout: bool = False + memory_exceeded: bool = False + + def __repr__(self): + parts = [] + parts.append(f"ExecutionResult(success={self.success}") + if self.timeout: + parts.append(", timeout=True") + if self.memory_exceeded: + parts.append(", memory_exceeded=True") + if self.error: + parts.append(f", error={self.error!r}") + if self.stdout: + parts.append(f", stdout={self.stdout!r}") + if self.stderr: + parts.append(f", stderr={self.stderr!r}") + parts.append(")") + return "".join(parts) + + +@contextlib.contextmanager +def time_limit(seconds: float): + def signal_handler(signum, frame): + raise TimeoutException("Timed out!") + + signal.setitimer(signal.ITIMER_REAL, seconds) + signal.signal(signal.SIGALRM, signal_handler) + try: + yield + finally: + signal.setitimer(signal.ITIMER_REAL, 0) + + +@contextlib.contextmanager +def capture_io(): + """Capture stdout and stderr, and disable stdin.""" + stdout_capture = io.StringIO() + stderr_capture = io.StringIO() + stdin_block = WriteOnlyStringIO() + with contextlib.redirect_stdout(stdout_capture): + with contextlib.redirect_stderr(stderr_capture): + with redirect_stdin(stdin_block): + yield stdout_capture, stderr_capture + + +@contextlib.contextmanager +def create_tempdir(): + with tempfile.TemporaryDirectory() as dirname: + with chdir(dirname): + yield dirname + + +class TimeoutException(Exception): + pass + + +class WriteOnlyStringIO(io.StringIO): + """StringIO that throws an exception when it's read from""" + + def read(self, *args, **kwargs): + raise IOError + + def readline(self, *args, **kwargs): + raise IOError + + def readlines(self, *args, **kwargs): + raise IOError + + def readable(self, *args, **kwargs): + """Returns True if the IO object can be read.""" + return False + + +class redirect_stdin(contextlib._RedirectStream): # type: ignore + _stream = "stdin" + + +@contextlib.contextmanager +def chdir(root): + if root == ".": + yield + return + cwd = os.getcwd() + os.chdir(root) + try: + yield + finally: + os.chdir(cwd) + + +def reliability_guard(maximum_memory_bytes: Optional[int] = None): + """ + This disables various destructive functions and prevents the generated code + from interfering with the test (e.g. fork bomb, killing other processes, + removing filesystem files, etc.) + + WARNING + This function is NOT a security sandbox. Untrusted code, including, model- + generated code, should not be blindly executed outside of one. See the + Codex paper for more information about OpenAI's code sandbox, and proceed + with caution. + """ + + if platform.uname().system != "Darwin": + # These resource limit calls seem to fail on macOS (Darwin), skip? + import resource + resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes)) + resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes)) + resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes)) + + faulthandler.disable() + + import builtins + + builtins.exit = None + builtins.quit = None + + import os + + os.environ["OMP_NUM_THREADS"] = "1" + + os.kill = None + os.system = None + os.putenv = None + os.remove = None + os.removedirs = None + os.rmdir = None + os.fchdir = None + os.setuid = None + os.fork = None + os.forkpty = None + os.killpg = None + os.rename = None + os.renames = None + os.truncate = None + os.replace = None + os.unlink = None + os.fchmod = None + os.fchown = None + os.chmod = None + os.chown = None + os.chroot = None + os.fchdir = None + os.lchflags = None + os.lchmod = None + os.lchown = None + os.getcwd = None + os.chdir = None + + import shutil + + shutil.rmtree = None + shutil.move = None + shutil.chown = None + + import subprocess + + subprocess.Popen = None # type: ignore + + __builtins__["help"] = None + + import sys + + sys.modules["ipdb"] = None + sys.modules["joblib"] = None + sys.modules["resource"] = None + sys.modules["psutil"] = None + sys.modules["tkinter"] = None + + +def _unsafe_execute(code: str, timeout: float, maximum_memory_bytes: Optional[int], result_dict): + """Execute code in a subprocess with safety guards. Results are written to result_dict.""" + with create_tempdir(): + + # These system calls are needed when cleaning up tempdir. + import os + import shutil + + rmtree = shutil.rmtree + rmdir = os.rmdir + chdir = os.chdir + unlink = os.unlink + + # Disable functionalities that can make destructive changes to the test. + reliability_guard(maximum_memory_bytes=maximum_memory_bytes) + + # Default to failure + result_dict.update({ + "success": False, + "stdout": "", + "stderr": "", + "timeout": False, + "memory_exceeded": False, + "error": None, + }) + + try: + exec_globals = {} + with capture_io() as (stdout_capture, stderr_capture): + with time_limit(timeout): + # WARNING + # This program exists to execute untrusted model-generated code. Although + # it is highly unlikely that model-generated code will do something overtly + # malicious in response to this test suite, model-generated code may act + # destructively due to a lack of model capability or alignment. + # Users are strongly encouraged to sandbox this evaluation suite so that it + # does not perform destructive actions on their host or network. For more + # information on how OpenAI sandboxes its code, see the accompanying paper. + # Once you have read this disclaimer and taken appropriate precautions, + # uncomment the following line and proceed at your own risk: + exec(code, exec_globals) + + result_dict.update({ + "success": True, + "stdout": stdout_capture.getvalue(), + "stderr": stderr_capture.getvalue(), + }) + + except TimeoutException: + result_dict.update({ + "timeout": True, + "error": "Execution timed out", + }) + + except MemoryError as e: + result_dict.update({ + "memory_exceeded": True, + "error": f"Memory limit exceeded: {e}", + }) + + except BaseException as e: + result_dict.update({ + "error": f"{type(e).__name__}: {e}", + }) + + # Needed for cleaning up. + shutil.rmtree = rmtree + os.rmdir = rmdir + os.chdir = chdir + os.unlink = unlink + + +def execute_code( + code: str, + timeout: float = 5.0, # 5 seconds default + maximum_memory_bytes: Optional[int] = 256 * 1024 * 1024, # 256MB default +) -> ExecutionResult: + """ + Execute Python code in a sandboxed environment. + + Args: + code: Python code to execute as a string + timeout: Maximum execution time in seconds (default: 5.0) + maximum_memory_bytes: Memory limit in bytes (default: 256MB, None to disable) + + Returns: + ExecutionResult with success status, stdout/stderr, and error information + + Example: + >>> result = execute_code("print('hello world')") + >>> result.success + True + >>> result.stdout + 'hello world\\n' + """ + + manager = multiprocessing.Manager() + result_dict = manager.dict() + + p = multiprocessing.Process( + target=_unsafe_execute, + args=(code, timeout, maximum_memory_bytes, result_dict) + ) + p.start() + p.join(timeout=timeout + 1) + + if p.is_alive(): + p.kill() + return ExecutionResult( + success=False, + stdout="", + stderr="", + error="Execution timed out (process killed)", + timeout=True, + memory_exceeded=False, + ) + + if not result_dict: + return ExecutionResult( + success=False, + stdout="", + stderr="", + error="Execution failed (no result returned)", + timeout=True, + memory_exceeded=False, + ) + + return ExecutionResult( + success=result_dict["success"], + stdout=result_dict["stdout"], + stderr=result_dict["stderr"], + error=result_dict["error"], + timeout=result_dict["timeout"], + memory_exceeded=result_dict["memory_exceeded"], + ) + diff --git a/services/inference/src/engine/flash_attention.py b/services/inference/src/engine/flash_attention.py new file mode 100644 index 00000000..470df27e --- /dev/null +++ b/services/inference/src/engine/flash_attention.py @@ -0,0 +1,187 @@ +""" +Unified Flash Attention interface with automatic FA3/SDPA switching. + +Exports `flash_attn` module that matches the FA3 API exactly, but falls back +to PyTorch SDPA on non-Hopper GPUs (including Blackwell), MPS, and CPU. + +Usage (drop-in replacement for FA3): + from engine.flash_attention import flash_attn + + # Training (no KV cache) + y = flash_attn.flash_attn_func(q, k, v, causal=True, window_size=window_size) + + # Inference (with KV cache) + y = flash_attn.flash_attn_with_kvcache(q, k_cache, v_cache, k=k, v=v, ...) +""" +import torch +import torch.nn.functional as F + + +# ============================================================================= +# Detection: Try to load FA3 on Hopper+ GPUs +# ============================================================================= +def _load_flash_attention_3(): + """Try to load Flash Attention 3 (requires Hopper GPU, sm90).""" + if not torch.cuda.is_available(): + return None + try: + major, _ = torch.cuda.get_device_capability() + # FA3 kernels are compiled for Hopper (sm90) only + # Ada (sm89), Blackwell (sm100) need SDPA fallback until FA3 is recompiled + if major != 9: + return None + import os + os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1" + from kernels import get_kernel + return get_kernel('varunneal/flash-attention-3').flash_attn_interface + except Exception: + return None + + +_fa3 = _load_flash_attention_3() +HAS_FA3 = _fa3 is not None + +# Override for testing: set to 'fa3', 'sdpa', or None (auto) +_override_impl = None + + +def _resolve_use_fa3(): + """Decide once whether to use FA3, based on availability, override, and dtype.""" + if _override_impl == 'fa3': + assert HAS_FA3, "Cannot override to FA3: not available on this hardware" + return True + if _override_impl == 'sdpa': + return False + if HAS_FA3: + # FA3 Hopper kernels only support bf16 and fp8; fp16/fp32 must use SDPA fallback + from .common import COMPUTE_DTYPE + if COMPUTE_DTYPE == torch.bfloat16: + return True + return False + return False + +USE_FA3 = _resolve_use_fa3() + + +# ============================================================================= +# SDPA helpers +# ============================================================================= +def _sdpa_attention(q, k, v, window_size, enable_gqa): + """ + SDPA attention with sliding window support. + q, k, v are (B, H, T, D) format. + """ + Tq = q.size(2) + Tk = k.size(2) + window = window_size[0] + + # Full context, same length + if (window < 0 or window >= Tq) and Tq == Tk: + return F.scaled_dot_product_attention(q, k, v, is_causal=True, enable_gqa=enable_gqa) + + # Single token generation + if Tq == 1: + if window >= 0 and window < Tk: + # window is "left" tokens we need to include (window + 1) keys total + start = max(0, Tk - (window + 1)) + k = k[:, :, start:, :] + v = v[:, :, start:, :] + return F.scaled_dot_product_attention(q, k, v, is_causal=False, enable_gqa=enable_gqa) + + # Need explicit mask for sliding window/chunk inference + device = q.device + # For chunk inference (Tq != Tk), is_causal is not aligned to cache position => build an explicit bool mask + row_idx = (Tk - Tq) + torch.arange(Tq, device=device).unsqueeze(1) + col_idx = torch.arange(Tk, device=device).unsqueeze(0) + mask = col_idx <= row_idx + + # sliding window (left) + if window >= 0 and window < Tk: + mask = mask & ((row_idx - col_idx) <= window) + + return F.scaled_dot_product_attention(q, k, v, attn_mask=mask, enable_gqa=enable_gqa) + +# ============================================================================= +# Public API: Same interface as FA3 +# ============================================================================= +def flash_attn_func(q, k, v, causal=False, window_size=(-1, -1)): + """ + Flash Attention for training (no KV cache). + + Args: + q, k, v: Tensors of shape (B, T, H, D) + causal: Whether to use causal masking + window_size: (left, right) sliding window. -1 means unlimited. + + Returns: + Output tensor of shape (B, T, H, D) + """ + if USE_FA3: + return _fa3.flash_attn_func(q, k, v, causal=causal, window_size=window_size) + + # SDPA fallback: transpose (B, T, H, D) -> (B, H, T, D) + q = q.transpose(1, 2) + k = k.transpose(1, 2) + v = v.transpose(1, 2) + enable_gqa = q.size(1) != k.size(1) + y = _sdpa_attention(q, k, v, window_size, enable_gqa) + return y.transpose(1, 2) # back to (B, T, H, D) + + +def flash_attn_with_kvcache(q, k_cache, v_cache, k=None, v=None, cache_seqlens=None, + causal=False, window_size=(-1, -1)): + """ + Flash Attention with KV cache for inference. + + FA3 updates k_cache/v_cache in-place. Our SDPA fallback does the same. + + Args: + q: Queries, shape (B, T_new, H, D) + k_cache, v_cache: Pre-allocated cache tensors, shape (B, T_max, H_kv, D) + k, v: New keys/values to insert, shape (B, T_new, H_kv, D) + cache_seqlens: Current position in cache, shape (B,) int32 + causal: Whether to use causal masking + window_size: (left, right) sliding window. -1 means unlimited. + + Returns: + Output tensor of shape (B, T_new, H, D) + """ + if USE_FA3: + return _fa3.flash_attn_with_kvcache( + q, k_cache, v_cache, k=k, v=v, cache_seqlens=cache_seqlens, + causal=causal, window_size=window_size + ) + + # SDPA fallback: manually manage KV cache + B, T_new, H, D = q.shape + pos = cache_seqlens[0].item() # assume uniform position across batch + + # Insert new k, v into cache (in-place, matching FA3 behavior) + if k is not None and v is not None: + k_cache[:, pos:pos+T_new, :, :] = k + v_cache[:, pos:pos+T_new, :, :] = v + + # Get full cache up to current position + new tokens + end_pos = pos + T_new + k_full = k_cache[:, :end_pos, :, :] + v_full = v_cache[:, :end_pos, :, :] + + # Transpose to SDPA layout: (B, T, H, D) -> (B, H, T, D) + q_sdpa = q.transpose(1, 2) + k_sdpa = k_full.transpose(1, 2) + v_sdpa = v_full.transpose(1, 2) + + enable_gqa = q_sdpa.size(1) != k_sdpa.size(1) + y_sdpa = _sdpa_attention(q_sdpa, k_sdpa, v_sdpa, window_size, enable_gqa) + + return y_sdpa.transpose(1, 2) # back to (B, T, H, D) + + +# ============================================================================= +# Export: flash_attn module interface (drop-in replacement for FA3) +# ============================================================================= +from types import SimpleNamespace +flash_attn = SimpleNamespace( + flash_attn_func=flash_attn_func, + flash_attn_with_kvcache=flash_attn_with_kvcache, +) diff --git a/services/inference/src/engine/fp8.py b/services/inference/src/engine/fp8.py new file mode 100644 index 00000000..6cf2f61a --- /dev/null +++ b/services/inference/src/engine/fp8.py @@ -0,0 +1,266 @@ +"""Minimal FP8 training for nanochat — tensorwise dynamic scaling only. + +Drop-in replacement for torchao's Float8Linear (~2000 lines) with ~150 lines. +We only need the "tensorwise" recipe (one scalar scale per tensor), not the full +generality of torchao (rowwise scaling, FSDP float8 all-gather, DTensor, tensor +subclass dispatch tables, etc.) + +How FP8 training works +====================== +A standard Linear layer does one matmul in forward and two in backward: + forward: output = input @ weight.T + backward: grad_input = grad_output @ weight + grad_weight= grad_output.T @ input + +FP8 training wraps each of these three matmuls with: + 1. Compute scale = FP8_MAX / max(|tensor|) for each operand + 2. Quantize: fp8_tensor = clamp(tensor * scale, -FP8_MAX, FP8_MAX).to(fp8) + 3. Matmul via torch._scaled_mm (cuBLAS FP8 kernel, ~2x faster than bf16) + 4. Dequantize: _scaled_mm handles this internally using the inverse scales + +The key insight: torch._scaled_mm and the float8 dtypes are PyTorch built-ins. +torchao is just orchestration around these primitives. We can call them directly. + +FP8 dtype choice +================ +There are two FP8 formats. We use both, following the standard convention: + - float8_e4m3fn: 4-bit exponent, 3-bit mantissa, range [-448, 448] + Higher precision (more mantissa bits), used for input and weight. + - float8_e5m2: 5-bit exponent, 2-bit mantissa, range [-57344, 57344] + Wider range (more exponent bits), used for gradients which can be large. + +torch._scaled_mm layout requirements +===================================== +The cuBLAS FP8 kernel requires specific memory layouts: + - First argument (A): must be row-major (contiguous) + - Second argument (B): must be column-major (B.t().contiguous().t()) +If B is obtained by transposing a contiguous tensor (e.g. weight.t()), it is +already column-major — no copy needed. Otherwise we use _to_col_major(). + +How this differs from torchao's approach +======================================== +torchao uses a "tensor subclass" architecture: Float8TrainingTensor is a subclass +of torch.Tensor that bundles FP8 data + scale + metadata. It implements +__torch_dispatch__ with a dispatch table that intercepts every aten op (mm, t, +reshape, clone, ...) and handles it in FP8-aware fashion. When you call + output = input @ weight.T +the @ operator dispatches to aten.mm, which gets intercepted and routed to +torch._scaled_mm behind the scenes. This is ~2000 lines of code because you need +a handler for every tensor operation that might touch an FP8 tensor. + +We take a simpler approach: a single autograd.Function (_Float8Matmul) that takes +full-precision inputs, quantizes to FP8 internally, calls _scaled_mm, and returns +full-precision outputs. Marked @allow_in_graph so torch.compile treats it as one +opaque node rather than trying to trace inside. + +The trade-off is in how torch.compile sees the two approaches: + - torchao: compile decomposes the tensor subclass (via __tensor_flatten__) and + sees every individual op (amax, scale, cast, _scaled_mm) as separate graph + nodes. Inductor can fuse these with surrounding operations (e.g. fuse the + amax computation with the preceding layer's activation function). + - ours: compile sees a single opaque call. It can optimize everything around + the FP8 linear (attention, norms, etc.) but cannot fuse across the boundary. + +Both call the exact same cuBLAS _scaled_mm kernel — the GPU matmul is identical. +The difference is only in the "glue" ops (amax, scale, cast) which are tiny +compared to the matmul. In practice this means our version is slightly faster +(less compilation overhead, no tensor subclass dispatch cost) but can produce +subtly different floating-point rounding paths under torch.compile, since Inductor +generates a different graph. Numerics are bitwise identical in eager mode. +""" + +import torch +import torch.nn as nn + +from .common import COMPUTE_DTYPE + +# Avoid division by zero when computing scale from an all-zeros tensor +EPS = 1e-12 + + +@torch.no_grad() +def _to_fp8(x, fp8_dtype): + """Dynamically quantize a tensor to FP8 using tensorwise scaling. + + "Tensorwise" means one scalar scale for the entire tensor (as opposed to + "rowwise" which computes a separate scale per row). Tensorwise is faster + because cuBLAS handles the scaling; rowwise needs the CUTLASS kernel. + + Returns (fp8_data, inverse_scale) for use with torch._scaled_mm. + """ + fp8_max = torch.finfo(fp8_dtype).max + # Compute the max absolute value across the entire tensor + amax = x.float().abs().max() + # Scale maps [0, amax] -> [0, fp8_max]. Use float64 for the division to + # ensure consistent numerics between torch.compile and eager mode. + # (torchao does the same upcast — without it, compile/eager can diverge) + scale = fp8_max / amax.double().clamp(min=EPS) + scale = scale.float() + # Quantize: scale into FP8 range, saturate (clamp prevents overflow when + # casting — PyTorch's default is to wrap, not saturate), then cast to FP8 + x_scaled = x.float() * scale + x_clamped = x_scaled.clamp(-fp8_max, fp8_max) + x_fp8 = x_clamped.to(fp8_dtype) + # _scaled_mm expects the *inverse* of our scale (it multiplies by this to + # convert FP8 values back to the original range during the matmul) + inv_scale = scale.reciprocal() + return x_fp8, inv_scale + + +def _to_col_major(x): + """Rearrange a 2D tensor's memory to column-major layout. + + torch._scaled_mm requires its second operand in column-major layout. + The trick: transpose -> contiguous (forces a copy in transposed order) + -> transpose back. The result has the same logical shape but column-major + strides, e.g. a [M, N] tensor gets strides (1, M) instead of (N, 1). + """ + return x.t().contiguous().t() + + +# allow_in_graph tells torch.compile to treat this as an opaque operation — +# dynamo won't try to decompose it into smaller ops. See the module docstring +# for how this differs from torchao's tensor subclass approach. +@torch._dynamo.allow_in_graph +class _Float8Matmul(torch.autograd.Function): + """Custom autograd for the three FP8 GEMMs of a Linear layer. + + The forward quantizes input and weight to FP8 and saves + the quantized tensors + scales for backward. + """ + + @staticmethod + def forward(ctx, input_2d, weight): + # Quantize both operands to e4m3 (higher precision format) + input_fp8, input_inv = _to_fp8(input_2d, torch.float8_e4m3fn) + weight_fp8, weight_inv = _to_fp8(weight, torch.float8_e4m3fn) + ctx.save_for_backward(input_fp8, input_inv, weight_fp8, weight_inv) + + # output = input @ weight.T + # input_fp8 is [B, K] contiguous = row-major (good for first arg) + # weight_fp8 is [N, K] contiguous, so weight_fp8.t() is [K, N] with + # strides (1, K) = column-major (good for second arg, no copy needed!) + output = torch._scaled_mm( + input_fp8, + weight_fp8.t(), + scale_a=input_inv, + scale_b=weight_inv, + out_dtype=input_2d.dtype, + # use_fast_accum=True accumulates the dot products in lower precision. + # Slightly less accurate but measurably faster. Standard practice for + # the forward pass; we use False in backward for more precise gradients. + use_fast_accum=True, + ) + return output + + @staticmethod + def backward(ctx, grad_output): + in_fp8, in_inv, w_fp8, w_inv = ctx.saved_tensors + + # === GEMM 1: grad_input = grad_output @ weight === + # Shapes: [B, N] @ [N, K] -> [B, K] + # Gradients use e5m2 (wider range), weights use e4m3 (higher precision) + go_fp8, go_inv = _to_fp8(grad_output, torch.float8_e5m2) + # go_fp8 is [B, N] contiguous = row-major, good for first arg + # w_fp8 is [N, K] contiguous = row-major, need column-major for second arg + w_col = _to_col_major(w_fp8) + grad_input = torch._scaled_mm( + go_fp8, + w_col, + scale_a=go_inv, + scale_b=w_inv, + out_dtype=grad_output.dtype, + use_fast_accum=False, + ) + + # === GEMM 2: grad_weight = grad_output.T @ input === + # Shapes: [N, B] @ [B, K] -> [N, K] + # go_fp8 is [B, N] contiguous, we need go.T = [N, B] as first arg. + # Transposing gives column-major, but first arg needs row-major, + # so we must call .contiguous() to physically rearrange the memory. + go_T = go_fp8.t().contiguous() # [N, B] row-major + in_col = _to_col_major(in_fp8) # [B, K] column-major + grad_weight = torch._scaled_mm( + go_T, + in_col, + scale_a=go_inv, + scale_b=in_inv, + out_dtype=grad_output.dtype, + use_fast_accum=False, + ) + + return grad_input, grad_weight + + +class Float8Linear(nn.Linear): + """Drop-in nn.Linear replacement that does FP8 compute. + + Weights and biases remain in their original precision (e.g. fp32/bf16). + Only the matmul is performed in FP8 via the _Float8Matmul autograd function. + """ + + def forward(self, input): + # Cast input to COMPUTE_DTYPE (typically bf16) since _scaled_mm expects + # reduced precision input, and we no longer rely on autocast to do this. + input = input.to(COMPUTE_DTYPE) + # _scaled_mm only works on 2D tensors, so flatten batch dimensions + orig_shape = input.shape + input_2d = input.reshape(-1, orig_shape[-1]) + output = _Float8Matmul.apply(input_2d, self.weight) + output = output.reshape(*orig_shape[:-1], output.shape[-1]) + if self.bias is not None: + output = output + self.bias.to(output.dtype) + return output + + @classmethod + def from_float(cls, mod): + """Create Float8Linear from nn.Linear, sharing the same weight and bias. + + Uses meta device to avoid allocating a temporary weight tensor — we + create the module shell on meta (shapes/dtypes only, no memory), then + point .weight and .bias to the original module's parameters. + """ + with torch.device("meta"): + new_mod = cls(mod.in_features, mod.out_features, bias=False) + new_mod.weight = mod.weight + new_mod.bias = mod.bias + return new_mod + + +class Float8LinearConfig: + """Minimal config matching torchao's API. Only tensorwise recipe is supported.""" + + @staticmethod + def from_recipe_name(recipe_name): + if recipe_name != "tensorwise": + raise ValueError( + f"Only 'tensorwise' recipe is supported, got '{recipe_name}'. " + f"Rowwise/axiswise recipes require the full torchao library." + ) + return Float8LinearConfig() + + +def convert_to_float8_training(module, *, config=None, module_filter_fn=None): + """Replace nn.Linear layers with Float8Linear throughout a module. + + Walks the module tree in post-order (children before parents) and swaps + each nn.Linear that passes the optional filter. The new Float8Linear shares + the original weight and bias tensors — no copies, no extra memory. + + Args: + module: Root module to convert. + config: Float8LinearConfig (accepted for API compat, only tensorwise supported). + module_filter_fn: Optional filter(module, fqn) -> bool. Only matching Linears + are converted. Common use: skip layers with dims not divisible by 16 + (hardware requirement for FP8 matmuls on H100). + """ + def _convert(mod, prefix=""): + for name, child in mod.named_children(): + fqn = f"{prefix}.{name}" if prefix else name + _convert(child, fqn) + if isinstance(child, nn.Linear) and not isinstance(child, Float8Linear): + if module_filter_fn is None or module_filter_fn(child, fqn): + setattr(mod, name, Float8Linear.from_float(child)) + + _convert(module) + return module diff --git a/services/inference/src/engine/gpt.py b/services/inference/src/engine/gpt.py new file mode 100644 index 00000000..e01537a4 --- /dev/null +++ b/services/inference/src/engine/gpt.py @@ -0,0 +1,507 @@ +""" +GPT model (rewrite, a lot simpler) +Notable features: +- rotary embeddings (and no positional embeddings) +- QK norm +- untied weights for token embedding and lm_head +- relu^2 activation in MLP +- norm after token embedding +- no learnable params in rmsnorm +- no bias in linear layers +- Group-Query Attention (GQA) support for more efficient inference +- Flash Attention 3 integration +""" + +from functools import partial +from dataclasses import dataclass + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .common import COMPUTE_DTYPE, get_dist_info, print0 +from .optim import DistMuonAdamW, MuonAdamW + +# Our custom Flash Attention module that automatically uses FA3 on Hopper+ and SDPA fallback elsewhere +from .flash_attention import flash_attn + +@dataclass +class GPTConfig: + sequence_len: int = 2048 + vocab_size: int = 32768 + n_layer: int = 12 + n_head: int = 6 # number of query heads + n_kv_head: int = 6 # number of key/value heads (GQA) + n_embd: int = 768 + # Sliding window attention pattern string, tiled across layers. Final layer always L. + # Characters: L=long (full context), S=short (quarter context) + # Examples: "L"=all full context, "SL"=alternating, "SSL"=two short then one long + window_pattern: str = "SSSL" + + +def norm(x): + return F.rms_norm(x, (x.size(-1),)) # note that this will run in bf16, seems ok + +class Linear(nn.Linear): + """nn.Linear that casts weights to match input dtype in forward. + Replaces autocast: master weights stay fp32 for optimizer precision, + but matmuls run in the activation dtype (typically bf16 from embeddings).""" + def forward(self, x): + return F.linear(x, self.weight.to(dtype=x.dtype)) + + +def has_ve(layer_idx, n_layer): + """Returns True if GPT layer should have Value Embedding (alternating, last layer always included).""" + return layer_idx % 2 == (n_layer - 1) % 2 + +def apply_rotary_emb(x, cos, sin): + assert x.ndim == 4 # multihead attention + d = x.shape[3] // 2 + x1, x2 = x[..., :d], x[..., d:] # split up last dim into two halves + y1 = x1 * cos + x2 * sin # rotate pairs of dims + y2 = x1 * (-sin) + x2 * cos + return torch.cat([y1, y2], 3) + +class CausalSelfAttention(nn.Module): + def __init__(self, config, layer_idx): + super().__init__() + self.layer_idx = layer_idx + self.n_head = config.n_head + self.n_kv_head = config.n_kv_head + self.n_embd = config.n_embd + self.head_dim = self.n_embd // self.n_head + assert self.n_embd % self.n_head == 0 + assert self.n_kv_head <= self.n_head and self.n_head % self.n_kv_head == 0 + self.c_q = Linear(self.n_embd, self.n_head * self.head_dim, bias=False) + self.c_k = Linear(self.n_embd, self.n_kv_head * self.head_dim, bias=False) + self.c_v = Linear(self.n_embd, self.n_kv_head * self.head_dim, bias=False) + self.c_proj = Linear(self.n_embd, self.n_embd, bias=False) + self.ve_gate_channels = 12 + self.ve_gate = Linear(self.ve_gate_channels, self.n_kv_head, bias=False) if has_ve(layer_idx, config.n_layer) else None + + def forward(self, x, ve, cos_sin, window_size, kv_cache): + B, T, C = x.size() + + # Project the input to get queries, keys, and values + # Shape: (B, T, H, D) - FA3's native layout, no transpose needed! + q = self.c_q(x).view(B, T, self.n_head, self.head_dim) + k = self.c_k(x).view(B, T, self.n_kv_head, self.head_dim) + v = self.c_v(x).view(B, T, self.n_kv_head, self.head_dim) + + # Value residual (ResFormer): mix in value embedding with input-dependent gate per head + if ve is not None: + ve = ve.view(B, T, self.n_kv_head, self.head_dim) + gate = 3 * torch.sigmoid(self.ve_gate(x[..., :self.ve_gate_channels])) # (B, T, n_kv_head), range (0, 3) + v = v + gate.unsqueeze(-1) * ve + + # Apply Rotary Embeddings to queries and keys to get relative positional encoding + cos, sin = cos_sin + q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) + q, k = norm(q), norm(k) # QK norm + q = q * 1.2 # sharper attention (split scale between Q and K), TODO think through better + k = k * 1.2 + + # Flash Attention (FA3 on Hopper+, PyTorch SDPA fallback elsewhere) + # window_size is (left, right) tuple: (N, 0) for causal, (-1, 0) for full context + if kv_cache is None: + # Training: causal attention with optional sliding window + y = flash_attn.flash_attn_func(q, k, v, causal=True, window_size=window_size) + else: + # Inference: use flash_attn_with_kvcache which handles cache management + k_cache, v_cache = kv_cache.get_layer_cache(self.layer_idx) + y = flash_attn.flash_attn_with_kvcache( + q, k_cache, v_cache, + k=k, v=v, + cache_seqlens=kv_cache.cache_seqlens, + causal=True, + window_size=window_size, + ) + # Advance position after last layer processes + if self.layer_idx == kv_cache.n_layers - 1: + kv_cache.advance(T) + + # Re-assemble the heads and project back to residual stream + y = y.contiguous().view(B, T, -1) + y = self.c_proj(y) + return y + + +class MLP(nn.Module): + def __init__(self, config): + super().__init__() + self.c_fc = Linear(config.n_embd, 4 * config.n_embd, bias=False) + self.c_proj = Linear(4 * config.n_embd, config.n_embd, bias=False) + + def forward(self, x): + x = self.c_fc(x) + x = F.relu(x).square() + x = self.c_proj(x) + return x + + +class Block(nn.Module): + def __init__(self, config, layer_idx): + super().__init__() + self.attn = CausalSelfAttention(config, layer_idx) + self.mlp = MLP(config) + + def forward(self, x, ve, cos_sin, window_size, kv_cache): + x = x + self.attn(norm(x), ve, cos_sin, window_size, kv_cache) + x = x + self.mlp(norm(x)) + return x + + +class GPT(nn.Module): + def __init__(self, config, pad_vocab_size_to=64): + """ + NOTE a major footgun: this __init__ function runs in meta device context (!!) + Therefore, any calculations inside here are shapes and dtypes only, no actual data. + => We actually initialize all data (parameters, buffers, etc.) in init_weights() instead. + """ + super().__init__() + self.config = config + # Compute per-layer window sizes for sliding window attention + # window_size is (left, right) tuple: (-1, 0) for full context, (N, 0) for sliding window + self.window_sizes = self._compute_window_sizes(config) + # Pad vocab for efficiency (DDP, tensor cores). This is just an optimization - outputs are cropped in forward(). + # https://huggingface.co/docs/transformers/main_classes/model#transformers.PreTrainedModel.resize_token_embeddings + padded_vocab_size = ((config.vocab_size + pad_vocab_size_to - 1) // pad_vocab_size_to) * pad_vocab_size_to + if padded_vocab_size != config.vocab_size: + print0(f"Padding vocab_size from {config.vocab_size} to {padded_vocab_size} for efficiency") + self.transformer = nn.ModuleDict({ + "wte": nn.Embedding(padded_vocab_size, config.n_embd), + "h": nn.ModuleList([Block(config, layer_idx) for layer_idx in range(config.n_layer)]), + }) + self.lm_head = Linear(config.n_embd, padded_vocab_size, bias=False) + # Per-layer learnable scalars (inspired by modded-nanogpt) + # resid_lambdas: scales the residual stream at each layer (init 1.0 = neutral) + # x0_lambdas: blends initial embedding back in at each layer (init 0.0 = disabled) + # Separate parameters so they can have different optimizer treatment + self.resid_lambdas = nn.Parameter(torch.ones(config.n_layer)) # fake init, real init in init_weights() + self.x0_lambdas = nn.Parameter(torch.zeros(config.n_layer)) # fake init, real init in init_weights() + # Smear: mix previous token's embedding into current token (cheap bigram-like info) + self.smear_gate = Linear(24, 1, bias=False) + self.smear_lambda = nn.Parameter(torch.zeros(1)) + # Backout: subtract cached mid-layer residual before final norm to remove low-level features + self.backout_lambda = nn.Parameter(0.2 * torch.ones(1)) + # Value embeddings (ResFormer-style): alternating layers, last layer always included + head_dim = config.n_embd // config.n_head + kv_dim = config.n_kv_head * head_dim + self.value_embeds = nn.ModuleDict({str(i): nn.Embedding(padded_vocab_size, kv_dim) for i in range(config.n_layer) if has_ve(i, config.n_layer)}) + # To support meta device initialization, we init the rotary embeddings here, but it's just "fake" meta tensors only. + # As for rotary_seq_len, these rotary embeddings are pretty small/cheap in memory, + # so let's just over-compute them by 10X, but assert fail if we ever reach that amount. + # In the future we can dynamically grow the cache, for now it's fine. + self.rotary_seq_len = config.sequence_len * 10 # 10X over-compute should be enough, TODO make nicer? + head_dim = config.n_embd // config.n_head + cos, sin = self._precompute_rotary_embeddings(self.rotary_seq_len, head_dim) + self.register_buffer("cos", cos, persistent=False) # persistent=False means it's not saved to the checkpoint + self.register_buffer("sin", sin, persistent=False) + + @torch.no_grad() + def init_weights(self): + """ + Initialize the full model in this one function for maximum clarity. + + wte (embedding): normal, std=1.0 + lm_head: normal, std=0.001 + for each block: + attn.c_q: uniform, std=1/sqrt(n_embd) + attn.c_k: uniform, std=1/sqrt(n_embd) + attn.c_v: uniform, std=1/sqrt(n_embd) + attn.c_proj: zeros + mlp.c_fc: uniform, std=1/sqrt(n_embd) + mlp.c_proj: zeros + """ + + # Embedding and unembedding + torch.nn.init.normal_(self.transformer.wte.weight, mean=0.0, std=0.8) + torch.nn.init.normal_(self.lm_head.weight, mean=0.0, std=0.001) + + # Transformer blocks: uniform init with bound = sqrt(3) * std (same standard deviation as normal) + n_embd = self.config.n_embd + s = 3**0.5 * n_embd**-0.5 # sqrt(3) multiplier makes sure Uniform achieves the same std as Normal + for block in self.transformer.h: + torch.nn.init.uniform_(block.attn.c_q.weight, -s, s) # weights use Uniform to avoid outliers + torch.nn.init.uniform_(block.attn.c_k.weight, -s, s) + torch.nn.init.uniform_(block.attn.c_v.weight, -s, s) + torch.nn.init.zeros_(block.attn.c_proj.weight) # projections are zero + torch.nn.init.uniform_(block.mlp.c_fc.weight, -s * 0.4, s * 0.4) # 0.4x init scale for c_fc + torch.nn.init.zeros_(block.mlp.c_proj.weight) + + # Per-layer scalars + # Per-layer resid init: stronger residual at early layers, weaker at deep layers + n_layer = self.config.n_layer + for i in range(n_layer): + self.resid_lambdas.data[i] = 1.15 - (0.10 * i / max(n_layer - 1, 1)) + # Decaying x0 init: earlier layers get more input embedding blending + for i in range(n_layer): + self.x0_lambdas.data[i] = 0.20 - (0.15 * i / max(n_layer - 1, 1)) + + # Value embeddings (init like c_v: uniform with same std) + for ve in self.value_embeds.values(): + torch.nn.init.uniform_(ve.weight, -s, s) + + # Gate weights init with small positive values so gates start slightly above neutral + for block in self.transformer.h: + if block.attn.ve_gate is not None: + torch.nn.init.uniform_(block.attn.ve_gate.weight, 0.0, 0.02) + + # Rotary embeddings + head_dim = self.config.n_embd // self.config.n_head + cos, sin = self._precompute_rotary_embeddings(self.rotary_seq_len, head_dim) + self.cos, self.sin = cos, sin + + # Cast embeddings to COMPUTE_DTYPE: optimizer can tolerate reduced-precision + # embeddings and it saves memory. Exception: fp16 requires fp32 embeddings + # because GradScaler cannot unscale fp16 gradients. + if COMPUTE_DTYPE != torch.float16: + self.transformer.wte.to(dtype=COMPUTE_DTYPE) + for ve in self.value_embeds.values(): + ve.to(dtype=COMPUTE_DTYPE) + + def _precompute_rotary_embeddings(self, seq_len, head_dim, base=100000, device=None): + # TODO: bump base theta more? e.g. 100K is more common more recently + # autodetect the device from model embeddings + if device is None: + device = self.transformer.wte.weight.device + # stride the channels + channel_range = torch.arange(0, head_dim, 2, dtype=torch.float32, device=device) + inv_freq = 1.0 / (base ** (channel_range / head_dim)) + # stride the time steps + t = torch.arange(seq_len, dtype=torch.float32, device=device) + # calculate the rotation frequencies at each (time, channel) pair + freqs = torch.outer(t, inv_freq) + cos, sin = freqs.cos(), freqs.sin() + cos, sin = cos.to(COMPUTE_DTYPE), sin.to(COMPUTE_DTYPE) + cos, sin = cos[None, :, None, :], sin[None, :, None, :] # add batch and head dims for later broadcasting + return cos, sin + + def _compute_window_sizes(self, config): + """ + Compute per-layer window sizes for sliding window attention. + + Returns list of (left, right) tuples for FA3's window_size parameter: + - left: how many tokens before current position to attend to (-1 = unlimited) + - right: how many tokens after current position to attend to (0 for causal) + + Pattern string is tiled across layers. Final layer always gets L (full context). + Characters: L=long (full context), S=short (quarter context) + """ + pattern = config.window_pattern.upper() + assert all(c in "SL" for c in pattern), f"Invalid window_pattern: {pattern}. Use only S and L." + # Map characters to window sizes + long_window = config.sequence_len + short_window = -(-long_window // 4 // 128) * 128 # ceil to FA3 tile size (2048 -> 768) + char_to_window = { + "L": (long_window, 0), + "S": (short_window, 0), + } + # Tile pattern across layers + window_sizes = [] + for layer_idx in range(config.n_layer): + char = pattern[layer_idx % len(pattern)] + window_sizes.append(char_to_window[char]) + # Final layer always gets full context + window_sizes[-1] = (long_window, 0) + return window_sizes + + def get_device(self): + return self.transformer.wte.weight.device + + def estimate_flops(self): + """ + Return the estimated FLOPs per token for the model (forward + backward). + Each matmul weight parameter contributes 2 FLOPs (multiply *, accumulate +) in forward, and 2X that in backward => 2+4=6. + Cleanest explanation of this: https://medium.com/@dzmitrybahdanau/the-flops-calculus-of-language-model-training-3b19c1f025e4 + On top of that, 12 * h * q * effective_seq_len accounts for key @ query matmul flops inside attention. + With sliding windows, effective_seq_len varies per layer (capped by window size). + Ref: https://arxiv.org/abs/2204.02311 (PaLM paper). + This is ~1% off from the exact formulas of Chinchilla paper, the difference is: + - Chinchilla counts the embedding layer as flops (? weird, it's just a lookup => we ignore) + - Chinchilla counts exp/sum/divide in attention softmax as flops (a little sus and very tiny => we ignore) + """ + nparams = sum(p.numel() for p in self.parameters()) + # Exclude non-matmul params: embeddings and per-layer scalars + value_embeds_numel = sum(ve.weight.numel() for ve in self.value_embeds.values()) + nparams_exclude = (self.transformer.wte.weight.numel() + value_embeds_numel + + self.resid_lambdas.numel() + self.x0_lambdas.numel() + + self.smear_gate.weight.numel() + self.smear_lambda.numel() + self.backout_lambda.numel()) + h, q, t = self.config.n_head, self.config.n_embd // self.config.n_head, self.config.sequence_len + # Sum attention FLOPs per layer, accounting for sliding window + attn_flops = 0 + for window_size in self.window_sizes: + window = window_size[0] # (left, right) tuple, we use left + effective_seq = t if window < 0 else min(window, t) + attn_flops += 12 * h * q * effective_seq + num_flops_per_token = 6 * (nparams - nparams_exclude) + attn_flops + return num_flops_per_token + + def num_scaling_params(self): + """ + Return detailed parameter counts for scaling law analysis. + Different papers use different conventions: + - Kaplan et al. excluded embedding parameters + - Chinchilla included all parameters + Ref: https://arxiv.org/abs/2203.15556 (Chinchilla paper) + Ref: https://arxiv.org/abs/2001.08361 (Kaplan et al. original scaling laws paper) + + Returns a dict with counts for each parameter group, so downstream analysis + can experiment with which combination gives the cleanest scaling laws. + """ + # Count each group separately (mirrors the grouping in setup_optimizers) + wte = sum(p.numel() for p in self.transformer.wte.parameters()) + value_embeds = sum(p.numel() for p in self.value_embeds.parameters()) + lm_head = sum(p.numel() for p in self.lm_head.parameters()) + transformer_matrices = sum(p.numel() for p in self.transformer.h.parameters()) + scalars = self.resid_lambdas.numel() + self.x0_lambdas.numel() + self.smear_gate.weight.numel() + self.smear_lambda.numel() + self.backout_lambda.numel() + total = wte + value_embeds + lm_head + transformer_matrices + scalars + assert total == sum(p.numel() for p in self.parameters()), "Parameter count mismatch" + return { + 'wte': wte, + 'value_embeds': value_embeds, + 'lm_head': lm_head, + 'transformer_matrices': transformer_matrices, + 'scalars': scalars, + 'total': total, + } + + def setup_optimizer(self, unembedding_lr=0.004, embedding_lr=0.2, matrix_lr=0.02, weight_decay=0.0, scalar_lr=0.5): + model_dim = self.config.n_embd + ddp, rank, local_rank, world_size = get_dist_info() + + # Separate out all parameters into groups + matrix_params = list(self.transformer.h.parameters()) + value_embeds_params = list(self.value_embeds.parameters()) + embedding_params = list(self.transformer.wte.parameters()) + lm_head_params = list(self.lm_head.parameters()) + resid_params = [self.resid_lambdas] + x0_params = [self.x0_lambdas] + smear_params = [self.smear_gate.weight, self.smear_lambda, self.backout_lambda] + assert len(list(self.parameters())) == len(matrix_params) + len(embedding_params) + len(lm_head_params) + len(value_embeds_params) + len(resid_params) + len(x0_params) + len(smear_params) + + # Scale the LR for the AdamW parameters by ∝1/√dmodel (tuned for 768 dim model) + dmodel_lr_scale = (model_dim / 768) ** -0.5 + print0(f"Scaling the LR for the AdamW parameters ∝1/√({model_dim}/768) = {dmodel_lr_scale:.6f}") + + # Build param_groups with all required fields explicit + param_groups = [ + # AdamW groups (embeddings, lm_head, scalars) + dict(kind='adamw', params=lm_head_params, lr=unembedding_lr * dmodel_lr_scale, betas=(0.8, 0.96), eps=1e-10, weight_decay=0.01), + dict(kind='adamw', params=embedding_params, lr=embedding_lr * dmodel_lr_scale, betas=(0.8, 0.995), eps=1e-10, weight_decay=0.001), + dict(kind='adamw', params=value_embeds_params, lr=embedding_lr * dmodel_lr_scale * 0.5, betas=(0.8, 0.995), eps=1e-10, weight_decay=0.01), + dict(kind='adamw', params=resid_params, lr=scalar_lr * 0.01, betas=(0.8, 0.95), eps=1e-10, weight_decay=0.05), + dict(kind='adamw', params=x0_params, lr=scalar_lr, betas=(0.96, 0.95), eps=1e-10, weight_decay=0.0), # higher beta1 for x0 + dict(kind='adamw', params=smear_params, lr=0.2, betas=(0.8, 0.95), eps=1e-10, weight_decay=0.0), + ] + # Muon groups (matrix params, grouped by shape for stacking) + for shape in sorted({p.shape for p in matrix_params}): + group_params = [p for p in matrix_params if p.shape == shape] + param_groups.append(dict( + kind='muon', params=group_params, lr=matrix_lr, + momentum=0.95, ns_steps=5, beta2=0.9, weight_decay=weight_decay, + )) + + Factory = DistMuonAdamW if ddp else MuonAdamW + optimizer = Factory(param_groups) + for group in optimizer.param_groups: + group["initial_lr"] = group["lr"] + return optimizer + + def forward(self, idx, targets=None, kv_cache=None, loss_reduction='mean'): + B, T = idx.size() + + # Grab the rotary embeddings for the current sequence length (they are of shape (1, seq_len, 1, head_dim/2)) + assert T <= self.cos.size(1), f"Sequence length grew beyond the rotary embeddings cache: {T} > {self.cos.size(1)}" + assert idx.device == self.cos.device, f"Rotary embeddings and idx are on different devices: {idx.device} != {self.cos.device}" + assert self.cos.dtype == COMPUTE_DTYPE, f"Rotary embeddings must be in {COMPUTE_DTYPE}, got {self.cos.dtype}" + # if kv cache exists, we need to offset the rotary embeddings to the current position in the cache + T0 = 0 if kv_cache is None else kv_cache.get_pos() + cos_sin = self.cos[:, T0:T0+T], self.sin[:, T0:T0+T] # truncate cache to current sequence length + + # Embed the tokens + x = self.transformer.wte(idx) # embed current token + x = x.to(COMPUTE_DTYPE) # ensure activations are in compute dtype (no-op usually, but active for fp16 code path) + x = norm(x) + + # Smear: mix previous token's embedding into current position (cheap bigram info) + if kv_cache is None: + # Training / naive generate: full sequence available, use fast slice + assert T > 1, "Training forward pass should have T > 1" + gate = self.smear_lambda.to(x.dtype) * torch.sigmoid(self.smear_gate(x[:, 1:, :24])) + x = torch.cat([x[:, :1], x[:, 1:] + gate * x[:, :-1]], dim=1) + else: + # KV cache inference: read prev embedding from cache, store current for next step + x_pre_smear = kv_cache.prev_embedding + kv_cache.prev_embedding = x[:, -1:, :] + if T > 1: + # Prefill: apply smear to positions 1+, same as training + gate = self.smear_lambda.to(x.dtype) * torch.sigmoid(self.smear_gate(x[:, 1:, :24])) + x = torch.cat([x[:, :1], x[:, 1:] + gate * x[:, :-1]], dim=1) + elif x_pre_smear is not None: + # Decode: single token, use cached prev embedding + gate = self.smear_lambda.to(x.dtype) * torch.sigmoid(self.smear_gate(x[:, :, :24])) + x = x + gate * x_pre_smear + + # Forward the trunk of the Transformer + x0 = x # save initial normalized embedding for x0 residual + n_layer = self.config.n_layer + backout_layer = n_layer // 2 # cache at halfway point + x_backout = None + for i, block in enumerate(self.transformer.h): + x = self.resid_lambdas[i] * x + self.x0_lambdas[i] * x0 + ve = self.value_embeds[str(i)](idx).to(x.dtype) if str(i) in self.value_embeds else None + x = block(x, ve, cos_sin, self.window_sizes[i], kv_cache) + if i == backout_layer: + x_backout = x + # Subtract mid-layer residual to remove low-level features before logit projection + if x_backout is not None: + x = x - self.backout_lambda.to(x.dtype) * x_backout + x = norm(x) + + # Forward the lm_head (compute logits) + softcap = 15 # smoothly cap the logits to the range [-softcap, softcap] + logits = self.lm_head(x) # (B, T, padded_vocab_size) <- very big tensor, large amount of memory + logits = logits[..., :self.config.vocab_size] # slice to remove padding + logits = logits.float() # switch to fp32 for logit softcap and loss computation + logits = softcap * torch.tanh(logits / softcap) # squash the logits + + if targets is not None: + # training: given the targets, compute and return the loss + # TODO experiment with chunked cross-entropy? + loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1, reduction=loss_reduction) + return loss + else: + # inference: just return the logits directly + return logits + + @torch.inference_mode() + def generate(self, tokens, max_tokens, temperature=1.0, top_k=None, seed=42): + """ + Naive autoregressive streaming inference. + To make it super simple, let's assume: + - batch size is 1 + - ids and the yielded tokens are simple Python lists and ints + """ + assert isinstance(tokens, list) + device = self.get_device() + rng = None + if temperature > 0: + rng = torch.Generator(device=device) + rng.manual_seed(seed) + ids = torch.tensor([tokens], dtype=torch.long, device=device) # add batch dim + for _ in range(max_tokens): + logits = self.forward(ids) # (B, T, vocab_size) + logits = logits[:, -1, :] # (B, vocab_size) + if top_k is not None and top_k > 0: + v, _ = torch.topk(logits, min(top_k, logits.size(-1))) + logits[logits < v[:, [-1]]] = -float('Inf') + if temperature > 0: + logits = logits / temperature + probs = F.softmax(logits, dim=-1) + next_ids = torch.multinomial(probs, num_samples=1, generator=rng) + else: + next_ids = torch.argmax(logits, dim=-1, keepdim=True) + ids = torch.cat((ids, next_ids), dim=1) + token = next_ids.item() + yield token diff --git a/services/inference/src/engine/optim.py b/services/inference/src/engine/optim.py new file mode 100644 index 00000000..0ee2e27f --- /dev/null +++ b/services/inference/src/engine/optim.py @@ -0,0 +1,533 @@ +""" +A nice and efficient mixed AdamW/Muon Combined Optimizer. +Usually the embeddings and scalars go into AdamW, and the matrix parameters go into Muon. +Two versions are provided (MuonAdamW, DistMuonAdamW), for single GPU and distributed. + +Addapted from: https://github.com/KellerJordan/modded-nanogpt +Further contributions from @karpathy and @chrisjmccormick. +""" + +import torch +import torch.distributed as dist +from torch import Tensor + +# ----------------------------------------------------------------------------- +""" +Good old AdamW optimizer, fused kernel. +https://arxiv.org/abs/1711.05101 +""" + +@torch.compile(dynamic=False, fullgraph=True) +def adamw_step_fused( + p: Tensor, # (32768, 768) - parameter tensor + grad: Tensor, # (32768, 768) - gradient, same shape as p + exp_avg: Tensor, # (32768, 768) - first moment, same shape as p + exp_avg_sq: Tensor, # (32768, 768) - second moment, same shape as p + step_t: Tensor, # () - 0-D CPU tensor, step count + lr_t: Tensor, # () - 0-D CPU tensor, learning rate + beta1_t: Tensor, # () - 0-D CPU tensor, beta1 + beta2_t: Tensor, # () - 0-D CPU tensor, beta2 + eps_t: Tensor, # () - 0-D CPU tensor, epsilon + wd_t: Tensor, # () - 0-D CPU tensor, weight decay +) -> None: + """ + Fused AdamW step: weight_decay -> momentum_update -> bias_correction -> param_update + All in one compiled graph to eliminate Python overhead between ops. + The 0-D CPU tensors avoid recompilation when hyperparameter values change. + """ + # Weight decay (decoupled, applied before the update) + p.mul_(1 - lr_t * wd_t) + # Update running averages (lerp_ is cleaner and fuses well) + exp_avg.lerp_(grad, 1 - beta1_t) + exp_avg_sq.lerp_(grad.square(), 1 - beta2_t) + # Bias corrections + bias1 = 1 - beta1_t ** step_t + bias2 = 1 - beta2_t ** step_t + # Compute update and apply + denom = (exp_avg_sq / bias2).sqrt() + eps_t + step_size = lr_t / bias1 + p.add_(exp_avg / denom, alpha=-step_size) + +# ----------------------------------------------------------------------------- +""" +Muon optimizer adapted and simplified from modded-nanogpt. +https://github.com/KellerJordan/modded-nanogpt + +Background: +Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a +quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose +of minimizing steps, it turns out to be empirically effective to keep increasing the slope at +zero even beyond the point where the iteration no longer converges all the way to one everywhere +on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T +where S' is diagonal with S_{ii}' ~ Uniform(0.5, 1.5), which turns out not to hurt model +performance at all relative to UV^T, where USV^T = G is the SVD. + +Here, an alternative to Newton-Schulz iteration with potentially better convergence properties: +Polar Express Sign Method for orthogonalization. +https://arxiv.org/pdf/2505.16932 +by Noah Amsel, David Persson, Christopher Musco, Robert M. Gower. + +NorMuon variance reduction: per-neuron/column adaptive learning rate that normalizes +update scales after orthogonalization (Muon's output has non-uniform scales across neurons). +https://arxiv.org/pdf/2510.05491 + +Some of the changes in nanochat implementation: +- Uses a simpler, more general approach to parameter grouping and stacking +- Uses a single fused kernel for the momentum -> polar_express -> variance_reduction -> update step +- Makes no assumptions about model architecture (e.g. that attention weights are fused into QKVO format) +""" + +# Coefficients for Polar Express (computed for num_iters=5, safety_factor=2e-2, cushion=2) +# From https://arxiv.org/pdf/2505.16932 +polar_express_coeffs = [ + (8.156554524902461, -22.48329292557795, 15.878769915207462), + (4.042929935166739, -2.808917465908714, 0.5000178451051316), + (3.8916678022926607, -2.772484153217685, 0.5060648178503393), + (3.285753657755655, -2.3681294933425376, 0.46449024233003106), + (2.3465413258596377, -1.7097828382687081, 0.42323551169305323), +] + +@torch.compile(dynamic=False, fullgraph=True) +def muon_step_fused( + stacked_grads: Tensor, # (12, 768, 3072) - stacked gradients + stacked_params: Tensor, # (12, 768, 3072) - stacked parameters + momentum_buffer: Tensor, # (12, 768, 3072) - first moment buffer + second_momentum_buffer: Tensor, # (12, 768, 1) or (12, 1, 3072) - factored second moment + momentum_t: Tensor, # () - 0-D CPU tensor, momentum coefficient + lr_t: Tensor, # () - 0-D CPU tensor, learning rate + wd_t: Tensor, # () - 0-D CPU tensor, weight decay + beta2_t: Tensor, # () - 0-D CPU tensor, beta2 for second moment + ns_steps: int, # 5 - number of Newton-Schulz/Polar Express iterations + red_dim: int, # -1 or -2 - reduction dimension for variance +) -> None: + """ + Fused Muon step: momentum -> polar_express -> variance_reduction -> cautious_update + All in one compiled graph to eliminate Python overhead between ops. + Some of the constants are 0-D CPU tensors to avoid recompilation when values change. + """ + + # Nesterov momentum + momentum = momentum_t.to(stacked_grads.dtype) + momentum_buffer.lerp_(stacked_grads, 1 - momentum) + g = stacked_grads.lerp_(momentum_buffer, momentum) + + # Polar express + X = g.bfloat16() + X = X / (X.norm(dim=(-2, -1), keepdim=True) * 1.01 + 1e-6) + if g.size(-2) > g.size(-1): # Tall matrix + for a, b, c in polar_express_coeffs[:ns_steps]: + A = X.mT @ X + B = b * A + c * (A @ A) + X = a * X + X @ B + else: # Wide matrix (original math) + for a, b, c in polar_express_coeffs[:ns_steps]: + A = X @ X.mT + B = b * A + c * (A @ A) + X = a * X + B @ X + g = X + + # Variance reduction + beta2 = beta2_t.to(g.dtype) + v_mean = g.float().square().mean(dim=red_dim, keepdim=True) + red_dim_size = g.size(red_dim) + v_norm_sq = v_mean.sum(dim=(-2, -1), keepdim=True) * red_dim_size + v_norm = v_norm_sq.sqrt() + second_momentum_buffer.lerp_(v_mean.to(dtype=second_momentum_buffer.dtype), 1 - beta2) + step_size = second_momentum_buffer.clamp_min(1e-10).rsqrt() + scaled_sq_sum = (v_mean * red_dim_size) * step_size.float().square() + v_norm_new = scaled_sq_sum.sum(dim=(-2, -1), keepdim=True).sqrt() + final_scale = step_size * (v_norm / v_norm_new.clamp_min(1e-10)) + g = g * final_scale.to(g.dtype) + + # Cautious weight decay + parameter update + lr = lr_t.to(g.dtype) + wd = wd_t.to(g.dtype) + mask = (g * stacked_params) >= 0 + stacked_params.sub_(lr * g + lr * wd * stacked_params * mask) + +# ----------------------------------------------------------------------------- +# Single GPU version of the MuonAdamW optimizer. +# Used mostly for reference, debugging and testing. + +class MuonAdamW(torch.optim.Optimizer): + """ + Combined optimizer: Muon for 2D matrix params, AdamW for others, single GPU version. + + AdamW - Fused AdamW optimizer step. + + Muon - MomentUm Orthogonalized by Newton-schulz + https://kellerjordan.github.io/posts/muon/ + + Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- + processing step, in which each 2D parameter's update is replaced with the nearest orthogonal + matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has + the advantage that it can be stably run in bfloat16 on the GPU. + + Some warnings: + - The Muon optimizer should not be used for the embedding layer, the final fully connected layer, + or any {0,1}-D parameters; those should all be optimized by a standard method (e.g., AdamW). + - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. + + Arguments: + param_groups: List of dicts, each containing: + - 'params': List of parameters + - 'kind': 'adamw' or 'muon' + - For AdamW groups: 'lr', 'betas', 'eps', 'weight_decay' + - For Muon groups: 'lr', 'momentum', 'ns_steps', 'beta2', 'weight_decay' + """ + def __init__(self, param_groups: list[dict]): + super().__init__(param_groups, defaults={}) + # 0-D CPU tensors to avoid torch.compile recompilation when values change + # AdamW tensors + self._adamw_step_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._adamw_lr_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._adamw_beta1_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._adamw_beta2_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._adamw_eps_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._adamw_wd_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + # Muon tensors + self._muon_momentum_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._muon_lr_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._muon_wd_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._muon_beta2_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + + def _step_adamw(self, group: dict) -> None: + """ + AdamW update for each param in the group individually. + Lazy init the state, fill in all 0-D tensors, call the fused kernel. + """ + for p in group['params']: + if p.grad is None: + continue + grad = p.grad + state = self.state[p] + + # State init + if not state: + state['step'] = 0 + state['exp_avg'] = torch.zeros_like(p) + state['exp_avg_sq'] = torch.zeros_like(p) + exp_avg = state['exp_avg'] + exp_avg_sq = state['exp_avg_sq'] + state['step'] += 1 + + # Fill 0-D tensors with current values + self._adamw_step_t.fill_(state['step']) + self._adamw_lr_t.fill_(group['lr']) + self._adamw_beta1_t.fill_(group['betas'][0]) + self._adamw_beta2_t.fill_(group['betas'][1]) + self._adamw_eps_t.fill_(group['eps']) + self._adamw_wd_t.fill_(group['weight_decay']) + + # Fused update: weight_decay -> momentum -> bias_correction -> param_update + adamw_step_fused( + p, grad, exp_avg, exp_avg_sq, + self._adamw_step_t, self._adamw_lr_t, self._adamw_beta1_t, + self._adamw_beta2_t, self._adamw_eps_t, self._adamw_wd_t, + ) + + def _step_muon(self, group: dict) -> None: + """ + Muon update for all params in the group (stacked for efficiency). + Lazy init the state, fill in all 0-D tensors, call the fused kernel. + """ + params: list[Tensor] = group['params'] + if not params: + return + + # Get or create group-level buffers (stored in first param's state for convenience) + p = params[0] + state = self.state[p] + num_params = len(params) + shape, device, dtype = p.shape, p.device, p.dtype + + # Momentum for every individual parameter + if "momentum_buffer" not in state: + state["momentum_buffer"] = torch.zeros(num_params, *shape, dtype=dtype, device=device) + momentum_buffer = state["momentum_buffer"] + + # Second momentum buffer is factored, either per-row or per-column + if "second_momentum_buffer" not in state: + state_shape = (num_params, shape[-2], 1) if shape[-2] >= shape[-1] else (num_params, 1, shape[-1]) + state["second_momentum_buffer"] = torch.zeros(state_shape, dtype=dtype, device=device) + second_momentum_buffer = state["second_momentum_buffer"] + red_dim = -1 if shape[-2] >= shape[-1] else -2 + + # Stack grads and params (NOTE: this assumes all params have the same shape) + stacked_grads = torch.stack([p.grad for p in params]) + stacked_params = torch.stack(params) + + # Fill all the 0-D tensors with current values + self._muon_momentum_t.fill_(group["momentum"]) + self._muon_beta2_t.fill_(group["beta2"] if group["beta2"] is not None else 0.0) + self._muon_lr_t.fill_(group["lr"] * max(1.0, shape[-2] / shape[-1])**0.5) + self._muon_wd_t.fill_(group["weight_decay"]) + + # Single fused kernel: momentum -> polar_express -> variance_reduction -> update + muon_step_fused( + stacked_grads, + stacked_params, + momentum_buffer, + second_momentum_buffer, + self._muon_momentum_t, + self._muon_lr_t, + self._muon_wd_t, + self._muon_beta2_t, + group["ns_steps"], + red_dim, + ) + + # Copy back to original params + torch._foreach_copy_(params, list(stacked_params.unbind(0))) + + @torch.no_grad() + def step(self): + for group in self.param_groups: + if group['kind'] == 'adamw': + self._step_adamw(group) + elif group['kind'] == 'muon': + self._step_muon(group) + else: + raise ValueError(f"Unknown optimizer kind: {group['kind']}") + +# ----------------------------------------------------------------------------- +# Distributed version of the MuonAdamW optimizer. +# Used for training on multiple GPUs. + +class DistMuonAdamW(torch.optim.Optimizer): + """ + Combined distributed optimizer: Muon for 2D matrix params, AdamW for others. + + See MuonAdamW for the algorithmic details of each optimizer. This class adds + distributed communication to enable multi-GPU training without PyTorch DDP. + + Design Goals: + - Overlap communication with computation (async ops) + - Minimize memory by sharding optimizer states across ranks (ZeRO-2 style) + - Batch small tensors into single comm ops where possible + + Communication Pattern (3-phase async): + We use a 3-phase structure to maximize overlap between communication and compute: + + Phase 1: Launch all async reduce ops + - Kick off all reduce_scatter/all_reduce operations + - Don't wait - let them run in background while we continue + + Phase 2: Wait for reduces, compute updates, launch gathers + - For each group: wait for its reduce, compute the update, launch gather + - By processing groups in order, earlier gathers run while later computes happen + + Phase 3: Wait for gathers, copy back + - Wait for all gathers to complete + - Copy updated params back to original tensors (Muon only) + + AdamW Communication (ZeRO-2 style): + - Small params (<1024 elements): all_reduce gradients, update full param on each rank. + Optimizer state is replicated but these params are tiny (scalars, biases). + - Large params: reduce_scatter gradients so each rank gets 1/N of the grad, update + only that slice, then all_gather the updated slices. Optimizer state (exp_avg, + exp_avg_sq) is sharded - each rank only stores state for its slice. + Requires param.shape[0] divisible by world_size. + + Muon Communication (stacked + chunked): + - All params in a Muon group must have the same shape (caller's responsibility). + - Stack all K params into a single (K, *shape) tensor for efficient comm. + - Divide K params across N ranks: each rank "owns" ceil(K/N) params. + - reduce_scatter the stacked grads so each rank gets its chunk. + - Each rank computes Muon update only for params it owns. + - all_gather the updated params back to all ranks. + - Optimizer state (momentum_buffer, second_momentum_buffer) is sharded by chunk. + - Padding: if K doesn't divide evenly, we zero-pad to (ceil(K/N) * N) for comm, + then ignore the padding when copying back. + + Buffer Reuse: + - For Muon, we allocate stacked_grads for reduce_scatter input, then reuse the + same buffer as the output for all_gather (stacked_params). This saves memory + since we don't need both buffers simultaneously. + + Arguments: + param_groups: List of dicts, each containing: + - 'params': List of parameters + - 'kind': 'adamw' or 'muon' + - For AdamW groups: 'lr', 'betas', 'eps', 'weight_decay' + - For Muon groups: 'lr', 'momentum', 'ns_steps', 'beta2', 'weight_decay' + """ + def __init__(self, param_groups: list[dict]): + super().__init__(param_groups, defaults={}) + # 0-D CPU tensors to avoid torch.compile recompilation when values change + self._adamw_step_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._adamw_lr_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._adamw_beta1_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._adamw_beta2_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._adamw_eps_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._adamw_wd_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._muon_momentum_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._muon_lr_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._muon_wd_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + self._muon_beta2_t = torch.tensor(0.0, dtype=torch.float32, device="cpu") + + def _reduce_adamw(self, group: dict, world_size: int) -> dict: + """Launch async reduce ops for AdamW group. Returns info dict with per-param infos.""" + param_infos = {} + for p in group['params']: + grad = p.grad + if p.numel() < 1024: + # Small params: all_reduce (no scatter/gather needed) + future = dist.all_reduce(grad, op=dist.ReduceOp.AVG, async_op=True).get_future() + param_infos[p] = dict(future=future, grad_slice=grad, is_small=True) + else: + # Large params: reduce_scatter + assert grad.shape[0] % world_size == 0, f"AdamW reduce_scatter requires shape[0] ({grad.shape[0]}) divisible by world_size ({world_size})" + rank_size = grad.shape[0] // world_size + grad_slice = torch.empty_like(grad[:rank_size]) + future = dist.reduce_scatter_tensor(grad_slice, grad, op=dist.ReduceOp.AVG, async_op=True).get_future() + param_infos[p] = dict(future=future, grad_slice=grad_slice, is_small=False) + return dict(param_infos=param_infos) + + def _reduce_muon(self, group: dict, world_size: int) -> dict: + """Launch async reduce op for Muon group. Returns info dict.""" + params = group['params'] + chunk_size = (len(params) + world_size - 1) // world_size + padded_num_params = chunk_size * world_size + p = params[0] + shape, device, dtype = p.shape, p.device, p.dtype + + # Stack grads and zero-pad to padded_num_params + grad_stack = torch.stack([p.grad for p in params]) + stacked_grads = torch.empty(padded_num_params, *shape, dtype=dtype, device=device) + stacked_grads[:len(params)].copy_(grad_stack) + if len(params) < padded_num_params: + stacked_grads[len(params):].zero_() + + # Reduce_scatter to get this rank's chunk + grad_chunk = torch.empty(chunk_size, *shape, dtype=dtype, device=device) + future = dist.reduce_scatter_tensor(grad_chunk, stacked_grads, op=dist.ReduceOp.AVG, async_op=True).get_future() + + return dict(future=future, grad_chunk=grad_chunk, stacked_grads=stacked_grads, chunk_size=chunk_size) + + def _compute_adamw(self, group: dict, info: dict, gather_list: list, rank: int, world_size: int) -> None: + """Wait for reduce, compute AdamW updates, launch gathers for large params.""" + param_infos = info['param_infos'] + for p in group['params']: + pinfo = param_infos[p] + pinfo['future'].wait() + grad_slice = pinfo['grad_slice'] + state = self.state[p] + + # For small params, operate on full param; for large, operate on slice + if pinfo['is_small']: + p_slice = p + else: + rank_size = p.shape[0] // world_size + p_slice = p[rank * rank_size:(rank + 1) * rank_size] + + # State init + if not state: + state['step'] = 0 + state['exp_avg'] = torch.zeros_like(p_slice) + state['exp_avg_sq'] = torch.zeros_like(p_slice) + state['step'] += 1 + + # Fill 0-D tensors and run fused kernel + self._adamw_step_t.fill_(state['step']) + self._adamw_lr_t.fill_(group['lr']) + self._adamw_beta1_t.fill_(group['betas'][0]) + self._adamw_beta2_t.fill_(group['betas'][1]) + self._adamw_eps_t.fill_(group['eps']) + self._adamw_wd_t.fill_(group['weight_decay']) + adamw_step_fused( + p_slice, grad_slice, state['exp_avg'], state['exp_avg_sq'], + self._adamw_step_t, self._adamw_lr_t, self._adamw_beta1_t, + self._adamw_beta2_t, self._adamw_eps_t, self._adamw_wd_t, + ) + + # Large params need all_gather + if not pinfo['is_small']: + future = dist.all_gather_into_tensor(p, p_slice, async_op=True).get_future() + gather_list.append(dict(future=future, params=None)) + + def _compute_muon(self, group: dict, info: dict, gather_list: list, rank: int) -> None: + """Wait for reduce, compute Muon updates, launch gather.""" + info['future'].wait() + params = group['params'] + chunk_size = info['chunk_size'] + grad_chunk = info['grad_chunk'] + p = params[0] + shape, device, dtype = p.shape, p.device, p.dtype + + # How many params does this rank own? + start_idx = rank * chunk_size + num_owned = min(chunk_size, max(0, len(params) - start_idx)) + + # Get or create group-level state + state = self.state[p] + if "momentum_buffer" not in state: + state["momentum_buffer"] = torch.zeros(chunk_size, *shape, dtype=dtype, device=device) + if "second_momentum_buffer" not in state: + state_shape = (chunk_size, shape[-2], 1) if shape[-2] >= shape[-1] else (chunk_size, 1, shape[-1]) + state["second_momentum_buffer"] = torch.zeros(state_shape, dtype=dtype, device=device) + red_dim = -1 if shape[-2] >= shape[-1] else -2 + + # Build output buffer for all_gather + updated_params = torch.empty(chunk_size, *shape, dtype=dtype, device=device) + + if num_owned > 0: + owned_params = [params[start_idx + i] for i in range(num_owned)] + stacked_owned = torch.stack(owned_params) + + # Fill 0-D tensors and run fused kernel + self._muon_momentum_t.fill_(group["momentum"]) + self._muon_beta2_t.fill_(group["beta2"]) + self._muon_lr_t.fill_(group["lr"] * max(1.0, shape[-2] / shape[-1])**0.5) + self._muon_wd_t.fill_(group["weight_decay"]) + muon_step_fused( + grad_chunk[:num_owned], stacked_owned, + state["momentum_buffer"][:num_owned], state["second_momentum_buffer"][:num_owned], + self._muon_momentum_t, self._muon_lr_t, self._muon_wd_t, self._muon_beta2_t, + group["ns_steps"], red_dim, + ) + updated_params[:num_owned].copy_(stacked_owned) + + if num_owned < chunk_size: + updated_params[num_owned:].zero_() + + # Reuse stacked_grads buffer for all_gather output + stacked_params = info["stacked_grads"] + future = dist.all_gather_into_tensor(stacked_params, updated_params, async_op=True).get_future() + gather_list.append(dict(future=future, stacked_params=stacked_params, params=params)) + + def _finish_gathers(self, gather_list: list) -> None: + """Wait for all gathers and copy Muon params back.""" + for info in gather_list: + info["future"].wait() + if info["params"] is not None: + # Muon: copy from stacked buffer back to individual params + torch._foreach_copy_(info["params"], list(info["stacked_params"][:len(info["params"])].unbind(0))) + + @torch.no_grad() + def step(self): + rank = dist.get_rank() + world_size = dist.get_world_size() + + # Phase 1: launch all async reduce ops + reduce_infos: list[dict] = [] + for group in self.param_groups: + if group['kind'] == 'adamw': + reduce_infos.append(self._reduce_adamw(group, world_size)) + elif group['kind'] == 'muon': + reduce_infos.append(self._reduce_muon(group, world_size)) + else: + raise ValueError(f"Unknown optimizer kind: {group['kind']}") + + # Phase 2: wait for reduces, compute updates, launch gathers + gather_list: list[dict] = [] + for group, info in zip(self.param_groups, reduce_infos): + if group['kind'] == 'adamw': + self._compute_adamw(group, info, gather_list, rank, world_size) + elif group['kind'] == 'muon': + self._compute_muon(group, info, gather_list, rank) + else: + raise ValueError(f"Unknown optimizer kind: {group['kind']}") + + # Phase 3: wait for gathers, copy back + self._finish_gathers(gather_list) diff --git a/services/inference/src/engine/tokenizer.py b/services/inference/src/engine/tokenizer.py new file mode 100644 index 00000000..4af4ef9b --- /dev/null +++ b/services/inference/src/engine/tokenizer.py @@ -0,0 +1,406 @@ +""" +BPE Tokenizer in the style of GPT-4. + +Two implementations are available: +1) HuggingFace Tokenizer that can do both training and inference but is really confusing +2) Our own RustBPE Tokenizer for training and tiktoken for efficient inference +""" + +import os +import copy +from functools import lru_cache + +SPECIAL_TOKENS = [ + # every document begins with the Beginning of Sequence (BOS) token that delimits documents + "<|bos|>", + # tokens below are only used during finetuning to render Conversations into token ids + "<|user_start|>", # user messages + "<|user_end|>", + "<|assistant_start|>", # assistant messages + "<|assistant_end|>", + "<|python_start|>", # assistant invokes python REPL tool + "<|python_end|>", + "<|output_start|>", # python REPL outputs back to assistant + "<|output_end|>", +] + +# NOTE: this split pattern deviates from GPT-4 in that we use \p{N}{1,2} instead of \p{N}{1,3} +# I did this because I didn't want to "waste" too many tokens on numbers for smaller vocab sizes. +# I verified that 2 is the sweet spot for vocab size of 32K. 1 is a bit worse, 3 was worse still. +SPLIT_PATTERN = r"""'(?i:[sdmt]|ll|ve|re)|[^\r\n\p{L}\p{N}]?+\p{L}+|\p{N}{1,2}| ?[^\s\p{L}\p{N}]++[\r\n]*|\s*[\r\n]|\s+(?!\S)|\s+""" + +# ----------------------------------------------------------------------------- +# Generic GPT-4-style tokenizer based on HuggingFace Tokenizer +from tokenizers import Tokenizer as HFTokenizer +from tokenizers import pre_tokenizers, decoders, Regex +from tokenizers.models import BPE +from tokenizers.trainers import BpeTrainer + +class HuggingFaceTokenizer: + """Light wrapper around HuggingFace Tokenizer for some utilities""" + + def __init__(self, tokenizer): + self.tokenizer = tokenizer + + @classmethod + def from_pretrained(cls, hf_path): + # init from a HuggingFace pretrained tokenizer (e.g. "gpt2") + tokenizer = HFTokenizer.from_pretrained(hf_path) + return cls(tokenizer) + + @classmethod + def from_directory(cls, tokenizer_dir): + # init from a local directory on disk (e.g. "out/tokenizer") + tokenizer_path = os.path.join(tokenizer_dir, "tokenizer.json") + tokenizer = HFTokenizer.from_file(tokenizer_path) + return cls(tokenizer) + + @classmethod + def train_from_iterator(cls, text_iterator, vocab_size): + # train from an iterator of text + # Configure the HuggingFace Tokenizer + tokenizer = HFTokenizer(BPE( + byte_fallback=True, # needed! + unk_token=None, + fuse_unk=False, + )) + # Normalizer: None + tokenizer.normalizer = None + # Pre-tokenizer: GPT-4 style + # the regex pattern used by GPT-4 to split text into groups before BPE + # NOTE: The pattern was changed from \p{N}{1,3} to \p{N}{1,2} because I suspect it is harmful to + # very small models and smaller vocab sizes, because it is a little bit wasteful in the token space. + # (but I haven't validated this! TODO) + gpt4_split_regex = Regex(SPLIT_PATTERN) # huggingface demands that you wrap it in Regex!! + tokenizer.pre_tokenizer = pre_tokenizers.Sequence([ + pre_tokenizers.Split(pattern=gpt4_split_regex, behavior="isolated", invert=False), + pre_tokenizers.ByteLevel(add_prefix_space=False, use_regex=False) + ]) + # Decoder: ByteLevel (it pairs together with the ByteLevel pre-tokenizer) + tokenizer.decoder = decoders.ByteLevel() + # Post-processor: None + tokenizer.post_processor = None + # Trainer: BPE + trainer = BpeTrainer( + vocab_size=vocab_size, + show_progress=True, + min_frequency=0, # no minimum frequency + initial_alphabet=pre_tokenizers.ByteLevel.alphabet(), + special_tokens=SPECIAL_TOKENS, + ) + # Kick off the training + tokenizer.train_from_iterator(text_iterator, trainer) + return cls(tokenizer) + + def get_vocab_size(self): + return self.tokenizer.get_vocab_size() + + def get_special_tokens(self): + special_tokens_map = self.tokenizer.get_added_tokens_decoder() + special_tokens = [w.content for w in special_tokens_map.values()] + return special_tokens + + def id_to_token(self, id): + return self.tokenizer.id_to_token(id) + + def _encode_one(self, text, prepend=None, append=None, num_threads=None): + # encode a single string + # prepend/append can be either a string of a special token or a token id directly. + # num_threads is ignored (only used by the nanochat Tokenizer for parallel encoding) + assert isinstance(text, str) + ids = [] + if prepend is not None: + prepend_id = prepend if isinstance(prepend, int) else self.encode_special(prepend) + ids.append(prepend_id) + ids.extend(self.tokenizer.encode(text, add_special_tokens=False).ids) + if append is not None: + append_id = append if isinstance(append, int) else self.encode_special(append) + ids.append(append_id) + return ids + + def encode_special(self, text): + # encode a single special token via exact match + return self.tokenizer.token_to_id(text) + + def get_bos_token_id(self): + # Different HuggingFace models use different BOS tokens and there is little consistency + # 1) attempt to find a <|bos|> token + bos = self.encode_special("<|bos|>") + # 2) if that fails, attempt to find a <|endoftext|> token (e.g. GPT-2 models) + if bos is None: + bos = self.encode_special("<|endoftext|>") + # 3) if these fail, it's better to crash than to silently return None + assert bos is not None, "Failed to find BOS token in tokenizer" + return bos + + def encode(self, text, *args, **kwargs): + if isinstance(text, str): + return self._encode_one(text, *args, **kwargs) + elif isinstance(text, list): + return [self._encode_one(t, *args, **kwargs) for t in text] + else: + raise ValueError(f"Invalid input type: {type(text)}") + + def __call__(self, *args, **kwargs): + return self.encode(*args, **kwargs) + + def decode(self, ids): + return self.tokenizer.decode(ids, skip_special_tokens=False) + + def save(self, tokenizer_dir): + # save the tokenizer to disk + os.makedirs(tokenizer_dir, exist_ok=True) + tokenizer_path = os.path.join(tokenizer_dir, "tokenizer.json") + self.tokenizer.save(tokenizer_path) + print(f"Saved tokenizer to {tokenizer_path}") + +# ----------------------------------------------------------------------------- +# Tokenizer based on rustbpe + tiktoken combo +import pickle +import rustbpe +import tiktoken + +class RustBPETokenizer: + """Light wrapper around tiktoken (for efficient inference) but train with rustbpe""" + + def __init__(self, enc, bos_token): + self.enc = enc + self.bos_token_id = self.encode_special(bos_token) + + @classmethod + def train_from_iterator(cls, text_iterator, vocab_size): + # 1) train using rustbpe + tokenizer = rustbpe.Tokenizer() + # the special tokens are inserted later in __init__, we don't train them here + vocab_size_no_special = vocab_size - len(SPECIAL_TOKENS) + assert vocab_size_no_special >= 256, f"vocab_size_no_special must be at least 256, got {vocab_size_no_special}" + tokenizer.train_from_iterator(text_iterator, vocab_size_no_special, pattern=SPLIT_PATTERN) + # 2) construct the associated tiktoken encoding for inference + pattern = tokenizer.get_pattern() + mergeable_ranks_list = tokenizer.get_mergeable_ranks() + mergeable_ranks = {bytes(k): v for k, v in mergeable_ranks_list} + tokens_offset = len(mergeable_ranks) + special_tokens = {name: tokens_offset + i for i, name in enumerate(SPECIAL_TOKENS)} + enc = tiktoken.Encoding( + name="rustbpe", + pat_str=pattern, + mergeable_ranks=mergeable_ranks, # dict[bytes, int] (token bytes -> merge priority rank) + special_tokens=special_tokens, # dict[str, int] (special token name -> token id) + ) + return cls(enc, "<|bos|>") + + @classmethod + def from_directory(cls, tokenizer_dir): + pickle_path = os.path.join(tokenizer_dir, "tokenizer.pkl") + with open(pickle_path, "rb") as f: + enc = pickle.load(f) + return cls(enc, "<|bos|>") + + @classmethod + def from_pretrained(cls, tiktoken_name): + # https://github.com/openai/tiktoken/blob/eedc8563/tiktoken_ext/openai_public.py + enc = tiktoken.get_encoding(tiktoken_name) + # tiktoken calls the special document delimiter token "<|endoftext|>" + # yes this is confusing because this token is almost always PREPENDED to the beginning of the document + # it most often is used to signal the start of a new sequence to the LLM during inference etc. + # so in nanoChat we always use "<|bos|>" short for "beginning of sequence", but historically it is often called "<|endoftext|>". + return cls(enc, "<|endoftext|>") + + def get_vocab_size(self): + return self.enc.n_vocab + + def get_special_tokens(self): + return self.enc.special_tokens_set + + def id_to_token(self, id): + return self.enc.decode([id]) + + @lru_cache(maxsize=32) + def encode_special(self, text): + return self.enc.encode_single_token(text) + + def get_bos_token_id(self): + return self.bos_token_id + + def encode(self, text, prepend=None, append=None, num_threads=8): + # text can be either a string or a list of strings + + if prepend is not None: + prepend_id = prepend if isinstance(prepend, int) else self.encode_special(prepend) + if append is not None: + append_id = append if isinstance(append, int) else self.encode_special(append) + + if isinstance(text, str): + ids = self.enc.encode_ordinary(text) + if prepend is not None: + ids.insert(0, prepend_id) # TODO: slightly inefficient here? :( hmm + if append is not None: + ids.append(append_id) + elif isinstance(text, list): + ids = self.enc.encode_ordinary_batch(text, num_threads=num_threads) + if prepend is not None: + for ids_row in ids: + ids_row.insert(0, prepend_id) # TODO: same + if append is not None: + for ids_row in ids: + ids_row.append(append_id) + else: + raise ValueError(f"Invalid input type: {type(text)}") + + return ids + + def __call__(self, *args, **kwargs): + return self.encode(*args, **kwargs) + + def decode(self, ids): + return self.enc.decode(ids) + + def save(self, tokenizer_dir): + # save the encoding object to disk + os.makedirs(tokenizer_dir, exist_ok=True) + pickle_path = os.path.join(tokenizer_dir, "tokenizer.pkl") + with open(pickle_path, "wb") as f: + pickle.dump(self.enc, f) + print(f"Saved tokenizer encoding to {pickle_path}") + + def render_conversation(self, conversation, max_tokens=2048): + """ + Tokenize a single Chat conversation (which we call a "doc" or "document" here). + Returns: + - ids: list[int] is a list of token ids of this rendered conversation + - mask: list[int] of same length, mask = 1 for tokens that the Assistant is expected to train on. + """ + # ids, masks that we will return and a helper function to help build them up. + ids, mask = [], [] + def add_tokens(token_ids, mask_val): + if isinstance(token_ids, int): + token_ids = [token_ids] + ids.extend(token_ids) + mask.extend([mask_val] * len(token_ids)) + + # sometimes the first message is a system message... + # => just merge it with the second (user) message + if conversation["messages"][0]["role"] == "system": + # some conversation surgery is necessary here for now... + conversation = copy.deepcopy(conversation) # avoid mutating the original + messages = conversation["messages"] + assert messages[1]["role"] == "user", "System message must be followed by a user message" + messages[1]["content"] = messages[0]["content"] + "\n\n" + messages[1]["content"] + messages = messages[1:] + else: + messages = conversation["messages"] + assert len(messages) >= 1, f"Conversation has less than 1 message: {messages}" + + # fetch all the special tokens we need + bos = self.get_bos_token_id() + user_start, user_end = self.encode_special("<|user_start|>"), self.encode_special("<|user_end|>") + assistant_start, assistant_end = self.encode_special("<|assistant_start|>"), self.encode_special("<|assistant_end|>") + python_start, python_end = self.encode_special("<|python_start|>"), self.encode_special("<|python_end|>") + output_start, output_end = self.encode_special("<|output_start|>"), self.encode_special("<|output_end|>") + + # now we can tokenize the conversation + add_tokens(bos, 0) + for i, message in enumerate(messages): + + # some sanity checking here around assumptions, to prevent footguns + must_be_from = "user" if i % 2 == 0 else "assistant" + assert message["role"] == must_be_from, f"Message {i} is from {message['role']} but should be from {must_be_from}" + + # content can be either a simple string or a list of parts (e.g. containing tool calls) + content = message["content"] + + if message["role"] == "user": + assert isinstance(content, str), "User messages are simply expected to be strings" + value_ids = self.encode(content) + add_tokens(user_start, 0) + add_tokens(value_ids, 0) + add_tokens(user_end, 0) + elif message["role"] == "assistant": + add_tokens(assistant_start, 0) + if isinstance(content, str): + # simple string => simply add the tokens + value_ids = self.encode(content) + add_tokens(value_ids, 1) + elif isinstance(content, list): + for part in content: + value_ids = self.encode(part["text"]) + if part["type"] == "text": + # string part => simply add the tokens + add_tokens(value_ids, 1) + elif part["type"] == "python": + # python tool call => add the tokens inside <|python_start|> and <|python_end|> + add_tokens(python_start, 1) + add_tokens(value_ids, 1) + add_tokens(python_end, 1) + elif part["type"] == "python_output": + # python output => add the tokens inside <|output_start|> and <|output_end|> + # none of these tokens are supervised because the tokens come from Python at test time + add_tokens(output_start, 0) + add_tokens(value_ids, 0) + add_tokens(output_end, 0) + else: + raise ValueError(f"Unknown part type: {part['type']}") + else: + raise ValueError(f"Unknown content type: {type(content)}") + add_tokens(assistant_end, 1) + + # truncate to max_tokens tokens MAX (helps prevent OOMs) + ids = ids[:max_tokens] + mask = mask[:max_tokens] + return ids, mask + + def visualize_tokenization(self, ids, mask, with_token_id=False): + """Small helper function useful in debugging: visualize the tokenization of render_conversation""" + RED = '\033[91m' + GREEN = '\033[92m' + RESET = '\033[0m' + GRAY = '\033[90m' + tokens = [] + for i, (token_id, mask_val) in enumerate(zip(ids, mask)): + token_str = self.decode([token_id]) + color = GREEN if mask_val == 1 else RED + tokens.append(f"{color}{token_str}{RESET}") + if with_token_id: + tokens.append(f"{GRAY}({token_id}){RESET}") + return '|'.join(tokens) + + def render_for_completion(self, conversation): + """ + Used during Reinforcement Learning. In that setting, we want to + render the conversation priming the Assistant for a completion. + Unlike the Chat SFT case, we don't need to return the mask. + """ + # We have some surgery to do: we need to pop the last message (of the Assistant) + conversation = copy.deepcopy(conversation) # avoid mutating the original + messages = conversation["messages"] + assert messages[-1]["role"] == "assistant", "Last message must be from the Assistant" + messages.pop() # remove the last message (of the Assistant) inplace + + # Now tokenize the conversation + ids, mask = self.render_conversation(conversation) + + # Finally, to prime the Assistant for a completion, append the Assistant start token + assistant_start = self.encode_special("<|assistant_start|>") + ids.append(assistant_start) + return ids + +# ----------------------------------------------------------------------------- +# nanochat-specific convenience functions + +def get_tokenizer(): + from .common import get_base_dir + base_dir = get_base_dir() + tokenizer_dir = os.path.join(base_dir, "tokenizer") + # return HuggingFaceTokenizer.from_directory(tokenizer_dir) + return RustBPETokenizer.from_directory(tokenizer_dir) + +def get_token_bytes(device="cpu"): + import torch + from .common import get_base_dir + base_dir = get_base_dir() + tokenizer_dir = os.path.join(base_dir, "tokenizer") + token_bytes_path = os.path.join(tokenizer_dir, "token_bytes.pt") + assert os.path.exists(token_bytes_path), f"Token bytes not found at {token_bytes_path}? It gets written by tok_train.py" + with open(token_bytes_path, "rb") as f: + token_bytes = torch.load(f, map_location=device) + return token_bytes diff --git a/services/inference/src/engine/tools.py b/services/inference/src/engine/tools.py new file mode 100644 index 00000000..df7ef64c --- /dev/null +++ b/services/inference/src/engine/tools.py @@ -0,0 +1,509 @@ +""" +Shared tool definitions for nanochat. + +The current tokenizer only has python/output special tokens. To preserve +checkpoint compatibility, we reuse those tokens as a generic tool-call and +tool-result channel. Legacy "python" calculator payloads still work. +""" + +from __future__ import annotations + +import ast +import json +import math +import os +import time +from dataclasses import dataclass, field +from typing import Any, Protocol + +import requests + +TOOL_CALL_START = "<|python_start|>" +TOOL_CALL_END = "<|python_end|>" +TOOL_RESULT_START = "<|output_start|>" +TOOL_RESULT_END = "<|output_end|>" +MAX_TOOL_PAYLOAD_CHARS = 4096 + +DEFAULT_TOOL_SCHEMA = [ + { + "name": "calculator", + "description": "Deterministic scientific calculator for exact arithmetic and common finance formulas.", + "arguments": { + "expression": "String expression using numbers, operators, and supported functions.", + }, + }, + { + "name": "web_search", + "description": "Search and fetch web content. Requires a search backend and optionally a page fetch client.", + "arguments": { + "query": "Search query string.", + "top_k": "Maximum number of results to return.", + "urls": "Optional explicit URLs to fetch instead of searching.", + }, + }, +] + + +def _compact_json(data: Any) -> str: + return json.dumps(data, ensure_ascii=True, separators=(",", ":"), sort_keys=True) + + +@dataclass +class ToolInvocation: + tool_name: str + arguments: dict[str, Any] + raw_text: str = "" + + +@dataclass +class ToolResult: + tool_name: str + success: bool + output: Any = None + error: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + def to_payload(self) -> str: + return _compact_json( + { + "tool": self.tool_name, + "success": self.success, + "output": self.output, + "error": self.error, + "metadata": self.metadata, + } + ) + + +class BaseTool: + name: str + + def run(self, arguments: dict[str, Any]) -> ToolResult: + raise NotImplementedError + + +class ToolRegistry: + def __init__(self, tools: list[BaseTool] | tuple[BaseTool, ...]): + self._tools = {tool.name: tool for tool in tools} + + def execute(self, tool_name: str, arguments: dict[str, Any]) -> ToolResult: + tool = self._tools.get(tool_name) + if tool is None: + return ToolResult(tool_name=tool_name, success=False, error=f"Unknown tool: {tool_name}") + try: + return tool.run(arguments) + except Exception as exc: # defensive: tool failures should become model-visible outputs + return ToolResult(tool_name=tool_name, success=False, error=str(exc)) + + def schema(self) -> list[dict[str, Any]]: + return [item for item in DEFAULT_TOOL_SCHEMA if item["name"] in self._tools] + + +def serialize_tool_call(tool_name: str, arguments: dict[str, Any] | None = None) -> str: + payload = { + "tool": tool_name, + "arguments": arguments or {}, + } + text = _compact_json(payload) + return text[:MAX_TOOL_PAYLOAD_CHARS] + + +def serialize_tool_result( + tool_name: str, + output: Any = None, + *, + success: bool = True, + error: str | None = None, + metadata: dict[str, Any] | None = None, +) -> str: + return ToolResult( + tool_name=tool_name, + success=success, + output=output, + error=error, + metadata=metadata or {}, + ).to_payload()[:MAX_TOOL_PAYLOAD_CHARS] + + +def parse_tool_call_payload(text: str) -> ToolInvocation: + stripped = text.strip() + if not stripped: + return ToolInvocation(tool_name="calculator", arguments={"expression": ""}, raw_text=text) + try: + payload = json.loads(stripped) + except json.JSONDecodeError: + return ToolInvocation(tool_name="calculator", arguments={"expression": stripped}, raw_text=text) + if isinstance(payload, dict): + tool_name = payload.get("tool") or payload.get("tool_name") or payload.get("name") + arguments = payload.get("arguments") or payload.get("args") or {} + if isinstance(tool_name, str) and isinstance(arguments, dict): + return ToolInvocation(tool_name=tool_name, arguments=arguments, raw_text=text) + return ToolInvocation(tool_name="calculator", arguments={"expression": stripped}, raw_text=text) + + +def parse_tool_result_payload(text: str) -> ToolResult | None: + stripped = text.strip() + try: + payload = json.loads(stripped) + except json.JSONDecodeError: + return None + if not isinstance(payload, dict): + return None + tool_name = payload.get("tool") + if not isinstance(tool_name, str): + return None + return ToolResult( + tool_name=tool_name, + success=bool(payload.get("success", True)), + output=payload.get("output"), + error=payload.get("error"), + metadata=payload.get("metadata") or {}, + ) + + +def _percent(value: float, rate: float) -> float: + return value * rate / 100.0 + + +def _percent_change(old: float, new: float) -> float: + if old == 0: + raise ValueError("percent_change old value cannot be zero") + return ((new - old) / old) * 100.0 + + +def _cagr(start: float, end: float, years: float) -> float: + if start <= 0 or end <= 0 or years <= 0: + raise ValueError("cagr inputs must be positive") + return ((end / start) ** (1.0 / years) - 1.0) * 100.0 + + +def _simple_interest(principal: float, annual_rate: float, years: float) -> float: + return principal * annual_rate / 100.0 * years + + +def _compound_interest(principal: float, annual_rate: float, periods_per_year: float, years: float) -> float: + if periods_per_year <= 0: + raise ValueError("periods_per_year must be positive") + return principal * (1.0 + annual_rate / 100.0 / periods_per_year) ** (periods_per_year * years) + + +def _emi(principal: float, annual_rate: float, months: float) -> float: + if months <= 0: + raise ValueError("months must be positive") + monthly_rate = annual_rate / 100.0 / 12.0 + if monthly_rate == 0: + return principal / months + growth = (1.0 + monthly_rate) ** months + return principal * monthly_rate * growth / (growth - 1.0) + + +ALLOWED_BINOPS = { + ast.Add: lambda a, b: a + b, + ast.Sub: lambda a, b: a - b, + ast.Mult: lambda a, b: a * b, + ast.Div: lambda a, b: a / b, + ast.Pow: lambda a, b: a ** b, + ast.Mod: lambda a, b: a % b, +} +ALLOWED_UNARYOPS = { + ast.UAdd: lambda a: a, + ast.USub: lambda a: -a, +} +ALLOWED_NAMES = { + "pi": math.pi, + "e": math.e, + "tau": math.tau, +} +ALLOWED_FUNCTIONS = { + "abs": abs, + "round": round, + "floor": math.floor, + "ceil": math.ceil, + "sqrt": math.sqrt, + "log": math.log, + "log10": math.log10, + "exp": math.exp, + "sin": math.sin, + "cos": math.cos, + "tan": math.tan, + "asin": math.asin, + "acos": math.acos, + "atan": math.atan, + "degrees": math.degrees, + "radians": math.radians, + "percent": _percent, + "percent_change": _percent_change, + "cagr": _cagr, + "simple_interest": _simple_interest, + "compound_interest": _compound_interest, + "emi": _emi, +} + + +class _SafeMathEvaluator: + def __init__(self, expression: str): + self.expression = expression + self.node_count = 0 + + def eval(self) -> float: + if len(self.expression) > 512: + raise ValueError("expression too long") + parsed = ast.parse(self.expression, mode="eval") + return self._visit(parsed.body) + + def _visit(self, node: ast.AST) -> Any: + self.node_count += 1 + if self.node_count > 128: + raise ValueError("expression too complex") + + if isinstance(node, ast.Constant): + if isinstance(node.value, (int, float)): + return node.value + raise ValueError(f"unsupported constant: {node.value!r}") + if isinstance(node, ast.Num): # pragma: no cover - py<3.8 compatibility + return node.n + if isinstance(node, ast.BinOp): + op = ALLOWED_BINOPS.get(type(node.op)) + if op is None: + raise ValueError(f"unsupported operator: {type(node.op).__name__}") + return op(self._visit(node.left), self._visit(node.right)) + if isinstance(node, ast.UnaryOp): + op = ALLOWED_UNARYOPS.get(type(node.op)) + if op is None: + raise ValueError(f"unsupported unary operator: {type(node.op).__name__}") + return op(self._visit(node.operand)) + if isinstance(node, ast.Name): + if node.id not in ALLOWED_NAMES: + raise ValueError(f"unknown symbol: {node.id}") + return ALLOWED_NAMES[node.id] + if isinstance(node, ast.Call): + if not isinstance(node.func, ast.Name): + raise ValueError("only direct function calls are allowed") + fn = ALLOWED_FUNCTIONS.get(node.func.id) + if fn is None: + raise ValueError(f"unsupported function: {node.func.id}") + if node.keywords: + raise ValueError("keyword arguments are not supported") + args = [self._visit(arg) for arg in node.args] + return fn(*args) + raise ValueError(f"unsupported expression node: {type(node).__name__}") + + +def _normalize_numeric_output(value: Any) -> Any: + if isinstance(value, float): + if not math.isfinite(value): + raise ValueError("result is not finite") + return float(f"{value:.12g}") + return value + + +class CalculatorTool(BaseTool): + name = "calculator" + + def run(self, arguments: dict[str, Any]) -> ToolResult: + expression = str(arguments.get("expression", "")).strip() + if not expression: + return ToolResult(tool_name=self.name, success=False, error="Missing expression") + value = _SafeMathEvaluator(expression).eval() + return ToolResult( + tool_name=self.name, + success=True, + output={"expression": expression, "value": _normalize_numeric_output(value)}, + ) + + +@dataclass +class SearchHit: + url: str + title: str = "" + snippet: str = "" + + +class SearchBackend(Protocol): + def search(self, query: str, top_k: int) -> list[SearchHit]: + raise NotImplementedError + + +class MockSearchBackend: + def __init__(self, canned_results: dict[str, list[dict[str, str]]] | None = None): + self.canned_results = canned_results or { + "browser rendering markdown": [ + { + "url": "https://developers.cloudflare.com/browser-rendering/rest-api/markdown-endpoint/", + "title": "Cloudflare markdown endpoint", + "snippet": "Extract markdown from a webpage using Cloudflare Browser Rendering.", + } + ], + "nanochat gpt2 speedrun": [ + { + "url": "https://github.com/karpathy/nanochat", + "title": "karpathy/nanochat", + "snippet": "Minimal LLM training harness with pretraining, SFT, RL, and chat UI.", + } + ], + } + + def search(self, query: str, top_k: int) -> list[SearchHit]: + normalized = query.strip().lower() + rows = self.canned_results.get(normalized, []) + return [SearchHit(**row) for row in rows[:top_k]] + + +class CloudflareBrowserRenderingClient: + def __init__( + self, + *, + api_token: str | None = None, + account_id: str | None = None, + base_url: str = "https://api.cloudflare.com/client/v4", + timeout: float = 30.0, + max_retries: int = 3, + ): + self.api_token = api_token or os.environ.get("CLOUDFLARE_API_TOKEN") + self.account_id = account_id or os.environ.get("CLOUDFLARE_ACCOUNT_ID") + if not self.api_token or not self.account_id: + raise ValueError("Cloudflare Browser Rendering requires CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID") + self.base_url = base_url.rstrip("/") + self.timeout = timeout + self.max_retries = max_retries + self.session = requests.Session() + self.session.headers.update( + { + "Authorization": f"Bearer {self.api_token}", + "Content-Type": "application/json", + } + ) + + def _post(self, endpoint: str, body: dict[str, Any]) -> Any: + url = f"{self.base_url}/accounts/{self.account_id}/browser-rendering/{endpoint}" + last_error = None + for attempt in range(1, self.max_retries + 1): + response = self.session.post(url, json=body, timeout=self.timeout) + if response.status_code == 429: + retry_after = response.headers.get("Retry-After") + sleep_seconds = float(retry_after) if retry_after else float(attempt) + last_error = RuntimeError(f"Cloudflare Browser Rendering rate limited on {endpoint}") + time.sleep(min(sleep_seconds, 5.0)) + continue + response.raise_for_status() + payload = response.json() + if not payload.get("success", False): + errors = payload.get("errors", []) + last_error = RuntimeError(f"Cloudflare Browser Rendering request failed: {errors}") + break + return payload.get("result") + if last_error is not None: + raise last_error + raise RuntimeError(f"Cloudflare Browser Rendering request failed for {endpoint}") + + def markdown(self, url: str, **options: Any) -> str: + body = {"url": url} + body.update(options) + return self._post("markdown", body) + + def links(self, url: str, **options: Any) -> list[str]: + body = {"url": url} + body.update(options) + return self._post("links", body) + + def json_extract(self, url: str, *, prompt: str | None = None, schema: dict[str, Any] | None = None, **options: Any) -> dict[str, Any]: + body: dict[str, Any] = {"url": url} + if prompt is not None: + body["prompt"] = prompt + if schema is not None: + body["schema"] = schema + body.update(options) + return self._post("json", body) + + +class WebSearchTool(BaseTool): + name = "web_search" + + def __init__( + self, + *, + search_backend: SearchBackend | None = None, + fetch_client: CloudflareBrowserRenderingClient | None = None, + max_results: int = 3, + ): + self.search_backend = search_backend + self.fetch_client = fetch_client + self.max_results = max_results + + def run(self, arguments: dict[str, Any]) -> ToolResult: + query = str(arguments.get("query", "")).strip() + requested_urls = arguments.get("urls") or [] + if isinstance(requested_urls, str): + requested_urls = [requested_urls] + top_k = int(arguments.get("top_k", self.max_results) or self.max_results) + top_k = max(1, min(top_k, 8)) + + if not query and not requested_urls: + return ToolResult(tool_name=self.name, success=False, error="Missing query or urls") + + hits: list[SearchHit] + if requested_urls: + hits = [SearchHit(url=str(url)) for url in requested_urls[:top_k]] + else: + if self.search_backend is None: + return ToolResult( + tool_name=self.name, + success=False, + error="No search backend configured. Cloudflare Browser Rendering can fetch pages but does not provide public web search by itself.", + ) + hits = self.search_backend.search(query, top_k) + + results = [] + for hit in hits[:top_k]: + entry: dict[str, Any] = {"url": hit.url} + if hit.title: + entry["title"] = hit.title + if hit.snippet: + entry["snippet"] = hit.snippet + if self.fetch_client is not None: + try: + markdown = self.fetch_client.markdown(hit.url) + links = self.fetch_client.links(hit.url) + entry["markdown"] = markdown[:4000] + entry["links"] = links[:10] + except Exception as exc: + entry["fetch_error"] = str(exc) + results.append(entry) + + return ToolResult( + tool_name=self.name, + success=True, + output={"query": query, "results": results}, + metadata={ + "search_backend": type(self.search_backend).__name__ if self.search_backend is not None else None, + "fetch_backend": type(self.fetch_client).__name__ if self.fetch_client is not None else None, + "num_results": len(results), + }, + ) + + +def build_default_tool_registry( + *, + cloudflare_token: str | None = None, + cloudflare_account_id: str | None = None, + search_backend: SearchBackend | None = None, +) -> ToolRegistry: + fetch_client = None + if cloudflare_token or os.environ.get("CLOUDFLARE_API_TOKEN"): + try: + fetch_client = CloudflareBrowserRenderingClient( + api_token=cloudflare_token, + account_id=cloudflare_account_id, + ) + except Exception: + fetch_client = None + registry = ToolRegistry( + [ + CalculatorTool(), + WebSearchTool( + search_backend=search_backend or MockSearchBackend(), + fetch_client=fetch_client, + ), + ] + ) + return registry diff --git a/services/inference/src/main.py b/services/inference/src/main.py new file mode 100644 index 00000000..186c7e10 --- /dev/null +++ b/services/inference/src/main.py @@ -0,0 +1,291 @@ +from __future__ import annotations + +import asyncio +import json +import logging +import random +from contextlib import asynccontextmanager +from typing import AsyncGenerator + +from fastapi import Depends, FastAPI, HTTPException, Request, status +from fastapi.responses import JSONResponse, StreamingResponse +from pydantic import BaseModel + +from config import Settings, get_settings +from middleware.internal_auth import require_internal_api_key +from services.weight_manager import WeightManager + +logger = logging.getLogger(__name__) + +# Abuse prevention limits +MAX_MESSAGES_PER_REQUEST = 500 +MAX_MESSAGE_LENGTH = 8000 +MAX_TOTAL_CONVERSATION_LENGTH = 32000 +MIN_TEMPERATURE = 0.0 +MAX_TEMPERATURE = 2.0 +MIN_TOP_K = 0 +MAX_TOP_K = 200 +MIN_MAX_TOKENS = 1 +MAX_MAX_TOKENS = 4096 + + +class ChatMessage(BaseModel): + role: str + content: str + + +class GenerateRequest(BaseModel): + messages: list[ChatMessage] + temperature: float | None = None + max_tokens: int | None = None + top_k: int | None = None + + +class SwapModelRequest(BaseModel): + model_tag: str + + +def validate_generate_request(request: GenerateRequest) -> None: + if len(request.messages) == 0: + raise HTTPException(status_code=400, detail="At least one message is required") + if len(request.messages) > MAX_MESSAGES_PER_REQUEST: + raise HTTPException( + status_code=400, + detail=f"Too many messages. Maximum {MAX_MESSAGES_PER_REQUEST} messages allowed per request", + ) + + total_length = 0 + for index, message in enumerate(request.messages): + if not message.content: + raise HTTPException(status_code=400, detail=f"Message {index} has empty content") + if len(message.content) > MAX_MESSAGE_LENGTH: + raise HTTPException( + status_code=400, + detail=f"Message {index} is too long. Maximum {MAX_MESSAGE_LENGTH} characters allowed per message", + ) + if message.role not in {"user", "assistant"}: + raise HTTPException( + status_code=400, + detail=f"Message {index} has invalid role. Must be 'user' or 'assistant'", + ) + total_length += len(message.content) + + if total_length > MAX_TOTAL_CONVERSATION_LENGTH: + raise HTTPException( + status_code=400, + detail=f"Total conversation is too long. Maximum {MAX_TOTAL_CONVERSATION_LENGTH} characters allowed", + ) + + if request.temperature is not None and not (MIN_TEMPERATURE <= request.temperature <= MAX_TEMPERATURE): + raise HTTPException( + status_code=400, + detail=f"Temperature must be between {MIN_TEMPERATURE} and {MAX_TEMPERATURE}", + ) + + if request.top_k is not None and not (MIN_TOP_K <= request.top_k <= MAX_TOP_K): + raise HTTPException(status_code=400, detail=f"top_k must be between {MIN_TOP_K} and {MAX_TOP_K}") + + if request.max_tokens is not None and not (MIN_MAX_TOKENS <= request.max_tokens <= MAX_MAX_TOKENS): + raise HTTPException( + status_code=400, + detail=f"max_tokens must be between {MIN_MAX_TOKENS} and {MAX_MAX_TOKENS}", + ) + + +def format_sse(payload: dict[str, object]) -> str: + return f"data: {json.dumps(payload, ensure_ascii=False, separators=(',', ':'))}\n\n" + + +def build_conversation_tokens(worker, messages: list[ChatMessage]) -> list[int]: + bos = worker.tokenizer.get_bos_token_id() + user_start = worker.tokenizer.encode_special("<|user_start|>") + user_end = worker.tokenizer.encode_special("<|user_end|>") + assistant_start = worker.tokenizer.encode_special("<|assistant_start|>") + assistant_end = worker.tokenizer.encode_special("<|assistant_end|>") + + conversation_tokens = [bos] + for message in messages: + if message.role == "user": + conversation_tokens.append(user_start) + conversation_tokens.extend(worker.tokenizer.encode(message.content)) + conversation_tokens.append(user_end) + else: + conversation_tokens.append(assistant_start) + conversation_tokens.extend(worker.tokenizer.encode(message.content)) + conversation_tokens.append(assistant_end) + + conversation_tokens.append(assistant_start) + return conversation_tokens + + +async def generate_stream(worker, request: GenerateRequest, settings: Settings) -> AsyncGenerator[str, None]: + temperature = request.temperature if request.temperature is not None else settings.default_temperature + max_new_tokens = request.max_tokens if request.max_tokens is not None else settings.default_max_tokens + top_k = request.top_k if request.top_k is not None else settings.default_top_k + + assistant_end = worker.tokenizer.encode_special("<|assistant_end|>") + bos = worker.tokenizer.get_bos_token_id() + accumulated_tokens: list[int] = [] + last_clean_text = "" + + for token_column, _ in worker.engine.generate( + build_conversation_tokens(worker, request.messages), + num_samples=1, + max_tokens=max_new_tokens, + temperature=temperature, + top_k=top_k, + seed=random.randint(0, 2**31 - 1), + ): + token = token_column[0] + if token == assistant_end or token == bos: + break + + accumulated_tokens.append(token) + current_text = worker.tokenizer.decode(accumulated_tokens) + if current_text.endswith("�"): + continue + + new_text = current_text[len(last_clean_text):] + if new_text: + last_clean_text = current_text + yield format_sse({"token": new_text, "gpu": worker.gpu_id}) + + yield format_sse({"done": True}) + + +class InferenceRuntime: + def __init__(self, settings: Settings, weight_manager: WeightManager | None = None) -> None: + self.settings = settings + self.weight_manager = weight_manager or WeightManager(settings) + self.worker_pool = None + self._swap_lock = asyncio.Lock() + + async def startup(self) -> None: + if not self.settings.startup_load_enabled or not self.settings.default_model_tag: + return + try: + self.worker_pool = await self.weight_manager.build_worker_pool( + self.settings.default_model_tag, + step=self.settings.default_step, + ) + except Exception as exc: # pragma: no cover - exercised by deployment conditions + logger.warning("Skipping startup model load: %s", exc) + self.worker_pool = None + + async def shutdown(self) -> None: + if self.worker_pool is not None: + await self.worker_pool.close() + + def health_payload(self) -> dict[str, object]: + snapshot = self.worker_pool.snapshot() if self.worker_pool is not None else { + "total_workers": 0, + "available_workers": 0, + "busy_workers": 0, + "draining": False, + "workers": [], + } + return { + "status": "ok", + "ready": self.worker_pool is not None and self.weight_manager.current_model is not None, + "current_model": self.weight_manager.current_model, + **snapshot, + } + + def models_payload(self) -> dict[str, object]: + return { + "current_model": self.weight_manager.current_model, + "models": self.weight_manager.list_models(), + } + + def stats_payload(self) -> dict[str, object]: + return self.health_payload() + + def require_ready_pool(self): + if self.worker_pool is None or self.weight_manager.current_model is None: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="No model is currently loaded", + ) + return self.worker_pool + + async def swap_model(self, model_tag: str) -> dict[str, str]: + async with self._swap_lock: + if self.worker_pool is not None and self.weight_manager.current_model == model_tag: + return {"status": "ok", "current_model": model_tag} + + old_pool = self.worker_pool + if old_pool is not None: + await old_pool.drain() + + try: + new_pool = await self.weight_manager.build_worker_pool(model_tag, step=self.settings.default_step) + except Exception: + if old_pool is not None: + old_pool.resume_accepting_requests() + raise + + self.worker_pool = new_pool + if old_pool is not None: + await old_pool.close() + + return {"status": "ok", "current_model": model_tag} + + +def get_runtime(request: Request) -> InferenceRuntime: + return request.app.state.runtime + + +def create_app(settings: Settings | None = None, runtime: InferenceRuntime | None = None) -> FastAPI: + resolved_settings = settings or get_settings() + + @asynccontextmanager + async def lifespan(app: FastAPI): + resolved_runtime = runtime or InferenceRuntime(resolved_settings) + app.state.settings = resolved_settings + app.state.runtime = resolved_runtime + await resolved_runtime.startup() + yield + await resolved_runtime.shutdown() + + app = FastAPI(title="nanochat inference service", version="0.1.0", lifespan=lifespan) + + @app.post("/generate", dependencies=[Depends(require_internal_api_key)]) + async def generate(request_body: GenerateRequest, runtime: InferenceRuntime = Depends(get_runtime)): + validate_generate_request(request_body) + worker_pool = runtime.require_ready_pool() + + try: + worker = await worker_pool.acquire_worker() + except RuntimeError as exc: + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=str(exc)) from exc + + async def stream_and_release() -> AsyncGenerator[str, None]: + try: + async for chunk in generate_stream(worker, request_body, resolved_settings): + yield chunk + finally: + await worker_pool.release_worker(worker) + + return StreamingResponse(stream_and_release(), media_type="text/event-stream") + + @app.get("/models", dependencies=[Depends(require_internal_api_key)]) + async def models(runtime: InferenceRuntime = Depends(get_runtime)): + return runtime.models_payload() + + @app.post("/models/swap", dependencies=[Depends(require_internal_api_key)]) + async def swap_model(request_body: SwapModelRequest, runtime: InferenceRuntime = Depends(get_runtime)): + payload = await runtime.swap_model(request_body.model_tag) + return JSONResponse(status_code=status.HTTP_202_ACCEPTED, content=payload) + + @app.get("/health") + async def health(runtime: InferenceRuntime = Depends(get_runtime)): + return runtime.health_payload() + + @app.get("/stats", dependencies=[Depends(require_internal_api_key)]) + async def stats(runtime: InferenceRuntime = Depends(get_runtime)): + return runtime.stats_payload() + + return app + + +app = create_app() diff --git a/services/inference/src/middleware/__init__.py b/services/inference/src/middleware/__init__.py new file mode 100644 index 00000000..ac626d2a --- /dev/null +++ b/services/inference/src/middleware/__init__.py @@ -0,0 +1,3 @@ +from .internal_auth import require_internal_api_key + +__all__ = ["require_internal_api_key"] diff --git a/services/inference/src/middleware/internal_auth.py b/services/inference/src/middleware/internal_auth.py new file mode 100644 index 00000000..9d7912b2 --- /dev/null +++ b/services/inference/src/middleware/internal_auth.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from fastapi import HTTPException, Request, status + + +def require_internal_api_key( + request: Request, +) -> None: + settings = request.app.state.settings + expected = settings.internal_api_key + if not expected: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="INTERNAL_API_KEY is not configured", + ) + + provided = request.headers.get("X-Internal-API-Key") or request.headers.get("INTERNAL_API_KEY") + if provided != expected: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid internal API key", + ) diff --git a/services/inference/src/services/__init__.py b/services/inference/src/services/__init__.py new file mode 100644 index 00000000..fdf7d18b --- /dev/null +++ b/services/inference/src/services/__init__.py @@ -0,0 +1,4 @@ +from .weight_manager import WeightManager +from .worker_pool import Worker, WorkerPool + +__all__ = ["WeightManager", "Worker", "WorkerPool"] diff --git a/services/inference/src/services/weight_manager.py b/services/inference/src/services/weight_manager.py new file mode 100644 index 00000000..dafc13aa --- /dev/null +++ b/services/inference/src/services/weight_manager.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import asyncio +import os +import shutil +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Any, Callable + +from huggingface_hub import snapshot_download + +from config import Settings + + +@dataclass +class ModelRecord: + model_tag: str + path: str + source: str + available: bool = False + loaded: bool = False + + +class WeightManager: + def __init__( + self, + settings: Settings, + downloader: Callable[..., Any] = snapshot_download, + ) -> None: + self.settings = settings + self.storage_path = Path(settings.model_storage_path) + self.storage_path.mkdir(parents=True, exist_ok=True) + self.tokenizer_path = self.storage_path / "tokenizer" + self.tokenizer_path.mkdir(parents=True, exist_ok=True) + os.environ["NANOCHAT_BASE_DIR"] = str(self.storage_path) + + self._downloader = downloader + self._registry: dict[str, ModelRecord] = {} + self.current_model: str | None = None + + if settings.default_model_tag: + self.register_model(settings.default_model_tag) + self.refresh_registry() + + def _default_source(self, model_tag: str) -> str: + return f"huggingface:{self.settings.hf_repo_owner}/{model_tag}" + + def _model_dir(self, model_tag: str) -> Path: + return self.storage_path / model_tag + + def _has_checkpoint_files(self, model_dir: Path) -> bool: + return any(model_dir.glob("model_*.pt")) + + def _extract_repo_id(self, source: str) -> str | None: + if source.startswith("huggingface:"): + return source.split(":", 1)[1] + return None + + def _refresh_entry(self, model_tag: str) -> None: + record = self._registry[model_tag] + record.available = self._has_checkpoint_files(Path(record.path)) + record.loaded = model_tag == self.current_model + + def register_model(self, model_tag: str, source: str | None = None, path: str | Path | None = None) -> ModelRecord: + existing = self._registry.get(model_tag) + record = ModelRecord( + model_tag=model_tag, + path=str(path or self._model_dir(model_tag)), + source=source or (existing.source if existing else self._default_source(model_tag)), + available=False, + loaded=False, + ) + self._registry[model_tag] = record + self._refresh_entry(model_tag) + return record + + def refresh_registry(self) -> None: + if not self.storage_path.exists(): + return + for child in sorted(self.storage_path.iterdir()): + if child.is_dir() and child.name != "tokenizer": + self.register_model(child.name, path=child) + for model_tag in list(self._registry): + self._refresh_entry(model_tag) + + def set_loaded_model(self, model_tag: str | None) -> None: + self.current_model = model_tag + for tag in list(self._registry): + self._refresh_entry(tag) + + def list_models(self) -> list[dict[str, Any]]: + self.refresh_registry() + return [asdict(self._registry[tag]) for tag in sorted(self._registry)] + + def _sync_tokenizer_assets(self, model_dir: Path) -> None: + for base_dir in (model_dir / "tokenizer", model_dir): + if not base_dir.exists(): + continue + for asset_name in ("tokenizer.pkl", "tokenizer.json", "token_bytes.pt"): + source = base_dir / asset_name + if source.exists(): + shutil.copy2(source, self.tokenizer_path / asset_name) + + def _download_repo(self, repo_id: str, local_dir: Path) -> None: + local_dir.mkdir(parents=True, exist_ok=True) + self._downloader( + repo_id=repo_id, + local_dir=str(local_dir), + token=self.settings.hf_token, + ) + + async def ensure_available(self, model_tag: str) -> ModelRecord: + record = self.register_model(model_tag) + if record.available: + self._sync_tokenizer_assets(Path(record.path)) + return record + + repo_id = self._extract_repo_id(record.source) + if repo_id is None: + raise FileNotFoundError(f"Model {model_tag} is not available locally and has no HuggingFace source") + + await asyncio.to_thread(self._download_repo, repo_id, Path(record.path)) + self._sync_tokenizer_assets(Path(record.path)) + self._refresh_entry(model_tag) + + if not self._registry[model_tag].available: + raise FileNotFoundError(f"Downloaded model {model_tag} but no model_*.pt files were found") + + return self._registry[model_tag] + + async def build_worker_pool(self, model_tag: str, step: int | None = None): + from services.worker_pool import WorkerPool + + await self.ensure_available(model_tag) + pool = await WorkerPool.create( + checkpoints_root=self.storage_path, + model_tag=model_tag, + step=step, + num_workers=self.settings.num_workers, + device_type=self.settings.resolved_device_type, + ) + self.set_loaded_model(model_tag) + return pool diff --git a/services/inference/src/services/worker_pool.py b/services/inference/src/services/worker_pool.py new file mode 100644 index 00000000..31a27f03 --- /dev/null +++ b/services/inference/src/services/worker_pool.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +import asyncio +import gc +import logging +from dataclasses import dataclass +from pathlib import Path + +import torch + +from engine import Engine +from engine.checkpoint_manager import load_model_from_dir +from engine.common import autodetect_device_type, compute_init + +logger = logging.getLogger(__name__) + + +@dataclass +class Worker: + gpu_id: int + device: torch.device + engine: Engine + tokenizer: object + model_tag: str + + +class WorkerPool: + def __init__(self, num_workers: int, device_type: str = ""): + self.requested_workers = max(1, num_workers) + self.device_type = device_type or autodetect_device_type() + self.num_workers = 0 + self.model_tag: str | None = None + self.workers: list[Worker] = [] + self.available_workers: asyncio.Queue[Worker] = asyncio.Queue() + self.accepting_requests = True + self._active_requests = 0 + self._idle_event = asyncio.Event() + self._idle_event.set() + + @classmethod + async def create( + cls, + checkpoints_root: Path, + model_tag: str, + step: int | None, + num_workers: int, + device_type: str = "", + ) -> "WorkerPool": + pool = cls(num_workers=num_workers, device_type=device_type) + await pool.initialize(checkpoints_root=checkpoints_root, model_tag=model_tag, step=step) + return pool + + async def initialize(self, checkpoints_root: Path, model_tag: str, step: int | None = None) -> None: + compute_init(self.device_type) + self.model_tag = model_tag + worker_count = self.requested_workers + + if self.device_type != "cuda": + worker_count = 1 + else: + available_gpu_count = torch.cuda.device_count() + if available_gpu_count < 1: + raise RuntimeError("CUDA was selected but no CUDA devices are available") + if worker_count > available_gpu_count: + logger.warning( + "Requested %s workers but only %s CUDA device(s) are available; clamping", + worker_count, + available_gpu_count, + ) + worker_count = available_gpu_count + + self.num_workers = worker_count + self.accepting_requests = True + + for worker_index in range(worker_count): + if self.device_type == "cuda": + device = torch.device(f"cuda:{worker_index}") + gpu_id = worker_index + else: + device = torch.device(self.device_type) + gpu_id = 0 + + model, tokenizer, _ = load_model_from_dir( + str(checkpoints_root), + device, + phase="eval", + model_tag=model_tag, + step=step, + ) + worker = Worker( + gpu_id=gpu_id, + device=device, + engine=Engine(model, tokenizer), + tokenizer=tokenizer, + model_tag=model_tag, + ) + self.workers.append(worker) + await self.available_workers.put(worker) + + logger.info("Initialized %s inference worker(s) for model %s", self.num_workers, model_tag) + + async def acquire_worker(self) -> Worker: + if not self.accepting_requests: + raise RuntimeError("Worker pool is draining for a model swap") + worker = await self.available_workers.get() + self._active_requests += 1 + self._idle_event.clear() + return worker + + async def release_worker(self, worker: Worker) -> None: + self._active_requests = max(0, self._active_requests - 1) + await self.available_workers.put(worker) + if self._active_requests == 0: + self._idle_event.set() + + async def drain(self) -> None: + self.accepting_requests = False + if self._active_requests > 0: + await self._idle_event.wait() + + def resume_accepting_requests(self) -> None: + self.accepting_requests = True + + def snapshot(self) -> dict[str, object]: + available = self.available_workers.qsize() + return { + "model_tag": self.model_tag, + "device_type": self.device_type, + "total_workers": len(self.workers), + "available_workers": available, + "busy_workers": len(self.workers) - available, + "draining": not self.accepting_requests, + "workers": [ + { + "gpu_id": worker.gpu_id, + "device": str(worker.device), + "model_tag": worker.model_tag, + } + for worker in self.workers + ], + } + + async def close(self) -> None: + self.accepting_requests = False + if self._active_requests > 0: + await self._idle_event.wait() + + while not self.available_workers.empty(): + self.available_workers.get_nowait() + + self.workers.clear() + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() diff --git a/services/inference/tests/conftest.py b/services/inference/tests/conftest.py new file mode 100644 index 00000000..2c94b2ec --- /dev/null +++ b/services/inference/tests/conftest.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +import sys +from pathlib import Path + + +SRC_DIR = Path(__file__).resolve().parents[1] / "src" +if str(SRC_DIR) not in sys.path: + sys.path.insert(0, str(SRC_DIR)) diff --git a/services/inference/tests/test_main.py b/services/inference/tests/test_main.py new file mode 100644 index 00000000..66fd8d68 --- /dev/null +++ b/services/inference/tests/test_main.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +import asyncio + +from fastapi.testclient import TestClient + +from config import Settings +from main import create_app + + +class FakeRuntime: + def __init__(self, ready: bool) -> None: + self.ready = ready + + async def startup(self) -> None: + return None + + async def shutdown(self) -> None: + return None + + def health_payload(self) -> dict[str, object]: + return { + "status": "ok", + "ready": self.ready, + "current_model": "samosachaat-d12" if self.ready else None, + "total_workers": 1 if self.ready else 0, + "available_workers": 1 if self.ready else 0, + "busy_workers": 0, + "draining": False, + "workers": [], + } + + def models_payload(self) -> dict[str, object]: + return {"current_model": None, "models": []} + + def stats_payload(self) -> dict[str, object]: + return self.health_payload() + + def require_ready_pool(self): + raise AssertionError("validation should fail before a worker is requested") + + async def swap_model(self, model_tag: str) -> dict[str, str]: + await asyncio.sleep(0) + return {"status": "ok", "current_model": model_tag} + + +def test_health_endpoint_reports_readiness() -> None: + settings = Settings(internal_api_key="secret", startup_load_enabled=False) + + not_ready_app = create_app(settings=settings, runtime=FakeRuntime(ready=False)) + with TestClient(not_ready_app) as client: + response = client.get("/health") + assert response.status_code == 200 + assert response.json()["ready"] is False + + ready_app = create_app(settings=settings, runtime=FakeRuntime(ready=True)) + with TestClient(ready_app) as client: + response = client.get("/health") + assert response.status_code == 200 + assert response.json()["ready"] is True + assert response.json()["current_model"] == "samosachaat-d12" + + +def test_generate_validation_rejects_empty_messages() -> None: + settings = Settings(internal_api_key="secret", startup_load_enabled=False) + app = create_app(settings=settings, runtime=FakeRuntime(ready=True)) + + with TestClient(app) as client: + response = client.post( + "/generate", + headers={"X-Internal-API-Key": "secret"}, + json={"messages": []}, + ) + + assert response.status_code == 400 + assert response.json()["detail"] == "At least one message is required" + + +def test_generate_validation_rejects_invalid_role() -> None: + settings = Settings(internal_api_key="secret", startup_load_enabled=False) + app = create_app(settings=settings, runtime=FakeRuntime(ready=True)) + + with TestClient(app) as client: + response = client.post( + "/generate", + headers={"X-Internal-API-Key": "secret"}, + json={ + "messages": [ + {"role": "system", "content": "You are helpful."}, + ] + }, + ) + + assert response.status_code == 400 + assert "invalid role" in response.json()["detail"] diff --git a/services/inference/tests/test_weight_manager.py b/services/inference/tests/test_weight_manager.py new file mode 100644 index 00000000..2a331ea2 --- /dev/null +++ b/services/inference/tests/test_weight_manager.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import asyncio +from pathlib import Path + +from config import Settings +from services.weight_manager import WeightManager + + +def test_weight_registry_downloads_and_tracks_models(tmp_path: Path) -> None: + downloads: list[tuple[str, str, str | None]] = [] + + def fake_downloader(*, repo_id: str, local_dir: str, token: str | None = None) -> str: + downloads.append((repo_id, local_dir, token)) + model_dir = Path(local_dir) + model_dir.mkdir(parents=True, exist_ok=True) + (model_dir / "model_000001.pt").write_bytes(b"weights") + (model_dir / "meta_000001.json").write_text("{}", encoding="utf-8") + tokenizer_dir = model_dir / "tokenizer" + tokenizer_dir.mkdir(exist_ok=True) + (tokenizer_dir / "tokenizer.pkl").write_bytes(b"tokenizer") + return local_dir + + settings = Settings( + model_storage_path=tmp_path, + default_model_tag="samosachaat-d12", + hf_token="hf-secret", + startup_load_enabled=False, + ) + manager = WeightManager(settings, downloader=fake_downloader) + + initial_models = manager.list_models() + assert initial_models[0]["model_tag"] == "samosachaat-d12" + assert initial_models[0]["available"] is False + assert initial_models[0]["loaded"] is False + + asyncio.run(manager.ensure_available("samosachaat-d24")) + + models = {entry["model_tag"]: entry for entry in manager.list_models()} + assert models["samosachaat-d24"]["available"] is True + assert models["samosachaat-d24"]["source"] == "huggingface:manmohan659/samosachaat-d24" + assert downloads == [("manmohan659/samosachaat-d24", str(tmp_path / "samosachaat-d24"), "hf-secret")] + assert (tmp_path / "tokenizer" / "tokenizer.pkl").exists() diff --git a/services/inference/uv.lock b/services/inference/uv.lock new file mode 100644 index 00000000..14d10848 --- /dev/null +++ b/services/inference/uv.lock @@ -0,0 +1,1134 @@ +version = 1 +revision = 3 +requires-python = ">=3.12" +resolution-markers = [ + "sys_platform != 'darwin'", + "sys_platform == 'darwin'", +] + +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + +[[package]] +name = "anyio" +version = "4.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" }, +] + +[[package]] +name = "certifi" +version = "2026.2.25" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/a1/67fe25fac3c7642725500a3f6cfe5821ad557c3abb11c9d20d12c7008d3e/charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", size = 144271, upload-time = "2026-04-02T09:28:39.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/eb/4fc8d0a7110eb5fc9cc161723a34a8a6c200ce3b4fbf681bc86feee22308/charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46", size = 311328, upload-time = "2026-04-02T09:26:24.331Z" }, + { url = "https://files.pythonhosted.org/packages/f8/e3/0fadc706008ac9d7b9b5be6dc767c05f9d3e5df51744ce4cc9605de7b9f4/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2", size = 208061, upload-time = "2026-04-02T09:26:25.568Z" }, + { url = "https://files.pythonhosted.org/packages/42/f0/3dd1045c47f4a4604df85ec18ad093912ae1344ac706993aff91d38773a2/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b", size = 229031, upload-time = "2026-04-02T09:26:26.865Z" }, + { url = "https://files.pythonhosted.org/packages/dc/67/675a46eb016118a2fbde5a277a5d15f4f69d5f3f5f338e5ee2f8948fcf43/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a", size = 225239, upload-time = "2026-04-02T09:26:28.044Z" }, + { url = "https://files.pythonhosted.org/packages/4b/f8/d0118a2f5f23b02cd166fa385c60f9b0d4f9194f574e2b31cef350ad7223/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116", size = 216589, upload-time = "2026-04-02T09:26:29.239Z" }, + { url = "https://files.pythonhosted.org/packages/b1/f1/6d2b0b261b6c4ceef0fcb0d17a01cc5bc53586c2d4796fa04b5c540bc13d/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb", size = 202733, upload-time = "2026-04-02T09:26:30.5Z" }, + { url = "https://files.pythonhosted.org/packages/6f/c0/7b1f943f7e87cc3db9626ba17807d042c38645f0a1d4415c7a14afb5591f/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1", size = 212652, upload-time = "2026-04-02T09:26:31.709Z" }, + { url = "https://files.pythonhosted.org/packages/38/dd/5a9ab159fe45c6e72079398f277b7d2b523e7f716acc489726115a910097/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15", size = 211229, upload-time = "2026-04-02T09:26:33.282Z" }, + { url = "https://files.pythonhosted.org/packages/d5/ff/531a1cad5ca855d1c1a8b69cb71abfd6d85c0291580146fda7c82857caa1/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5", size = 203552, upload-time = "2026-04-02T09:26:34.845Z" }, + { url = "https://files.pythonhosted.org/packages/c1/4c/a5fb52d528a8ca41f7598cb619409ece30a169fbdf9cdce592e53b46c3a6/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d", size = 230806, upload-time = "2026-04-02T09:26:36.152Z" }, + { url = "https://files.pythonhosted.org/packages/59/7a/071feed8124111a32b316b33ae4de83d36923039ef8cf48120266844285b/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7", size = 212316, upload-time = "2026-04-02T09:26:37.672Z" }, + { url = "https://files.pythonhosted.org/packages/fd/35/f7dba3994312d7ba508e041eaac39a36b120f32d4c8662b8814dab876431/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464", size = 227274, upload-time = "2026-04-02T09:26:38.93Z" }, + { url = "https://files.pythonhosted.org/packages/8a/2d/a572df5c9204ab7688ec1edc895a73ebded3b023bb07364710b05dd1c9be/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49", size = 218468, upload-time = "2026-04-02T09:26:40.17Z" }, + { url = "https://files.pythonhosted.org/packages/86/eb/890922a8b03a568ca2f336c36585a4713c55d4d67bf0f0c78924be6315ca/charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c", size = 148460, upload-time = "2026-04-02T09:26:41.416Z" }, + { url = "https://files.pythonhosted.org/packages/35/d9/0e7dffa06c5ab081f75b1b786f0aefc88365825dfcd0ac544bdb7b2b6853/charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6", size = 159330, upload-time = "2026-04-02T09:26:42.554Z" }, + { url = "https://files.pythonhosted.org/packages/9e/5d/481bcc2a7c88ea6b0878c299547843b2521ccbc40980cb406267088bc701/charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d", size = 147828, upload-time = "2026-04-02T09:26:44.075Z" }, + { url = "https://files.pythonhosted.org/packages/c1/3b/66777e39d3ae1ddc77ee606be4ec6d8cbd4c801f65e5a1b6f2b11b8346dd/charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", size = 309627, upload-time = "2026-04-02T09:26:45.198Z" }, + { url = "https://files.pythonhosted.org/packages/2e/4e/b7f84e617b4854ade48a1b7915c8ccfadeba444d2a18c291f696e37f0d3b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", size = 207008, upload-time = "2026-04-02T09:26:46.824Z" }, + { url = "https://files.pythonhosted.org/packages/c4/bb/ec73c0257c9e11b268f018f068f5d00aa0ef8c8b09f7753ebd5f2880e248/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", size = 228303, upload-time = "2026-04-02T09:26:48.397Z" }, + { url = "https://files.pythonhosted.org/packages/85/fb/32d1f5033484494619f701e719429c69b766bfc4dbc61aa9e9c8c166528b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", size = 224282, upload-time = "2026-04-02T09:26:49.684Z" }, + { url = "https://files.pythonhosted.org/packages/fa/07/330e3a0dda4c404d6da83b327270906e9654a24f6c546dc886a0eb0ffb23/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", size = 215595, upload-time = "2026-04-02T09:26:50.915Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7c/fc890655786e423f02556e0216d4b8c6bcb6bdfa890160dc66bf52dee468/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", size = 201986, upload-time = "2026-04-02T09:26:52.197Z" }, + { url = "https://files.pythonhosted.org/packages/d8/97/bfb18b3db2aed3b90cf54dc292ad79fdd5ad65c4eae454099475cbeadd0d/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", size = 211711, upload-time = "2026-04-02T09:26:53.49Z" }, + { url = "https://files.pythonhosted.org/packages/6f/a5/a581c13798546a7fd557c82614a5c65a13df2157e9ad6373166d2a3e645d/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", size = 210036, upload-time = "2026-04-02T09:26:54.975Z" }, + { url = "https://files.pythonhosted.org/packages/8c/bf/b3ab5bcb478e4193d517644b0fb2bf5497fbceeaa7a1bc0f4d5b50953861/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", size = 202998, upload-time = "2026-04-02T09:26:56.303Z" }, + { url = "https://files.pythonhosted.org/packages/e7/4e/23efd79b65d314fa320ec6017b4b5834d5c12a58ba4610aa353af2e2f577/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", size = 230056, upload-time = "2026-04-02T09:26:57.554Z" }, + { url = "https://files.pythonhosted.org/packages/b9/9f/1e1941bc3f0e01df116e68dc37a55c4d249df5e6fa77f008841aef68264f/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", size = 211537, upload-time = "2026-04-02T09:26:58.843Z" }, + { url = "https://files.pythonhosted.org/packages/80/0f/088cbb3020d44428964a6c97fe1edfb1b9550396bf6d278330281e8b709c/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", size = 226176, upload-time = "2026-04-02T09:27:00.437Z" }, + { url = "https://files.pythonhosted.org/packages/6a/9f/130394f9bbe06f4f63e22641d32fc9b202b7e251c9aef4db044324dac493/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", size = 217723, upload-time = "2026-04-02T09:27:02.021Z" }, + { url = "https://files.pythonhosted.org/packages/73/55/c469897448a06e49f8fa03f6caae97074fde823f432a98f979cc42b90e69/charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", size = 148085, upload-time = "2026-04-02T09:27:03.192Z" }, + { url = "https://files.pythonhosted.org/packages/5d/78/1b74c5bbb3f99b77a1715c91b3e0b5bdb6fe302d95ace4f5b1bec37b0167/charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", size = 158819, upload-time = "2026-04-02T09:27:04.454Z" }, + { url = "https://files.pythonhosted.org/packages/68/86/46bd42279d323deb8687c4a5a811fd548cb7d1de10cf6535d099877a9a9f/charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", size = 147915, upload-time = "2026-04-02T09:27:05.971Z" }, + { url = "https://files.pythonhosted.org/packages/97/c8/c67cb8c70e19ef1960b97b22ed2a1567711de46c4ddf19799923adc836c2/charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0", size = 309234, upload-time = "2026-04-02T09:27:07.194Z" }, + { url = "https://files.pythonhosted.org/packages/99/85/c091fdee33f20de70d6c8b522743b6f831a2f1cd3ff86de4c6a827c48a76/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a", size = 208042, upload-time = "2026-04-02T09:27:08.749Z" }, + { url = "https://files.pythonhosted.org/packages/87/1c/ab2ce611b984d2fd5d86a5a8a19c1ae26acac6bad967da4967562c75114d/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b", size = 228706, upload-time = "2026-04-02T09:27:09.951Z" }, + { url = "https://files.pythonhosted.org/packages/a8/29/2b1d2cb00bf085f59d29eb773ce58ec2d325430f8c216804a0a5cd83cbca/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41", size = 224727, upload-time = "2026-04-02T09:27:11.175Z" }, + { url = "https://files.pythonhosted.org/packages/47/5c/032c2d5a07fe4d4855fea851209cca2b6f03ebeb6d4e3afdb3358386a684/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e", size = 215882, upload-time = "2026-04-02T09:27:12.446Z" }, + { url = "https://files.pythonhosted.org/packages/2c/c2/356065d5a8b78ed04499cae5f339f091946a6a74f91e03476c33f0ab7100/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae", size = 200860, upload-time = "2026-04-02T09:27:13.721Z" }, + { url = "https://files.pythonhosted.org/packages/0c/cd/a32a84217ced5039f53b29f460962abb2d4420def55afabe45b1c3c7483d/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18", size = 211564, upload-time = "2026-04-02T09:27:15.272Z" }, + { url = "https://files.pythonhosted.org/packages/44/86/58e6f13ce26cc3b8f4a36b94a0f22ae2f00a72534520f4ae6857c4b81f89/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b", size = 211276, upload-time = "2026-04-02T09:27:16.834Z" }, + { url = "https://files.pythonhosted.org/packages/8f/fe/d17c32dc72e17e155e06883efa84514ca375f8a528ba2546bee73fc4df81/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356", size = 201238, upload-time = "2026-04-02T09:27:18.229Z" }, + { url = "https://files.pythonhosted.org/packages/6a/29/f33daa50b06525a237451cdb6c69da366c381a3dadcd833fa5676bc468b3/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab", size = 230189, upload-time = "2026-04-02T09:27:19.445Z" }, + { url = "https://files.pythonhosted.org/packages/b6/6e/52c84015394a6a0bdcd435210a7e944c5f94ea1055f5cc5d56c5fe368e7b/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46", size = 211352, upload-time = "2026-04-02T09:27:20.79Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d7/4353be581b373033fb9198bf1da3cf8f09c1082561e8e922aa7b39bf9fe8/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44", size = 227024, upload-time = "2026-04-02T09:27:22.063Z" }, + { url = "https://files.pythonhosted.org/packages/30/45/99d18aa925bd1740098ccd3060e238e21115fffbfdcb8f3ece837d0ace6c/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72", size = 217869, upload-time = "2026-04-02T09:27:23.486Z" }, + { url = "https://files.pythonhosted.org/packages/5c/05/5ee478aa53f4bb7996482153d4bfe1b89e0f087f0ab6b294fcf92d595873/charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10", size = 148541, upload-time = "2026-04-02T09:27:25.146Z" }, + { url = "https://files.pythonhosted.org/packages/48/77/72dcb0921b2ce86420b2d79d454c7022bf5be40202a2a07906b9f2a35c97/charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f", size = 159634, upload-time = "2026-04-02T09:27:26.642Z" }, + { url = "https://files.pythonhosted.org/packages/c6/a3/c2369911cd72f02386e4e340770f6e158c7980267da16af8f668217abaa0/charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246", size = 148384, upload-time = "2026-04-02T09:27:28.271Z" }, + { url = "https://files.pythonhosted.org/packages/94/09/7e8a7f73d24dba1f0035fbbf014d2c36828fc1bf9c88f84093e57d315935/charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24", size = 330133, upload-time = "2026-04-02T09:27:29.474Z" }, + { url = "https://files.pythonhosted.org/packages/8d/da/96975ddb11f8e977f706f45cddd8540fd8242f71ecdb5d18a80723dcf62c/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79", size = 216257, upload-time = "2026-04-02T09:27:30.793Z" }, + { url = "https://files.pythonhosted.org/packages/e5/e8/1d63bf8ef2d388e95c64b2098f45f84758f6d102a087552da1485912637b/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960", size = 234851, upload-time = "2026-04-02T09:27:32.44Z" }, + { url = "https://files.pythonhosted.org/packages/9b/40/e5ff04233e70da2681fa43969ad6f66ca5611d7e669be0246c4c7aaf6dc8/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4", size = 233393, upload-time = "2026-04-02T09:27:34.03Z" }, + { url = "https://files.pythonhosted.org/packages/be/c1/06c6c49d5a5450f76899992f1ee40b41d076aee9279b49cf9974d2f313d5/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e", size = 223251, upload-time = "2026-04-02T09:27:35.369Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/f2ff16fb050946169e3e1f82134d107e5d4ae72647ec8a1b1446c148480f/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1", size = 206609, upload-time = "2026-04-02T09:27:36.661Z" }, + { url = "https://files.pythonhosted.org/packages/69/d5/a527c0cd8d64d2eab7459784fb4169a0ac76e5a6fc5237337982fd61347e/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44", size = 220014, upload-time = "2026-04-02T09:27:38.019Z" }, + { url = "https://files.pythonhosted.org/packages/7e/80/8a7b8104a3e203074dc9aa2c613d4b726c0e136bad1cc734594b02867972/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e", size = 218979, upload-time = "2026-04-02T09:27:39.37Z" }, + { url = "https://files.pythonhosted.org/packages/02/9a/b759b503d507f375b2b5c153e4d2ee0a75aa215b7f2489cf314f4541f2c0/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3", size = 209238, upload-time = "2026-04-02T09:27:40.722Z" }, + { url = "https://files.pythonhosted.org/packages/c2/4e/0f3f5d47b86bdb79256e7290b26ac847a2832d9a4033f7eb2cd4bcf4bb5b/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0", size = 236110, upload-time = "2026-04-02T09:27:42.33Z" }, + { url = "https://files.pythonhosted.org/packages/96/23/bce28734eb3ed2c91dcf93abeb8a5cf393a7b2749725030bb630e554fdd8/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e", size = 219824, upload-time = "2026-04-02T09:27:43.924Z" }, + { url = "https://files.pythonhosted.org/packages/2c/6f/6e897c6984cc4d41af319b077f2f600fc8214eb2fe2d6bcb79141b882400/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb", size = 233103, upload-time = "2026-04-02T09:27:45.348Z" }, + { url = "https://files.pythonhosted.org/packages/76/22/ef7bd0fe480a0ae9b656189ec00744b60933f68b4f42a7bb06589f6f576a/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe", size = 225194, upload-time = "2026-04-02T09:27:46.706Z" }, + { url = "https://files.pythonhosted.org/packages/c5/a7/0e0ab3e0b5bc1219bd80a6a0d4d72ca74d9250cb2382b7c699c147e06017/charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0", size = 159827, upload-time = "2026-04-02T09:27:48.053Z" }, + { url = "https://files.pythonhosted.org/packages/7a/1d/29d32e0fb40864b1f878c7f5a0b343ae676c6e2b271a2d55cc3a152391da/charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c", size = 174168, upload-time = "2026-04-02T09:27:49.795Z" }, + { url = "https://files.pythonhosted.org/packages/de/32/d92444ad05c7a6e41fb2036749777c163baf7a0301a040cb672d6b2b1ae9/charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d", size = 153018, upload-time = "2026-04-02T09:27:51.116Z" }, + { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" }, +] + +[[package]] +name = "click" +version = "8.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/75/31212c6bf2503fdf920d87fee5d7a86a2e3bcf444984126f13d8e4016804/click-8.3.2.tar.gz", hash = "sha256:14162b8b3b3550a7d479eafa77dfd3c38d9dc8951f6f69c78913a8f9a7540fd5", size = 302856, upload-time = "2026-04-03T19:14:45.118Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/20/71885d8b97d4f3dde17b1fdb92dbd4908b00541c5a3379787137285f602e/click-8.3.2-py3-none-any.whl", hash = "sha256:1924d2c27c5653561cd2cae4548d1406039cb79b858b747cfea24924bbc1616d", size = 108379, upload-time = "2026-04-03T19:14:43.505Z" }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, +] + +[[package]] +name = "fastapi" +version = "0.136.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4e/d9/e66315807e41e69e7f6a1b42a162dada2f249c5f06ad3f1a95f84ab336ef/fastapi-0.136.0.tar.gz", hash = "sha256:cf08e067cc66e106e102d9ba659463abfac245200752f8a5b7b1e813de4ff73e", size = 396607, upload-time = "2026-04-16T11:47:13.623Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/a3/0bd5f0cdb0bbc92650e8dc457e9250358411ee5d1b65e42b6632387daf81/fastapi-0.136.0-py3-none-any.whl", hash = "sha256:8793d44ec7378e2be07f8a013cf7f7aa47d6327d0dfe9804862688ec4541a6b4", size = 117556, upload-time = "2026-04-16T11:47:11.922Z" }, +] + +[[package]] +name = "filelock" +version = "3.28.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/17/6e8890271880903e3538660a21d63a6c1fea969ac71d0d6b608b78727fa9/filelock-3.28.0.tar.gz", hash = "sha256:4ed1010aae813c4ee8d9c660e4792475ee60c4a0ba76073ceaf862bd317e3ca6", size = 56474, upload-time = "2026-04-14T22:54:33.625Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/21/2f728888c45033d34a417bfcd248ea2564c9e08ab1bfd301377cf05d5586/filelock-3.28.0-py3-none-any.whl", hash = "sha256:de9af6712788e7171df1b28b15eba2446c69721433fa427a9bee07b17820a9db", size = 39189, upload-time = "2026-04-14T22:54:32.037Z" }, +] + +[[package]] +name = "fsspec" +version = "2026.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/cf/b50ddf667c15276a9ab15a70ef5f257564de271957933ffea49d2cdbcdfb/fsspec-2026.3.0.tar.gz", hash = "sha256:1ee6a0e28677557f8c2f994e3eea77db6392b4de9cd1f5d7a9e87a0ae9d01b41", size = 313547, upload-time = "2026-03-27T19:11:14.892Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/1f/5f4a3cd9e4440e9d9bc78ad0a91a1c8d46b4d429d5239ebe6793c9fe5c41/fsspec-2026.3.0-py3-none-any.whl", hash = "sha256:d2ceafaad1b3457968ed14efa28798162f1638dbb5d2a6868a2db002a5ee39a4", size = 202595, upload-time = "2026-03-27T19:11:13.595Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "hf-xet" +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" }, + { url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" }, + { url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" }, + { url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" }, + { url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" }, + { url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" }, + { url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" }, + { url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" }, + { url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" }, + { url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[[package]] +name = "huggingface-hub" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/89/e7aa12d8a6b9259bed10671abb25ae6fa437c0f88a86ecbf59617bae7759/huggingface_hub-1.11.0.tar.gz", hash = "sha256:15fb3713c7f9cdff7b808a94fd91664f661ab142796bb48c9cd9493e8d166278", size = 761749, upload-time = "2026-04-16T13:07:39.73Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/02/4f3f8997d1ea7fe0146b343e5e14bd065fa87af790d07e5576d31b31cc18/huggingface_hub-1.11.0-py3-none-any.whl", hash = "sha256:42a6de0afbfeb5e022222d36398f029679db4eb4778801aafda32257ae9131ab", size = 645499, upload-time = "2026-04-16T13:07:37.716Z" }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, +] + +[[package]] +name = "markdown-it-py" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + +[[package]] +name = "mpmath" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106, upload-time = "2023-03-07T16:47:11.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, +] + +[[package]] +name = "nanochat-inference-service" +version = "0.1.0" +source = { virtual = "." } +dependencies = [ + { name = "fastapi" }, + { name = "filelock" }, + { name = "huggingface-hub" }, + { name = "pydantic-settings" }, + { name = "requests" }, + { name = "rustbpe" }, + { name = "tiktoken" }, + { name = "tokenizers" }, + { name = "torch", version = "2.9.1", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform == 'darwin'" }, + { name = "torch", version = "2.9.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "sys_platform != 'darwin'" }, + { name = "transformers" }, + { name = "uvicorn" }, +] + +[package.dev-dependencies] +dev = [ + { name = "httpx" }, + { name = "pytest" }, +] + +[package.metadata] +requires-dist = [ + { name = "fastapi", specifier = ">=0.117.1" }, + { name = "filelock", specifier = ">=3.18.0" }, + { name = "huggingface-hub", specifier = ">=0.35.0" }, + { name = "pydantic-settings", specifier = ">=2.10.1" }, + { name = "requests", specifier = ">=2.32.0" }, + { name = "rustbpe", specifier = ">=0.1.0" }, + { name = "tiktoken", specifier = ">=0.11.0" }, + { name = "tokenizers", specifier = ">=0.22.0" }, + { name = "torch", specifier = "==2.9.1", index = "https://download.pytorch.org/whl/cpu" }, + { name = "transformers", specifier = ">=4.57.3" }, + { name = "uvicorn", specifier = ">=0.36.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "httpx", specifier = ">=0.28.1" }, + { name = "pytest", specifier = ">=8.0.0" }, +] + +[[package]] +name = "networkx" +version = "3.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/51/63fe664f3908c97be9d2e4f1158eb633317598cfa6e1fc14af5383f17512/networkx-3.6.1.tar.gz", hash = "sha256:26b7c357accc0c8cde558ad486283728b65b6a95d85ee1cd66bafab4c8168509", size = 2517025, upload-time = "2025-12-08T17:02:39.908Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, +] + +[[package]] +name = "numpy" +version = "2.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/9f/b8cef5bffa569759033adda9481211426f12f53299629b410340795c2514/numpy-2.4.4.tar.gz", hash = "sha256:2d390634c5182175533585cc89f3608a4682ccb173cc9bb940b2881c8d6f8fa0", size = 20731587, upload-time = "2026-03-29T13:22:01.298Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/05/32396bec30fb2263770ee910142f49c1476d08e8ad41abf8403806b520ce/numpy-2.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:15716cfef24d3a9762e3acdf87e27f58dc823d1348f765bbea6bef8c639bfa1b", size = 16689272, upload-time = "2026-03-29T13:18:49.223Z" }, + { url = "https://files.pythonhosted.org/packages/c5/f3/a983d28637bfcd763a9c7aafdb6d5c0ebf3d487d1e1459ffdb57e2f01117/numpy-2.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23cbfd4c17357c81021f21540da84ee282b9c8fba38a03b7b9d09ba6b951421e", size = 14699573, upload-time = "2026-03-29T13:18:52.629Z" }, + { url = "https://files.pythonhosted.org/packages/9b/fd/e5ecca1e78c05106d98028114f5c00d3eddb41207686b2b7de3e477b0e22/numpy-2.4.4-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:8b3b60bb7cba2c8c81837661c488637eee696f59a877788a396d33150c35d842", size = 5204782, upload-time = "2026-03-29T13:18:55.579Z" }, + { url = "https://files.pythonhosted.org/packages/de/2f/702a4594413c1a8632092beae8aba00f1d67947389369b3777aed783fdca/numpy-2.4.4-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e4a010c27ff6f210ff4c6ef34394cd61470d01014439b192ec22552ee867f2a8", size = 6552038, upload-time = "2026-03-29T13:18:57.769Z" }, + { url = "https://files.pythonhosted.org/packages/7f/37/eed308a8f56cba4d1fdf467a4fc67ef4ff4bf1c888f5fc980481890104b1/numpy-2.4.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9e75681b59ddaa5e659898085ae0eaea229d054f2ac0c7e563a62205a700121", size = 15670666, upload-time = "2026-03-29T13:19:00.341Z" }, + { url = "https://files.pythonhosted.org/packages/0a/0d/0e3ecece05b7a7e87ab9fb587855548da437a061326fff64a223b6dcb78a/numpy-2.4.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:81f4a14bee47aec54f883e0cad2d73986640c1590eb9bfaaba7ad17394481e6e", size = 16645480, upload-time = "2026-03-29T13:19:03.63Z" }, + { url = "https://files.pythonhosted.org/packages/34/49/f2312c154b82a286758ee2f1743336d50651f8b5195db18cdb63675ff649/numpy-2.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:62d6b0f03b694173f9fcb1fb317f7222fd0b0b103e784c6549f5e53a27718c44", size = 17020036, upload-time = "2026-03-29T13:19:07.428Z" }, + { url = "https://files.pythonhosted.org/packages/7b/e9/736d17bd77f1b0ec4f9901aaec129c00d59f5d84d5e79bba540ef12c2330/numpy-2.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fbc356aae7adf9e6336d336b9c8111d390a05df88f1805573ebb0807bd06fd1d", size = 18368643, upload-time = "2026-03-29T13:19:10.775Z" }, + { url = "https://files.pythonhosted.org/packages/63/f6/d417977c5f519b17c8a5c3bc9e8304b0908b0e21136fe43bf628a1343914/numpy-2.4.4-cp312-cp312-win32.whl", hash = "sha256:0d35aea54ad1d420c812bfa0385c71cd7cc5bcf7c65fed95fc2cd02fe8c79827", size = 5961117, upload-time = "2026-03-29T13:19:13.464Z" }, + { url = "https://files.pythonhosted.org/packages/2d/5b/e1deebf88ff431b01b7406ca3583ab2bbb90972bbe1c568732e49c844f7e/numpy-2.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:b5f0362dc928a6ecd9db58868fca5e48485205e3855957bdedea308f8672ea4a", size = 12320584, upload-time = "2026-03-29T13:19:16.155Z" }, + { url = "https://files.pythonhosted.org/packages/58/89/e4e856ac82a68c3ed64486a544977d0e7bdd18b8da75b78a577ca31c4395/numpy-2.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:846300f379b5b12cc769334464656bc882e0735d27d9726568bc932fdc49d5ec", size = 10221450, upload-time = "2026-03-29T13:19:18.994Z" }, + { url = "https://files.pythonhosted.org/packages/14/1d/d0a583ce4fefcc3308806a749a536c201ed6b5ad6e1322e227ee4848979d/numpy-2.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08f2e31ed5e6f04b118e49821397f12767934cfdd12a1ce86a058f91e004ee50", size = 16684933, upload-time = "2026-03-29T13:19:22.47Z" }, + { url = "https://files.pythonhosted.org/packages/c1/62/2b7a48fbb745d344742c0277f01286dead15f3f68e4f359fbfcf7b48f70f/numpy-2.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e823b8b6edc81e747526f70f71a9c0a07ac4e7ad13020aa736bb7c9d67196115", size = 14694532, upload-time = "2026-03-29T13:19:25.581Z" }, + { url = "https://files.pythonhosted.org/packages/e5/87/499737bfba066b4a3bebff24a8f1c5b2dee410b209bc6668c9be692580f0/numpy-2.4.4-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4a19d9dba1a76618dd86b164d608566f393f8ec6ac7c44f0cc879011c45e65af", size = 5199661, upload-time = "2026-03-29T13:19:28.31Z" }, + { url = "https://files.pythonhosted.org/packages/cd/da/464d551604320d1491bc345efed99b4b7034143a85787aab78d5691d5a0e/numpy-2.4.4-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d2a8490669bfe99a233298348acc2d824d496dee0e66e31b66a6022c2ad74a5c", size = 6547539, upload-time = "2026-03-29T13:19:30.97Z" }, + { url = "https://files.pythonhosted.org/packages/7d/90/8d23e3b0dafd024bf31bdec225b3bb5c2dbfa6912f8a53b8659f21216cbf/numpy-2.4.4-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45dbed2ab436a9e826e302fcdcbe9133f9b0006e5af7168afb8963a6520da103", size = 15668806, upload-time = "2026-03-29T13:19:33.887Z" }, + { url = "https://files.pythonhosted.org/packages/d1/73/a9d864e42a01896bb5974475438f16086be9ba1f0d19d0bb7a07427c4a8b/numpy-2.4.4-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c901b15172510173f5cb310eae652908340f8dede90fff9e3bf6c0d8dfd92f83", size = 16632682, upload-time = "2026-03-29T13:19:37.336Z" }, + { url = "https://files.pythonhosted.org/packages/34/fb/14570d65c3bde4e202a031210475ae9cde9b7686a2e7dc97ee67d2833b35/numpy-2.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:99d838547ace2c4aace6c4f76e879ddfe02bb58a80c1549928477862b7a6d6ed", size = 17019810, upload-time = "2026-03-29T13:19:40.963Z" }, + { url = "https://files.pythonhosted.org/packages/8a/77/2ba9d87081fd41f6d640c83f26fb7351e536b7ce6dd9061b6af5904e8e46/numpy-2.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0aec54fd785890ecca25a6003fd9a5aed47ad607bbac5cd64f836ad8666f4959", size = 18357394, upload-time = "2026-03-29T13:19:44.859Z" }, + { url = "https://files.pythonhosted.org/packages/a2/23/52666c9a41708b0853fa3b1a12c90da38c507a3074883823126d4e9d5b30/numpy-2.4.4-cp313-cp313-win32.whl", hash = "sha256:07077278157d02f65c43b1b26a3886bce886f95d20aabd11f87932750dfb14ed", size = 5959556, upload-time = "2026-03-29T13:19:47.661Z" }, + { url = "https://files.pythonhosted.org/packages/57/fb/48649b4971cde70d817cf97a2a2fdc0b4d8308569f1dd2f2611959d2e0cf/numpy-2.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:5c70f1cc1c4efbe316a572e2d8b9b9cc44e89b95f79ca3331553fbb63716e2bf", size = 12317311, upload-time = "2026-03-29T13:19:50.67Z" }, + { url = "https://files.pythonhosted.org/packages/ba/d8/11490cddd564eb4de97b4579ef6bfe6a736cc07e94c1598590ae25415e01/numpy-2.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:ef4059d6e5152fa1a39f888e344c73fdc926e1b2dd58c771d67b0acfbf2aa67d", size = 10222060, upload-time = "2026-03-29T13:19:54.229Z" }, + { url = "https://files.pythonhosted.org/packages/99/5d/dab4339177a905aad3e2221c915b35202f1ec30d750dd2e5e9d9a72b804b/numpy-2.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4bbc7f303d125971f60ec0aaad5e12c62d0d2c925f0ab1273debd0e4ba37aba5", size = 14822302, upload-time = "2026-03-29T13:19:57.585Z" }, + { url = "https://files.pythonhosted.org/packages/eb/e4/0564a65e7d3d97562ed6f9b0fd0fb0a6f559ee444092f105938b50043876/numpy-2.4.4-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:4d6d57903571f86180eb98f8f0c839fa9ebbfb031356d87f1361be91e433f5b7", size = 5327407, upload-time = "2026-03-29T13:20:00.601Z" }, + { url = "https://files.pythonhosted.org/packages/29/8d/35a3a6ce5ad371afa58b4700f1c820f8f279948cca32524e0a695b0ded83/numpy-2.4.4-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:4636de7fd195197b7535f231b5de9e4b36d2c440b6e566d2e4e4746e6af0ca93", size = 6647631, upload-time = "2026-03-29T13:20:02.855Z" }, + { url = "https://files.pythonhosted.org/packages/f4/da/477731acbd5a58a946c736edfdabb2ac5b34c3d08d1ba1a7b437fa0884df/numpy-2.4.4-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ad2e2ef14e0b04e544ea2fa0a36463f847f113d314aa02e5b402fdf910ef309e", size = 15727691, upload-time = "2026-03-29T13:20:06.004Z" }, + { url = "https://files.pythonhosted.org/packages/e6/db/338535d9b152beabeb511579598418ba0212ce77cf9718edd70262cc4370/numpy-2.4.4-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a285b3b96f951841799528cd1f4f01cd70e7e0204b4abebac9463eecfcf2a40", size = 16681241, upload-time = "2026-03-29T13:20:09.417Z" }, + { url = "https://files.pythonhosted.org/packages/e2/a9/ad248e8f58beb7a0219b413c9c7d8151c5d285f7f946c3e26695bdbbe2df/numpy-2.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f8474c4241bc18b750be2abea9d7a9ec84f46ef861dbacf86a4f6e043401f79e", size = 17085767, upload-time = "2026-03-29T13:20:13.126Z" }, + { url = "https://files.pythonhosted.org/packages/b5/1a/3b88ccd3694681356f70da841630e4725a7264d6a885c8d442a697e1146b/numpy-2.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4e874c976154687c1f71715b034739b45c7711bec81db01914770373d125e392", size = 18403169, upload-time = "2026-03-29T13:20:17.096Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c9/fcfd5d0639222c6eac7f304829b04892ef51c96a75d479214d77e3ce6e33/numpy-2.4.4-cp313-cp313t-win32.whl", hash = "sha256:9c585a1790d5436a5374bac930dad6ed244c046ed91b2b2a3634eb2971d21008", size = 6083477, upload-time = "2026-03-29T13:20:20.195Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e3/3938a61d1c538aaec8ed6fd6323f57b0c2d2d2219512434c5c878db76553/numpy-2.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:93e15038125dc1e5345d9b5b68aa7f996ec33b98118d18c6ca0d0b7d6198b7e8", size = 12457487, upload-time = "2026-03-29T13:20:22.946Z" }, + { url = "https://files.pythonhosted.org/packages/97/6a/7e345032cc60501721ef94e0e30b60f6b0bd601f9174ebd36389a2b86d40/numpy-2.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:0dfd3f9d3adbe2920b68b5cd3d51444e13a10792ec7154cd0a2f6e74d4ab3233", size = 10292002, upload-time = "2026-03-29T13:20:25.909Z" }, + { url = "https://files.pythonhosted.org/packages/6e/06/c54062f85f673dd5c04cbe2f14c3acb8c8b95e3384869bb8cc9bff8cb9df/numpy-2.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f169b9a863d34f5d11b8698ead99febeaa17a13ca044961aa8e2662a6c7766a0", size = 16684353, upload-time = "2026-03-29T13:20:29.504Z" }, + { url = "https://files.pythonhosted.org/packages/4c/39/8a320264a84404c74cc7e79715de85d6130fa07a0898f67fb5cd5bd79908/numpy-2.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2483e4584a1cb3092da4470b38866634bafb223cbcd551ee047633fd2584599a", size = 14704914, upload-time = "2026-03-29T13:20:33.547Z" }, + { url = "https://files.pythonhosted.org/packages/91/fb/287076b2614e1d1044235f50f03748f31fa287e3dbe6abeb35cdfa351eca/numpy-2.4.4-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:2d19e6e2095506d1736b7d80595e0f252d76b89f5e715c35e06e937679ea7d7a", size = 5210005, upload-time = "2026-03-29T13:20:36.45Z" }, + { url = "https://files.pythonhosted.org/packages/63/eb/fcc338595309910de6ecabfcef2419a9ce24399680bfb149421fa2df1280/numpy-2.4.4-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:6a246d5914aa1c820c9443ddcee9c02bec3e203b0c080349533fae17727dfd1b", size = 6544974, upload-time = "2026-03-29T13:20:39.014Z" }, + { url = "https://files.pythonhosted.org/packages/44/5d/e7e9044032a716cdfaa3fba27a8e874bf1c5f1912a1ddd4ed071bf8a14a6/numpy-2.4.4-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:989824e9faf85f96ec9c7761cd8d29c531ad857bfa1daa930cba85baaecf1a9a", size = 15684591, upload-time = "2026-03-29T13:20:42.146Z" }, + { url = "https://files.pythonhosted.org/packages/98/7c/21252050676612625449b4807d6b695b9ce8a7c9e1c197ee6216c8a65c7c/numpy-2.4.4-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27a8d92cd10f1382a67d7cf4db7ce18341b66438bdd9f691d7b0e48d104c2a9d", size = 16637700, upload-time = "2026-03-29T13:20:46.204Z" }, + { url = "https://files.pythonhosted.org/packages/b1/29/56d2bbef9465db24ef25393383d761a1af4f446a1df9b8cded4fe3a5a5d7/numpy-2.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e44319a2953c738205bf3354537979eaa3998ed673395b964c1176083dd46252", size = 17035781, upload-time = "2026-03-29T13:20:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e3/2b/a35a6d7589d21f44cea7d0a98de5ddcbb3d421b2622a5c96b1edf18707c3/numpy-2.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e892aff75639bbef0d2a2cfd55535510df26ff92f63c92cd84ef8d4ba5a5557f", size = 18362959, upload-time = "2026-03-29T13:20:54.019Z" }, + { url = "https://files.pythonhosted.org/packages/64/c9/d52ec581f2390e0f5f85cbfd80fb83d965fc15e9f0e1aec2195faa142cde/numpy-2.4.4-cp314-cp314-win32.whl", hash = "sha256:1378871da56ca8943c2ba674530924bb8ca40cd228358a3b5f302ad60cf875fc", size = 6008768, upload-time = "2026-03-29T13:20:56.912Z" }, + { url = "https://files.pythonhosted.org/packages/fa/22/4cc31a62a6c7b74a8730e31a4274c5dc80e005751e277a2ce38e675e4923/numpy-2.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:715d1c092715954784bc79e1174fc2a90093dc4dc84ea15eb14dad8abdcdeb74", size = 12449181, upload-time = "2026-03-29T13:20:59.548Z" }, + { url = "https://files.pythonhosted.org/packages/70/2e/14cda6f4d8e396c612d1bf97f22958e92148801d7e4f110cabebdc0eef4b/numpy-2.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:2c194dd721e54ecad9ad387c1d35e63dce5c4450c6dc7dd5611283dda239aabb", size = 10496035, upload-time = "2026-03-29T13:21:02.524Z" }, + { url = "https://files.pythonhosted.org/packages/b1/e8/8fed8c8d848d7ecea092dc3469643f9d10bc3a134a815a3b033da1d2039b/numpy-2.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2aa0613a5177c264ff5921051a5719d20095ea586ca88cc802c5c218d1c67d3e", size = 14824958, upload-time = "2026-03-29T13:21:05.671Z" }, + { url = "https://files.pythonhosted.org/packages/05/1a/d8007a5138c179c2bf33ef44503e83d70434d2642877ee8fbb230e7c0548/numpy-2.4.4-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:42c16925aa5a02362f986765f9ebabf20de75cdefdca827d14315c568dcab113", size = 5330020, upload-time = "2026-03-29T13:21:08.635Z" }, + { url = "https://files.pythonhosted.org/packages/99/64/ffb99ac6ae93faf117bcbd5c7ba48a7f45364a33e8e458545d3633615dda/numpy-2.4.4-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:874f200b2a981c647340f841730fc3a2b54c9d940566a3c4149099591e2c4c3d", size = 6650758, upload-time = "2026-03-29T13:21:10.949Z" }, + { url = "https://files.pythonhosted.org/packages/6e/6e/795cc078b78a384052e73b2f6281ff7a700e9bf53bcce2ee579d4f6dd879/numpy-2.4.4-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c9b39d38a9bd2ae1becd7eac1303d031c5c110ad31f2b319c6e7d98b135c934d", size = 15729948, upload-time = "2026-03-29T13:21:14.047Z" }, + { url = "https://files.pythonhosted.org/packages/5f/86/2acbda8cc2af5f3d7bfc791192863b9e3e19674da7b5e533fded124d1299/numpy-2.4.4-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b268594bccac7d7cf5844c7732e3f20c50921d94e36d7ec9b79e9857694b1b2f", size = 16679325, upload-time = "2026-03-29T13:21:17.561Z" }, + { url = "https://files.pythonhosted.org/packages/bc/59/cafd83018f4aa55e0ac6fa92aa066c0a1877b77a615ceff1711c260ffae8/numpy-2.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ac6b31e35612a26483e20750126d30d0941f949426974cace8e6b5c58a3657b0", size = 17084883, upload-time = "2026-03-29T13:21:21.106Z" }, + { url = "https://files.pythonhosted.org/packages/f0/85/a42548db84e65ece46ab2caea3d3f78b416a47af387fcbb47ec28e660dc2/numpy-2.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8e3ed142f2728df44263aaf5fb1f5b0b99f4070c553a0d7f033be65338329150", size = 18403474, upload-time = "2026-03-29T13:21:24.828Z" }, + { url = "https://files.pythonhosted.org/packages/ed/ad/483d9e262f4b831000062e5d8a45e342166ec8aaa1195264982bca267e62/numpy-2.4.4-cp314-cp314t-win32.whl", hash = "sha256:dddbbd259598d7240b18c9d87c56a9d2fb3b02fe266f49a7c101532e78c1d871", size = 6155500, upload-time = "2026-03-29T13:21:28.205Z" }, + { url = "https://files.pythonhosted.org/packages/c7/03/2fc4e14c7bd4ff2964b74ba90ecb8552540b6315f201df70f137faa5c589/numpy-2.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:a7164afb23be6e37ad90b2f10426149fd75aee07ca55653d2aa41e66c4ef697e", size = 12637755, upload-time = "2026-03-29T13:21:31.107Z" }, + { url = "https://files.pythonhosted.org/packages/58/78/548fb8e07b1a341746bfbecb32f2c268470f45fa028aacdbd10d9bc73aab/numpy-2.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:ba203255017337d39f89bdd58417f03c4426f12beed0440cfd933cb15f8669c7", size = 10566643, upload-time = "2026-03-29T13:21:34.339Z" }, +] + +[[package]] +name = "packaging" +version = "26.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/de/0d2b39fb4af88a0258f3bac87dfcbb48e73fbdea4a2ed0e2213f9a4c2f9a/packaging-26.1.tar.gz", hash = "sha256:f042152b681c4bfac5cae2742a55e103d27ab2ec0f3d88037136b6bfe7c9c5de", size = 215519, upload-time = "2026-04-14T21:12:49.362Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/c2/920ef838e2f0028c8262f16101ec09ebd5969864e5a64c4c05fad0617c56/packaging-26.1-py3-none-any.whl", hash = "sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f", size = 95831, upload-time = "2026-04-14T21:12:47.56Z" }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, +] + +[[package]] +name = "pydantic" +version = "2.13.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/6b/1353beb3d1cd5cf61cdec5b6f87a9872399de3bc5cae0b7ce07ff4de2ab0/pydantic-2.13.1.tar.gz", hash = "sha256:a0f829b279ddd1e39291133fe2539d2aa46cc6b150c1706a270ff0879e3774d2", size = 843746, upload-time = "2026-04-15T14:57:19.398Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/5a/2225f4c176dbfed0d809e848b50ef08f70e61daa667b7fa14b0d311ae44d/pydantic-2.13.1-py3-none-any.whl", hash = "sha256:9557ecc2806faaf6037f85b1fbd963d01e30511c48085f0d573650fdeaad378a", size = 471917, upload-time = "2026-04-15T14:57:17.277Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.46.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/93/f97a86a7eb28faa1d038af2fd5d6166418b4433659108a4c311b57128b2d/pydantic_core-2.46.1.tar.gz", hash = "sha256:d408153772d9f298098fb5d620f045bdf0f017af0d5cb6e309ef8c205540caa4", size = 471230, upload-time = "2026-04-15T14:49:34.52Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/fb/caaa8ee23861c170f07dbd58fc2be3a2c02a32637693cbb23eef02e84808/pydantic_core-2.46.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ae8c8c5eb4c796944f3166f2f0dab6c761c2c2cc5bd20e5f692128be8600b9a4", size = 2119472, upload-time = "2026-04-15T14:49:45.946Z" }, + { url = "https://files.pythonhosted.org/packages/fa/61/bcffaa52894489ff89e5e1cdde67429914bf083c0db7296bef153020f786/pydantic_core-2.46.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:daba6f5f5b986aa0682623a1a4f8d1ecb0ec00ce09cfa9ca71a3b742bc383e3a", size = 1951230, upload-time = "2026-04-15T14:52:27.646Z" }, + { url = "https://files.pythonhosted.org/packages/f8/95/80d2f43a2a1a1e3220fd329d614aa5a39e0a75d24353a3aaf226e605f1c2/pydantic_core-2.46.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0265f3a2460539ecc97817a80c7a23c458dd84191229b655522a2674f701f14e", size = 1976394, upload-time = "2026-04-15T14:50:32.742Z" }, + { url = "https://files.pythonhosted.org/packages/8d/31/2c5b1a207926b5fc1961a2d11da940129bc3841c36cc4df03014195b2966/pydantic_core-2.46.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb16c0156c4b4e94aa3719138cc43c53d30ff21126b6a3af63786dcc0757b56e", size = 2068455, upload-time = "2026-04-15T14:50:01.286Z" }, + { url = "https://files.pythonhosted.org/packages/7d/36/c6aa07274359a51ac62895895325ce90107e811c6cea39d2617a99ef10d7/pydantic_core-2.46.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b42d80fad8e4b283e1e4138f1142f0d038c46d137aad2f9824ad9086080dd41", size = 2239049, upload-time = "2026-04-15T14:53:02.216Z" }, + { url = "https://files.pythonhosted.org/packages/0a/3f/77cdd0db8bddc714842dfd93f737c863751cf02001c993341504f6b0cd53/pydantic_core-2.46.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9cced85896d5b795293bc36b7e2fb0347a36c828551b50cbba510510d928548c", size = 2318681, upload-time = "2026-04-15T14:50:04.539Z" }, + { url = "https://files.pythonhosted.org/packages/a1/a3/09d929a40e6727274b0b500ad06e1b3f35d4f4665ae1c8ba65acbb17e9b5/pydantic_core-2.46.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a641cb1e74b44c418adaf9f5f450670dbec53511f030d8cde8d8accb66edc363", size = 2096527, upload-time = "2026-04-15T14:53:14.766Z" }, + { url = "https://files.pythonhosted.org/packages/89/ae/544c3a82456ebc254a9fcbe2715bab76c70acf9d291aaea24391147943e4/pydantic_core-2.46.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:191e7a122ab14eb12415fe3f92610fc06c7f1d2b4b9101d24d490d447ac92506", size = 2170407, upload-time = "2026-04-15T14:51:27.138Z" }, + { url = "https://files.pythonhosted.org/packages/9d/ce/0dfd881c7af4c522f47b325707bd9a2cdcf4f40e4f2fd30df0e9a3e8d393/pydantic_core-2.46.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4fe4ff660f7938b5d92f21529ce331b011aa35e481ab64b7cd03f52384e544bb", size = 2188578, upload-time = "2026-04-15T14:50:39.655Z" }, + { url = "https://files.pythonhosted.org/packages/a1/e9/980ea2a6d5114dd1a62ecc5f56feb3d34555f33bd11043f042e5f7f0724a/pydantic_core-2.46.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:18fcea085b3adc3868d8d19606da52d7a52d8bccd8e28652b0778dbe5e6a6660", size = 2188959, upload-time = "2026-04-15T14:52:42.243Z" }, + { url = "https://files.pythonhosted.org/packages/e7/f1/595e0f50f4bfc56cde2fe558f2b0978f29f2865da894c6226231e17464a5/pydantic_core-2.46.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:e8e589e7c9466e022d79e13c5764c2239b2e5a7993ba727822b021234f89b56b", size = 2339973, upload-time = "2026-04-15T14:52:10.642Z" }, + { url = "https://files.pythonhosted.org/packages/49/44/be9f979a6ab6b8c36865ccd92c3a38a760c66055e1f384665f35525134c4/pydantic_core-2.46.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f78eb3d4027963bdc9baccd177f02a98bf8714bc51fe17153d8b51218918b5bc", size = 2385228, upload-time = "2026-04-15T14:51:00.77Z" }, + { url = "https://files.pythonhosted.org/packages/5b/d4/c826cd711787d240219f01d0d3ca116cb55516b8b95277820aa9c85e1882/pydantic_core-2.46.1-cp312-cp312-win32.whl", hash = "sha256:54fe30c20cab03844dc63bdc6ddca67f74a2eb8482df69c1e5f68396856241be", size = 1978828, upload-time = "2026-04-15T14:50:29.362Z" }, + { url = "https://files.pythonhosted.org/packages/22/05/8a1fcf8181be4c7a9cfc34e5fbf2d9c3866edc9dfd3c48d5401806e0a523/pydantic_core-2.46.1-cp312-cp312-win_amd64.whl", hash = "sha256:aea4e22ed4c53f2774221435e39969a54d2e783f4aee902cdd6c8011415de893", size = 2070015, upload-time = "2026-04-15T14:49:47.301Z" }, + { url = "https://files.pythonhosted.org/packages/61/d5/fea36ad2882b99c174ef4ffbc7ea6523f6abe26060fbc1f77d6441670232/pydantic_core-2.46.1-cp312-cp312-win_arm64.whl", hash = "sha256:f76fb49c34b4d66aa6e552ce9e852ea97a3a06301a9f01ae82f23e449e3a55f8", size = 2030176, upload-time = "2026-04-15T14:50:47.307Z" }, + { url = "https://files.pythonhosted.org/packages/ff/d2/bda39bad2f426cb5078e6ad28076614d3926704196efe0d7a2a19a99025d/pydantic_core-2.46.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:cdc8a5762a9c4b9d86e204d555444e3227507c92daba06259ee66595834de47a", size = 2119092, upload-time = "2026-04-15T14:49:50.392Z" }, + { url = "https://files.pythonhosted.org/packages/ee/f3/69631e64d69cb3481494b2bddefe0ddd07771209f74e9106d066f9138c2a/pydantic_core-2.46.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ba381dfe9c85692c566ecb60fa5a77a697a2a8eebe274ec5e4d6ec15fafad799", size = 1951400, upload-time = "2026-04-15T14:51:06.588Z" }, + { url = "https://files.pythonhosted.org/packages/53/1c/21cb3db6ae997df31be8e91f213081f72ffa641cb45c89b8a1986832b1f9/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1593d8de98207466dc070118322fef68307a0cc6a5625e7b386f6fdae57f9ab6", size = 1976864, upload-time = "2026-04-15T14:50:54.804Z" }, + { url = "https://files.pythonhosted.org/packages/91/9c/05c819f734318ce5a6ca24da300d93696c105af4adb90494ee571303afd8/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8262c74a1af5b0fdf795f5537f7145785a63f9fbf9e15405f547440c30017ed8", size = 2066669, upload-time = "2026-04-15T14:51:42.346Z" }, + { url = "https://files.pythonhosted.org/packages/cb/23/fadddf1c7f2f517f58731aea9b35c914e6005250f08dac9b8e53904cdbaa/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b88949a24182e83fbbb3f7ca9b7858d0d37b735700ea91081434b7d37b3b444", size = 2238737, upload-time = "2026-04-15T14:50:45.558Z" }, + { url = "https://files.pythonhosted.org/packages/23/07/0cd4f95cb0359c8b1ec71e89c3777e7932c8dfeb9cd54740289f310aaead/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8f3708cd55537aeaf3fd0ea55df0d68d0da51dcb07cbc8508745b34acc4c6e0", size = 2316258, upload-time = "2026-04-15T14:51:08.471Z" }, + { url = "https://files.pythonhosted.org/packages/0c/40/6fc24c3766a19c222a0d60d652b78f0283339d4cd4c173fab06b7ee76571/pydantic_core-2.46.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f79292435fff1d4f0c18d9cfaf214025cc88e4f5104bfaed53f173621da1c743", size = 2097474, upload-time = "2026-04-15T14:49:56.543Z" }, + { url = "https://files.pythonhosted.org/packages/4b/af/f39795d1ce549e35d0841382b9c616ae211caffb88863147369a8d74fba9/pydantic_core-2.46.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:a2e607aeb59cf4575bb364470288db3b9a1f0e7415d053a322e3e154c1a0802e", size = 2168383, upload-time = "2026-04-15T14:51:29.269Z" }, + { url = "https://files.pythonhosted.org/packages/e6/32/0d563f74582795779df6cc270c3fc220f49f4daf7860d74a5a6cda8491ff/pydantic_core-2.46.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec5ca190b75878a9f6ae1fc8f5eb678497934475aef3d93204c9fa01e97370b6", size = 2186182, upload-time = "2026-04-15T14:50:19.097Z" }, + { url = "https://files.pythonhosted.org/packages/5c/07/1c10d5ce312fc4cf86d1e50bdcdbb8ef248409597b099cab1b4bb3a093f7/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:1f80535259dcdd517d7b8ca588d5ca24b4f337228e583bebedf7a3adcdf5f721", size = 2187859, upload-time = "2026-04-15T14:49:22.974Z" }, + { url = "https://files.pythonhosted.org/packages/92/01/e1f62d4cb39f0913dbf5c95b9b119ef30ddba9493dff8c2b012f0cdd67dc/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:24820b3c82c43df61eca30147e42853e6c127d8b868afdc0c162df829e011eb4", size = 2338372, upload-time = "2026-04-15T14:49:53.316Z" }, + { url = "https://files.pythonhosted.org/packages/44/ed/218dfeea6127fb1781a6ceca241ec6edf00e8a8933ff331af2215975a534/pydantic_core-2.46.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:f12794b1dd8ac9fb66619e0b3a0427189f5d5638e55a3de1385121a9b7bf9b39", size = 2384039, upload-time = "2026-04-15T14:53:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/6c/1e/011e763cd059238249fbd5780e0f8d0b04b47f86c8925e22784f3e5fc977/pydantic_core-2.46.1-cp313-cp313-win32.whl", hash = "sha256:9bc09aed935cdf50f09e908923f9efbcca54e9244bd14a5a0e2a6c8d2c21b4e9", size = 1977943, upload-time = "2026-04-15T14:52:17.969Z" }, + { url = "https://files.pythonhosted.org/packages/8c/06/b559a490d3ed106e9b1777b8d5c8112dd8d31716243cd662616f66c1f8ea/pydantic_core-2.46.1-cp313-cp313-win_amd64.whl", hash = "sha256:fac2d6c8615b8b42bee14677861ba09d56ee076ba4a65cfb9c3c3d0cc89042f2", size = 2068729, upload-time = "2026-04-15T14:53:07.288Z" }, + { url = "https://files.pythonhosted.org/packages/9f/52/32a198946e2e19508532aa9da02a61419eb15bd2d96bab57f810f2713e31/pydantic_core-2.46.1-cp313-cp313-win_arm64.whl", hash = "sha256:f978329f12ace9f3cb814a5e44d98bbeced2e36f633132bafa06d2d71332e33e", size = 2029550, upload-time = "2026-04-15T14:52:22.707Z" }, + { url = "https://files.pythonhosted.org/packages/bd/2b/6793fe89ab66cb2d3d6e5768044eab80bba1d0fae8fd904d0a1574712e17/pydantic_core-2.46.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:9917cb61effac7ec0f448ef491ec7584526d2193be84ff981e85cbf18b68c42a", size = 2118110, upload-time = "2026-04-15T14:50:52.947Z" }, + { url = "https://files.pythonhosted.org/packages/d2/87/e9a905ddfcc2fd7bd862b340c02be6ab1f827922822d425513635d0ac774/pydantic_core-2.46.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0e749679ca9f8a9d0bff95fb7f6b57bb53f2207fa42ffcc1ec86de7e0029ab89", size = 1948645, upload-time = "2026-04-15T14:51:55.577Z" }, + { url = "https://files.pythonhosted.org/packages/15/23/26e67f86ed62ac9d6f7f3091ee5220bf14b5ac36fb811851d601365ef896/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2ecacee70941e233a2dad23f7796a06f86cc10cc2fbd1c97c7dd5b5a79ffa4f", size = 1977576, upload-time = "2026-04-15T14:49:37.58Z" }, + { url = "https://files.pythonhosted.org/packages/b8/78/813c13c0de323d4de54ee2e6fdd69a0271c09ac8dd65a8a000931aa487a5/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:647d0a2475b8ed471962eed92fa69145b864942f9c6daa10f95ac70676637ae7", size = 2060358, upload-time = "2026-04-15T14:51:40.087Z" }, + { url = "https://files.pythonhosted.org/packages/09/5e/4caf2a15149271fbd2b4d968899a450853c800b85152abcf54b11531417f/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac9cde61965b0697fce6e6cc372df9e1ad93734828aac36e9c1c42a22ad02897", size = 2235980, upload-time = "2026-04-15T14:50:34.535Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c1/a2cdabb5da6f5cb63a3558bcafffc20f790fa14ccffbefbfb1370fadc93f/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a2eb0864085f8b641fb3f54a2fb35c58aff24b175b80bc8a945050fcde03204", size = 2316800, upload-time = "2026-04-15T14:52:46.999Z" }, + { url = "https://files.pythonhosted.org/packages/76/fd/19d711e4e9331f9d77f222bffc202bf30ea0d74f6419046376bb82f244c8/pydantic_core-2.46.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b83ce9fede4bc4fb649281d9857f06d30198b8f70168f18b987518d713111572", size = 2101762, upload-time = "2026-04-15T14:49:24.278Z" }, + { url = "https://files.pythonhosted.org/packages/dc/64/ce95625448e1a4e219390a2923fd594f3fa368599c6b42ac71a5df7238c9/pydantic_core-2.46.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:cb33192753c60f269d2f4a1db8253c95b0df6e04f2989631a8cc1b0f4f6e2e92", size = 2167737, upload-time = "2026-04-15T14:50:41.637Z" }, + { url = "https://files.pythonhosted.org/packages/ad/31/413572d03ca3e73b408f00f54418b91a8be6401451bc791eaeff210328e5/pydantic_core-2.46.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:96611d51f953f87e1ae97637c01ee596a08b7f494ea00a5afb67ea6547b9f53b", size = 2185658, upload-time = "2026-04-15T14:51:46.799Z" }, + { url = "https://files.pythonhosted.org/packages/36/09/e4f581353bdf3f0c7de8a8b27afd14fc761da29d78146376315a6fedc487/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:9b176fa55f9107db5e6c86099aa5bfd934f1d3ba6a8b43f714ddeebaed3f42b7", size = 2184154, upload-time = "2026-04-15T14:52:49.629Z" }, + { url = "https://files.pythonhosted.org/packages/1a/a4/d0d52849933f5a4bf1ad9d8da612792f96469b37e286a269e3ee9c60bbb1/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:79a59f63a4ce4f3330e27e6f3ce281dd1099453b637350e97d7cf24c207cd120", size = 2332379, upload-time = "2026-04-15T14:49:55.009Z" }, + { url = "https://files.pythonhosted.org/packages/30/93/25bfb08fdbef419f73290e573899ce938a327628c34e8f3a4bafeea30126/pydantic_core-2.46.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:f200fce071808a385a314b7343f5e3688d7c45746be3d64dc71ee2d3e2a13268", size = 2377964, upload-time = "2026-04-15T14:51:59.649Z" }, + { url = "https://files.pythonhosted.org/packages/15/36/b777766ff83fef1cf97473d64764cd44f38e0d8c269ed06faace9ae17666/pydantic_core-2.46.1-cp314-cp314-win32.whl", hash = "sha256:3a07eccc0559fb9acc26d55b16bf8ebecd7f237c74a9e2c5741367db4e6d8aff", size = 1976450, upload-time = "2026-04-15T14:51:57.665Z" }, + { url = "https://files.pythonhosted.org/packages/7b/4b/4cd19d2437acfc18ca166db5a2067040334991eb862c4ecf2db098c91fbf/pydantic_core-2.46.1-cp314-cp314-win_amd64.whl", hash = "sha256:1706d270309ac7d071ffe393988c471363705feb3d009186e55d17786ada9622", size = 2067750, upload-time = "2026-04-15T14:49:38.941Z" }, + { url = "https://files.pythonhosted.org/packages/7f/a0/490751c0ef8f5b27aae81731859aed1508e72c1a9b5774c6034269db773b/pydantic_core-2.46.1-cp314-cp314-win_arm64.whl", hash = "sha256:22d4e7457ade8af06528012f382bc994a97cc2ce6e119305a70b3deff1e409d6", size = 2021109, upload-time = "2026-04-15T14:50:27.728Z" }, + { url = "https://files.pythonhosted.org/packages/36/3a/2a018968245fffd25d5f1972714121ad309ff2de19d80019ad93494844f9/pydantic_core-2.46.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:607ff9db0b7e2012e7eef78465e69f9a0d7d1c3e7c6a84cf0c4011db0fcc3feb", size = 2111548, upload-time = "2026-04-15T14:52:08.273Z" }, + { url = "https://files.pythonhosted.org/packages/77/5b/4103b6192213217e874e764e5467d2ff10d8873c1147d01fa432ac281880/pydantic_core-2.46.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8cda3eacaea13bd02a1bea7e457cc9fc30b91c5a91245cef9b215140f80dd78c", size = 1926745, upload-time = "2026-04-15T14:50:03.045Z" }, + { url = "https://files.pythonhosted.org/packages/c3/70/602a667cf4be4bec6c3334512b12ae4ea79ce9bfe41dc51be1fd34434453/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9493279cdc7997fe19e5ed9b41f30cbc3806bd4722adb402fedb6f6d41bd72a", size = 1965922, upload-time = "2026-04-15T14:51:12.555Z" }, + { url = "https://files.pythonhosted.org/packages/a9/24/06a89ce5323e755b7d2812189f9706b87aaebe49b34d247b380502f7992c/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3644e5e10059999202355b6c6616e624909e23773717d8f76deb8a6e2a72328c", size = 2043221, upload-time = "2026-04-15T14:51:18.995Z" }, + { url = "https://files.pythonhosted.org/packages/2c/6e/b1d9ad907d9d76964903903349fd2e33c87db4b993cc44713edcad0fc488/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ad6c9de57683e26c92730991960c0c3571b8053263b042de2d3e105930b2767", size = 2243655, upload-time = "2026-04-15T14:50:10.718Z" }, + { url = "https://files.pythonhosted.org/packages/ef/73/787abfaad51174641abb04c8aa125322279b40ad7ce23c495f5a69f76554/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:557ebaa27c7617e7088002318c679a8ce685fa048523417cd1ca52b7f516d955", size = 2295976, upload-time = "2026-04-15T14:53:09.694Z" }, + { url = "https://files.pythonhosted.org/packages/56/0b/b7c5a631b6d5153d4a1ea4923b139aea256dc3bd99c8e6c7b312c7733146/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cd37e39b22b796ba0298fe81e9421dd7b65f97acfbb0fb19b33ffdda7b9a7b4", size = 2103439, upload-time = "2026-04-15T14:50:08.32Z" }, + { url = "https://files.pythonhosted.org/packages/2a/3f/952ee470df69e5674cdec1cbde22331adf643b5cc2ff79f4292d80146ee4/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:6689443b59714992e67d62505cdd2f952d6cf1c14cc9fd9aeec6719befc6f23b", size = 2132871, upload-time = "2026-04-15T14:50:24.445Z" }, + { url = "https://files.pythonhosted.org/packages/e3/8b/1dea3b1e683c60c77a60f710215f90f486755962aa8939dbcb7c0f975ac3/pydantic_core-2.46.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f32c41ca1e3456b5dd691827b7c1433c12d5f0058cc186afbb3615bc07d97b8", size = 2168658, upload-time = "2026-04-15T14:52:24.897Z" }, + { url = "https://files.pythonhosted.org/packages/67/97/32ae283810910d274d5ba9f48f856f5f2f612410b78b249f302d297816f5/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:88cd1355578852db83954dc36e4f58f299646916da976147c20cf6892ba5dc43", size = 2171184, upload-time = "2026-04-15T14:52:34.854Z" }, + { url = "https://files.pythonhosted.org/packages/a2/57/c9a855527fe56c2072070640221f53095b0b19eaf651f3c77643c9cabbe3/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:a170fefdb068279a473cc9d34848b85e61d68bfcc2668415b172c5dfc6f213bf", size = 2316573, upload-time = "2026-04-15T14:52:12.871Z" }, + { url = "https://files.pythonhosted.org/packages/37/b3/14c39ffc7399819c5448007c7bcb4e6da5669850cfb7dcbb727594290b48/pydantic_core-2.46.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:556a63ff1006934dba4eed7ea31b58274c227e29298ec398e4275eda4b905e95", size = 2378340, upload-time = "2026-04-15T14:51:02.619Z" }, + { url = "https://files.pythonhosted.org/packages/01/55/a37461fbb29c053ea4e62cfc5c2d56425cb5efbef8316e63f6d84ae45718/pydantic_core-2.46.1-cp314-cp314t-win32.whl", hash = "sha256:3b146d8336a995f7d7da6d36e4a779b7e7dff2719ac00a1eb8bd3ded00bec87b", size = 1960843, upload-time = "2026-04-15T14:52:06.103Z" }, + { url = "https://files.pythonhosted.org/packages/22/d7/97e1221197d17a27f768363f87ec061519eeeed15bbd315d2e9d1429ff03/pydantic_core-2.46.1-cp314-cp314t-win_amd64.whl", hash = "sha256:f1bc856c958e6fe9ec071e210afe6feb695f2e2e81fd8d2b102f558d364c4c17", size = 2048696, upload-time = "2026-04-15T14:52:52.154Z" }, + { url = "https://files.pythonhosted.org/packages/19/d5/4eac95255c7d35094b46a32ec1e4d80eac94729c694726ee1d69948bd5f0/pydantic_core-2.46.1-cp314-cp314t-win_arm64.whl", hash = "sha256:21a5bfd8a1aa4de60494cdf66b0c912b1495f26a8899896040021fbd6038d989", size = 2022343, upload-time = "2026-04-15T14:49:49.036Z" }, + { url = "https://files.pythonhosted.org/packages/f4/97/95de673a1356a88b2efdaa120eb6af357a81555c35f6809a7a1423ff7aef/pydantic_core-2.46.1-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:5f9107a24a4bc00293434dfa95cf8968751ad0dd703b26ea83a75a56f7326041", size = 2107564, upload-time = "2026-04-15T14:50:49.14Z" }, + { url = "https://files.pythonhosted.org/packages/00/fc/a7c16d85211ea9accddc693b7d049f20b0c06440d9264d1e1c074394ee6c/pydantic_core-2.46.1-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:2b1801ba99876984d0a03362782819238141c4d0f3f67f69093663691332fc35", size = 1939925, upload-time = "2026-04-15T14:50:36.188Z" }, + { url = "https://files.pythonhosted.org/packages/2e/23/87841169d77820ddabeb81d82002c95dcb82163846666d74f5bdeeaec750/pydantic_core-2.46.1-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7fd82a91a20ed6d54fa8c91e7a98255b1ff45bf09b051bfe7fe04eb411e232e", size = 1995313, upload-time = "2026-04-15T14:50:22.538Z" }, + { url = "https://files.pythonhosted.org/packages/ea/96/b46609359a354fa9cd336fc5d93334f1c358b756cc81e4b397347a88fa6f/pydantic_core-2.46.1-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f135bf07c92c93def97008bc4496d16934da9efefd7204e5f22a2c92523cb1f", size = 2151197, upload-time = "2026-04-15T14:51:22.925Z" }, +] + +[[package]] +name = "pydantic-settings" +version = "2.13.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" }, +] + +[[package]] +name = "pygments" +version = "2.20.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, +] + +[[package]] +name = "pytest" +version = "9.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, +] + +[[package]] +name = "python-dotenv" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, +] + +[[package]] +name = "regex" +version = "2026.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/3a246dbf05666918bd3664d9d787f84a9108f6f43cc953a077e4a7dfdb7e/regex-2026.4.4.tar.gz", hash = "sha256:e08270659717f6973523ce3afbafa53515c4dc5dcad637dc215b6fd50f689423", size = 416000, upload-time = "2026-04-03T20:56:28.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/28/b972a4d3df61e1d7bcf1b59fdb3cddef22f88b6be43f161bb41ebc0e4081/regex-2026.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c07ab8794fa929e58d97a0e1796b8b76f70943fa39df225ac9964615cf1f9d52", size = 490434, upload-time = "2026-04-03T20:53:40.219Z" }, + { url = "https://files.pythonhosted.org/packages/84/20/30041446cf6dc3e0eab344fc62770e84c23b6b68a3b657821f9f80cb69b4/regex-2026.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2c785939dc023a1ce4ec09599c032cc9933d258a998d16ca6f2b596c010940eb", size = 292061, upload-time = "2026-04-03T20:53:41.862Z" }, + { url = "https://files.pythonhosted.org/packages/62/c8/3baa06d75c98c46d4cc4262b71fd2edb9062b5665e868bca57859dadf93a/regex-2026.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b1ce5c81c9114f1ce2f9288a51a8fd3aeea33a0cc440c415bf02da323aa0a76", size = 289628, upload-time = "2026-04-03T20:53:43.701Z" }, + { url = "https://files.pythonhosted.org/packages/31/87/3accf55634caad8c0acab23f5135ef7d4a21c39f28c55c816ae012931408/regex-2026.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:760ef21c17d8e6a4fe8cf406a97cf2806a4df93416ccc82fc98d25b1c20425be", size = 796651, upload-time = "2026-04-03T20:53:45.379Z" }, + { url = "https://files.pythonhosted.org/packages/f6/0c/aaa2c83f34efedbf06f61cb1942c25f6cf1ee3b200f832c4d05f28306c2e/regex-2026.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7088fcdcb604a4417c208e2169715800d28838fefd7455fbe40416231d1d47c1", size = 865916, upload-time = "2026-04-03T20:53:47.064Z" }, + { url = "https://files.pythonhosted.org/packages/d9/f6/8c6924c865124643e8f37823eca845dc27ac509b2ee58123685e71cd0279/regex-2026.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:07edca1ba687998968f7db5bc355288d0c6505caa7374f013d27356d93976d13", size = 912287, upload-time = "2026-04-03T20:53:49.422Z" }, + { url = "https://files.pythonhosted.org/packages/11/0e/a9f6f81013e0deaf559b25711623864970fe6a098314e374ccb1540a4152/regex-2026.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:993f657a7c1c6ec51b5e0ba97c9817d06b84ea5fa8d82e43b9405de0defdc2b9", size = 801126, upload-time = "2026-04-03T20:53:51.096Z" }, + { url = "https://files.pythonhosted.org/packages/71/61/3a0cc8af2dc0c8deb48e644dd2521f173f7e6513c6e195aad9aa8dd77ac5/regex-2026.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:2b69102a743e7569ebee67e634a69c4cb7e59d6fa2e1aa7d3bdbf3f61435f62d", size = 776788, upload-time = "2026-04-03T20:53:52.889Z" }, + { url = "https://files.pythonhosted.org/packages/64/0b/8bb9cbf21ef7dee58e49b0fdb066a7aded146c823202e16494a36777594f/regex-2026.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dac006c8b6dda72d86ea3d1333d45147de79a3a3f26f10c1cf9287ca4ca0ac3", size = 785184, upload-time = "2026-04-03T20:53:55.627Z" }, + { url = "https://files.pythonhosted.org/packages/99/c2/d3e80e8137b25ee06c92627de4e4d98b94830e02b3e6f81f3d2e3f504cf5/regex-2026.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:50a766ee2010d504554bfb5f578ed2e066898aa26411d57e6296230627cdefa0", size = 859913, upload-time = "2026-04-03T20:53:57.249Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/9d5d876157d969c804622456ef250017ac7a8f83e0e14f903b9e6df5ce95/regex-2026.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:9e2f5217648f68e3028c823df58663587c1507a5ba8419f4fdfc8a461be76043", size = 765732, upload-time = "2026-04-03T20:53:59.428Z" }, + { url = "https://files.pythonhosted.org/packages/82/80/b568935b4421388561c8ed42aff77247285d3ae3bb2a6ca22af63bae805e/regex-2026.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:39d8de85a08e32632974151ba59c6e9140646dcc36c80423962b1c5c0a92e244", size = 852152, upload-time = "2026-04-03T20:54:01.505Z" }, + { url = "https://files.pythonhosted.org/packages/39/29/f0f81217e21cd998245da047405366385d5c6072048038a3d33b37a79dc0/regex-2026.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:55d9304e0e7178dfb1e106c33edf834097ddf4a890e2f676f6c5118f84390f73", size = 789076, upload-time = "2026-04-03T20:54:03.323Z" }, + { url = "https://files.pythonhosted.org/packages/49/1d/1d957a61976ab9d4e767dd4f9d04b66cc0c41c5e36cf40e2d43688b5ae6f/regex-2026.4.4-cp312-cp312-win32.whl", hash = "sha256:04bb679bc0bde8a7bfb71e991493d47314e7b98380b083df2447cda4b6edb60f", size = 266700, upload-time = "2026-04-03T20:54:05.639Z" }, + { url = "https://files.pythonhosted.org/packages/c5/5c/bf575d396aeb58ea13b06ef2adf624f65b70fafef6950a80fc3da9cae3bc/regex-2026.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:db0ac18435a40a2543dbb3d21e161a6c78e33e8159bd2e009343d224bb03bb1b", size = 277768, upload-time = "2026-04-03T20:54:07.312Z" }, + { url = "https://files.pythonhosted.org/packages/c9/27/049df16ec6a6828ccd72add3c7f54b4df029669bea8e9817df6fff58be90/regex-2026.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:4ce255cc05c1947a12989c6db801c96461947adb7a59990f1360b5983fab4983", size = 270568, upload-time = "2026-04-03T20:54:09.484Z" }, + { url = "https://files.pythonhosted.org/packages/9d/83/c4373bc5f31f2cf4b66f9b7c31005bd87fe66f0dce17701f7db4ee79ee29/regex-2026.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:62f5519042c101762509b1d717b45a69c0139d60414b3c604b81328c01bd1943", size = 490273, upload-time = "2026-04-03T20:54:11.202Z" }, + { url = "https://files.pythonhosted.org/packages/46/f8/fe62afbcc3cf4ad4ac9adeaafd98aa747869ae12d3e8e2ac293d0593c435/regex-2026.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3790ba9fb5dd76715a7afe34dbe603ba03f8820764b1dc929dd08106214ed031", size = 291954, upload-time = "2026-04-03T20:54:13.412Z" }, + { url = "https://files.pythonhosted.org/packages/5a/92/4712b9fe6a33d232eeb1c189484b80c6c4b8422b90e766e1195d6e758207/regex-2026.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8fae3c6e795d7678963f2170152b0d892cf6aee9ee8afc8c45e6be38d5107fe7", size = 289487, upload-time = "2026-04-03T20:54:15.824Z" }, + { url = "https://files.pythonhosted.org/packages/88/2c/f83b93f85e01168f1070f045a42d4c937b69fdb8dd7ae82d307253f7e36e/regex-2026.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:298c3ec2d53225b3bf91142eb9691025bab610e0c0c51592dde149db679b3d17", size = 796646, upload-time = "2026-04-03T20:54:18.229Z" }, + { url = "https://files.pythonhosted.org/packages/df/55/61a2e17bf0c4dc57e11caf8dd11771280d8aaa361785f9e3bc40d653f4a7/regex-2026.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e9638791082eaf5b3ac112c587518ee78e083a11c4b28012d8fe2a0f536dfb17", size = 865904, upload-time = "2026-04-03T20:54:20.019Z" }, + { url = "https://files.pythonhosted.org/packages/45/32/1ac8ed1b5a346b5993a3d256abe0a0f03b0b73c8cc88d928537368ac65b6/regex-2026.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae3e764bd4c5ff55035dc82a8d49acceb42a5298edf6eb2fc4d328ee5dd7afae", size = 912304, upload-time = "2026-04-03T20:54:22.403Z" }, + { url = "https://files.pythonhosted.org/packages/26/47/2ee5c613ab546f0eddebf9905d23e07beb933416b1246c2d8791d01979b4/regex-2026.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ffa81f81b80047ba89a3c69ae6a0f78d06f4a42ce5126b0eb2a0a10ad44e0b2e", size = 801126, upload-time = "2026-04-03T20:54:24.308Z" }, + { url = "https://files.pythonhosted.org/packages/75/cd/41dacd129ca9fd20bd7d02f83e0fad83e034ac8a084ec369c90f55ef37e2/regex-2026.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f56ebf9d70305307a707911b88469213630aba821e77de7d603f9d2f0730687d", size = 776772, upload-time = "2026-04-03T20:54:26.319Z" }, + { url = "https://files.pythonhosted.org/packages/89/6d/5af0b588174cb5f46041fa7dd64d3fd5cd2fe51f18766703d1edc387f324/regex-2026.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:773d1dfd652bbffb09336abf890bfd64785c7463716bf766d0eb3bc19c8b7f27", size = 785228, upload-time = "2026-04-03T20:54:28.387Z" }, + { url = "https://files.pythonhosted.org/packages/b7/3b/f5a72b7045bd59575fc33bf1345f156fcfd5a8484aea6ad84b12c5a82114/regex-2026.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d51d20befd5275d092cdffba57ded05f3c436317ee56466c8928ac32d960edaf", size = 860032, upload-time = "2026-04-03T20:54:30.641Z" }, + { url = "https://files.pythonhosted.org/packages/39/a4/72a317003d6fcd7a573584a85f59f525dfe8f67e355ca74eb6b53d66a5e2/regex-2026.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:0a51cdb3c1e9161154f976cb2bef9894bc063ac82f31b733087ffb8e880137d0", size = 765714, upload-time = "2026-04-03T20:54:32.789Z" }, + { url = "https://files.pythonhosted.org/packages/25/1e/5672e16f34dbbcb2560cc7e6a2fbb26dfa8b270711e730101da4423d3973/regex-2026.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ae5266a82596114e41fb5302140e9630204c1b5f325c770bec654b95dd54b0aa", size = 852078, upload-time = "2026-04-03T20:54:34.546Z" }, + { url = "https://files.pythonhosted.org/packages/f7/0d/c813f0af7c6cc7ed7b9558bac2e5120b60ad0fa48f813e4d4bd55446f214/regex-2026.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c882cd92ec68585e9c1cf36c447ec846c0d94edd706fe59e0c198e65822fd23b", size = 789181, upload-time = "2026-04-03T20:54:36.642Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6d/a344608d1adbd2a95090ddd906cec09a11be0e6517e878d02a5123e0917f/regex-2026.4.4-cp313-cp313-win32.whl", hash = "sha256:05568c4fbf3cb4fa9e28e3af198c40d3237cf6041608a9022285fe567ec3ad62", size = 266690, upload-time = "2026-04-03T20:54:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/31/07/54049f89b46235ca6f45cd6c88668a7050e77d4a15555e47dd40fde75263/regex-2026.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:3384df51ed52db0bea967e21458ab0a414f67cdddfd94401688274e55147bb81", size = 277733, upload-time = "2026-04-03T20:54:40.11Z" }, + { url = "https://files.pythonhosted.org/packages/0e/21/61366a8e20f4d43fb597708cac7f0e2baadb491ecc9549b4980b2be27d16/regex-2026.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:acd38177bd2c8e69a411d6521760806042e244d0ef94e2dd03ecdaa8a3c99427", size = 270565, upload-time = "2026-04-03T20:54:41.883Z" }, + { url = "https://files.pythonhosted.org/packages/f1/1e/3a2b9672433bef02f5d39aa1143ca2c08f311c1d041c464a42be9ae648dc/regex-2026.4.4-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f94a11a9d05afcfcfa640e096319720a19cc0c9f7768e1a61fceee6a3afc6c7c", size = 494126, upload-time = "2026-04-03T20:54:43.602Z" }, + { url = "https://files.pythonhosted.org/packages/4e/4b/c132a4f4fe18ad3340d89fcb56235132b69559136036b845be3c073142ed/regex-2026.4.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:36bcb9d6d1307ab629edc553775baada2aefa5c50ccc0215fbfd2afcfff43141", size = 293882, upload-time = "2026-04-03T20:54:45.41Z" }, + { url = "https://files.pythonhosted.org/packages/f4/5f/eaa38092ce7a023656280f2341dbbd4ad5f05d780a70abba7bb4f4bea54c/regex-2026.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:261c015b3e2ed0919157046d768774ecde57f03d8fa4ba78d29793447f70e717", size = 292334, upload-time = "2026-04-03T20:54:47.051Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f6/dd38146af1392dac33db7074ab331cec23cced3759167735c42c5460a243/regex-2026.4.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c228cf65b4a54583763645dcd73819b3b381ca8b4bb1b349dee1c135f4112c07", size = 811691, upload-time = "2026-04-03T20:54:49.074Z" }, + { url = "https://files.pythonhosted.org/packages/7a/f0/dc54c2e69f5eeec50601054998ec3690d5344277e782bd717e49867c1d29/regex-2026.4.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:dd2630faeb6876fb0c287f664d93ddce4d50cd46c6e88e60378c05c9047e08ca", size = 871227, upload-time = "2026-04-03T20:54:51.035Z" }, + { url = "https://files.pythonhosted.org/packages/a1/af/cb16bd5dc61621e27df919a4449bbb7e5a1034c34d307e0a706e9cc0f3e3/regex-2026.4.4-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6a50ab11b7779b849472337191f3a043e27e17f71555f98d0092fa6d73364520", size = 917435, upload-time = "2026-04-03T20:54:52.994Z" }, + { url = "https://files.pythonhosted.org/packages/5c/71/8b260897f22996b666edd9402861668f45a2ca259f665ac029e6104a2d7d/regex-2026.4.4-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0734f63afe785138549fbe822a8cfeaccd1bae814c5057cc0ed5b9f2de4fc883", size = 816358, upload-time = "2026-04-03T20:54:54.884Z" }, + { url = "https://files.pythonhosted.org/packages/1c/60/775f7f72a510ef238254906c2f3d737fc80b16ca85f07d20e318d2eea894/regex-2026.4.4-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c4ee50606cb1967db7e523224e05f32089101945f859928e65657a2cbb3d278b", size = 785549, upload-time = "2026-04-03T20:54:57.01Z" }, + { url = "https://files.pythonhosted.org/packages/58/42/34d289b3627c03cf381e44da534a0021664188fa49ba41513da0b4ec6776/regex-2026.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6c1818f37be3ca02dcb76d63f2c7aaba4b0dc171b579796c6fbe00148dfec6b1", size = 801364, upload-time = "2026-04-03T20:54:58.981Z" }, + { url = "https://files.pythonhosted.org/packages/fc/20/f6ecf319b382a8f1ab529e898b222c3f30600fcede7834733c26279e7465/regex-2026.4.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f5bfc2741d150d0be3e4a0401a5c22b06e60acb9aa4daa46d9e79a6dcd0f135b", size = 866221, upload-time = "2026-04-03T20:55:00.88Z" }, + { url = "https://files.pythonhosted.org/packages/92/6a/9f16d3609d549bd96d7a0b2aee1625d7512ba6a03efc01652149ef88e74d/regex-2026.4.4-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:504ffa8a03609a087cad81277a629b6ce884b51a24bd388a7980ad61748618ff", size = 772530, upload-time = "2026-04-03T20:55:03.213Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f6/aa9768bc96a4c361ac96419fbaf2dcdc33970bb813df3ba9b09d5d7b6d96/regex-2026.4.4-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:70aadc6ff12e4b444586e57fc30771f86253f9f0045b29016b9605b4be5f7dfb", size = 856989, upload-time = "2026-04-03T20:55:05.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/b4/c671db3556be2473ae3e4bb7a297c518d281452871501221251ea4ecba57/regex-2026.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f4f83781191007b6ef43b03debc35435f10cad9b96e16d147efe84a1d48bdde4", size = 803241, upload-time = "2026-04-03T20:55:07.162Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5c/83e3b1d89fa4f6e5a1bc97b4abd4a9a97b3c1ac7854164f694f5f0ba98a0/regex-2026.4.4-cp313-cp313t-win32.whl", hash = "sha256:e014a797de43d1847df957c0a2a8e861d1c17547ee08467d1db2c370b7568baa", size = 269921, upload-time = "2026-04-03T20:55:09.62Z" }, + { url = "https://files.pythonhosted.org/packages/28/07/077c387121f42cdb4d92b1301133c0d93b5709d096d1669ab847dda9fe2e/regex-2026.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:b15b88b0d52b179712632832c1d6e58e5774f93717849a41096880442da41ab0", size = 281240, upload-time = "2026-04-03T20:55:11.521Z" }, + { url = "https://files.pythonhosted.org/packages/9d/22/ead4a4abc7c59a4d882662aa292ca02c8b617f30b6e163bc1728879e9353/regex-2026.4.4-cp313-cp313t-win_arm64.whl", hash = "sha256:586b89cdadf7d67bf86ae3342a4dcd2b8d70a832d90c18a0ae955105caf34dbe", size = 272440, upload-time = "2026-04-03T20:55:13.365Z" }, + { url = "https://files.pythonhosted.org/packages/f0/f5/ed97c2dc47b5fbd4b73c0d7d75f9ebc8eca139f2bbef476bba35f28c0a77/regex-2026.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:2da82d643fa698e5e5210e54af90181603d5853cf469f5eedf9bfc8f59b4b8c7", size = 490343, upload-time = "2026-04-03T20:55:15.241Z" }, + { url = "https://files.pythonhosted.org/packages/80/e9/de4828a7385ec166d673a5790ad06ac48cdaa98bc0960108dd4b9cc1aef7/regex-2026.4.4-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:54a1189ad9d9357760557c91103d5e421f0a2dabe68a5cdf9103d0dcf4e00752", size = 291909, upload-time = "2026-04-03T20:55:17.558Z" }, + { url = "https://files.pythonhosted.org/packages/b4/d6/5cfbfc97f3201a4d24b596a77957e092030dcc4205894bc035cedcfce62f/regex-2026.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:76d67d5afb1fe402d10a6403bae668d000441e2ab115191a804287d53b772951", size = 289692, upload-time = "2026-04-03T20:55:20.561Z" }, + { url = "https://files.pythonhosted.org/packages/8e/ac/f2212d9fd56fe897e36d0110ba30ba2d247bd6410c5bd98499c7e5a1e1f2/regex-2026.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e7cd3e4ee8d80447a83bbc9ab0c8459781fa77087f856c3e740d7763be0df27f", size = 796979, upload-time = "2026-04-03T20:55:22.56Z" }, + { url = "https://files.pythonhosted.org/packages/c9/e3/a016c12675fbac988a60c7e1c16e67823ff0bc016beb27bd7a001dbdabc6/regex-2026.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e19e18c568d2866d8b6a6dfad823db86193503f90823a8f66689315ba28fbe8", size = 866744, upload-time = "2026-04-03T20:55:24.646Z" }, + { url = "https://files.pythonhosted.org/packages/af/a4/0b90ca4cf17adc3cb43de80ec71018c37c88ad64987e8d0d481a95ca60b5/regex-2026.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7698a6f38730fd1385d390d1ed07bb13dce39aa616aca6a6d89bea178464b9a4", size = 911613, upload-time = "2026-04-03T20:55:27.033Z" }, + { url = "https://files.pythonhosted.org/packages/8e/3b/2b3dac0b82d41ab43aa87c6ecde63d71189d03fe8854b8ca455a315edac3/regex-2026.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:173a66f3651cdb761018078e2d9487f4cf971232c990035ec0eb1cdc6bf929a9", size = 800551, upload-time = "2026-04-03T20:55:29.532Z" }, + { url = "https://files.pythonhosted.org/packages/25/fe/5365eb7aa0e753c4b5957815c321519ecab033c279c60e1b1ae2367fa810/regex-2026.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa7922bbb2cc84fa062d37723f199d4c0cd200245ce269c05db82d904db66b83", size = 776911, upload-time = "2026-04-03T20:55:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/aa/b3/7fb0072156bba065e3b778a7bc7b0a6328212be5dd6a86fd207e0c4f2dab/regex-2026.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:59f67cd0a0acaf0e564c20bbd7f767286f23e91e2572c5703bf3e56ea7557edb", size = 785751, upload-time = "2026-04-03T20:55:33.797Z" }, + { url = "https://files.pythonhosted.org/packages/02/1a/9f83677eb699273e56e858f7bd95acdbee376d42f59e8bfca2fd80d79df3/regex-2026.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:475e50f3f73f73614f7cba5524d6de49dee269df00272a1b85e3d19f6d498465", size = 860484, upload-time = "2026-04-03T20:55:35.745Z" }, + { url = "https://files.pythonhosted.org/packages/3b/7a/93937507b61cfcff8b4c5857f1b452852b09f741daa9acae15c971d8554e/regex-2026.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:a1c0c7d67b64d85ac2e1879923bad2f08a08f3004055f2f406ef73c850114bd4", size = 765939, upload-time = "2026-04-03T20:55:37.972Z" }, + { url = "https://files.pythonhosted.org/packages/86/ea/81a7f968a351c6552b1670ead861e2a385be730ee28402233020c67f9e0f/regex-2026.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:1371c2ccbb744d66ee63631cc9ca12aa233d5749972626b68fe1a649dd98e566", size = 851417, upload-time = "2026-04-03T20:55:39.92Z" }, + { url = "https://files.pythonhosted.org/packages/4c/7e/323c18ce4b5b8f44517a36342961a0306e931e499febbd876bb149d900f0/regex-2026.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:59968142787042db793348a3f5b918cf24ced1f23247328530e063f89c128a95", size = 789056, upload-time = "2026-04-03T20:55:42.303Z" }, + { url = "https://files.pythonhosted.org/packages/c0/af/e7510f9b11b1913b0cd44eddb784b2d650b2af6515bfce4cffcc5bfd1d38/regex-2026.4.4-cp314-cp314-win32.whl", hash = "sha256:59efe72d37fd5a91e373e5146f187f921f365f4abc1249a5ab446a60f30dd5f8", size = 272130, upload-time = "2026-04-03T20:55:44.995Z" }, + { url = "https://files.pythonhosted.org/packages/9a/51/57dae534c915e2d3a21490e88836fa2ae79dde3b66255ecc0c0a155d2c10/regex-2026.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:e0aab3ff447845049d676827d2ff714aab4f73f340e155b7de7458cf53baa5a4", size = 280992, upload-time = "2026-04-03T20:55:47.316Z" }, + { url = "https://files.pythonhosted.org/packages/0a/5e/abaf9f4c3792e34edb1434f06717fae2b07888d85cb5cec29f9204931bf8/regex-2026.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:a7a5bb6aa0cf62208bb4fa079b0c756734f8ad0e333b425732e8609bd51ee22f", size = 273563, upload-time = "2026-04-03T20:55:49.273Z" }, + { url = "https://files.pythonhosted.org/packages/ff/06/35da85f9f217b9538b99cbb170738993bcc3b23784322decb77619f11502/regex-2026.4.4-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:97850d0638391bdc7d35dc1c1039974dcb921eaafa8cc935ae4d7f272b1d60b3", size = 494191, upload-time = "2026-04-03T20:55:51.258Z" }, + { url = "https://files.pythonhosted.org/packages/54/5b/1bc35f479eef8285c4baf88d8c002023efdeebb7b44a8735b36195486ae7/regex-2026.4.4-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ee7337f88f2a580679f7bbfe69dc86c043954f9f9c541012f49abc554a962f2e", size = 293877, upload-time = "2026-04-03T20:55:53.214Z" }, + { url = "https://files.pythonhosted.org/packages/39/5b/f53b9ad17480b3ddd14c90da04bfb55ac6894b129e5dea87bcaf7d00e336/regex-2026.4.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7429f4e6192c11d659900c0648ba8776243bf396ab95558b8c51a345afeddde6", size = 292410, upload-time = "2026-04-03T20:55:55.736Z" }, + { url = "https://files.pythonhosted.org/packages/bb/56/52377f59f60a7c51aa4161eecf0b6032c20b461805aca051250da435ffc9/regex-2026.4.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4f10fbd5dd13dcf4265b4cc07d69ca70280742870c97ae10093e3d66000359", size = 811831, upload-time = "2026-04-03T20:55:57.802Z" }, + { url = "https://files.pythonhosted.org/packages/dd/63/8026310bf066f702a9c361f83a8c9658f3fe4edb349f9c1e5d5273b7c40c/regex-2026.4.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a152560af4f9742b96f3827090f866eeec5becd4765c8e0d3473d9d280e76a5a", size = 871199, upload-time = "2026-04-03T20:56:00.333Z" }, + { url = "https://files.pythonhosted.org/packages/20/9f/a514bbb00a466dbb506d43f187a04047f7be1505f10a9a15615ead5080ee/regex-2026.4.4-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54170b3e95339f415d54651f97df3bff7434a663912f9358237941bbf9143f55", size = 917649, upload-time = "2026-04-03T20:56:02.445Z" }, + { url = "https://files.pythonhosted.org/packages/cb/6b/8399f68dd41a2030218839b9b18360d79b86d22b9fab5ef477c7f23ca67c/regex-2026.4.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:07f190d65f5a72dcb9cf7106bfc3d21e7a49dd2879eda2207b683f32165e4d99", size = 816388, upload-time = "2026-04-03T20:56:04.595Z" }, + { url = "https://files.pythonhosted.org/packages/1e/9c/103963f47c24339a483b05edd568594c2be486188f688c0170fd504b2948/regex-2026.4.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9a2741ce5a29d3c84b0b94261ba630ab459a1b847a0d6beca7d62d188175c790", size = 785746, upload-time = "2026-04-03T20:56:07.13Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ee/7f6054c0dec0cee3463c304405e4ff42e27cff05bf36fcb34be549ab17bd/regex-2026.4.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b26c30df3a28fd9793113dac7385a4deb7294a06c0f760dd2b008bd49a9139bc", size = 801483, upload-time = "2026-04-03T20:56:09.365Z" }, + { url = "https://files.pythonhosted.org/packages/30/c2/51d3d941cf6070dc00c3338ecf138615fc3cce0421c3df6abe97a08af61a/regex-2026.4.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:421439d1bee44b19f4583ccf42670ca464ffb90e9fdc38d37f39d1ddd1e44f1f", size = 866331, upload-time = "2026-04-03T20:56:12.039Z" }, + { url = "https://files.pythonhosted.org/packages/16/e8/76d50dcc122ac33927d939f350eebcfe3dbcbda96913e03433fc36de5e63/regex-2026.4.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:b40379b53ecbc747fd9bdf4a0ea14eb8188ca1bd0f54f78893a39024b28f4863", size = 772673, upload-time = "2026-04-03T20:56:14.558Z" }, + { url = "https://files.pythonhosted.org/packages/a5/6e/5f6bf75e20ea6873d05ba4ec78378c375cbe08cdec571c83fbb01606e563/regex-2026.4.4-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:08c55c13d2eef54f73eeadc33146fb0baaa49e7335eb1aff6ae1324bf0ddbe4a", size = 857146, upload-time = "2026-04-03T20:56:16.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/33/3c76d9962949e487ebba353a18e89399f292287204ac8f2f4cfc3a51c233/regex-2026.4.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9776b85f510062f5a75ef112afe5f494ef1635607bf1cc220c1391e9ac2f5e81", size = 803463, upload-time = "2026-04-03T20:56:18.923Z" }, + { url = "https://files.pythonhosted.org/packages/19/eb/ef32dcd2cb69b69bc0c3e55205bce94a7def48d495358946bc42186dcccc/regex-2026.4.4-cp314-cp314t-win32.whl", hash = "sha256:385edaebde5db5be103577afc8699fea73a0e36a734ba24870be7ffa61119d74", size = 275709, upload-time = "2026-04-03T20:56:20.996Z" }, + { url = "https://files.pythonhosted.org/packages/a0/86/c291bf740945acbf35ed7dbebf8e2eea2f3f78041f6bd7cdab80cb274dc0/regex-2026.4.4-cp314-cp314t-win_amd64.whl", hash = "sha256:5d354b18839328927832e2fa5f7c95b7a3ccc39e7a681529e1685898e6436d45", size = 285622, upload-time = "2026-04-03T20:56:23.641Z" }, + { url = "https://files.pythonhosted.org/packages/d5/e7/ec846d560ae6a597115153c02ca6138a7877a1748b2072d9521c10a93e58/regex-2026.4.4-cp314-cp314t-win_arm64.whl", hash = "sha256:af0384cb01a33600c49505c27c6c57ab0b27bf84a74e28524c92ca897ebdac9d", size = 275773, upload-time = "2026-04-03T20:56:26.07Z" }, +] + +[[package]] +name = "requests" +version = "2.33.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5f/a4/98b9c7c6428a668bf7e42ebb7c79d576a1c3c1e3ae2d47e674b468388871/requests-2.33.1.tar.gz", hash = "sha256:18817f8c57c6263968bc123d237e3b8b08ac046f5456bd1e307ee8f4250d3517", size = 134120, upload-time = "2026-03-30T16:09:15.531Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/8e/7540e8a2036f79a125c1d2ebadf69ed7901608859186c856fa0388ef4197/requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a", size = 64947, upload-time = "2026-03-30T16:09:13.83Z" }, +] + +[[package]] +name = "rich" +version = "15.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, +] + +[[package]] +name = "rustbpe" +version = "0.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/2e/f16e179ad1e185f0bb5a8fc2376fff05d1eeefcb6d8a77ee04306e8a42ae/rustbpe-0.1.0.tar.gz", hash = "sha256:18765f62ac579a9ff9e89c611f9c9b9e46bd1adde9be3f59c00b6eb4e1f28b3a", size = 29723, upload-time = "2026-01-03T22:24:11.872Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/a3/7fe53c4dcd7d90a777424c61ac8072153ce47941066e0a247c020a4a663e/rustbpe-0.1.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:13e6aeaaf6e2f970ab577f32a6c49c8dd23517279253a37873ddc7f74fd30622", size = 1007207, upload-time = "2026-01-03T22:23:46.336Z" }, + { url = "https://files.pythonhosted.org/packages/a7/41/dee1474cfea594d7a9cebb42f683170f1f2d8af4473541c0a1f96dfaff76/rustbpe-0.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40763a0751ba8a595717f5015d18b0241e1af9930412e42d350380ba4601361b", size = 947913, upload-time = "2026-01-03T22:23:47.458Z" }, + { url = "https://files.pythonhosted.org/packages/a2/fd/c90bc3a3e823b8cafb85625ed37311987c20317168ea73d0ebaba54f8df2/rustbpe-0.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bfe8d24d0d71c16fb8ba5106e7d2be2c43211195a74ffa7e2c88cb98c07122e4", size = 1030968, upload-time = "2026-01-03T22:23:48.753Z" }, + { url = "https://files.pythonhosted.org/packages/fa/64/e15606774d2f13d1bdbdca4cd6e8fcd14fc0c3fb7ca7b00412c4ed0a8700/rustbpe-0.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:844f7f6c3bd59a9578b87ebc6bb60fe3ee47c8d8040a62488ce8e7eaeeb31319", size = 1075101, upload-time = "2026-01-03T22:23:50.041Z" }, + { url = "https://files.pythonhosted.org/packages/d7/26/8de98d90fd8765a1ea517b01897e05aa9932998e604bb9003e5e9b73be3c/rustbpe-0.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:b79b67d8db6a2fe3928918006569e73aee23e012b3b0b36fd4a2a85cc2c2161f", size = 914924, upload-time = "2026-01-03T22:23:51.31Z" }, + { url = "https://files.pythonhosted.org/packages/c6/63/a0475defd438cd6a4cd28b74ad8dd01bb7de6adafaa411968e758b0a9036/rustbpe-0.1.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6a59c05a8123d3a8e8815106fd1938a9499d4fdaf5cf00351fa7d3b5cc4f8ad6", size = 1007322, upload-time = "2026-01-03T22:23:52.568Z" }, + { url = "https://files.pythonhosted.org/packages/81/72/18e762472a42d68820e2d1244655fd960e200e449136fabe3c32f6f2a1b1/rustbpe-0.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bc0a8dd8b30860e3a4889ab7cf7c04a2614f8fc77c191efde1500aa054484efa", size = 948256, upload-time = "2026-01-03T22:23:53.926Z" }, + { url = "https://files.pythonhosted.org/packages/16/07/3c0948db94fc454b62012ff8b3e74ad13f84bf8fbcfb84b402bfb786e82e/rustbpe-0.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3eeb8efae3d10a3b6640a1e2bdc7f1e55a15f867bdae9efb3d8f0757b01d9d3a", size = 1031258, upload-time = "2026-01-03T22:23:54.961Z" }, + { url = "https://files.pythonhosted.org/packages/fb/69/77355ca8baf0c5023994b3f11304822d07116567ea47893f90267c086f87/rustbpe-0.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b0b77591c9e836df41ad0b30be9ec519a708c477cbf82eedaf839e7a9b10101", size = 1075321, upload-time = "2026-01-03T22:23:55.995Z" }, + { url = "https://files.pythonhosted.org/packages/5c/fe/5c529d92988be7df251de718a633054ecca2d5986a17759a6546a9f45c26/rustbpe-0.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:cc5bebc9990071e400bbe304304af3d5757522bb2a1177e2c3517f11ad28f0eb", size = 915136, upload-time = "2026-01-03T22:23:57.56Z" }, + { url = "https://files.pythonhosted.org/packages/af/d7/8f7215233acd67402f8bdf972daa3fbe9184b176348530b84ac40751a806/rustbpe-0.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1229e70c2d091faf8c0a50951e2e734b3b810d1d2b7677cd49d86dc3853c283", size = 1031277, upload-time = "2026-01-03T22:23:58.55Z" }, + { url = "https://files.pythonhosted.org/packages/ff/1a/0b34c02138f28a984bc44fdc0dc10afc9137814b2a56b8cd4e5ae25b8601/rustbpe-0.1.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:02d123e72fe9253c92904bfe2ba35afc816576b2cdbb432a96001e75bafb888e", size = 1007777, upload-time = "2026-01-03T22:23:59.539Z" }, + { url = "https://files.pythonhosted.org/packages/bb/b1/da66ce14f43b23136c07183be03ddbc58654824455cce36c2bad38254aeb/rustbpe-0.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a0df5172a813c982d31673d9de32dd053ddbb64ced2b97709a85d2e3c6a6cd28", size = 948400, upload-time = "2026-01-03T22:24:00.506Z" }, + { url = "https://files.pythonhosted.org/packages/05/d0/551dcfb8d314f4e0b60b86ab616bcaaf3a381f6e72f83f1211246528a7c1/rustbpe-0.1.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c525072e521a5cf729474a0ea6c83b1b16973b877098ee7060eac4bbacd46c7a", size = 1031325, upload-time = "2026-01-03T22:24:01.501Z" }, + { url = "https://files.pythonhosted.org/packages/4e/36/3f1730a6b8f4435b8cb2ceee2edb3be8357656e35f1f6549b5f387eb056a/rustbpe-0.1.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0c9216f9e38558f0939f6e34cc78d5517d7a02026c1a35b271ca82e9b522539", size = 1075729, upload-time = "2026-01-03T22:24:02.528Z" }, + { url = "https://files.pythonhosted.org/packages/b4/03/aaa994e9a28cb7248c2cfc43a93c779ee7ac0e19cf9eae6717b63bbe6a8d/rustbpe-0.1.0-cp314-cp314-win_amd64.whl", hash = "sha256:b5ceb789bb93a82547c0ed7277ecc01047eaf0eeea6bbc0a21420e65e5fb553a", size = 915650, upload-time = "2026-01-03T22:24:03.71Z" }, + { url = "https://files.pythonhosted.org/packages/8c/68/3ab181ff8b12dcabdb256dffb82de0d8bf30c72ac3d188451ac5fa1cc643/rustbpe-0.1.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88d1482ccadf5e29b524b13740b3a5e1f4e454a048684885d894fd1a9930617a", size = 1030995, upload-time = "2026-01-03T22:24:04.76Z" }, +] + +[[package]] +name = "safetensors" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" }, + { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" }, + { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" }, + { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" }, + { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" }, + { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" }, + { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" }, + { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" }, + { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" }, + { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" }, +] + +[[package]] +name = "setuptools" +version = "82.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316, upload-time = "2026-03-09T12:47:17.221Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223, upload-time = "2026-03-09T12:47:15.026Z" }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + +[[package]] +name = "starlette" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/69/17425771797c36cded50b7fe44e850315d039f28b15901ab44839e70b593/starlette-1.0.0.tar.gz", hash = "sha256:6a4beaf1f81bb472fd19ea9b918b50dc3a77a6f2e190a12954b25e6ed5eea149", size = 2655289, upload-time = "2026-03-22T18:29:46.779Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, +] + +[[package]] +name = "sympy" +version = "1.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mpmath" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, +] + +[[package]] +name = "tiktoken" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" }, + { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" }, + { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" }, + { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" }, + { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" }, + { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" }, + { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" }, + { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" }, + { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" }, + { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" }, + { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" }, + { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" }, + { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" }, + { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" }, + { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" }, + { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" }, + { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" }, + { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" }, + { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" }, + { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" }, + { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" }, + { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" }, + { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" }, + { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" }, + { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" }, + { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" }, + { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" }, + { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" }, + { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" }, + { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" }, + { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" }, + { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" }, + { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" }, +] + +[[package]] +name = "tokenizers" +version = "0.22.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275, upload-time = "2026-01-05T10:41:02.158Z" }, + { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472, upload-time = "2026-01-05T10:41:00.276Z" }, + { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736, upload-time = "2026-01-05T10:40:32.165Z" }, + { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835, upload-time = "2026-01-05T10:40:38.847Z" }, + { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673, upload-time = "2026-01-05T10:40:56.614Z" }, + { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818, upload-time = "2026-01-05T10:40:44.507Z" }, + { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195, upload-time = "2026-01-05T10:40:51.139Z" }, + { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982, upload-time = "2026-01-05T10:40:58.331Z" }, + { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245, upload-time = "2026-01-05T10:41:04.053Z" }, + { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069, upload-time = "2026-01-05T10:45:10.673Z" }, + { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263, upload-time = "2026-01-05T10:45:12.559Z" }, + { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429, upload-time = "2026-01-05T10:45:14.333Z" }, + { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363, upload-time = "2026-01-05T10:45:20.593Z" }, + { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786, upload-time = "2026-01-05T10:45:18.411Z" }, + { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, +] + +[[package]] +name = "torch" +version = "2.9.1" +source = { registry = "https://download.pytorch.org/whl/cpu" } +resolution-markers = [ + "sys_platform == 'darwin'", +] +dependencies = [ + { name = "filelock", marker = "sys_platform == 'darwin'" }, + { name = "fsspec", marker = "sys_platform == 'darwin'" }, + { name = "jinja2", marker = "sys_platform == 'darwin'" }, + { name = "networkx", marker = "sys_platform == 'darwin'" }, + { name = "setuptools", marker = "sys_platform == 'darwin'" }, + { name = "sympy", marker = "sys_platform == 'darwin'" }, + { name = "typing-extensions", marker = "sys_platform == 'darwin'" }, +] +wheels = [ + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:287242dd1f830846098b5eca847f817aa5c6015ea57ab4c1287809efea7b77eb" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8924d10d36eac8fe0652a060a03fc2ae52980841850b9a1a2ddb0f27a4f181cd" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:bcee64ae7aa65876ceeae6dcaebe75109485b213528c74939602208a20706e3f" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:defadbeb055cfcf5def58f70937145aecbd7a4bc295238ded1d0e85ae2cf0e1d" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:886f84b181f766f53265ba0a1d503011e60f53fff9d569563ef94f24160e1072" }, +] + +[[package]] +name = "torch" +version = "2.9.1+cpu" +source = { registry = "https://download.pytorch.org/whl/cpu" } +resolution-markers = [ + "sys_platform != 'darwin'", +] +dependencies = [ + { name = "filelock", marker = "sys_platform != 'darwin'" }, + { name = "fsspec", marker = "sys_platform != 'darwin'" }, + { name = "jinja2", marker = "sys_platform != 'darwin'" }, + { name = "networkx", marker = "sys_platform != 'darwin'" }, + { name = "setuptools", marker = "sys_platform != 'darwin'" }, + { name = "sympy", marker = "sys_platform != 'darwin'" }, + { name = "typing-extensions", marker = "sys_platform != 'darwin'" }, +] +wheels = [ + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3bf9b442a51a2948e41216a76d7ab00f0694cfcaaa51b6f9bcab57b7f89843e6" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7417d8c565f219d3455654cb431c6d892a3eb40246055e14d645422de13b9ea1" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp312-cp312-win_amd64.whl", hash = "sha256:a4e06b4f441675d26b462123c8a83e77c55f1ec8ebc081203be2db1ea8054add" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp312-cp312-win_arm64.whl", hash = "sha256:1abe31f14b560c1f062699e966cb08ef5b67518a1cfac2d8547a3dbcd8387b06" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:3e532e553b37ee859205a9b2d1c7977fd6922f53bbb1b9bfdd5bdc00d1a60ed4" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:39b3dff6d8fba240ae0d1bede4ca11c2531ae3b47329206512d99e17907ff74b" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp313-cp313-win_amd64.whl", hash = "sha256:404a7ab2fffaf2ca069e662f331eb46313692b2f1630df2720094284f390ccef" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp313-cp313-win_arm64.whl", hash = "sha256:161decbff26a33f13cb5ba6d2c8f458bbf56193bcc32ecc70be6dd4c7a3ee79d" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:01b1884f724977a20c7da2f640f1c7b37f4a2c117a7f4a6c1c0424d14cb86322" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:031a597147fa81b1e6d79ccf1ad3ccc7fafa27941d6cf26ff5caaa384fb20e92" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp313-cp313t-win_amd64.whl", hash = "sha256:e586ab1363e3f86aa4cc133b7fdcf98deb1d2c13d43a7a6e5a6a18e9c5364893" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:65010ab4aacce6c9a1ddfc935f986c003ca8638ded04348fd326c3e74346237c" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:88adf5157db5da1d54b1c9fe4a6c1d20ceef00e75d854e206a87dbf69e3037dc" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp314-cp314-win_amd64.whl", hash = "sha256:f60e2565f261542efac07e25208fb3fc55c6fe82314a5a9cbee971edb5f27713" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3ac2b8df2c55430e836dcda31940d47f1f5f94b8731057b6f20300ebea394dd9" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:5b688445f928f13563b7418b17c57e97bf955ab559cf73cd8f2b961f8572dbb3" }, + { url = "https://download-r2.pytorch.org/whl/cpu/torch-2.9.1%2Bcpu-cp314-cp314t-win_amd64.whl", hash = "sha256:cf9c3e50b595721ca6b488bdcc326e0f1af73ed28b9b66eff504a96649bb5c96" }, +] + +[[package]] +name = "tqdm" +version = "4.67.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, +] + +[[package]] +name = "transformers" +version = "5.5.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "regex" }, + { name = "safetensors" }, + { name = "tokenizers" }, + { name = "tqdm" }, + { name = "typer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" }, +] + +[[package]] +name = "typer" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, +] + +[[package]] +name = "uvicorn" +version = "0.44.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/da/6eee1ff8b6cbeed47eeb5229749168e81eb4b7b999a1a15a7176e51410c9/uvicorn-0.44.0.tar.gz", hash = "sha256:6c942071b68f07e178264b9152f1f16dfac5da85880c4ce06366a96d70d4f31e", size = 86947, upload-time = "2026-04-06T09:23:22.826Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/23/a5bbd9600dd607411fa644c06ff4951bec3a4d82c4b852374024359c19c0/uvicorn-0.44.0-py3-none-any.whl", hash = "sha256:ce937c99a2cc70279556967274414c087888e8cec9f9c94644dfca11bd3ced89", size = 69425, upload-time = "2026-04-06T09:23:21.524Z" }, +]