adding unit tests, starting point of RTX 5090 optimization with flex_attention

2026-03-27 07:05:15 +00:00 · 2026-03-04 10:57:43 -08:00 · 2026-03-04 10:57:43 -08:00 · 0c5b9319aa
commit 0c5b9319aa
parent b07604ebaa
16 changed files with 2721 additions and 3 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -29,6 +29,8 @@ dependencies = [
 [dependency-groups]
 dev = [
    "pytest>=8.0.0",
+    "pytest-cov>=7.0.0",
+    "pytest-mock>=3.15.1",
 ]

 [tool.pytest.ini_options]
--- a/runs/miniseries.sh
+++ b/runs/miniseries.sh
@ -28,7 +28,7 @@ fi
 # Series name: from arg, env var, or default to today's date (e.g., jan11)
 SERIES_NAME="${1:-${SERIES_NAME:-$(date +%b%d | tr '[:upper:]' '[:lower:]')}}"
 # Depths to train (the "miniseries")
-DEPTHS=(12 14 16 18 20 22 24 26)
+DEPTHS=(12 14)
 # Hardware
 NPROC_PER_NODE="${NPROC_PER_NODE:-8}"
 # Logging
@ -63,7 +63,7 @@ for d in "${DEPTHS[@]}"; do
    elif [ $d -ge 20 ]; then
        DEVICE_BATCH_SIZE_ARG="--device-batch-size=16"
    else
-        DEVICE_BATCH_SIZE_ARG="--device-batch-size=32"
+        DEVICE_BATCH_SIZE_ARG="--device-batch-size=16"
    fi

    torchrun --standalone --nproc_per_node=$NPROC_PER_NODE -m scripts.base_train -- \
--- a/tests/test_checkpoint_manager_new.py
+++ b/tests/test_checkpoint_manager_new.py
@ -0,0 +1,175 @@
+import json
+import os
+from types import SimpleNamespace
+
+import pytest
+import torch
+
+import nanochat.checkpoint_manager as ckpt
+
+
+def test_log0_respects_rank(monkeypatch):
+    seen = {"n": 0}
+    monkeypatch.setattr(ckpt.logger, "info", lambda _m: seen.__setitem__("n", seen["n"] + 1))
+    monkeypatch.setenv("RANK", "0")
+    ckpt.log0("hi")
+    monkeypatch.setenv("RANK", "1")
+    ckpt.log0("skip")
+    assert seen["n"] == 1
+
+
+def test_patch_missing_helpers():
+    cfg = {"vocab_size": 32}
+    ckpt._patch_missing_config_keys(cfg)
+    assert cfg["window_pattern"] == "L"
+    ckpt._patch_missing_config_keys(cfg)
+    assert cfg["window_pattern"] == "L"
+
+    model_data = {}
+    model_cfg = SimpleNamespace(n_layer=3)
+    ckpt._patch_missing_keys(model_data, model_cfg)
+    assert torch.all(model_data["resid_lambdas"] == 1)
+    assert torch.all(model_data["x0_lambdas"] == 0)
+
+    ckpt._patch_missing_keys(model_data, model_cfg)
+    assert model_data["resid_lambdas"].numel() == 3
+
+
+def test_save_and_load_checkpoint(tmp_path):
+    model_data = {"w": torch.tensor([1, 2])}
+    optim_data = {"g": torch.tensor([3, 4])}
+    meta_data = {"model_config": {"vocab_size": 8, "n_layer": 1, "n_head": 1, "n_kv_head": 1, "n_embd": 4, "sequence_len": 4}}
+    ckpt.save_checkpoint(str(tmp_path), 7, model_data, optim_data, meta_data, rank=0)
+    ckpt.save_checkpoint(str(tmp_path), 8, model_data, None, meta_data, rank=0)
+    got_model, got_optim, got_meta = ckpt.load_checkpoint(str(tmp_path), 7, "cpu", load_optimizer=True, rank=0)
+    assert torch.equal(got_model["w"], model_data["w"])
+    assert torch.equal(got_optim["g"], optim_data["g"])
+    assert got_meta["model_config"]["vocab_size"] == 8
+
+    got_model2, got_optim2, got_meta2 = ckpt.load_checkpoint(str(tmp_path), 8, "cpu", load_optimizer=False, rank=0)
+    assert got_optim2 is None
+    assert got_meta2 == meta_data
+    assert torch.equal(got_model2["w"], model_data["w"])
+
+
+def test_build_model_cpu_and_train_eval(monkeypatch):
+    model_data = {"_orig_mod.w": torch.ones(1, dtype=torch.bfloat16)}
+    meta_data = {"model_config": {"vocab_size": 16, "n_layer": 2, "n_head": 1, "n_kv_head": 1, "n_embd": 4, "sequence_len": 8}}
+
+    class FakeTokenizer:
+        def get_vocab_size(self):
+            return 16
+
+    class FakeModel:
+        def __init__(self, cfg):
+            self.cfg = cfg
+            self.mode = None
+            self.loaded = None
+
+        def to_empty(self, device):
+            self.device = device
+
+        def init_weights(self):
+            self.init_called = True
+
+        def load_state_dict(self, data, strict, assign):
+            self.loaded = (data, strict, assign)
+
+        def eval(self):
+            self.mode = "eval"
+
+        def train(self):
+            self.mode = "train"
+
+    monkeypatch.setattr(ckpt, "load_checkpoint", lambda *a, **k: (model_data.copy(), None, meta_data))
+    monkeypatch.setattr(ckpt, "GPTConfig", lambda **kw: SimpleNamespace(**kw))
+    monkeypatch.setattr(ckpt, "GPT", lambda cfg: FakeModel(cfg))
+    monkeypatch.setattr(ckpt, "get_tokenizer", lambda: FakeTokenizer())
+
+    m, tok, meta = ckpt.build_model("/x", 1, torch.device("cpu"), phase="eval")
+    assert m.mode == "eval"
+    assert tok.get_vocab_size() == 16
+    assert meta == meta_data
+    assert "w" in m.loaded[0]
+    assert m.loaded[0]["w"].dtype == torch.float32
+
+    m2, _, _ = ckpt.build_model("/x", 1, torch.device("cpu"), phase="train")
+    assert m2.mode == "train"
+
+    with pytest.raises(AssertionError):
+        ckpt.build_model("/x", 1, torch.device("cpu"), phase="bad")
+
+    class BadTok(FakeTokenizer):
+        def get_vocab_size(self):
+            return 99
+
+    monkeypatch.setattr(ckpt, "get_tokenizer", lambda: BadTok())
+    with pytest.raises(AssertionError):
+        ckpt.build_model("/x", 1, torch.device("cpu"), phase="eval")
+
+
+def test_find_largest_model_and_last_step(tmp_path, monkeypatch):
+    with pytest.raises(FileNotFoundError):
+        ckpt.find_largest_model(str(tmp_path))
+
+    (tmp_path / "d3").mkdir()
+    (tmp_path / "d10").mkdir()
+    assert ckpt.find_largest_model(str(tmp_path)) == "d10"
+
+    # Fallback path: no d<number> tags, choose newest by mtime.
+    other = tmp_path / "abc"
+    newest = tmp_path / "zzz"
+    other.mkdir(exist_ok=True)
+    newest.mkdir(exist_ok=True)
+    monkeypatch.setattr(ckpt.re, "match", lambda _pat, _s: None)
+    assert ckpt.find_largest_model(str(tmp_path)) == "zzz"
+
+    step_dir = tmp_path / "steps"
+    step_dir.mkdir()
+    with pytest.raises(FileNotFoundError):
+        ckpt.find_last_step(str(step_dir))
+    (step_dir / "model_000012.pt").write_bytes(b"")
+    (step_dir / "model_000007.pt").write_bytes(b"")
+    assert ckpt.find_last_step(str(step_dir)) == 12
+
+
+def test_load_model_from_dir_and_load_model(monkeypatch):
+    calls = {}
+
+    monkeypatch.setattr(ckpt, "find_largest_model", lambda d: "d12")
+    monkeypatch.setattr(ckpt, "find_last_step", lambda d: 42)
+
+    def fake_build_model(checkpoint_dir, step, device, phase):
+        calls["args"] = (checkpoint_dir, step, device, phase)
+        return "m", "t", {"k": 1}
+
+    monkeypatch.setattr(ckpt, "build_model", fake_build_model)
+    out = ckpt.load_model_from_dir("/root/checkpoints", device="cpu", phase="eval")
+    assert out == ("m", "t", {"k": 1})
+    assert calls["args"][0].endswith("/root/checkpoints/d12")
+    assert calls["args"][1] == 42
+
+    monkeypatch.setattr(ckpt, "get_base_dir", lambda: "/base")
+    monkeypatch.setattr(ckpt, "load_model_from_dir", lambda checkpoints_dir, *a, **k: ("ok", checkpoints_dir, a, k))
+    got = ckpt.load_model("base", "cpu", phase="eval")
+    assert got[0] == "ok"
+    assert got[1].endswith("/base/base_checkpoints")
+    assert ckpt.load_model("sft", "cpu", phase="eval")[1].endswith("/base/chatsft_checkpoints")
+    assert ckpt.load_model("rl", "cpu", phase="eval")[1].endswith("/base/chatrl_checkpoints")
+
+
+def test_load_optimizer_state(monkeypatch, tmp_path):
+    model_root = tmp_path / "base_checkpoints" / "d7"
+    model_root.mkdir(parents=True)
+    opt_path = model_root / "optim_000003_rank2.pt"
+    torch.save({"x": torch.tensor([1])}, opt_path)
+
+    monkeypatch.setattr(ckpt, "get_base_dir", lambda: str(tmp_path))
+    monkeypatch.setattr(ckpt, "find_largest_model", lambda _d: "d7")
+    monkeypatch.setattr(ckpt, "find_last_step", lambda _d: 3)
+
+    got = ckpt.load_optimizer_state("base", "cpu", rank=2, model_tag=None, step=None)
+    assert torch.equal(got["x"], torch.tensor([1]))
+
+    missing = ckpt.load_optimizer_state("base", "cpu", rank=99, model_tag="d7", step=3)
+    assert missing is None
--- a/tests/test_common_new.py
+++ b/tests/test_common_new.py
@ -0,0 +1,234 @@
+import io
+import logging
+import os
+import types
+
+import pytest
+import torch
+
+import nanochat.common as common
+
+
+def test_colored_formatter_info_and_non_info():
+    fmt = common.ColoredFormatter("%(levelname)s %(message)s")
+    rec = logging.LogRecord("x", logging.INFO, __file__, 1, "Shard 2: 10 MB 5%", (), None)
+    out = fmt.format(rec)
+    assert "Shard 2" in out
+    assert "10 MB" in out
+    assert "5%" in out
+
+    rec2 = logging.LogRecord("x", logging.WARNING, __file__, 1, "warn", (), None)
+    out2 = fmt.format(rec2)
+    assert "warn" in out2
+
+
+def test_setup_default_logging(monkeypatch):
+    calls = {}
+
+    def fake_basic_config(**kwargs):
+        calls["kwargs"] = kwargs
+
+    monkeypatch.setattr(common.logging, "basicConfig", fake_basic_config)
+    common.setup_default_logging()
+    assert calls["kwargs"]["level"] == logging.INFO
+    assert len(calls["kwargs"]["handlers"]) == 1
+
+
+def test_get_base_dir_env_and_default(monkeypatch, tmp_path):
+    monkeypatch.setenv("NANOCHAT_BASE_DIR", str(tmp_path / "from_env"))
+    got = common.get_base_dir()
+    assert got.endswith("from_env")
+    assert os.path.isdir(got)
+
+    monkeypatch.delenv("NANOCHAT_BASE_DIR", raising=False)
+    monkeypatch.setattr(common.os.path, "expanduser", lambda _: str(tmp_path))
+    got2 = common.get_base_dir()
+    assert got2.endswith(".cache/nanochat")
+    assert os.path.isdir(got2)
+
+
+def test_download_file_with_lock_paths(monkeypatch, tmp_path):
+    monkeypatch.setattr(common, "get_base_dir", lambda: str(tmp_path))
+    out = tmp_path / "f.bin"
+    out.write_bytes(b"x")
+    assert common.download_file_with_lock("http://x", "f.bin") == str(out)
+
+    out.unlink()
+    payload = b"hello"
+    marker = {"post": None}
+
+    class Resp:
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc, tb):
+            return False
+
+        def read(self):
+            return payload
+
+    monkeypatch.setattr(common.urllib.request, "urlopen", lambda _url: Resp())
+    got = common.download_file_with_lock(
+        "http://example.test",
+        "f.bin",
+        postprocess_fn=lambda p: marker.__setitem__("post", p),
+    )
+    assert got == str(out)
+    assert out.read_bytes() == payload
+    assert marker["post"] == str(out)
+
+
+def test_download_file_with_lock_recheck_after_lock(monkeypatch, tmp_path):
+    monkeypatch.setattr(common, "get_base_dir", lambda: str(tmp_path))
+    file_path = str(tmp_path / "race.bin")
+
+    calls = {"n": 0}
+    real_exists = common.os.path.exists
+
+    def fake_exists(path):
+        if path == file_path:
+            calls["n"] += 1
+            # first check (before lock): missing; second check (inside lock): present
+            return calls["n"] >= 2
+        return real_exists(path)
+
+    class Lock:
+        def __init__(self, _p):
+            pass
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc, tb):
+            return False
+
+    monkeypatch.setattr(common.os.path, "exists", fake_exists)
+    monkeypatch.setattr(common, "FileLock", Lock)
+    got = common.download_file_with_lock("http://unused", "race.bin")
+    assert got == file_path
+
+
+def test_print0_and_banner(monkeypatch, capsys):
+    monkeypatch.setenv("RANK", "1")
+    common.print0("hidden")
+    assert capsys.readouterr().out == ""
+
+    monkeypatch.setenv("RANK", "0")
+    common.print0("shown")
+    assert "shown" in capsys.readouterr().out
+
+    common.print_banner()
+    assert "█████" in capsys.readouterr().out
+
+
+def test_ddp_helpers(monkeypatch):
+    for k in ("RANK", "LOCAL_RANK", "WORLD_SIZE"):
+        monkeypatch.delenv(k, raising=False)
+    assert common.is_ddp_requested() is False
+    assert common.get_dist_info() == (False, 0, 0, 1)
+
+    monkeypatch.setenv("RANK", "3")
+    monkeypatch.setenv("LOCAL_RANK", "2")
+    monkeypatch.setenv("WORLD_SIZE", "8")
+    assert common.is_ddp_requested() is True
+    assert common.get_dist_info() == (True, 3, 2, 8)
+
+    monkeypatch.setattr(common.dist, "is_available", lambda: True)
+    monkeypatch.setattr(common.dist, "is_initialized", lambda: True)
+    assert common.is_ddp_initialized() is True
+
+
+def test_autodetect_device_type(monkeypatch):
+    monkeypatch.setattr(common.torch.cuda, "is_available", lambda: True)
+    assert common.autodetect_device_type() == "cuda"
+
+    monkeypatch.setattr(common.torch.cuda, "is_available", lambda: False)
+    monkeypatch.setattr(common.torch.backends.mps, "is_available", lambda: True)
+    assert common.autodetect_device_type() == "mps"
+
+    monkeypatch.setattr(common.torch.backends.mps, "is_available", lambda: False)
+    assert common.autodetect_device_type() == "cpu"
+
+
+def test_compute_init_cpu_and_cleanup(monkeypatch):
+    monkeypatch.setattr(common, "get_dist_info", lambda: (False, 0, 0, 1))
+    out = common.compute_init("cpu")
+    assert out[0] is False
+    assert out[-1].type == "cpu"
+
+    called = {"destroy": 0}
+    monkeypatch.setattr(common, "is_ddp_initialized", lambda: True)
+    monkeypatch.setattr(common.dist, "destroy_process_group", lambda: called.__setitem__("destroy", 1))
+    common.compute_cleanup()
+    assert called["destroy"] == 1
+
+
+def test_compute_init_mps_and_cuda_paths(monkeypatch):
+    monkeypatch.setattr(common, "get_dist_info", lambda: (True, 1, 0, 2))
+    monkeypatch.setattr(common.torch.backends.mps, "is_available", lambda: True)
+    out = common.compute_init("mps")
+    assert out[0] is True
+    assert out[-1].type == "mps"
+
+    monkeypatch.setattr(common.torch.cuda, "is_available", lambda: True)
+    set_device_calls = {}
+    init_calls = {}
+    barrier_calls = {}
+    matmul_calls = {}
+    seed_calls = {}
+    cuda_seed_calls = {}
+
+    monkeypatch.setattr(common.torch.cuda, "set_device", lambda d: set_device_calls.__setitem__("device", d))
+    monkeypatch.setattr(common.dist, "init_process_group", lambda **kw: init_calls.__setitem__("kw", kw))
+    monkeypatch.setattr(common.dist, "barrier", lambda: barrier_calls.__setitem__("n", 1))
+    monkeypatch.setattr(common.torch, "set_float32_matmul_precision", lambda x: matmul_calls.__setitem__("x", x))
+    monkeypatch.setattr(common.torch, "manual_seed", lambda x: seed_calls.__setitem__("x", x))
+    monkeypatch.setattr(common.torch.cuda, "manual_seed", lambda x: cuda_seed_calls.__setitem__("x", x))
+
+    out2 = common.compute_init("cuda")
+    assert out2[0] is True
+    assert out2[-1].type == "cuda"
+    assert matmul_calls["x"] == "high"
+    assert seed_calls["x"] == 42
+    assert cuda_seed_calls["x"] == 42
+    assert "device_id" in init_calls["kw"]
+    assert barrier_calls["n"] == 1
+    assert set_device_calls["device"].type == "cuda"
+
+
+def test_compute_init_assertions(monkeypatch):
+    monkeypatch.setattr(common.torch.cuda, "is_available", lambda: False)
+    with pytest.raises(AssertionError):
+        common.compute_init("cuda")
+    monkeypatch.setattr(common.torch.backends.mps, "is_available", lambda: False)
+    with pytest.raises(AssertionError):
+        common.compute_init("mps")
+    with pytest.raises(AssertionError):
+        common.compute_init("bad")
+
+
+def test_dummy_wandb_and_peak_flops(monkeypatch):
+    w = common.DummyWandb()
+    assert w.log() is None
+    assert w.finish() is None
+
+    assert common.get_peak_flops("NVIDIA H100 NVL") == 835e12
+    assert common.get_peak_flops("A100-SXM") == 312e12
+
+    class Props:
+        max_compute_units = 8
+
+    class XPU:
+        @staticmethod
+        def get_device_properties(_name):
+            return Props()
+
+    monkeypatch.setattr(common.torch, "xpu", XPU())
+    pvc = common.get_peak_flops("Data Center GPU Max 1550")
+    assert pvc == 512 * 8 * 1300 * 10**6
+
+    warned = {}
+    monkeypatch.setattr(common.logger, "warning", lambda msg: warned.__setitem__("msg", msg))
+    unknown = common.get_peak_flops("mystery gpu")
+    assert unknown == float("inf")
+    assert "Peak flops undefined" in warned["msg"]
--- a/tests/test_core_eval_new.py
+++ b/tests/test_core_eval_new.py
@ -0,0 +1,231 @@
+import random
+from dataclasses import dataclass
+
+import pytest
+import torch
+
+import nanochat.core_eval as core_eval
+
+
+@dataclass
+class ItemMC:
+    query: str
+    choices: list
+    gold: int
+
+
+@dataclass
+class ItemSchema:
+    context_options: list
+    continuation: str
+    gold: int
+
+
+@dataclass
+class ItemLM:
+    context: str
+    continuation: str
+    gold: int = 0
+
+
+class TinyTokenizer:
+    def __init__(self, bos=99):
+        self._bos = bos
+
+    def get_bos_token_id(self):
+        return self._bos
+
+    def __call__(self, prompts, prepend=None):
+        return self.encode(prompts, prepend=prepend)
+
+    def encode(self, prompts, prepend=None):
+        if isinstance(prompts, str):
+            prompts = [prompts]
+        out = []
+        for p in prompts:
+            ids = [len(x) % 11 + 1 for x in p.split()]
+            if prepend is not None:
+                ids = [prepend] + ids
+            out.append(ids)
+        return out
+
+
+def test_prompt_renderers():
+    mc_item = {"query": "Q?", "choices": ["A", "B"], "gold": 1}
+    mc_shots = [ItemMC(query="Q0", choices=["x", "y"], gold=0)]
+    prompts_mc = core_eval.render_prompts_mc(mc_item, " -> ", mc_shots)
+    assert len(prompts_mc) == 2
+    assert "Q0 -> x" in prompts_mc[0]
+
+    schema_item = {"context_options": ["ctx1", "ctx2"], "continuation": "cont", "gold": 0}
+    schema_shots = [ItemSchema(context_options=["ca", "cb"], continuation="x", gold=1)]
+    prompts_schema = core_eval.render_prompts_schema(schema_item, " || ", schema_shots)
+    assert len(prompts_schema) == 2
+    assert "cb || x" in prompts_schema[0]
+
+    lm_item = {"context": "c1  ", "continuation": "end", "gold": 0}
+    lm_shots = [ItemLM(context="ctx ", continuation="tail")]
+    prompts_lm = core_eval.render_prompts_lm(lm_item, " ## ", lm_shots)
+    assert len(prompts_lm) == 2
+    assert prompts_lm[0].endswith("##")
+    assert prompts_lm[1].endswith("## end")
+
+
+def test_sequence_helpers():
+    assert core_eval.find_common_length([[1, 2, 3], [1, 2, 4]], direction="left") == 2
+    assert core_eval.find_common_length([[1, 2, 3], [0, 2, 3]], direction="right") == 2
+    assert core_eval.find_common_length([[7, 8], [7, 8]], direction="left") == 2
+
+    stacked = core_eval.stack_sequences([[1, 2], [3]], pad_token_id=0)
+    assert stacked.tolist() == [[1, 2], [3, 0]]
+
+
+def test_batch_sequence_helpers():
+    tok = TinyTokenizer(bos=5)
+    prompts = ["aa bb", "aa bb cc"]
+    tokens_mc, s_mc, e_mc = core_eval.batch_sequences_mc(tok, prompts)
+    assert len(tokens_mc) == 2
+    assert len(s_mc) == 2 and len(e_mc) == 2
+
+    tokens_schema, s_schema, e_schema = core_eval.batch_sequences_schema(tok, prompts)
+    assert len(tokens_schema) == 2
+    assert len(s_schema) == 2 and len(e_schema) == 2
+
+    lm_prompts = ["a b", "a b c d"]
+    tokens_lm, s_lm, e_lm = core_eval.batch_sequences_lm(tok, lm_prompts)
+    assert len(tokens_lm) == 1
+    assert s_lm[0] < e_lm[0]
+
+    bad = [[1, 2], [8, 9]]
+    class BadTok:
+        def get_bos_token_id(self):
+            return 1
+
+        def __call__(self, *args, **kwargs):
+            del args, kwargs
+            return bad
+
+    with pytest.raises(AssertionError):
+        core_eval.batch_sequences_lm(BadTok(), ["x", "y"])
+
+
+def test_forward_model():
+    class M:
+        def __call__(self, input_ids):
+            b, t = input_ids.shape
+            v = 6
+            logits = torch.zeros((b, t, v), dtype=torch.float32)
+            for i in range(b):
+                for j in range(t):
+                    logits[i, j, (j + 1) % v] = 10.0
+            return logits
+
+    input_ids = torch.tensor([[1, 2, 3], [2, 1, 0]], dtype=torch.long)
+    losses, preds = core_eval.forward_model(M(), input_ids)
+    assert losses.shape == input_ids.shape
+    assert torch.isnan(losses[:, -1]).all()
+    assert preds.shape == input_ids.shape
+
+
+def _simple_model_for_eval():
+    class M:
+        max_seq_len = 3
+
+        def __call__(self, input_ids):
+            b, t = input_ids.shape
+            # Prefer token 3 always to create deterministic MC scores.
+            logits = torch.zeros((b, t, 8), dtype=torch.float32)
+            logits[..., 3] = 3.0
+            return logits
+
+    return M()
+
+
+def _simple_model_no_crop():
+    class M:
+        max_seq_len = None
+
+        def __call__(self, input_ids):
+            b, t = input_ids.shape
+            logits = torch.zeros((b, t, 8), dtype=torch.float32)
+            logits[..., 3] = 3.0
+            return logits
+
+    return M()
+
+
+def test_evaluate_example_main_paths(monkeypatch):
+    tok = TinyTokenizer(bos=1)
+    data_mc = [
+        {"query": "q0", "choices": ["a", "b"], "gold": 0},
+        {"query": "q1", "choices": ["c", "d"], "gold": 1},
+        {"query": "q2", "choices": ["e", "f"], "gold": 0},
+    ]
+    meta_mc = {"task_type": "multiple_choice", "num_fewshot": 1, "continuation_delimiter": " => "}
+    out_mc = core_eval.evaluate_example(0, _simple_model_for_eval(), tok, data_mc, "cpu", meta_mc)
+    assert isinstance(out_mc, bool)
+
+    data_schema = [
+        {"context_options": ["ctx0", "ctx1"], "continuation": "next", "gold": 0},
+        {"context_options": ["ctx2", "ctx3"], "continuation": "next", "gold": 1},
+    ]
+    meta_schema = {"task_type": "schema", "num_fewshot": 0, "continuation_delimiter": " -> "}
+    out_schema = core_eval.evaluate_example(0, _simple_model_no_crop(), tok, data_schema, "cpu", meta_schema)
+    assert isinstance(out_schema, bool)
+
+    # LM path with explicit prefix check and correctness.
+    class LMTokenizer(TinyTokenizer):
+        def encode(self, prompts, prepend=None):
+            if prompts == ["p0", "p1"]:
+                return [[1, 2], [1, 2, 3]]
+            return super().encode(prompts, prepend=prepend)
+
+    lm_data = [{"context": "x", "continuation": "y", "gold": 0}]
+    meta_lm = {"task_type": "language_modeling", "num_fewshot": 0, "continuation_delimiter": ""}
+    monkeypatch.setattr(core_eval, "render_prompts_lm", lambda *_a, **_k: ["p0", "p1"])
+    out_lm = core_eval.evaluate_example(0, _simple_model_for_eval(), LMTokenizer(), lm_data, "cpu", meta_lm)
+    assert isinstance(out_lm, bool)
+
+    with pytest.raises(ValueError):
+        core_eval.evaluate_example(0, _simple_model_for_eval(), tok, data_mc, "cpu", {"task_type": "bad", "num_fewshot": 0, "continuation_delimiter": ""})
+
+
+def test_evaluate_example_reaches_final_else(monkeypatch):
+    # Make first task-type comparison pass once, then fail later so final else is executed.
+    class FlakyType:
+        def __init__(self):
+            self.first = True
+
+        def __eq__(self, other):
+            if self.first and other == "multiple_choice":
+                self.first = False
+                return True
+            return False
+
+    tok = TinyTokenizer(bos=1)
+    data_mc = [{"query": "q", "choices": ["a", "b"], "gold": 0}]
+    meta = {"task_type": FlakyType(), "num_fewshot": 0, "continuation_delimiter": " :: "}
+    with pytest.raises(ValueError):
+        core_eval.evaluate_example(0, _simple_model_for_eval(), tok, data_mc, "cpu", meta)
+
+
+def test_evaluate_task(monkeypatch):
+    data = [{"x": 1}, {"x": 2}, {"x": 3}, {"x": 4}]
+    task_meta = {"task_type": "multiple_choice", "num_fewshot": 0, "continuation_delimiter": ""}
+
+    monkeypatch.setattr(core_eval, "evaluate_example", lambda idx, *_a, **_k: (idx % 2) == 0)
+    monkeypatch.setattr(core_eval.dist, "is_initialized", lambda: False)
+    out = core_eval.evaluate_task(model=None, tokenizer=None, data=data, device="cpu", task_meta=task_meta)
+    assert 0.0 <= out <= 1.0
+
+    # Distributed branch.
+    calls = {"barrier": 0, "reduce": 0}
+    monkeypatch.setattr(core_eval.dist, "is_initialized", lambda: True)
+    monkeypatch.setattr(core_eval.dist, "get_rank", lambda: 0)
+    monkeypatch.setattr(core_eval.dist, "get_world_size", lambda: 2)
+    monkeypatch.setattr(core_eval.dist, "barrier", lambda: calls.__setitem__("barrier", calls["barrier"] + 1))
+    monkeypatch.setattr(core_eval.dist, "all_reduce", lambda *_a, **_k: calls.__setitem__("reduce", calls["reduce"] + 1))
+    out2 = core_eval.evaluate_task(model=None, tokenizer=None, data=data, device="cpu", task_meta=task_meta)
+    assert 0.0 <= out2 <= 1.0
+    assert calls["barrier"] == 1
+    assert calls["reduce"] == 1
--- a/tests/test_dataset_dataloader_new.py
+++ b/tests/test_dataset_dataloader_new.py
@ -0,0 +1,282 @@
+import os
+import runpy
+import types
+
+import pytest
+import torch
+
+import nanochat.dataset as dataset
+import nanochat.dataloader as dataloader
+
+
+def test_list_parquet_files(tmp_path):
+    (tmp_path / "a.parquet").write_bytes(b"")
+    (tmp_path / "b.tmp").write_bytes(b"")
+    (tmp_path / "c.parquet.tmp").write_bytes(b"")
+    (tmp_path / "d.parquet").write_bytes(b"")
+    got = dataset.list_parquet_files(str(tmp_path))
+    assert got == [str(tmp_path / "a.parquet"), str(tmp_path / "d.parquet")]
+
+
+def test_parquets_iter_batched(monkeypatch):
+    files = ["/p0.parquet", "/p1.parquet"]
+    monkeypatch.setattr(dataset, "list_parquet_files", lambda: files)
+
+    class RG:
+        def __init__(self, vals):
+            self._vals = vals
+
+        def column(self, _name):
+            return types.SimpleNamespace(to_pylist=lambda: self._vals)
+
+    class PF:
+        def __init__(self, _path):
+            self.num_row_groups = 3
+
+        def read_row_group(self, idx):
+            return RG([f"row-{idx}"])
+
+    monkeypatch.setattr(dataset.pq, "ParquetFile", PF)
+
+    train = list(dataset.parquets_iter_batched("train", start=0, step=2))
+    val = list(dataset.parquets_iter_batched("val", start=1, step=2))
+    assert train == [["row-0"], ["row-2"]]
+    assert val == [["row-1"]]
+
+    with pytest.raises(AssertionError):
+        list(dataset.parquets_iter_batched("bad"))
+
+
+def test_download_single_file(monkeypatch, tmp_path):
+    monkeypatch.setattr(dataset, "DATA_DIR", str(tmp_path))
+    monkeypatch.setattr(dataset, "BASE_URL", "http://example.test")
+
+    # Existing file -> skip.
+    pre = tmp_path / dataset.index_to_filename(1)
+    pre.write_bytes(b"x")
+    assert dataset.download_single_file(1) is True
+
+    # Success path.
+    class Resp:
+        def __init__(self):
+            self.ok = True
+
+        def raise_for_status(self):
+            return None
+
+        def iter_content(self, chunk_size):
+            del chunk_size
+            yield b"abc"
+            yield b""
+            yield b"def"
+
+    monkeypatch.setattr(dataset.requests, "get", lambda *a, **k: Resp())
+    assert dataset.download_single_file(2) is True
+    out = tmp_path / dataset.index_to_filename(2)
+    assert out.read_bytes() == b"abcdef"
+
+    # Failure with retries and eventual False.
+    calls = {"n": 0}
+
+    def bad_get(*_a, **_k):
+        calls["n"] += 1
+        raise dataset.requests.RequestException("boom")
+
+    slept = {"n": 0}
+    monkeypatch.setattr(dataset.requests, "get", bad_get)
+    monkeypatch.setattr(dataset.time, "sleep", lambda _s: slept.__setitem__("n", slept["n"] + 1))
+    assert dataset.download_single_file(3) is False
+    assert calls["n"] == 5
+    assert slept["n"] == 4
+
+    # Execute final return False line by skipping attempts loop.
+    monkeypatch.setattr(dataset, "range", lambda *_a, **_k: [], raising=False)
+    assert dataset.download_single_file(4) is False
+
+
+def test_document_batches(monkeypatch):
+    monkeypatch.setattr(dataloader, "list_parquet_files", lambda: ["/a.parquet", "/b.parquet", "/c.parquet"])
+    monkeypatch.setattr(dataloader, "get_dist_info", lambda: (True, 1, 0, 2))
+
+    class RG:
+        def __init__(self, vals):
+            self._vals = vals
+
+        def column(self, _name):
+            return types.SimpleNamespace(to_pylist=lambda: self._vals)
+
+    class PF:
+        def __init__(self, path):
+            self.path = path
+            # Make first file tiny to trigger continue in resume branch.
+            self.num_row_groups = 1 if "a.parquet" in path else 4
+
+        def read_row_group(self, idx):
+            return RG([f"{self.path}-r{idx}-d0", f"{self.path}-r{idx}-d1"])
+
+    monkeypatch.setattr(dataloader.pq, "ParquetFile", PF)
+
+    # Resume in first file should continue to next file due rg_idx>=num_row_groups.
+    gen = dataloader._document_batches("train", {"pq_idx": 0, "rg_idx": 3, "epoch": 5}, tokenizer_batch_size=1)
+    b1, state1 = next(gen)
+    assert len(b1) == 1
+    assert state1[0] == 1
+    assert state1[2] >= 5
+
+    # Non-resume and val split.
+    gen2 = dataloader._document_batches("val", None, tokenizer_batch_size=2)
+    b2, state2 = next(gen2)
+    assert len(b2) == 2
+    assert state2[0] == 0
+
+    monkeypatch.setattr(dataloader, "list_parquet_files", lambda: [])
+    with pytest.raises(AssertionError):
+        next(dataloader._document_batches("train", None, tokenizer_batch_size=1))
+
+
+def test_document_batches_resume_cleared_and_epoch_increment(monkeypatch):
+    monkeypatch.setattr(dataloader, "list_parquet_files", lambda: ["/only.parquet", "/val.parquet"])
+    monkeypatch.setattr(dataloader, "get_dist_info", lambda: (True, 0, 0, 2))
+
+    class RG:
+        def __init__(self, vals):
+            self._vals = vals
+
+        def column(self, _name):
+            return types.SimpleNamespace(to_pylist=lambda: self._vals)
+
+    class PF:
+        def __init__(self, _path):
+            self.num_row_groups = 3
+
+        def read_row_group(self, idx):
+            return RG([f"r{idx}"])
+
+    monkeypatch.setattr(dataloader.pq, "ParquetFile", PF)
+    gen = dataloader._document_batches("train", {"pq_idx": 0, "rg_idx": 0, "epoch": 1}, tokenizer_batch_size=1)
+    b1, s1 = next(gen)
+    b2, s2 = next(gen)
+    assert b1 == ["r2"] and s1 == (0, 2, 1)
+    # next epoch yield proves first_pass->False and epoch increment happened
+    assert b2 == ["r0"] and s2[2] >= 2
+
+
+def test_tokenizing_loader_and_wrapper(monkeypatch):
+    class Tok:
+        def get_bos_token_id(self):
+            return 7
+
+        def encode(self, doc_batch, prepend=None, num_threads=None):
+            del num_threads
+            out = []
+            for t in doc_batch:
+                row = [prepend] if prepend is not None else []
+                row.extend([len(t), len(t) + 1, len(t) + 2])
+                out.append(row)
+            return out
+
+    batches = iter([
+        (["a", "bb", "ccc"], (1, 2, 3)),
+        (["dddd", "eeeee", "ffffff"], (4, 5, 6)),
+    ])
+    monkeypatch.setattr(dataloader, "_document_batches", lambda *a, **k: batches)
+
+    gen = dataloader.tokenizing_distributed_data_loader_with_state_bos_bestfit(
+        tokenizer=Tok(),
+        B=2,
+        T=4,
+        split="train",
+        tokenizer_threads=1,
+        tokenizer_batch_size=2,
+        device="cpu",
+        resume_state_dict=None,
+        buffer_size=2,
+    )
+    x, y, st = next(gen)
+    assert x.shape == (2, 4)
+    assert y.shape == (2, 4)
+    assert st == {"pq_idx": 4, "rg_idx": 5, "epoch": 6}
+
+    with pytest.raises(AssertionError):
+        next(
+            dataloader.tokenizing_distributed_data_loader_with_state_bos_bestfit(
+                tokenizer=Tok(),
+                B=1,
+                T=2,
+                split="bad",
+                device="cpu",
+            )
+        )
+
+    batches2 = iter([(["z"], (0, 0, 1))])
+    monkeypatch.setattr(dataloader, "_document_batches", lambda *a, **k: batches2)
+    gen2 = dataloader.tokenizing_distributed_data_loader_bos_bestfit(
+        tokenizer=Tok(),
+        B=1,
+        T=2,
+        split="val",
+        device="cpu",
+        buffer_size=1,
+    )
+    x2, y2 = next(gen2)
+    assert x2.shape == (1, 2)
+    assert y2.shape == (1, 2)
+
+
+def test_download_single_file_cleanup_except_and_dataset_main(monkeypatch, tmp_path):
+    # Cleanup except path (os.remove raises).
+    monkeypatch.setattr(dataset, "DATA_DIR", str(tmp_path))
+    monkeypatch.setattr(dataset, "BASE_URL", "http://example.test")
+    monkeypatch.setattr(dataset, "range", lambda *_a, **_k: [5], raising=False)
+    monkeypatch.setattr(dataset.requests, "get", lambda *_a, **_k: (_ for _ in ()).throw(dataset.requests.RequestException("x")))
+
+    filename = dataset.index_to_filename(9)
+    fp = str(tmp_path / filename)
+    tp = str(tmp_path / f"{filename}.tmp")
+    with open(tp, "w", encoding="utf-8") as f:
+        f.write("y")
+
+    real_exists = dataset.os.path.exists
+    calls = {"fp": 0}
+
+    def fake_exists(path):
+        if path == fp:
+            calls["fp"] += 1
+            return calls["fp"] > 1  # first check in function should be False; cleanup checks True
+        if path == tp:
+            return True
+        return real_exists(path)
+
+    monkeypatch.setattr(dataset.os.path, "exists", fake_exists)
+    monkeypatch.setattr(dataset.os, "remove", lambda _p: (_ for _ in ()).throw(OSError("deny")))
+    assert dataset.download_single_file(9) is False
+
+    # __main__ block coverage.
+    class FakeParser:
+        def __init__(self, *a, **k):
+            pass
+
+        def add_argument(self, *a, **k):
+            return None
+
+        def parse_args(self):
+            return types.SimpleNamespace(num_files=2, num_workers=3)
+
+    class FakePool:
+        def __init__(self, processes):
+            self.processes = processes
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, exc_type, exc, tb):
+            return False
+
+        def map(self, fn, ids):
+            del fn
+            return [True for _ in ids]
+
+    monkeypatch.setattr("argparse.ArgumentParser", FakeParser)
+    monkeypatch.setattr("multiprocessing.Pool", FakePool)
+    monkeypatch.setenv("NANOCHAT_BASE_DIR", str(tmp_path / "base"))
+    runpy.run_module("nanochat.dataset", run_name="__main__")
--- a/tests/test_engine_extra_new.py
+++ b/tests/test_engine_extra_new.py
@ -0,0 +1,202 @@
+import runpy
+from contextlib import contextmanager
+from types import SimpleNamespace
+
+import pytest
+import torch
+
+import nanochat.engine as engine
+
+
+def test_timeout_eval_and_calculator_paths(monkeypatch):
+    original_timeout = engine.timeout
+    with engine.timeout(1, "1+1"):
+        assert 1 + 1 == 2
+
+    assert engine.eval_with_timeout("1+2", max_time=1) == 3
+    assert engine.eval_with_timeout("bad +", max_time=1) is None
+
+    @contextmanager
+    def boom(_duration, _formula):
+        raise Exception("t")
+        yield  # pragma: no cover
+
+    monkeypatch.setattr(engine, "timeout", boom)
+    assert engine.eval_with_timeout("1+1", max_time=1) is None
+    monkeypatch.setattr(engine, "timeout", original_timeout)
+
+    assert engine.use_calculator("1,000 + 2") == 1002
+    assert engine.use_calculator("2 ** 8") is None
+    assert engine.use_calculator("x$y") is None
+    assert engine.use_calculator("__import__('os')") is None
+    assert engine.use_calculator("'abc'") is None
+    assert engine.use_calculator("'ababa'.count('a')") == 3
+
+
+def test_timeout_handler_line(monkeypatch):
+    captured = {}
+    monkeypatch.setattr(engine.signal, "signal", lambda _sig, fn: captured.__setitem__("fn", fn))
+    monkeypatch.setattr(engine.signal, "alarm", lambda _n: None)
+    with pytest.raises(Exception):
+        with engine.timeout(1, "slow_expr"):
+            captured["fn"](0, None)
+
+
+def test_sample_next_token_branches():
+    logits = torch.tensor([[0.1, 0.2, 0.9]], dtype=torch.float32)
+    rng = torch.Generator(device="cpu").manual_seed(123)
+
+    out0 = engine.sample_next_token(logits, rng, temperature=0.0, top_k=None)
+    assert out0.shape == (1, 1)
+    assert out0.item() == 2
+
+    out1 = engine.sample_next_token(logits, rng, temperature=1.0, top_k=2)
+    assert out1.shape == (1, 1)
+    assert out1.item() in {1, 2}
+
+    out2 = engine.sample_next_token(logits, rng, temperature=1.0, top_k=None)
+    assert out2.shape == (1, 1)
+
+    with pytest.raises(AssertionError):
+        engine.sample_next_token(logits, rng, temperature=-1.0)
+
+
+def test_kv_cache_prefill_assertions():
+    a = engine.KVCache(1, 1, 4, 2, 1, "cpu", torch.float32)
+    b = engine.KVCache(1, 1, 2, 2, 1, "cpu", torch.float32)
+    b.advance(1)
+    a.advance(1)
+    with pytest.raises(AssertionError):
+        a.prefill(b)  # non-empty destination
+
+    c = engine.KVCache(1, 2, 2, 2, 1, "cpu", torch.float32)
+    d = engine.KVCache(1, 1, 2, 2, 1, "cpu", torch.float32)
+    with pytest.raises(AssertionError):
+        c.prefill(d)  # head mismatch
+
+    e = engine.KVCache(1, 1, 1, 2, 1, "cpu", torch.float32)
+    f = engine.KVCache(1, 1, 2, 2, 1, "cpu", torch.float32)
+    with pytest.raises(AssertionError):
+        e.prefill(f)  # seq len too small
+
+
+class _TinyTok:
+    def __init__(self):
+        self.special = {
+            "<|python_start|>": 100,
+            "<|python_end|>": 101,
+            "<|output_start|>": 102,
+            "<|output_end|>": 103,
+            "<|assistant_end|>": 104,
+            "<|bos|>": 0,
+        }
+
+    def encode_special(self, s):
+        return self.special[s]
+
+    def get_bos_token_id(self):
+        return 0
+
+    def encode(self, s, prepend=None):
+        out = [9] if s == "2" else [55]
+        if prepend is not None:
+            return [prepend] + out
+        return out
+
+    def decode(self, ids):
+        return "1+1" if ids else ""
+
+
+class _TinyModel:
+    def __init__(self):
+        self.config = SimpleNamespace(n_kv_head=1, n_head=1, n_embd=4, n_layer=1, sequence_len=32)
+        self._device = torch.device("cpu")
+        self.vocab = 200
+
+    def get_device(self):
+        return self._device
+
+    def forward(self, ids, kv_cache=None):
+        b, t = ids.shape
+        if kv_cache is not None:
+            kv_cache.advance(t)
+        return torch.zeros((b, t, self.vocab), dtype=torch.float32)
+
+
+def test_engine_generate_tool_forcing(monkeypatch):
+    tok = _TinyTok()
+    model = _TinyModel()
+    eng = engine.Engine(model, tok)
+
+    sampled = [
+        [100],  # python_start
+        [55],   # expression token
+        [101],  # python_end -> enqueue forced output tokens
+        [104],  # ignored while forced queue drains
+        [104],
+        [104],
+        [104],  # finally consumed as assistant_end
+    ]
+    idx = {"i": 0}
+
+    def fake_sample(_logits, _rng, _temperature, _top_k):
+        row = sampled[min(idx["i"], len(sampled) - 1)]
+        idx["i"] += 1
+        return torch.tensor([[row[0]]], dtype=torch.long)
+
+    calls = {"expr": None}
+    monkeypatch.setattr(engine, "sample_next_token", fake_sample)
+    monkeypatch.setattr(engine, "use_calculator", lambda expr: calls.__setitem__("expr", expr) or 2)
+
+    rows = list(eng.generate([1, 2], num_samples=1, max_tokens=10, temperature=0.0))
+    flat_tokens = [r[0][0] for r in rows]
+    flat_masks = [r[1][0] for r in rows]
+    assert 102 in flat_tokens and 103 in flat_tokens  # output_start/output_end forced
+    assert 0 in flat_masks  # forced token mask
+    assert calls["expr"] == "1+1"
+
+
+def test_engine_generate_batch_and_input_validation(monkeypatch):
+    tok = _TinyTok()
+    model = _TinyModel()
+    eng = engine.Engine(model, tok)
+
+    # Stop immediately via BOS; also exercises max_tokens=None kv-length hint path.
+    monkeypatch.setattr(
+        engine,
+        "sample_next_token",
+        lambda *_a, **_k: torch.tensor([[tok.get_bos_token_id()]], dtype=torch.long),
+    )
+    rows = list(eng.generate([7], num_samples=1, max_tokens=None, temperature=0.0))
+    assert len(rows) == 1
+
+    results, masks = eng.generate_batch([7], num_samples=1, max_tokens=3, temperature=0.0)
+    assert results == [[7]]
+    assert masks == [[0]]
+
+    with pytest.raises(AssertionError):
+        list(eng.generate("bad", num_samples=1, max_tokens=1))  # type: ignore[arg-type]
+
+
+def test_engine_main_block_runs(monkeypatch):
+    # Patch external entrypoints used by the __main__ block.
+    monkeypatch.setattr("nanochat.common.autodetect_device_type", lambda: "cpu")
+    monkeypatch.setattr("nanochat.common.compute_init", lambda _d: (False, 0, 0, 1, torch.device("cpu")))
+
+    class MainTok(_TinyTok):
+        def encode(self, s, prepend=None):
+            out = [11, 12]
+            return ([prepend] + out) if prepend is not None else out
+
+        def decode(self, ids):
+            return "x"
+
+    class MainModel(_TinyModel):
+        def generate(self, tokens, max_tokens, temperature=1.0, top_k=None, seed=42):
+            del tokens, max_tokens, temperature, top_k, seed
+            yield 11
+            yield 12
+
+    monkeypatch.setattr("nanochat.checkpoint_manager.load_model", lambda *a, **k: (MainModel(), MainTok(), {}))
+    monkeypatch.setattr(engine.torch.cuda, "synchronize", lambda: None)
+    runpy.run_module("nanochat.engine", run_name="__main__")
--- a/tests/test_execution_new.py
+++ b/tests/test_execution_new.py
@ -0,0 +1,235 @@
+import builtins
+import io
+import os
+from contextlib import contextmanager
+
+import pytest
+
+import nanochat.execution as execution
+
+
+def test_execution_result_repr_variants():
+    r = execution.ExecutionResult(success=True, stdout="ok\n", stderr="")
+    s = repr(r)
+    assert "success=True" in s
+    assert "stdout='ok\\n'" in s
+
+    r2 = execution.ExecutionResult(success=False, stdout="", stderr="err", error="bad", timeout=True, memory_exceeded=True)
+    s2 = repr(r2)
+    assert "timeout=True" in s2
+    assert "memory_exceeded=True" in s2
+    assert "error='bad'" in s2
+    assert "stderr='err'" in s2
+
+
+def test_write_only_string_io_and_capture_io():
+    s = execution.WriteOnlyStringIO()
+    assert s.readable() is False
+    with pytest.raises(IOError):
+        s.read()
+    with pytest.raises(IOError):
+        s.readline()
+    with pytest.raises(IOError):
+        s.readlines()
+
+    with execution.capture_io() as (out, err):
+        print("hello")
+    assert "hello" in out.getvalue()
+    assert err.getvalue() == ""
+
+
+def test_time_limit_and_chdir_and_tempdir(tmp_path):
+    with execution.time_limit(0.5):
+        x = 1 + 1
+    assert x == 2
+
+    with pytest.raises(execution.TimeoutException):
+        with execution.time_limit(0.01):
+            import time
+            time.sleep(0.05)
+
+    cwd = os.getcwd()
+    with execution.chdir("."):
+        assert os.getcwd() == cwd
+    with execution.chdir(str(tmp_path)):
+        assert os.getcwd() == str(tmp_path)
+    assert os.getcwd() == cwd
+
+    with execution.create_tempdir() as d:
+        assert os.path.isdir(d)
+    assert not os.path.exists(d)
+
+
+def test_reliability_guard(monkeypatch):
+    # Snapshot a small subset we assert was modified/restored in test cleanup.
+    old_exit = builtins.exit
+    old_quit = builtins.quit
+
+    class FakePlatform:
+        @staticmethod
+        def uname():
+            class U:
+                system = "Linux"
+            return U()
+
+    calls = {"rlimit": 0, "disabled": 0}
+    monkeypatch.setattr(execution, "platform", FakePlatform())
+
+    class FakeResource:
+        RLIMIT_AS = 1
+        RLIMIT_DATA = 2
+        RLIMIT_STACK = 3
+
+        @staticmethod
+        def setrlimit(_k, _v):
+            calls["rlimit"] += 1
+
+    import sys
+    keys = ["ipdb", "joblib", "resource", "psutil", "tkinter"]
+    old_modules = {k: sys.modules.get(k, None) for k in keys}
+
+    class FakeOS:
+        def __init__(self):
+            self.environ = {}
+            self.kill = lambda *a, **k: None
+            self.system = lambda *a, **k: None
+            self.putenv = lambda *a, **k: None
+            self.remove = lambda *a, **k: None
+            self.removedirs = lambda *a, **k: None
+            self.rmdir = lambda *a, **k: None
+            self.fchdir = lambda *a, **k: None
+            self.setuid = lambda *a, **k: None
+            self.fork = lambda *a, **k: None
+            self.forkpty = lambda *a, **k: None
+            self.killpg = lambda *a, **k: None
+            self.rename = lambda *a, **k: None
+            self.renames = lambda *a, **k: None
+            self.truncate = lambda *a, **k: None
+            self.replace = lambda *a, **k: None
+            self.unlink = lambda *a, **k: None
+            self.fchmod = lambda *a, **k: None
+            self.fchown = lambda *a, **k: None
+            self.chmod = lambda *a, **k: None
+            self.chown = lambda *a, **k: None
+            self.chroot = lambda *a, **k: None
+            self.lchflags = lambda *a, **k: None
+            self.lchmod = lambda *a, **k: None
+            self.lchown = lambda *a, **k: None
+            self.getcwd = lambda: "."
+            self.chdir = lambda _p: None
+
+    class FakeShutil:
+        def __init__(self):
+            self.rmtree = lambda *a, **k: None
+            self.move = lambda *a, **k: None
+            self.chown = lambda *a, **k: None
+
+    class FakeSubprocess:
+        def __init__(self):
+            self.Popen = object
+
+    monkeypatch.setitem(sys.modules, "resource", FakeResource)
+    monkeypatch.setitem(sys.modules, "os", FakeOS())
+    monkeypatch.setitem(sys.modules, "shutil", FakeShutil())
+    monkeypatch.setitem(sys.modules, "subprocess", FakeSubprocess())
+    monkeypatch.setattr(execution.faulthandler, "disable", lambda: calls.__setitem__("disabled", 1))
+    execution.reliability_guard(1024)
+    assert calls["rlimit"] == 3
+    assert calls["disabled"] == 1
+    assert builtins.exit is None
+    assert builtins.quit is None
+
+    # Restore minimal globals so current test process remains healthy.
+    builtins.exit = old_exit
+    builtins.quit = old_quit
+    if isinstance(__builtins__, dict):
+        __builtins__["help"] = help
+    else:
+        setattr(__builtins__, "help", help)
+    for k, v in old_modules.items():
+        if v is None:
+            sys.modules.pop(k, None)
+        else:
+            sys.modules[k] = v
+
+
+def test_unsafe_execute_success_and_error_paths(monkeypatch):
+    # Avoid mutating process-wide dangerous globals during direct _unsafe_execute calls.
+    monkeypatch.setattr(execution, "reliability_guard", lambda maximum_memory_bytes=None: None)
+
+    out = {}
+    execution._unsafe_execute("print('hi')", timeout=1.0, maximum_memory_bytes=None, result_dict=out)
+    assert out["success"] is True
+    assert "hi" in out["stdout"]
+    assert out["stderr"] == ""
+
+    out2 = {}
+    execution._unsafe_execute("raise MemoryError('oom')", timeout=1.0, maximum_memory_bytes=None, result_dict=out2)
+    assert out2["memory_exceeded"] is True
+    assert "Memory limit exceeded" in out2["error"]
+
+    out3 = {}
+    execution._unsafe_execute("raise ValueError('bad')", timeout=1.0, maximum_memory_bytes=None, result_dict=out3)
+    assert out3["success"] is False
+    assert "ValueError: bad" in out3["error"]
+
+    @contextmanager
+    def boom(_seconds):
+        raise execution.TimeoutException("Timed out!")
+        yield  # pragma: no cover
+
+    monkeypatch.setattr(execution, "time_limit", boom)
+    out4 = {}
+    execution._unsafe_execute("print('x')", timeout=1.0, maximum_memory_bytes=None, result_dict=out4)
+    assert out4["timeout"] is True
+    assert out4["error"] == "Execution timed out"
+
+
+def test_execute_code_paths(monkeypatch):
+    # Normal path with real machinery.
+    r = execution.execute_code("print('ok')", timeout=1.0)
+    assert r.success is True
+    assert "ok" in r.stdout
+
+    # p.is_alive() path.
+    class FakeDict(dict):
+        pass
+
+    class FakeManager:
+        def dict(self):
+            return FakeDict()
+
+    class FakeProcessAlive:
+        def __init__(self, target, args):
+            self.target = target
+            self.args = args
+            self.killed = False
+
+        def start(self):
+            return None
+
+        def join(self, timeout):
+            del timeout
+            return None
+
+        def is_alive(self):
+            return True
+
+        def kill(self):
+            self.killed = True
+
+    monkeypatch.setattr(execution.multiprocessing, "Manager", lambda: FakeManager())
+    monkeypatch.setattr(execution.multiprocessing, "Process", FakeProcessAlive)
+    r2 = execution.execute_code("print(1)")
+    assert r2.timeout is True
+    assert "process killed" in r2.error
+
+    # Empty result_dict path.
+    class FakeProcessDone(FakeProcessAlive):
+        def is_alive(self):
+            return False
+
+    monkeypatch.setattr(execution.multiprocessing, "Process", FakeProcessDone)
+    r3 = execution.execute_code("print(2)")
+    assert r3.success is False
+    assert "no result returned" in r3.error
--- a/tests/test_flash_attention_new.py
+++ b/tests/test_flash_attention_new.py
@ -0,0 +1,110 @@
+import sys
+import types
+
+import pytest
+import torch
+
+import nanochat.flash_attention as fa
+
+
+def test_load_flash_attention_3_paths(monkeypatch):
+    monkeypatch.setattr(fa.torch.cuda, "is_available", lambda: False)
+    assert fa._load_flash_attention_3() is None
+
+    monkeypatch.setattr(fa.torch.cuda, "is_available", lambda: True)
+    monkeypatch.setattr(fa.torch.cuda, "get_device_capability", lambda: (8, 0))
+    assert fa._load_flash_attention_3() is None
+
+    monkeypatch.setattr(fa.torch.cuda, "get_device_capability", lambda: (9, 0))
+    monkeypatch.setitem(sys.modules, "kernels", None)
+    assert fa._load_flash_attention_3() is None
+
+    class K:
+        @staticmethod
+        def get_kernel(_name):
+            return types.SimpleNamespace(flash_attn_interface="iface")
+
+    monkeypatch.setitem(sys.modules, "kernels", K)
+    assert fa._load_flash_attention_3() == "iface"
+
+
+def test_use_fa3_override(monkeypatch):
+    monkeypatch.setattr(fa, "HAS_FA3", False)
+    monkeypatch.setattr(fa, "_override_impl", "fa3")
+    with pytest.raises(AssertionError):
+        fa._use_fa3()
+
+    monkeypatch.setattr(fa, "_override_impl", "sdpa")
+    assert fa._use_fa3() is False
+
+    monkeypatch.setattr(fa, "_override_impl", None)
+    monkeypatch.setattr(fa, "HAS_FA3", True)
+    assert fa._use_fa3() is True
+
+
+def test_sdpa_attention_branches():
+    q = torch.randn(1, 2, 4, 3)
+    k = torch.randn(1, 2, 4, 3)
+    v = torch.randn(1, 2, 4, 3)
+    y1 = fa._sdpa_attention(q, k, v, window_size=(-1, -1), enable_gqa=False)
+    assert y1.shape == q.shape
+
+    q2 = torch.randn(1, 2, 1, 3)
+    k2 = torch.randn(1, 2, 6, 3)
+    v2 = torch.randn(1, 2, 6, 3)
+    y2 = fa._sdpa_attention(q2, k2, v2, window_size=(2, 0), enable_gqa=False)
+    assert y2.shape == q2.shape
+
+    q3 = torch.randn(1, 2, 3, 3)
+    k3 = torch.randn(1, 2, 6, 3)
+    v3 = torch.randn(1, 2, 6, 3)
+    y3 = fa._sdpa_attention(q3, k3, v3, window_size=(3, 0), enable_gqa=False)
+    assert y3.shape == q3.shape
+
+
+def test_public_flash_attn_paths(monkeypatch):
+    q = torch.randn(1, 3, 2, 4)
+    k = torch.randn(1, 3, 2, 4)
+    v = torch.randn(1, 3, 2, 4)
+
+    # FA3 path.
+    class FakeFA3:
+        @staticmethod
+        def flash_attn_func(q, k, v, causal, window_size):
+            del k, v, causal, window_size
+            return q + 1
+
+        @staticmethod
+        def flash_attn_with_kvcache(q, k_cache, v_cache, k=None, v=None, cache_seqlens=None, causal=False, window_size=(-1, -1)):
+            del k_cache, v_cache, k, v, cache_seqlens, causal, window_size
+            return q + 2
+
+    monkeypatch.setattr(fa, "_fa3", FakeFA3())
+    monkeypatch.setattr(fa, "HAS_FA3", True)
+    monkeypatch.setattr(fa, "_override_impl", "fa3")
+    out1 = fa.flash_attn_func(q, k, v, causal=True, window_size=(3, 0))
+    assert torch.allclose(out1, q + 1)
+
+    k_cache = torch.zeros(1, 8, 2, 4)
+    v_cache = torch.zeros(1, 8, 2, 4)
+    cache_seqlens = torch.zeros(1, dtype=torch.int32)
+    out2 = fa.flash_attn_with_kvcache(q[:, :1], k_cache, v_cache, k=k[:, :1], v=v[:, :1], cache_seqlens=cache_seqlens, causal=True, window_size=(3, 0))
+    assert torch.allclose(out2, q[:, :1] + 2)
+
+    # SDPA path with cache insert/update.
+    monkeypatch.setattr(fa, "_override_impl", "sdpa")
+    out3 = fa.flash_attn_func(q, k, v, causal=True, window_size=(3, 0))
+    assert out3.shape == q.shape
+
+    out4 = fa.flash_attn_with_kvcache(
+        q[:, :1],
+        k_cache,
+        v_cache,
+        k=k[:, :1],
+        v=v[:, :1],
+        cache_seqlens=cache_seqlens,
+        causal=True,
+        window_size=(2, 0),
+    )
+    assert out4.shape == q[:, :1].shape
+
--- a/tests/test_fp8_new.py
+++ b/tests/test_fp8_new.py
@ -0,0 +1,83 @@
+import torch
+import torch.nn as nn
+import pytest
+
+import nanochat.fp8 as fp8
+
+
+def test_to_fp8_and_col_major():
+    x = torch.tensor([[0.0, 1.0], [2.0, -3.0]], dtype=torch.float32)
+    x_fp8, inv = fp8._to_fp8(x, torch.float8_e4m3fn)
+    assert x_fp8.dtype == torch.float8_e4m3fn
+    assert inv.ndim == 0
+
+    cm = fp8._to_col_major(torch.arange(6, dtype=torch.float32).view(2, 3))
+    assert cm.shape == (2, 3)
+    assert torch.equal(cm, torch.arange(6, dtype=torch.float32).view(2, 3))
+
+
+def test_float8_matmul_forward_backward(monkeypatch):
+    def fake_scaled_mm(a, b, scale_a=None, scale_b=None, out_dtype=None, use_fast_accum=None):
+        del scale_a, scale_b, use_fast_accum
+        out = a.float() @ b.float()
+        return out.to(out_dtype if out_dtype is not None else out.dtype)
+
+    monkeypatch.setattr(fp8.torch, "_scaled_mm", fake_scaled_mm)
+
+    x = torch.randn(3, 4, dtype=torch.float32, requires_grad=True)
+    w = torch.randn(5, 4, dtype=torch.float32, requires_grad=True)
+    y = fp8._Float8Matmul.apply(x, w)
+    assert y.shape == (3, 5)
+    y.sum().backward()
+    assert x.grad is not None
+    assert w.grad is not None
+
+
+def test_float8_linear_and_config(monkeypatch):
+    monkeypatch.setattr(fp8._Float8Matmul, "apply", lambda input_2d, weight: input_2d.float() @ weight.float().t())
+    layer = fp8.Float8Linear(4, 3, bias=True)
+    inp = torch.randn(2, 5, 4)
+    out = layer(inp)
+    assert out.shape == (2, 5, 3)
+
+    # Autocast branch.
+    monkeypatch.setattr(fp8.torch, "is_autocast_enabled", lambda: True)
+    monkeypatch.setattr(fp8.torch, "get_autocast_gpu_dtype", lambda: torch.float16)
+    out2 = layer(inp)
+    assert out2.shape == (2, 5, 3)
+
+    layer2 = fp8.Float8Linear(4, 3, bias=False)
+    out3 = layer2(inp)
+    assert out3.shape == (2, 5, 3)
+
+    src = nn.Linear(4, 3, bias=True)
+    converted = fp8.Float8Linear.from_float(src)
+    assert isinstance(converted, fp8.Float8Linear)
+    assert converted.weight is src.weight
+    assert converted.bias is src.bias
+
+    assert isinstance(fp8.Float8LinearConfig.from_recipe_name("tensorwise"), fp8.Float8LinearConfig)
+    with pytest.raises(ValueError):
+        fp8.Float8LinearConfig.from_recipe_name("rowwise")
+
+
+def test_convert_to_float8_training(monkeypatch):
+    monkeypatch.setattr(fp8.Float8Linear, "from_float", classmethod(lambda cls, mod: cls(mod.in_features, mod.out_features, bias=(mod.bias is not None))))
+
+    m = nn.Sequential(
+        nn.Linear(4, 4),
+        nn.ReLU(),
+        nn.Sequential(nn.Linear(4, 2), nn.Linear(2, 1)),
+    )
+    out = fp8.convert_to_float8_training(m)
+    assert isinstance(out[0], fp8.Float8Linear)
+    assert isinstance(out[2][0], fp8.Float8Linear)
+    assert isinstance(out[2][1], fp8.Float8Linear)
+
+    m2 = nn.Sequential(nn.Linear(4, 4), nn.Linear(4, 4))
+    out2 = fp8.convert_to_float8_training(
+        m2,
+        module_filter_fn=lambda _mod, fqn: fqn.endswith("0"),
+    )
+    assert isinstance(out2[0], fp8.Float8Linear)
+    assert isinstance(out2[1], nn.Linear)
--- a/tests/test_gpt_new.py
+++ b/tests/test_gpt_new.py
@ -0,0 +1,242 @@
+from types import SimpleNamespace
+
+import pytest
+import torch
+
+import nanochat.gpt as gpt
+
+
+def _cfg(**overrides):
+    base = dict(sequence_len=8, vocab_size=13, n_layer=2, n_head=2, n_kv_head=1, n_embd=4, window_pattern="SL")
+    base.update(overrides)
+    return gpt.GPTConfig(**base)
+
+
+def test_norm_has_ve_and_rotary():
+    x = torch.randn(2, 3, 4)
+    y = gpt.norm(x)
+    assert y.shape == x.shape
+    assert gpt.has_ve(1, 4) is True
+    assert gpt.has_ve(0, 4) is False
+
+    h = torch.randn(1, 2, 2, 4)
+    cos = torch.ones(1, 2, 1, 2)
+    sin = torch.zeros(1, 2, 1, 2)
+    out = gpt.apply_rotary_emb(h, cos, sin)
+    assert out.shape == h.shape
+
+
+def test_attention_forward_paths(monkeypatch):
+    cfg = _cfg(n_layer=1)
+    attn = gpt.CausalSelfAttention(cfg, layer_idx=0)
+
+    calls = {"func": 0, "kvcache": 0}
+
+    def fake_func(q, k, v, causal, window_size):
+        del k, v, causal, window_size
+        calls["func"] += 1
+        return q
+
+    def fake_kvcache(q, k_cache, v_cache, k=None, v=None, cache_seqlens=None, causal=False, window_size=(-1, -1)):
+        del k_cache, v_cache, k, v, cache_seqlens, causal, window_size
+        calls["kvcache"] += 1
+        return q
+
+    monkeypatch.setattr(gpt.flash_attn, "flash_attn_func", fake_func)
+    monkeypatch.setattr(gpt.flash_attn, "flash_attn_with_kvcache", fake_kvcache)
+
+    x = torch.randn(2, 3, cfg.n_embd)
+    cos = torch.ones(1, 3, 1, cfg.n_embd // cfg.n_head // 2)
+    sin = torch.zeros_like(cos)
+    y = attn(x, ve=None, cos_sin=(cos, sin), window_size=(cfg.sequence_len, 0), kv_cache=None)
+    assert y.shape == x.shape
+    assert calls["func"] == 1
+
+    class Cache:
+        def __init__(self):
+            self.n_layers = 1
+            self.cache_seqlens = torch.zeros(2, dtype=torch.int32)
+            self.k = torch.zeros(2, 10, cfg.n_kv_head, cfg.n_embd // cfg.n_head)
+            self.v = torch.zeros(2, 10, cfg.n_kv_head, cfg.n_embd // cfg.n_head)
+            self.advanced = 0
+
+        def get_layer_cache(self, _idx):
+            return self.k, self.v
+
+        def advance(self, t):
+            self.advanced += t
+
+    ve = torch.randn(2, 3, cfg.n_kv_head * (cfg.n_embd // cfg.n_head))
+    attn.ve_gate_channels = cfg.n_embd
+    attn.ve_gate = torch.nn.Linear(cfg.n_embd, cfg.n_kv_head, bias=False)
+    cache = Cache()
+    y2 = attn(x, ve=ve, cos_sin=(cos, sin), window_size=(cfg.sequence_len, 0), kv_cache=cache)
+    assert y2.shape == x.shape
+    assert calls["kvcache"] == 1
+    assert cache.advanced == 3
+
+    with pytest.raises(AssertionError):
+        gpt.CausalSelfAttention(_cfg(n_embd=5, n_head=2), layer_idx=0)
+    with pytest.raises(AssertionError):
+        gpt.CausalSelfAttention(_cfg(n_head=3, n_kv_head=2), layer_idx=0)
+
+
+def test_mlp_and_block(monkeypatch):
+    cfg = _cfg(n_layer=1)
+    mlp = gpt.MLP(cfg)
+    x = torch.randn(2, 3, cfg.n_embd)
+    y = mlp(x)
+    assert y.shape == x.shape
+
+    monkeypatch.setattr(gpt.flash_attn, "flash_attn_func", lambda q, k, v, causal, window_size: q)
+    block = gpt.Block(cfg, layer_idx=0)
+    cos = torch.ones(1, 3, 1, cfg.n_embd // cfg.n_head // 2)
+    sin = torch.zeros_like(cos)
+    out = block(x, ve=None, cos_sin=(cos, sin), window_size=(cfg.sequence_len, 0), kv_cache=None)
+    assert out.shape == x.shape
+
+
+def test_gpt_core_helpers_and_forward(monkeypatch):
+    monkeypatch.setattr(gpt, "print0", lambda *a, **k: None)
+    monkeypatch.setattr(gpt.flash_attn, "flash_attn_func", lambda q, k, v, causal, window_size: q)
+    monkeypatch.setattr(gpt.flash_attn, "flash_attn_with_kvcache", lambda q, k_cache, v_cache, **kw: q)
+
+    model = gpt.GPT(_cfg(), pad_vocab_size_to=8)
+    model.init_weights()
+    for block in model.transformer.h:
+        if block.attn.ve_gate is not None:
+            block.attn.ve_gate_channels = model.config.n_embd
+            block.attn.ve_gate = torch.nn.Linear(model.config.n_embd, model.config.n_kv_head, bias=False)
+
+    ws = model._compute_window_sizes(_cfg(window_pattern="SLS", n_layer=4))
+    assert len(ws) == 4
+    assert ws[-1][0] == 8
+    with pytest.raises(AssertionError):
+        model._compute_window_sizes(_cfg(window_pattern="Q"))
+
+    cos, sin = model._precompute_rotary_embeddings(4, 2, base=1000, device=torch.device("cpu"))
+    assert cos.shape == sin.shape
+
+    assert model.get_device().type == "cpu"
+    assert model.estimate_flops() > 0
+    counts = model.num_scaling_params()
+    assert counts["total"] == sum(p.numel() for p in model.parameters())
+
+    idx = torch.randint(0, model.config.vocab_size, (2, 4), dtype=torch.long)
+    logits = model.forward(idx)
+    assert logits.shape == (2, 4, model.config.vocab_size)
+
+    targets = torch.randint(0, model.config.vocab_size, (2, 4), dtype=torch.long)
+    loss = model.forward(idx, targets=targets, loss_reduction="mean")
+    assert loss.ndim == 0
+
+    with pytest.raises(AssertionError):
+        model.forward(torch.randint(0, model.config.vocab_size, (1, model.cos.size(1) + 1)))
+
+
+def test_setup_optimizer_paths(monkeypatch):
+    model = gpt.GPT(_cfg(), pad_vocab_size_to=8)
+
+    class FakeOpt:
+        def __init__(self, groups):
+            self.param_groups = groups
+
+    monkeypatch.setattr(gpt, "print0", lambda *a, **k: None)
+    monkeypatch.setattr(gpt, "MuonAdamW", FakeOpt)
+    monkeypatch.setattr(gpt, "DistMuonAdamW", FakeOpt)
+
+    monkeypatch.setattr(gpt, "get_dist_info", lambda: (False, 0, 0, 1))
+    opt1 = model.setup_optimizer()
+    assert isinstance(opt1, FakeOpt)
+    assert all("initial_lr" in g for g in opt1.param_groups)
+
+    monkeypatch.setattr(gpt, "get_dist_info", lambda: (True, 0, 0, 2))
+    opt2 = model.setup_optimizer()
+    assert isinstance(opt2, FakeOpt)
+
+
+def test_generate_paths(monkeypatch):
+    model = gpt.GPT(_cfg(), pad_vocab_size_to=8)
+
+    # Force predictable logits.
+    def fake_forward(ids, *args, **kwargs):
+        b, t = ids.shape
+        v = model.config.vocab_size
+        logits = torch.zeros((b, t, v), dtype=torch.float32)
+        logits[..., 2] = 2.0
+        logits[..., 3] = 1.0
+        return logits
+
+    monkeypatch.setattr(model, "forward", fake_forward)
+    tokens = [1, 2]
+    out_temp0 = list(model.generate(tokens, max_tokens=3, temperature=0.0, top_k=None, seed=42))
+    assert out_temp0 == [2, 2, 2]
+
+    out_topk = list(model.generate(tokens, max_tokens=2, temperature=1.0, top_k=1, seed=42))
+    assert out_topk == [2, 2]
+
+    with pytest.raises(AssertionError):
+        list(model.generate("bad", max_tokens=1))  # type: ignore[arg-type]
+
+
+def test_init_weights_cuda_cast_branch_with_fake_self(monkeypatch):
+    # This executes the CUDA-only cast lines in init_weights without requiring a real CUDA device.
+    class FakeTensor:
+        def __init__(self):
+            self.device = SimpleNamespace(type="cuda")
+
+    class FakeParam:
+        def __init__(self):
+            self.weight = FakeTensor()
+            self.to_calls = 0
+
+        def to(self, dtype):
+            del dtype
+            self.to_calls += 1
+
+    class FakeAttn:
+        def __init__(self):
+            self.c_q = SimpleNamespace(weight=object())
+            self.c_k = SimpleNamespace(weight=object())
+            self.c_v = SimpleNamespace(weight=object())
+            self.c_proj = SimpleNamespace(weight=object())
+            self.ve_gate = SimpleNamespace(weight=object())
+
+    class FakeMLP:
+        def __init__(self):
+            self.c_fc = SimpleNamespace(weight=object())
+            self.c_proj = SimpleNamespace(weight=object())
+
+    class FakeBlock:
+        def __init__(self):
+            self.attn = FakeAttn()
+            self.mlp = FakeMLP()
+
+    class Fillable:
+        def fill_(self, _v):
+            return self
+
+        def numel(self):
+            return 1
+
+    fake_self = SimpleNamespace()
+    fake_self.config = _cfg()
+    fake_self.rotary_seq_len = 8
+    fake_self.transformer = SimpleNamespace(
+        wte=FakeParam(),
+        h=[FakeBlock(), FakeBlock()],
+    )
+    fake_self.lm_head = SimpleNamespace(weight=object())
+    fake_self.resid_lambdas = Fillable()
+    fake_self.x0_lambdas = Fillable()
+    ve0 = FakeParam()
+    fake_self.value_embeds = {"0": ve0}
+    fake_self._precompute_rotary_embeddings = lambda *a, **k: (torch.zeros(1, 1, 1, 1, dtype=torch.bfloat16), torch.zeros(1, 1, 1, 1, dtype=torch.bfloat16))
+
+    monkeypatch.setattr(gpt.torch.nn.init, "normal_", lambda *a, **k: None)
+    monkeypatch.setattr(gpt.torch.nn.init, "uniform_", lambda *a, **k: None)
+    monkeypatch.setattr(gpt.torch.nn.init, "zeros_", lambda *a, **k: None)
+
+    gpt.GPT.init_weights(fake_self)
+    assert fake_self.transformer.wte.to_calls == 1
+    assert ve0.to_calls == 1
--- a/tests/test_loss_eval_new.py
+++ b/tests/test_loss_eval_new.py
@ -0,0 +1,56 @@
+import math
+
+import torch
+
+import nanochat.loss_eval as loss_eval
+
+
+class FakeModel:
+    def get_device(self):
+        return torch.device("cpu")
+
+    def __call__(self, x, y, loss_reduction="none"):
+        del x, loss_reduction
+        # Return a deterministic per-token loss tensor matching y shape.
+        return torch.ones_like(y, dtype=torch.float32)
+
+
+def test_evaluate_bpb_paths(monkeypatch):
+    model = FakeModel()
+    token_bytes = torch.tensor([0, 1, 2, 3, 4], dtype=torch.int64)
+
+    # Path with ignored targets (<0).
+    batches1 = [
+        (
+            torch.tensor([[0, 1], [2, 3]], dtype=torch.long),
+            torch.tensor([[1, -1], [2, 0]], dtype=torch.long),
+        )
+    ]
+    monkeypatch.setattr(loss_eval.dist, "is_initialized", lambda: False)
+    out1 = loss_eval.evaluate_bpb(model, batches1, steps=1, token_bytes=token_bytes)
+    assert out1 > 0.0
+
+    # Fast path without ignored targets.
+    batches2 = [
+        (
+            torch.tensor([[0, 1]], dtype=torch.long),
+            torch.tensor([[3, 4]], dtype=torch.long),
+        )
+    ]
+    out2 = loss_eval.evaluate_bpb(model, batches2, steps=1, token_bytes=token_bytes)
+    assert out2 > 0.0
+
+    # Distributed reduction path.
+    calls = {"n": 0}
+    monkeypatch.setattr(loss_eval.dist, "is_initialized", lambda: True)
+    monkeypatch.setattr(loss_eval.dist, "get_world_size", lambda: 2)
+    monkeypatch.setattr(loss_eval.dist, "all_reduce", lambda *_a, **_k: calls.__setitem__("n", calls["n"] + 1))
+    out3 = loss_eval.evaluate_bpb(model, batches2, steps=1, token_bytes=token_bytes)
+    assert out3 > 0.0
+    assert calls["n"] == 2
+
+    # No counted bytes -> inf.
+    zero_bytes = torch.zeros_like(token_bytes)
+    out4 = loss_eval.evaluate_bpb(model, batches2, steps=1, token_bytes=zero_bytes)
+    assert out4 == float("inf")
+
--- a/tests/test_optim_new.py
+++ b/tests/test_optim_new.py
@ -0,0 +1,229 @@
+from types import SimpleNamespace
+
+import pytest
+import torch
+
+import nanochat.optim as optim
+
+
+def test_fused_step_functions_via_wrapped():
+    # AdamW fused kernel.
+    p = torch.ones(2, 2, dtype=torch.float32)
+    g = torch.full((2, 2), 0.1, dtype=torch.float32)
+    exp_avg = torch.zeros_like(p)
+    exp_avg_sq = torch.zeros_like(p)
+    optim.adamw_step_fused.__wrapped__(
+        p,
+        g,
+        exp_avg,
+        exp_avg_sq,
+        torch.tensor(1.0),
+        torch.tensor(0.01),
+        torch.tensor(0.9),
+        torch.tensor(0.99),
+        torch.tensor(1e-8),
+        torch.tensor(0.01),
+    )
+    assert not torch.equal(p, torch.ones_like(p))
+
+    # Muon fused kernel: tall matrix branch.
+    grads_tall = torch.randn(2, 4, 2, dtype=torch.float32)
+    params_tall = torch.randn(2, 4, 2, dtype=torch.float32)
+    m_tall = torch.zeros_like(grads_tall)
+    v_tall = torch.zeros(2, 4, 1, dtype=torch.float32)
+    optim.muon_step_fused.__wrapped__(
+        grads_tall,
+        params_tall,
+        m_tall,
+        v_tall,
+        torch.tensor(0.9),
+        torch.tensor(0.01),
+        torch.tensor(0.0),
+        torch.tensor(0.95),
+        2,
+        -1,
+    )
+
+    # Muon fused kernel: wide matrix branch.
+    grads_wide = torch.randn(2, 2, 4, dtype=torch.float32)
+    params_wide = torch.randn(2, 2, 4, dtype=torch.float32)
+    m_wide = torch.zeros_like(grads_wide)
+    v_wide = torch.zeros(2, 1, 4, dtype=torch.float32)
+    optim.muon_step_fused.__wrapped__(
+        grads_wide,
+        params_wide,
+        m_wide,
+        v_wide,
+        torch.tensor(0.9),
+        torch.tensor(0.01),
+        torch.tensor(0.0),
+        torch.tensor(0.95),
+        2,
+        -2,
+    )
+
+
+def test_muon_adamw_optimizer_paths(monkeypatch):
+    monkeypatch.setattr(
+        optim,
+        "adamw_step_fused",
+        lambda p, grad, exp_avg, exp_avg_sq, step_t, lr_t, beta1_t, beta2_t, eps_t, wd_t: p.data.add_(-grad * lr_t.item()),
+    )
+    monkeypatch.setattr(
+        optim,
+        "muon_step_fused",
+        lambda stacked_grads, stacked_params, momentum_buffer, second_momentum_buffer, momentum_t, lr_t, wd_t, beta2_t, ns_steps, red_dim: stacked_params.add_(-stacked_grads * lr_t.item()),
+    )
+
+    p1 = torch.nn.Parameter(torch.ones(4, 4))
+    p2 = torch.nn.Parameter(torch.ones(4, 4))
+    p1.grad = torch.full_like(p1, 0.1)
+    p2.grad = torch.full_like(p2, 0.2)
+
+    p1_nograd = torch.nn.Parameter(torch.ones(4, 4))
+    groups = [
+        dict(kind="adamw", params=[p1_nograd, p1], lr=0.01, betas=(0.9, 0.95), eps=1e-8, weight_decay=0.0),
+        dict(kind="muon", params=[p2], lr=0.02, momentum=0.9, ns_steps=2, beta2=None, weight_decay=0.0),
+    ]
+    opt = optim.MuonAdamW(groups)
+    opt.step()
+    assert p1.data.mean().item() < 1.0
+    assert p2.data.mean().item() < 1.0
+
+    # _step_muon early return on empty params.
+    opt._step_muon(dict(kind="muon", params=[], lr=0.01, momentum=0.9, ns_steps=1, beta2=0.95, weight_decay=0.0))
+
+    bad = optim.MuonAdamW([dict(kind="bad", params=[p1], lr=0.1)])
+    with pytest.raises(ValueError):
+        bad.step()
+
+
+class _Future:
+    def __init__(self):
+        self.waited = 0
+
+    def wait(self):
+        self.waited += 1
+        return None
+
+
+class _AsyncOp:
+    def __init__(self):
+        self.f = _Future()
+
+    def get_future(self):
+        return self.f
+
+
+def test_dist_muon_adamw_components(monkeypatch):
+    # Fake distributed ops.
+    def fake_all_reduce(t, op=None, async_op=False):
+        del op, async_op
+        return _AsyncOp()
+
+    def fake_reduce_scatter_tensor(out, inp, op=None, async_op=False):
+        del op, async_op
+        # take the leading chunk
+        flat = inp.reshape(inp.shape[0], -1)
+        out.copy_(inp[: out.shape[0]])
+        return _AsyncOp()
+
+    def fake_all_gather_into_tensor(out, inp, async_op=False):
+        del async_op
+        # repeat input chunks into output prefix as needed
+        n = inp.shape[0]
+        out.data[:n].copy_(inp.detach())
+        if out.shape[0] > n:
+            out.data[n:].zero_()
+        return _AsyncOp()
+
+    monkeypatch.setattr(optim.dist, "all_reduce", fake_all_reduce)
+    monkeypatch.setattr(optim.dist, "reduce_scatter_tensor", fake_reduce_scatter_tensor)
+    monkeypatch.setattr(optim.dist, "all_gather_into_tensor", fake_all_gather_into_tensor)
+    monkeypatch.setattr(optim.dist, "get_rank", lambda: 1)
+    monkeypatch.setattr(optim.dist, "get_world_size", lambda: 2)
+    monkeypatch.setattr(
+        optim,
+        "adamw_step_fused",
+        lambda p, grad, exp_avg, exp_avg_sq, step_t, lr_t, beta1_t, beta2_t, eps_t, wd_t: p.data.add_(-grad * lr_t.item()),
+    )
+    monkeypatch.setattr(
+        optim,
+        "muon_step_fused",
+        lambda stacked_grads, stacked_params, momentum_buffer, second_momentum_buffer, momentum_t, lr_t, wd_t, beta2_t, ns_steps, red_dim: stacked_params.add_(-stacked_grads * lr_t.item()),
+    )
+
+    p_small = torch.nn.Parameter(torch.ones(8), requires_grad=False)
+    p_small.grad = torch.full_like(p_small, 0.1)
+    p_large = torch.nn.Parameter(torch.ones(1024, 2), requires_grad=False)
+    p_large.grad = torch.full_like(p_large, 0.1)
+
+    p_mu0 = torch.nn.Parameter(torch.ones(2, 2), requires_grad=False)
+    p_mu1 = torch.nn.Parameter(torch.ones(2, 2), requires_grad=False)
+    p_mu2 = torch.nn.Parameter(torch.ones(2, 2), requires_grad=False)
+    for p in (p_mu0, p_mu1, p_mu2):
+        p.grad = torch.full_like(p, 0.2)
+
+    groups = [
+        dict(kind="adamw", params=[p_small, p_large], lr=0.01, betas=(0.9, 0.95), eps=1e-8, weight_decay=0.0),
+        dict(kind="muon", params=[p_mu0, p_mu1, p_mu2], lr=0.02, momentum=0.9, ns_steps=2, beta2=0.95, weight_decay=0.0),
+    ]
+    opt = optim.DistMuonAdamW(groups)
+
+    # _reduce_adamw small and large paths.
+    info_adam = opt._reduce_adamw(groups[0], world_size=2)
+    assert len(info_adam["param_infos"]) == 2
+    assert info_adam["param_infos"][p_small]["is_small"] is True
+    assert info_adam["param_infos"][p_large]["is_small"] is False
+
+    # Assertion on non-divisible shape[0] for large params.
+    bad_p = torch.nn.Parameter(torch.ones(1025, 2), requires_grad=False)
+    bad_p.grad = torch.ones_like(bad_p)
+    with pytest.raises(AssertionError):
+        opt._reduce_adamw(dict(kind="adamw", params=[bad_p], lr=0.01, betas=(0.9, 0.95), eps=1e-8, weight_decay=0.0), world_size=2)
+
+    info_mu = opt._reduce_muon(groups[1], world_size=2)
+    assert "chunk_size" in info_mu
+
+    gather_list = []
+    opt._compute_adamw(groups[0], info_adam, gather_list, rank=1, world_size=2)
+    assert len(gather_list) >= 1
+
+    opt._compute_muon(groups[1], info_mu, gather_list, rank=1)
+    assert len(gather_list) >= 2
+    opt._compute_muon(groups[1], info_mu, gather_list, rank=3)
+
+    # _finish_gathers handles both params=None and params=list branches.
+    gather_list.append(dict(future=_Future(), params=None))
+    opt._finish_gathers(gather_list)
+
+    # step() happy path.
+    opt.step()
+
+    # step() unknown kind.
+    bad = optim.DistMuonAdamW([dict(kind="bad", params=[p_small], lr=0.1)])
+    with pytest.raises(ValueError):
+        bad.step()
+
+
+def test_dist_optimizer_phase2_unknown_kind(monkeypatch):
+    p = torch.nn.Parameter(torch.ones(2, 2), requires_grad=False)
+    p.grad = torch.ones_like(p)
+    group = dict(kind="muon", params=[p], lr=0.01, momentum=0.9, ns_steps=1, beta2=0.95, weight_decay=0.0)
+    opt = optim.DistMuonAdamW([group])
+
+    monkeypatch.setattr(optim.dist, "get_rank", lambda: 0)
+    monkeypatch.setattr(optim.dist, "get_world_size", lambda: 1)
+
+    def mutate_kind(g, world_size):
+        del world_size
+        g["kind"] = "bad"
+        return {}
+
+    monkeypatch.setattr(opt, "_reduce_muon", mutate_kind)
+    monkeypatch.setattr(opt, "_compute_adamw", lambda *a, **k: None)
+    monkeypatch.setattr(opt, "_compute_muon", lambda *a, **k: None)
+    monkeypatch.setattr(opt, "_finish_gathers", lambda *a, **k: None)
+
+    with pytest.raises(ValueError):
+        opt.step()
--- a/tests/test_report_new.py
+++ b/tests/test_report_new.py
@ -0,0 +1,206 @@
+import datetime
+import runpy
+import os
+import types
+
+import pytest
+
+import nanochat.report as report
+
+
+def test_run_command_paths(monkeypatch):
+    class R:
+        def __init__(self, stdout, returncode):
+            self.stdout = stdout
+            self.returncode = returncode
+
+    monkeypatch.setattr(report.subprocess, "run", lambda *a, **k: R("x\n", 1))
+    assert report.run_command("x") == "x"
+
+    monkeypatch.setattr(report.subprocess, "run", lambda *a, **k: R("  ", 0))
+    assert report.run_command("x") == ""
+
+    monkeypatch.setattr(report.subprocess, "run", lambda *a, **k: R("", 2))
+    assert report.run_command("x") is None
+
+    def boom(*a, **k):
+        raise RuntimeError("fail")
+
+    monkeypatch.setattr(report.subprocess, "run", boom)
+    assert report.run_command("x") is None
+
+
+def test_git_gpu_system_cost_helpers(monkeypatch):
+    seq = iter(["abc123", "main", " M x", "subject line\nbody"])
+    monkeypatch.setattr(report, "run_command", lambda _cmd: next(seq))
+    git = report.get_git_info()
+    assert git["commit"] == "abc123"
+    assert git["branch"] == "main"
+    assert git["dirty"] is True
+    assert git["message"] == "subject line"
+
+    monkeypatch.setattr(report.torch.cuda, "is_available", lambda: False)
+    assert report.get_gpu_info() == {"available": False}
+
+    class Props:
+        def __init__(self, name, total_memory):
+            self.name = name
+            self.total_memory = total_memory
+
+    monkeypatch.setattr(report.torch.cuda, "is_available", lambda: True)
+    monkeypatch.setattr(report.torch.cuda, "device_count", lambda: 2)
+    monkeypatch.setattr(report.torch.cuda, "get_device_properties", lambda i: Props(f"GPU{i}", 8 * 1024**3))
+    monkeypatch.setattr(report.torch, "version", types.SimpleNamespace(cuda="12.4"))
+    g = report.get_gpu_info()
+    assert g["available"] is True
+    assert g["count"] == 2
+    assert g["cuda_version"] == "12.4"
+
+    monkeypatch.setattr(report.socket, "gethostname", lambda: "host")
+    monkeypatch.setattr(report.platform, "system", lambda: "Linux")
+    monkeypatch.setattr(report.platform, "python_version", lambda: "3.10.0")
+    monkeypatch.setattr(report.torch, "__version__", "2.x")
+    monkeypatch.setattr(report.psutil, "cpu_count", lambda logical=False: 8 if not logical else 16)
+    monkeypatch.setattr(report.psutil, "virtual_memory", lambda: types.SimpleNamespace(total=32 * 1024**3))
+    monkeypatch.setenv("USER", "alice")
+    monkeypatch.setenv("NANOCHAT_BASE_DIR", "/n")
+    monkeypatch.setattr(report.os, "getcwd", lambda: "/cwd")
+    s = report.get_system_info()
+    assert s["hostname"] == "host"
+    assert s["cpu_count_logical"] == 16
+    assert s["nanochat_base_dir"] == "/n"
+
+    assert report.estimate_cost({"available": False}) is None
+    c1 = report.estimate_cost({"available": True, "count": 2, "names": ["H100"], "memory_gb": [80]}, runtime_hours=3)
+    assert c1["hourly_rate"] == 6.0
+    assert c1["estimated_total"] == 18.0
+    c2 = report.estimate_cost({"available": True, "count": 3, "names": ["Unknown"], "memory_gb": [1]})
+    assert c2["hourly_rate"] == 6.0
+
+
+def test_generate_header(monkeypatch, tmp_path):
+    monkeypatch.chdir(tmp_path)
+    (tmp_path / "uv.lock").write_text("a\nb\n", encoding="utf-8")
+    monkeypatch.setattr(report, "get_git_info", lambda: {"branch": "b", "commit": "c", "dirty": False, "message": "m"})
+    monkeypatch.setattr(report, "get_gpu_info", lambda: {"available": True, "count": 1, "names": ["A100"], "memory_gb": [40], "cuda_version": "12"})
+    monkeypatch.setattr(report, "get_system_info", lambda: {"platform": "Linux", "cpu_count": 8, "cpu_count_logical": 16, "memory_gb": 31.5, "python_version": "3.10", "torch_version": "2.9"})
+    monkeypatch.setattr(report, "estimate_cost", lambda _gpu: {"hourly_rate": 1.23, "gpu_type": "A100", "estimated_total": None})
+
+    def fake_run(cmd):
+        if "git ls-files" in cmd and "| xargs" not in cmd:
+            return "a.py\nb.md"
+        if "| xargs wc -lc" in cmd:
+            return "  10  100 total"
+        return ""
+
+    monkeypatch.setattr(report, "run_command", fake_run)
+    h = report.generate_header()
+    assert "# nanochat training report" in h
+    assert "Branch: b" in h
+    assert "Hourly Rate: $1.23/hour" in h
+    assert "Dependencies (uv.lock lines): 2" in h
+
+    # No GPU and no uv.lock branch.
+    os.remove(tmp_path / "uv.lock")
+    monkeypatch.setattr(report, "get_gpu_info", lambda: {"available": False})
+    monkeypatch.setattr(report, "estimate_cost", lambda _gpu: None)
+    h2 = report.generate_header()
+    assert "GPUs: None available" in h2
+
+
+def test_slug_extract_timestamp_helpers():
+    assert report.slugify("Hello World") == "hello-world"
+    section = "a: 1\nb: 2\nc: 3"
+    assert report.extract(section, "b") == {"b": "2"}
+    assert report.extract(section, ["a", "c"]) == {"a": "1", "c": "3"}
+
+    content = "x\ntimestamp: 2026-01-02 03:04:05\nz"
+    dt = report.extract_timestamp(content, "timestamp:")
+    assert dt == datetime.datetime(2026, 1, 2, 3, 4, 5)
+    assert report.extract_timestamp("timestamp: not-a-time", "timestamp:") is None
+    assert report.extract_timestamp("bad", "timestamp:") is None
+
+
+def test_report_log_generate_reset_and_get_report(monkeypatch, tmp_path):
+    r = report.Report(str(tmp_path))
+    p = r.log("My Section", [{"i": 12345, "f": 1.23456, "s": "x"}, "", None, "plain\n"])
+    text = (tmp_path / "my-section.md").read_text(encoding="utf-8")
+    assert p.endswith("my-section.md")
+    assert "- i: 12,345" in text
+    assert "- f: 1.2346" in text
+    assert "plain" in text
+
+    # Prepare files for generate().
+    header = tmp_path / "header.md"
+    header.write_text(
+        "Run started: 2026-01-01 00:00:00\n\n### Bloat\n- Lines: 10\n\n",
+        encoding="utf-8",
+    )
+    (tmp_path / "base-model-evaluation.md").write_text(
+        "timestamp: 2026-01-01 01:00:00\nCORE: 0.5\n",
+        encoding="utf-8",
+    )
+    (tmp_path / "chat-evaluation-sft.md").write_text(
+        "timestamp: 2026-01-01 02:00:00\nARC-Easy: 0.7\nChatCORE: 0.8\n",
+        encoding="utf-8",
+    )
+    (tmp_path / "chat-evaluation-rl.md").write_text(
+        "timestamp: 2026-01-01 03:00:00\nGSM8K: 0.9\n",
+        encoding="utf-8",
+    )
+    cwd = tmp_path / "cwd"
+    cwd.mkdir()
+    monkeypatch.chdir(cwd)
+    out = r.generate()
+    rep = (tmp_path / "report.md").read_text(encoding="utf-8")
+    assert out.endswith("report.md")
+    assert "## Summary" in rep
+    assert "Total wall clock time: 2h0m" in rep
+    assert "| CORE" in rep
+
+    # Missing header path + reset.
+    (tmp_path / "header.md").unlink()
+    out2 = r.generate()
+    rep2 = (tmp_path / "report.md").read_text(encoding="utf-8")
+    assert out2.endswith("report.md")
+    assert "Total wall clock time: unknown" in rep2
+
+    monkeypatch.setattr(report, "generate_header", lambda: "H\n")
+    r.reset()
+    assert (tmp_path / "header.md").exists()
+    assert "Run started:" in (tmp_path / "header.md").read_text(encoding="utf-8")
+
+    d = report.DummyReport()
+    assert d.log() is None
+    assert d.reset() is None
+
+    # get_report() rank 0 and non-zero branches.
+    monkeypatch.setattr("nanochat.common.get_dist_info", lambda: (True, 0, 0, 2))
+    monkeypatch.setattr("nanochat.common.get_base_dir", lambda: str(tmp_path))
+    got = report.get_report()
+    assert isinstance(got, report.Report)
+
+    monkeypatch.setattr("nanochat.common.get_dist_info", lambda: (True, 1, 1, 2))
+    got2 = report.get_report()
+    assert isinstance(got2, report.DummyReport)
+
+
+def test_report_main_block_generate_and_reset(monkeypatch, tmp_path):
+    class FakeParser:
+        def __init__(self, *a, **k):
+            pass
+
+        def add_argument(self, *a, **k):
+            return None
+
+        def parse_args(self):
+            return self.args
+
+    parser = FakeParser()
+    parser.args = types.SimpleNamespace(command="generate")
+    monkeypatch.setattr("argparse.ArgumentParser", lambda *a, **k: parser)
+    monkeypatch.setenv("NANOCHAT_BASE_DIR", str(tmp_path / "base"))
+    runpy.run_module("nanochat.report", run_name="__main__")
+
+    parser.args = types.SimpleNamespace(command="reset")
+    runpy.run_module("nanochat.report", run_name="__main__")
--- a/tests/test_tokenizer_new.py
+++ b/tests/test_tokenizer_new.py
@ -0,0 +1,281 @@
+import os
+import pickle
+from types import SimpleNamespace
+
+import pytest
+import torch
+
+import nanochat.tokenizer as tokmod
+
+
+class _FakeHFCore:
+    def __init__(self, *args, **kwargs):
+        del args, kwargs
+        self.saved = None
+        self.normalizer = None
+        self.pre_tokenizer = None
+        self.decoder = None
+        self.post_processor = None
+        self.trained = False
+        self.special = {"<|bos|>": 100, "<|endoftext|>": 101}
+
+    @classmethod
+    def from_pretrained(cls, _path):
+        return cls()
+
+    @classmethod
+    def from_file(cls, _path):
+        return cls()
+
+    def train_from_iterator(self, text_iterator, trainer):
+        del trainer
+        list(text_iterator)
+        self.trained = True
+
+    def get_vocab_size(self):
+        return 123
+
+    def get_added_tokens_decoder(self):
+        return {0: SimpleNamespace(content="<|bos|>"), 1: SimpleNamespace(content="<|assistant_start|>")}
+
+    def id_to_token(self, id):
+        return f"tok-{id}"
+
+    def encode(self, text, add_special_tokens=False):
+        del add_special_tokens
+        return SimpleNamespace(ids=[len(text), len(text) + 1])
+
+    def token_to_id(self, text):
+        return self.special.get(text)
+
+    def decode(self, ids, skip_special_tokens=False):
+        del skip_special_tokens
+        return "|".join(map(str, ids))
+
+    def save(self, path):
+        self.saved = path
+        with open(path, "w", encoding="utf-8") as f:
+            f.write("{}")
+
+
+def test_huggingface_tokenizer_wrapper(monkeypatch, tmp_path):
+    monkeypatch.setattr(tokmod, "HFTokenizer", _FakeHFCore)
+    monkeypatch.setattr(tokmod, "BPE", lambda **kwargs: kwargs)
+    monkeypatch.setattr(tokmod, "Regex", lambda x: f"re:{x}")
+
+    class _ByteLevel:
+        @staticmethod
+        def alphabet():
+            return [b"a"]
+
+        def __init__(self, **kwargs):
+            self.kw = kwargs
+
+    fake_pre = SimpleNamespace(
+        Split=lambda **kwargs: ("split", kwargs),
+        ByteLevel=_ByteLevel,
+        Sequence=lambda xs: ("seq", xs),
+    )
+    monkeypatch.setattr(tokmod, "pre_tokenizers", fake_pre)
+    monkeypatch.setattr(tokmod, "decoders", SimpleNamespace(ByteLevel=lambda: "decoder"))
+    monkeypatch.setattr(tokmod, "BpeTrainer", lambda **kwargs: ("trainer", kwargs))
+
+    h1 = tokmod.HuggingFaceTokenizer.from_pretrained("gpt2")
+    assert isinstance(h1, tokmod.HuggingFaceTokenizer)
+
+    d = tmp_path / "hf"
+    d.mkdir()
+    (d / "tokenizer.json").write_text("{}", encoding="utf-8")
+    h2 = tokmod.HuggingFaceTokenizer.from_directory(str(d))
+    assert isinstance(h2, tokmod.HuggingFaceTokenizer)
+
+    h3 = tokmod.HuggingFaceTokenizer.train_from_iterator(iter(["hello", "world"]), vocab_size=300)
+    assert isinstance(h3, tokmod.HuggingFaceTokenizer)
+    assert h3.tokenizer.trained is True
+
+    assert h3.get_vocab_size() == 123
+    assert "<|bos|>" in h3.get_special_tokens()
+    assert h3.id_to_token(7) == "tok-7"
+    assert h3.encode_special("<|bos|>") == 100
+    assert h3.get_bos_token_id() == 100
+
+    h3.tokenizer.special["<|bos|>"] = None
+    assert h3.get_bos_token_id() == 101
+    h3.tokenizer.special["<|endoftext|>"] = None
+    with pytest.raises(AssertionError):
+        h3.get_bos_token_id()
+    h3.tokenizer.special["<|bos|>"] = 100
+
+    one = h3._encode_one("abc", prepend="<|bos|>", append=999)
+    assert one[0] == 100 and one[-1] == 999
+    assert isinstance(h3.encode("abc"), list)
+    assert isinstance(h3.encode(["a", "bb"]), list)
+    with pytest.raises(ValueError):
+        h3.encode(123)  # type: ignore[arg-type]
+
+    assert h3("abc") == h3.encode("abc")
+    assert h3.decode([1, 2]) == "1|2"
+    save_dir = tmp_path / "save_hf"
+    h3.save(str(save_dir))
+    assert (save_dir / "tokenizer.json").exists()
+
+
+class _FakeRustTokenizerCore:
+    def __init__(self):
+        self.pattern = "pat"
+        self.mergeable = [(b"a", 0), (b"b", 1)]
+        self.trained = None
+
+    def train_from_iterator(self, it, vocab_size_no_special, pattern):
+        self.trained = (list(it), vocab_size_no_special, pattern)
+
+    def get_pattern(self):
+        return self.pattern
+
+    def get_mergeable_ranks(self):
+        return self.mergeable
+
+
+class _FakeEnc:
+    def __init__(self):
+        self.n_vocab = 300
+        self.special_tokens_set = set(tokmod.SPECIAL_TOKENS)
+        self.special = {name: 256 + i for i, name in enumerate(tokmod.SPECIAL_TOKENS)}
+        self.special["<|endoftext|>"] = 999
+
+    def encode_single_token(self, text):
+        return self.special[text]
+
+    def encode_ordinary(self, text):
+        return [ord(c) % 50 for c in text]
+
+    def encode_ordinary_batch(self, text, num_threads=8):
+        del num_threads
+        return [self.encode_ordinary(t) for t in text]
+
+    def decode(self, ids):
+        return ",".join(str(x) for x in ids)
+
+
+def test_rust_bpe_wrapper_and_helpers(monkeypatch, tmp_path):
+    monkeypatch.setattr(tokmod.rustbpe, "Tokenizer", _FakeRustTokenizerCore)
+    monkeypatch.setattr(tokmod.tiktoken, "Encoding", lambda **kwargs: _FakeEnc())
+    monkeypatch.setattr(tokmod.tiktoken, "get_encoding", lambda _name: _FakeEnc())
+
+    r1 = tokmod.RustBPETokenizer.train_from_iterator(iter(["hello", "world"]), vocab_size=300)
+    assert isinstance(r1, tokmod.RustBPETokenizer)
+
+    with pytest.raises(AssertionError):
+        tokmod.RustBPETokenizer.train_from_iterator(iter(["x"]), vocab_size=10)
+
+    assert r1.get_vocab_size() == 300
+    assert "<|bos|>" in r1.get_special_tokens()
+    assert isinstance(r1.id_to_token(3), str)
+    assert r1.get_bos_token_id() == r1.encode_special("<|bos|>")
+    assert r1.encode_special("<|bos|>") == r1.encode_special("<|bos|>")  # cache path
+
+    s = r1.encode("abc", prepend="<|bos|>", append="<|assistant_end|>")
+    assert s[0] == r1.encode_special("<|bos|>")
+    assert s[-1] == r1.encode_special("<|assistant_end|>")
+    b = r1.encode(["a", "bc"], prepend=1, append=2, num_threads=1)
+    assert b[0][0] == 1 and b[0][-1] == 2
+    with pytest.raises(ValueError):
+        r1.encode(123)  # type: ignore[arg-type]
+    assert r1("abc") == r1.encode("abc")
+    assert isinstance(r1.decode([1, 2]), str)
+
+    save_dir = tmp_path / "save_rust"
+    r1.save(str(save_dir))
+    assert (save_dir / "tokenizer.pkl").exists()
+    r2 = tokmod.RustBPETokenizer.from_directory(str(save_dir))
+    assert isinstance(r2, tokmod.RustBPETokenizer)
+    r3 = tokmod.RustBPETokenizer.from_pretrained("gpt2")
+    assert isinstance(r3, tokmod.RustBPETokenizer)
+
+
+def test_render_conversation_and_completion(monkeypatch):
+    r = tokmod.RustBPETokenizer(_FakeEnc(), "<|bos|>")
+
+    conv = {
+        "messages": [
+            {"role": "system", "content": "system msg"},
+            {"role": "user", "content": "hello"},
+            {
+                "role": "assistant",
+                "content": [
+                    {"type": "text", "text": "hi"},
+                    {"type": "python", "text": "1+1"},
+                    {"type": "python_output", "text": "2"},
+                ],
+            },
+        ]
+    }
+    ids, mask = r.render_conversation(conv, max_tokens=128)
+    assert len(ids) == len(mask)
+    assert any(m == 1 for m in mask)
+    vis = r.visualize_tokenization(ids[:5], mask[:5], with_token_id=True)
+    assert "|" in vis
+
+    conv_str_assistant = {
+        "messages": [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "plain answer"},
+        ]
+    }
+    ids2, mask2 = r.render_conversation(conv_str_assistant, max_tokens=64)
+    assert len(ids2) == len(mask2)
+    assert any(m == 1 for m in mask2)
+
+    comp_conv = {
+        "messages": [
+            {"role": "user", "content": "Q"},
+            {"role": "assistant", "content": "A"},
+        ]
+    }
+    out = r.render_for_completion(comp_conv)
+    assert out[-1] == r.encode_special("<|assistant_start|>")
+
+    bad_role = {"messages": [{"role": "assistant", "content": "x"}]}
+    with pytest.raises(AssertionError):
+        r.render_conversation(bad_role)
+
+    bad_content = {
+        "messages": [
+            {"role": "user", "content": "x"},
+            {"role": "assistant", "content": 123},
+        ]
+    }
+    with pytest.raises(ValueError):
+        r.render_conversation(bad_content)
+
+    bad_part = {
+        "messages": [
+            {"role": "user", "content": "x"},
+            {"role": "assistant", "content": [{"type": "bad", "text": "x"}]},
+        ]
+    }
+    with pytest.raises(ValueError):
+        r.render_conversation(bad_part)
+
+    with pytest.raises(AssertionError):
+        r.render_for_completion({"messages": [{"role": "user", "content": "x"}]})
+
+
+def test_get_tokenizer_and_token_bytes(monkeypatch, tmp_path):
+    tok_dir = tmp_path / "tokenizer"
+    tok_dir.mkdir()
+    token_bytes = torch.tensor([0, 1, 2], dtype=torch.int64)
+    torch.save(token_bytes, tok_dir / "token_bytes.pt")
+
+    monkeypatch.setattr("nanochat.common.get_base_dir", lambda: str(tmp_path))
+    monkeypatch.setattr(tokmod.RustBPETokenizer, "from_directory", classmethod(lambda cls, d: ("tok", d)))
+    got = tokmod.get_tokenizer()
+    assert got[0] == "tok"
+    assert got[1].endswith("tokenizer")
+
+    tb = tokmod.get_token_bytes(device="cpu")
+    assert torch.equal(tb, token_bytes)
+
+    os.remove(tok_dir / "token_bytes.pt")
+    with pytest.raises(AssertionError):
+        tokmod.get_token_bytes()
--- a/uv.lock
+++ b/uv.lock
@ -556,6 +556,124 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/0c/58/bd257695f39d05594ca4ad60df5bcb7e32247f9951fd09a9b8edb82d1daa/contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77", size = 225315, upload-time = "2025-07-26T12:02:58.801Z" },
 ]

+[[package]]
+name = "coverage"
+version = "7.13.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/56/95b7e30fa389756cb56630faa728da46a27b8c6eb46f9d557c68fff12b65/coverage-7.13.4.tar.gz", hash = "sha256:e5c8f6ed1e61a8b2dcdf31eb0b9bbf0130750ca79c1c49eb898e2ad86f5ccc91", size = 827239, upload-time = "2026-02-09T12:59:03.86Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/44/d4/7827d9ffa34d5d4d752eec907022aa417120936282fc488306f5da08c292/coverage-7.13.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fc31c787a84f8cd6027eba44010517020e0d18487064cd3d8968941856d1415", size = 219152, upload-time = "2026-02-09T12:56:11.974Z" },
+    { url = "https://files.pythonhosted.org/packages/35/b0/d69df26607c64043292644dbb9dc54b0856fabaa2cbb1eeee3331cc9e280/coverage-7.13.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a32ebc02a1805adf637fc8dec324b5cdacd2e493515424f70ee33799573d661b", size = 219667, upload-time = "2026-02-09T12:56:13.33Z" },
+    { url = "https://files.pythonhosted.org/packages/82/a4/c1523f7c9e47b2271dbf8c2a097e7a1f89ef0d66f5840bb59b7e8814157b/coverage-7.13.4-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e24f9156097ff9dc286f2f913df3a7f63c0e333dcafa3c196f2c18b4175ca09a", size = 246425, upload-time = "2026-02-09T12:56:14.552Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/02/aa7ec01d1a5023c4b680ab7257f9bfde9defe8fdddfe40be096ac19e8177/coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f", size = 248229, upload-time = "2026-02-09T12:56:16.31Z" },
+    { url = "https://files.pythonhosted.org/packages/35/98/85aba0aed5126d896162087ef3f0e789a225697245256fc6181b95f47207/coverage-7.13.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2a09cfa6a5862bc2fc6ca7c3def5b2926194a56b8ab78ffcf617d28911123012", size = 250106, upload-time = "2026-02-09T12:56:18.024Z" },
+    { url = "https://files.pythonhosted.org/packages/96/72/1db59bd67494bc162e3e4cd5fbc7edba2c7026b22f7c8ef1496d58c2b94c/coverage-7.13.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:296f8b0af861d3970c2a4d8c91d48eb4dd4771bcef9baedec6a9b515d7de3def", size = 252021, upload-time = "2026-02-09T12:56:19.272Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/97/72899c59c7066961de6e3daa142d459d47d104956db43e057e034f015c8a/coverage-7.13.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e101609bcbbfb04605ea1027b10dc3735c094d12d40826a60f897b98b1c30256", size = 247114, upload-time = "2026-02-09T12:56:21.051Z" },
+    { url = "https://files.pythonhosted.org/packages/39/1f/f1885573b5970235e908da4389176936c8933e86cb316b9620aab1585fa2/coverage-7.13.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:aa3feb8db2e87ff5e6d00d7e1480ae241876286691265657b500886c98f38bda", size = 248143, upload-time = "2026-02-09T12:56:22.585Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/cf/e80390c5b7480b722fa3e994f8202807799b85bc562aa4f1dde209fbb7be/coverage-7.13.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4fc7fa81bbaf5a02801b65346c8b3e657f1d93763e58c0abdf7c992addd81a92", size = 246152, upload-time = "2026-02-09T12:56:23.748Z" },
+    { url = "https://files.pythonhosted.org/packages/44/bf/f89a8350d85572f95412debb0fb9bb4795b1d5b5232bd652923c759e787b/coverage-7.13.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:33901f604424145c6e9c2398684b92e176c0b12df77d52db81c20abd48c3794c", size = 249959, upload-time = "2026-02-09T12:56:25.209Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/6e/612a02aece8178c818df273e8d1642190c4875402ca2ba74514394b27aba/coverage-7.13.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:bb28c0f2cf2782508a40cec377935829d5fcc3ad9a3681375af4e84eb34b6b58", size = 246416, upload-time = "2026-02-09T12:56:26.475Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/98/b5afc39af67c2fa6786b03c3a7091fc300947387ce8914b096db8a73d67a/coverage-7.13.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d107aff57a83222ddbd8d9ee705ede2af2cc926608b57abed8ef96b50b7e8f9", size = 247025, upload-time = "2026-02-09T12:56:27.727Z" },
+    { url = "https://files.pythonhosted.org/packages/51/30/2bba8ef0682d5bd210c38fe497e12a06c9f8d663f7025e9f5c2c31ce847d/coverage-7.13.4-cp310-cp310-win32.whl", hash = "sha256:a6f94a7d00eb18f1b6d403c91a88fd58cfc92d4b16080dfdb774afc8294469bf", size = 221758, upload-time = "2026-02-09T12:56:29.051Z" },
+    { url = "https://files.pythonhosted.org/packages/78/13/331f94934cf6c092b8ea59ff868eb587bc8fe0893f02c55bc6c0183a192e/coverage-7.13.4-cp310-cp310-win_amd64.whl", hash = "sha256:2cb0f1e000ebc419632bbe04366a8990b6e32c4e0b51543a6484ffe15eaeda95", size = 222693, upload-time = "2026-02-09T12:56:30.366Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/ad/b59e5b451cf7172b8d1043dc0fa718f23aab379bc1521ee13d4bd9bfa960/coverage-7.13.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d490ba50c3f35dd7c17953c68f3270e7ccd1c6642e2d2afe2d8e720b98f5a053", size = 219278, upload-time = "2026-02-09T12:56:31.673Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/17/0cb7ca3de72e5f4ef2ec2fa0089beafbcaaaead1844e8b8a63d35173d77d/coverage-7.13.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:19bc3c88078789f8ef36acb014d7241961dbf883fd2533d18cb1e7a5b4e28b11", size = 219783, upload-time = "2026-02-09T12:56:33.104Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/63/325d8e5b11e0eaf6d0f6a44fad444ae58820929a9b0de943fa377fe73e85/coverage-7.13.4-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3998e5a32e62fdf410c0dbd3115df86297995d6e3429af80b8798aad894ca7aa", size = 250200, upload-time = "2026-02-09T12:56:34.474Z" },
+    { url = "https://files.pythonhosted.org/packages/76/53/c16972708cbb79f2942922571a687c52bd109a7bd51175aeb7558dff2236/coverage-7.13.4-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8e264226ec98e01a8e1054314af91ee6cde0eacac4f465cc93b03dbe0bce2fd7", size = 252114, upload-time = "2026-02-09T12:56:35.749Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/c2/7ab36d8b8cc412bec9ea2d07c83c48930eb4ba649634ba00cb7e4e0f9017/coverage-7.13.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a3aa4e7b9e416774b21797365b358a6e827ffadaaca81b69ee02946852449f00", size = 254220, upload-time = "2026-02-09T12:56:37.796Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/4d/cf52c9a3322c89a0e6febdfbc83bb45c0ed3c64ad14081b9503adee702e7/coverage-7.13.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:71ca20079dd8f27fcf808817e281e90220475cd75115162218d0e27549f95fef", size = 256164, upload-time = "2026-02-09T12:56:39.016Z" },
+    { url = "https://files.pythonhosted.org/packages/78/e9/eb1dd17bd6de8289df3580e967e78294f352a5df8a57ff4671ee5fc3dcd0/coverage-7.13.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e2f25215f1a359ab17320b47bcdaca3e6e6356652e8256f2441e4ef972052903", size = 250325, upload-time = "2026-02-09T12:56:40.668Z" },
+    { url = "https://files.pythonhosted.org/packages/71/07/8c1542aa873728f72267c07278c5cc0ec91356daf974df21335ccdb46368/coverage-7.13.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d65b2d373032411e86960604dc4edac91fdfb5dca539461cf2cbe78327d1e64f", size = 251913, upload-time = "2026-02-09T12:56:41.97Z" },
+    { url = "https://files.pythonhosted.org/packages/74/d7/c62e2c5e4483a748e27868e4c32ad3daa9bdddbba58e1bc7a15e252baa74/coverage-7.13.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:94eb63f9b363180aff17de3e7c8760c3ba94664ea2695c52f10111244d16a299", size = 249974, upload-time = "2026-02-09T12:56:43.323Z" },
+    { url = "https://files.pythonhosted.org/packages/98/9f/4c5c015a6e98ced54efd0f5cf8d31b88e5504ecb6857585fc0161bb1e600/coverage-7.13.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e856bf6616714c3a9fbc270ab54103f4e685ba236fa98c054e8f87f266c93505", size = 253741, upload-time = "2026-02-09T12:56:45.155Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/59/0f4eef89b9f0fcd9633b5d350016f54126ab49426a70ff4c4e87446cabdc/coverage-7.13.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:65dfcbe305c3dfe658492df2d85259e0d79ead4177f9ae724b6fb245198f55d6", size = 249695, upload-time = "2026-02-09T12:56:46.636Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/2c/b7476f938deb07166f3eb281a385c262675d688ff4659ad56c6c6b8e2e70/coverage-7.13.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b507778ae8a4c915436ed5c2e05b4a6cecfa70f734e19c22a005152a11c7b6a9", size = 250599, upload-time = "2026-02-09T12:56:48.13Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/34/c3420709d9846ee3785b9f2831b4d94f276f38884032dca1457fa83f7476/coverage-7.13.4-cp311-cp311-win32.whl", hash = "sha256:784fc3cf8be001197b652d51d3fd259b1e2262888693a4636e18879f613a62a9", size = 221780, upload-time = "2026-02-09T12:56:50.479Z" },
+    { url = "https://files.pythonhosted.org/packages/61/08/3d9c8613079d2b11c185b865de9a4c1a68850cfda2b357fae365cf609f29/coverage-7.13.4-cp311-cp311-win_amd64.whl", hash = "sha256:2421d591f8ca05b308cf0092807308b2facbefe54af7c02ac22548b88b95c98f", size = 222715, upload-time = "2026-02-09T12:56:51.815Z" },
+    { url = "https://files.pythonhosted.org/packages/18/1a/54c3c80b2f056164cc0a6cdcb040733760c7c4be9d780fe655f356f433e4/coverage-7.13.4-cp311-cp311-win_arm64.whl", hash = "sha256:79e73a76b854d9c6088fe5d8b2ebe745f8681c55f7397c3c0a016192d681045f", size = 221385, upload-time = "2026-02-09T12:56:53.194Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/81/4ce2fdd909c5a0ed1f6dedb88aa57ab79b6d1fbd9b588c1ac7ef45659566/coverage-7.13.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02231499b08dabbe2b96612993e5fc34217cdae907a51b906ac7fca8027a4459", size = 219449, upload-time = "2026-02-09T12:56:54.889Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/96/5238b1efc5922ddbdc9b0db9243152c09777804fb7c02ad1741eb18a11c0/coverage-7.13.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40aa8808140e55dc022b15d8aa7f651b6b3d68b365ea0398f1441e0b04d859c3", size = 219810, upload-time = "2026-02-09T12:56:56.33Z" },
+    { url = "https://files.pythonhosted.org/packages/78/72/2f372b726d433c9c35e56377cf1d513b4c16fe51841060d826b95caacec1/coverage-7.13.4-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5b856a8ccf749480024ff3bd7310adaef57bf31fd17e1bfc404b7940b6986634", size = 251308, upload-time = "2026-02-09T12:56:57.858Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/a0/2ea570925524ef4e00bb6c82649f5682a77fac5ab910a65c9284de422600/coverage-7.13.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c048ea43875fbf8b45d476ad79f179809c590ec7b79e2035c662e7afa3192e3", size = 254052, upload-time = "2026-02-09T12:56:59.754Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/ac/45dc2e19a1939098d783c846e130b8f862fbb50d09e0af663988f2f21973/coverage-7.13.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b7b38448866e83176e28086674fe7368ab8590e4610fb662b44e345b86d63ffa", size = 255165, upload-time = "2026-02-09T12:57:01.287Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/4d/26d236ff35abc3b5e63540d3386e4c3b192168c1d96da5cb2f43c640970f/coverage-7.13.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:de6defc1c9badbf8b9e67ae90fd00519186d6ab64e5cc5f3d21359c2a9b2c1d3", size = 257432, upload-time = "2026-02-09T12:57:02.637Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/55/14a966c757d1348b2e19caf699415a2a4c4f7feaa4bbc6326a51f5c7dd1b/coverage-7.13.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7eda778067ad7ffccd23ecffce537dface96212576a07924cbf0d8799d2ded5a", size = 251716, upload-time = "2026-02-09T12:57:04.056Z" },
+    { url = "https://files.pythonhosted.org/packages/77/33/50116647905837c66d28b2af1321b845d5f5d19be9655cb84d4a0ea806b4/coverage-7.13.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e87f6c587c3f34356c3759f0420693e35e7eb0e2e41e4c011cb6ec6ecbbf1db7", size = 253089, upload-time = "2026-02-09T12:57:05.503Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/b4/8efb11a46e3665d92635a56e4f2d4529de6d33f2cb38afd47d779d15fc99/coverage-7.13.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8248977c2e33aecb2ced42fef99f2d319e9904a36e55a8a68b69207fb7e43edc", size = 251232, upload-time = "2026-02-09T12:57:06.879Z" },
+    { url = "https://files.pythonhosted.org/packages/51/24/8cd73dd399b812cc76bb0ac260e671c4163093441847ffe058ac9fda1e32/coverage-7.13.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:25381386e80ae727608e662474db537d4df1ecd42379b5ba33c84633a2b36d47", size = 255299, upload-time = "2026-02-09T12:57:08.245Z" },
+    { url = "https://files.pythonhosted.org/packages/03/94/0a4b12f1d0e029ce1ccc1c800944a9984cbe7d678e470bb6d3c6bc38a0da/coverage-7.13.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:ee756f00726693e5ba94d6df2bdfd64d4852d23b09bb0bc700e3b30e6f333985", size = 250796, upload-time = "2026-02-09T12:57:10.142Z" },
+    { url = "https://files.pythonhosted.org/packages/73/44/6002fbf88f6698ca034360ce474c406be6d5a985b3fdb3401128031eef6b/coverage-7.13.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fdfc1e28e7c7cdce44985b3043bc13bbd9c747520f94a4d7164af8260b3d91f0", size = 252673, upload-time = "2026-02-09T12:57:12.197Z" },
+    { url = "https://files.pythonhosted.org/packages/de/c6/a0279f7c00e786be75a749a5674e6fa267bcbd8209cd10c9a450c655dfa7/coverage-7.13.4-cp312-cp312-win32.whl", hash = "sha256:01d4cbc3c283a17fc1e42d614a119f7f438eabb593391283adca8dc86eff1246", size = 221990, upload-time = "2026-02-09T12:57:14.085Z" },
+    { url = "https://files.pythonhosted.org/packages/77/4e/c0a25a425fcf5557d9abd18419c95b63922e897bc86c1f327f155ef234a9/coverage-7.13.4-cp312-cp312-win_amd64.whl", hash = "sha256:9401ebc7ef522f01d01d45532c68c5ac40fb27113019b6b7d8b208f6e9baa126", size = 222800, upload-time = "2026-02-09T12:57:15.944Z" },
+    { url = "https://files.pythonhosted.org/packages/47/ac/92da44ad9a6f4e3a7debd178949d6f3769bedca33830ce9b1dcdab589a37/coverage-7.13.4-cp312-cp312-win_arm64.whl", hash = "sha256:b1ec7b6b6e93255f952e27ab58fbc68dcc468844b16ecbee881aeb29b6ab4d8d", size = 221415, upload-time = "2026-02-09T12:57:17.497Z" },
+    { url = "https://files.pythonhosted.org/packages/db/23/aad45061a31677d68e47499197a131eea55da4875d16c1f42021ab963503/coverage-7.13.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b66a2da594b6068b48b2692f043f35d4d3693fb639d5ea8b39533c2ad9ac3ab9", size = 219474, upload-time = "2026-02-09T12:57:19.332Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/70/9b8b67a0945f3dfec1fd896c5cefb7c19d5a3a6d74630b99a895170999ae/coverage-7.13.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3599eb3992d814d23b35c536c28df1a882caa950f8f507cef23d1cbf334995ac", size = 219844, upload-time = "2026-02-09T12:57:20.66Z" },
+    { url = "https://files.pythonhosted.org/packages/97/fd/7e859f8fab324cef6c4ad7cff156ca7c489fef9179d5749b0c8d321281c2/coverage-7.13.4-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:93550784d9281e374fb5a12bf1324cc8a963fd63b2d2f223503ef0fd4aa339ea", size = 250832, upload-time = "2026-02-09T12:57:22.007Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/dc/b2442d10020c2f52617828862d8b6ee337859cd8f3a1f13d607dddda9cf7/coverage-7.13.4-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b720ce6a88a2755f7c697c23268ddc47a571b88052e6b155224347389fdf6a3b", size = 253434, upload-time = "2026-02-09T12:57:23.339Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/88/6728a7ad17428b18d836540630487231f5470fb82454871149502f5e5aa2/coverage-7.13.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7b322db1284a2ed3aa28ffd8ebe3db91c929b7a333c0820abec3d838ef5b3525", size = 254676, upload-time = "2026-02-09T12:57:24.774Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/bc/21244b1b8cedf0dff0a2b53b208015fe798d5f2a8d5348dbfece04224fff/coverage-7.13.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4594c67d8a7c89cf922d9df0438c7c7bb022ad506eddb0fdb2863359ff78242", size = 256807, upload-time = "2026-02-09T12:57:26.125Z" },
+    { url = "https://files.pythonhosted.org/packages/97/a0/ddba7ed3251cff51006737a727d84e05b61517d1784a9988a846ba508877/coverage-7.13.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:53d133df809c743eb8bce33b24bcababb371f4441340578cd406e084d94a6148", size = 251058, upload-time = "2026-02-09T12:57:27.614Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/55/e289addf7ff54d3a540526f33751951bf0878f3809b47f6dfb3def69c6f7/coverage-7.13.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76451d1978b95ba6507a039090ba076105c87cc76fc3efd5d35d72093964d49a", size = 252805, upload-time = "2026-02-09T12:57:29.066Z" },
+    { url = "https://files.pythonhosted.org/packages/13/4e/cc276b1fa4a59be56d96f1dabddbdc30f4ba22e3b1cd42504c37b3313255/coverage-7.13.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7f57b33491e281e962021de110b451ab8a24182589be17e12a22c79047935e23", size = 250766, upload-time = "2026-02-09T12:57:30.522Z" },
+    { url = "https://files.pythonhosted.org/packages/94/44/1093b8f93018f8b41a8cf29636c9292502f05e4a113d4d107d14a3acd044/coverage-7.13.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:1731dc33dc276dafc410a885cbf5992f1ff171393e48a21453b78727d090de80", size = 254923, upload-time = "2026-02-09T12:57:31.946Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/55/ea2796da2d42257f37dbea1aab239ba9263b31bd91d5527cdd6db5efe174/coverage-7.13.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:bd60d4fe2f6fa7dff9223ca1bbc9f05d2b6697bc5961072e5d3b952d46e1b1ea", size = 250591, upload-time = "2026-02-09T12:57:33.842Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/fa/7c4bb72aacf8af5020675aa633e59c1fbe296d22aed191b6a5b711eb2bc7/coverage-7.13.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9181a3ccead280b828fae232df12b16652702b49d41e99d657f46cc7b1f6ec7a", size = 252364, upload-time = "2026-02-09T12:57:35.743Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/38/a8d2ec0146479c20bbaa7181b5b455a0c41101eed57f10dd19a78ab44c80/coverage-7.13.4-cp313-cp313-win32.whl", hash = "sha256:f53d492307962561ac7de4cd1de3e363589b000ab69617c6156a16ba7237998d", size = 222010, upload-time = "2026-02-09T12:57:37.25Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/0c/dbfafbe90a185943dcfbc766fe0e1909f658811492d79b741523a414a6cc/coverage-7.13.4-cp313-cp313-win_amd64.whl", hash = "sha256:e6f70dec1cc557e52df5306d051ef56003f74d56e9c4dd7ddb07e07ef32a84dd", size = 222818, upload-time = "2026-02-09T12:57:38.734Z" },
+    { url = "https://files.pythonhosted.org/packages/04/d1/934918a138c932c90d78301f45f677fb05c39a3112b96fd2c8e60503cdc7/coverage-7.13.4-cp313-cp313-win_arm64.whl", hash = "sha256:fb07dc5da7e849e2ad31a5d74e9bece81f30ecf5a42909d0a695f8bd1874d6af", size = 221438, upload-time = "2026-02-09T12:57:40.223Z" },
+    { url = "https://files.pythonhosted.org/packages/52/57/ee93ced533bcb3e6df961c0c6e42da2fc6addae53fb95b94a89b1e33ebd7/coverage-7.13.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:40d74da8e6c4b9ac18b15331c4b5ebc35a17069410cad462ad4f40dcd2d50c0d", size = 220165, upload-time = "2026-02-09T12:57:41.639Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/e0/969fc285a6fbdda49d91af278488d904dcd7651b2693872f0ff94e40e84a/coverage-7.13.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4223b4230a376138939a9173f1bdd6521994f2aff8047fae100d6d94d50c5a12", size = 220516, upload-time = "2026-02-09T12:57:44.215Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/b8/9531944e16267e2735a30a9641ff49671f07e8138ecf1ca13db9fd2560c7/coverage-7.13.4-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:1d4be36a5114c499f9f1f9195e95ebf979460dbe2d88e6816ea202010ba1c34b", size = 261804, upload-time = "2026-02-09T12:57:45.989Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/f3/e63df6d500314a2a60390d1989240d5f27318a7a68fa30ad3806e2a9323e/coverage-7.13.4-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:200dea7d1e8095cc6e98cdabe3fd1d21ab17d3cee6dab00cadbb2fe35d9c15b9", size = 263885, upload-time = "2026-02-09T12:57:47.42Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/67/7654810de580e14b37670b60a09c599fa348e48312db5b216d730857ffe6/coverage-7.13.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b8eb931ee8e6d8243e253e5ed7336deea6904369d2fd8ae6e43f68abbf167092", size = 266308, upload-time = "2026-02-09T12:57:49.345Z" },
+    { url = "https://files.pythonhosted.org/packages/37/6f/39d41eca0eab3cc82115953ad41c4e77935286c930e8fad15eaed1389d83/coverage-7.13.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:75eab1ebe4f2f64d9509b984f9314d4aa788540368218b858dad56dc8f3e5eb9", size = 267452, upload-time = "2026-02-09T12:57:50.811Z" },
+    { url = "https://files.pythonhosted.org/packages/50/6d/39c0fbb8fc5cd4d2090811e553c2108cf5112e882f82505ee7495349a6bf/coverage-7.13.4-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c35eb28c1d085eb7d8c9b3296567a1bebe03ce72962e932431b9a61f28facf26", size = 261057, upload-time = "2026-02-09T12:57:52.447Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/a2/60010c669df5fa603bb5a97fb75407e191a846510da70ac657eb696b7fce/coverage-7.13.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb88b316ec33760714a4720feb2816a3a59180fd58c1985012054fa7aebee4c2", size = 263875, upload-time = "2026-02-09T12:57:53.938Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/d9/63b22a6bdbd17f1f96e9ed58604c2a6b0e72a9133e37d663bef185877cf6/coverage-7.13.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7d41eead3cc673cbd38a4417deb7fd0b4ca26954ff7dc6078e33f6ff97bed940", size = 261500, upload-time = "2026-02-09T12:57:56.012Z" },
+    { url = "https://files.pythonhosted.org/packages/70/bf/69f86ba1ad85bc3ad240e4c0e57a2e620fbc0e1645a47b5c62f0e941ad7f/coverage-7.13.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:fb26a934946a6afe0e326aebe0730cdff393a8bc0bbb65a2f41e30feddca399c", size = 265212, upload-time = "2026-02-09T12:57:57.5Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/f2/5f65a278a8c2148731831574c73e42f57204243d33bedaaf18fa79c5958f/coverage-7.13.4-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:dae88bc0fc77edaa65c14be099bd57ee140cf507e6bfdeea7938457ab387efb0", size = 260398, upload-time = "2026-02-09T12:57:59.027Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/80/6e8280a350ee9fea92f14b8357448a242dcaa243cb2c72ab0ca591f66c8c/coverage-7.13.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:845f352911777a8e722bfce168958214951e07e47e5d5d9744109fa5fe77f79b", size = 262584, upload-time = "2026-02-09T12:58:01.129Z" },
+    { url = "https://files.pythonhosted.org/packages/22/63/01ff182fc95f260b539590fb12c11ad3e21332c15f9799cb5e2386f71d9f/coverage-7.13.4-cp313-cp313t-win32.whl", hash = "sha256:2fa8d5f8de70688a28240de9e139fa16b153cc3cbb01c5f16d88d6505ebdadf9", size = 222688, upload-time = "2026-02-09T12:58:02.736Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/43/89de4ef5d3cd53b886afa114065f7e9d3707bdb3e5efae13535b46ae483d/coverage-7.13.4-cp313-cp313t-win_amd64.whl", hash = "sha256:9351229c8c8407645840edcc277f4a2d44814d1bc34a2128c11c2a031d45a5dd", size = 223746, upload-time = "2026-02-09T12:58:05.362Z" },
+    { url = "https://files.pythonhosted.org/packages/35/39/7cf0aa9a10d470a5309b38b289b9bb07ddeac5d61af9b664fe9775a4cb3e/coverage-7.13.4-cp313-cp313t-win_arm64.whl", hash = "sha256:30b8d0512f2dc8c8747557e8fb459d6176a2c9e5731e2b74d311c03b78451997", size = 222003, upload-time = "2026-02-09T12:58:06.952Z" },
+    { url = "https://files.pythonhosted.org/packages/92/11/a9cf762bb83386467737d32187756a42094927150c3e107df4cb078e8590/coverage-7.13.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:300deaee342f90696ed186e3a00c71b5b3d27bffe9e827677954f4ee56969601", size = 219522, upload-time = "2026-02-09T12:58:08.623Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/28/56e6d892b7b052236d67c95f1936b6a7cf7c3e2634bf27610b8cbd7f9c60/coverage-7.13.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29e3220258d682b6226a9b0925bc563ed9a1ebcff3cad30f043eceea7eaf2689", size = 219855, upload-time = "2026-02-09T12:58:10.176Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/69/233459ee9eb0c0d10fcc2fe425a029b3fa5ce0f040c966ebce851d030c70/coverage-7.13.4-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:391ee8f19bef69210978363ca930f7328081c6a0152f1166c91f0b5fdd2a773c", size = 250887, upload-time = "2026-02-09T12:58:12.503Z" },
+    { url = "https://files.pythonhosted.org/packages/06/90/2cdab0974b9b5bbc1623f7876b73603aecac11b8d95b85b5b86b32de5eab/coverage-7.13.4-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0dd7ab8278f0d58a0128ba2fca25824321f05d059c1441800e934ff2efa52129", size = 253396, upload-time = "2026-02-09T12:58:14.615Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/15/ea4da0f85bf7d7b27635039e649e99deb8173fe551096ea15017f7053537/coverage-7.13.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78cdf0d578b15148b009ccf18c686aa4f719d887e76e6b40c38ffb61d264a552", size = 254745, upload-time = "2026-02-09T12:58:16.162Z" },
+    { url = "https://files.pythonhosted.org/packages/99/11/bb356e86920c655ca4d61daee4e2bbc7258f0a37de0be32d233b561134ff/coverage-7.13.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:48685fee12c2eb3b27c62f2658e7ea21e9c3239cba5a8a242801a0a3f6a8c62a", size = 257055, upload-time = "2026-02-09T12:58:17.892Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/0f/9ae1f8cb17029e09da06ca4e28c9e1d5c1c0a511c7074592e37e0836c915/coverage-7.13.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4e83efc079eb39480e6346a15a1bcb3e9b04759c5202d157e1dd4303cd619356", size = 250911, upload-time = "2026-02-09T12:58:19.495Z" },
+    { url = "https://files.pythonhosted.org/packages/89/3a/adfb68558fa815cbc29747b553bc833d2150228f251b127f1ce97e48547c/coverage-7.13.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ecae9737b72408d6a950f7e525f30aca12d4bd8dd95e37342e5beb3a2a8c4f71", size = 252754, upload-time = "2026-02-09T12:58:21.064Z" },
+    { url = "https://files.pythonhosted.org/packages/32/b1/540d0c27c4e748bd3cd0bd001076ee416eda993c2bae47a73b7cc9357931/coverage-7.13.4-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ae4578f8528569d3cf303fef2ea569c7f4c4059a38c8667ccef15c6e1f118aa5", size = 250720, upload-time = "2026-02-09T12:58:22.622Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/95/383609462b3ffb1fe133014a7c84fc0dd01ed55ac6140fa1093b5af7ebb1/coverage-7.13.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:6fdef321fdfbb30a197efa02d48fcd9981f0d8ad2ae8903ac318adc653f5df98", size = 254994, upload-time = "2026-02-09T12:58:24.548Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/ba/1761138e86c81680bfc3c49579d66312865457f9fe405b033184e5793cb3/coverage-7.13.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b0f6ccf3dbe577170bebfce1318707d0e8c3650003cb4b3a9dd744575daa8b5", size = 250531, upload-time = "2026-02-09T12:58:26.271Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/8e/05900df797a9c11837ab59c4d6fe94094e029582aab75c3309a93e6fb4e3/coverage-7.13.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75fcd519f2a5765db3f0e391eb3b7d150cce1a771bf4c9f861aeab86c767a3c0", size = 252189, upload-time = "2026-02-09T12:58:27.807Z" },
+    { url = "https://files.pythonhosted.org/packages/00/bd/29c9f2db9ea4ed2738b8a9508c35626eb205d51af4ab7bf56a21a2e49926/coverage-7.13.4-cp314-cp314-win32.whl", hash = "sha256:8e798c266c378da2bd819b0677df41ab46d78065fb2a399558f3f6cae78b2fbb", size = 222258, upload-time = "2026-02-09T12:58:29.441Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/4d/1f8e723f6829977410efeb88f73673d794075091c8c7c18848d273dc9d73/coverage-7.13.4-cp314-cp314-win_amd64.whl", hash = "sha256:245e37f664d89861cf2329c9afa2c1fe9e6d4e1a09d872c947e70718aeeac505", size = 223073, upload-time = "2026-02-09T12:58:31.026Z" },
+    { url = "https://files.pythonhosted.org/packages/51/5b/84100025be913b44e082ea32abcf1afbf4e872f5120b7a1cab1d331b1e13/coverage-7.13.4-cp314-cp314-win_arm64.whl", hash = "sha256:ad27098a189e5838900ce4c2a99f2fe42a0bf0c2093c17c69b45a71579e8d4a2", size = 221638, upload-time = "2026-02-09T12:58:32.599Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/e4/c884a405d6ead1370433dad1e3720216b4f9fd8ef5b64bfd984a2a60a11a/coverage-7.13.4-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:85480adfb35ffc32d40918aad81b89c69c9cc5661a9b8a81476d3e645321a056", size = 220246, upload-time = "2026-02-09T12:58:34.181Z" },
+    { url = "https://files.pythonhosted.org/packages/81/5c/4d7ed8b23b233b0fffbc9dfec53c232be2e695468523242ea9fd30f97ad2/coverage-7.13.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:79be69cf7f3bf9b0deeeb062eab7ac7f36cd4cc4c4dd694bd28921ba4d8596cc", size = 220514, upload-time = "2026-02-09T12:58:35.704Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/6f/3284d4203fd2f28edd73034968398cd2d4cb04ab192abc8cff007ea35679/coverage-7.13.4-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:caa421e2684e382c5d8973ac55e4f36bed6821a9bad5c953494de960c74595c9", size = 261877, upload-time = "2026-02-09T12:58:37.864Z" },
+    { url = "https://files.pythonhosted.org/packages/09/aa/b672a647bbe1556a85337dc95bfd40d146e9965ead9cc2fe81bde1e5cbce/coverage-7.13.4-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14375934243ee05f56c45393fe2ce81fe5cc503c07cee2bdf1725fb8bef3ffaf", size = 264004, upload-time = "2026-02-09T12:58:39.492Z" },
+    { url = "https://files.pythonhosted.org/packages/79/a1/aa384dbe9181f98bba87dd23dda436f0c6cf2e148aecbb4e50fc51c1a656/coverage-7.13.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25a41c3104d08edb094d9db0d905ca54d0cd41c928bb6be3c4c799a54753af55", size = 266408, upload-time = "2026-02-09T12:58:41.852Z" },
+    { url = "https://files.pythonhosted.org/packages/53/5e/5150bf17b4019bc600799f376bb9606941e55bd5a775dc1e096b6ffea952/coverage-7.13.4-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6f01afcff62bf9a08fb32b2c1d6e924236c0383c02c790732b6537269e466a72", size = 267544, upload-time = "2026-02-09T12:58:44.093Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/ed/f1de5c675987a4a7a672250d2c5c9d73d289dbf13410f00ed7181d8017dd/coverage-7.13.4-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eb9078108fbf0bcdde37c3f4779303673c2fa1fe8f7956e68d447d0dd426d38a", size = 260980, upload-time = "2026-02-09T12:58:45.721Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/e3/fe758d01850aa172419a6743fe76ba8b92c29d181d4f676ffe2dae2ba631/coverage-7.13.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0e086334e8537ddd17e5f16a344777c1ab8194986ec533711cbe6c41cde841b6", size = 263871, upload-time = "2026-02-09T12:58:47.334Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/76/b829869d464115e22499541def9796b25312b8cf235d3bb00b39f1675395/coverage-7.13.4-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:725d985c5ab621268b2edb8e50dfe57633dc69bda071abc470fed55a14935fd3", size = 261472, upload-time = "2026-02-09T12:58:48.995Z" },
+    { url = "https://files.pythonhosted.org/packages/14/9e/caedb1679e73e2f6ad240173f55218488bfe043e38da577c4ec977489915/coverage-7.13.4-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:3c06f0f1337c667b971ca2f975523347e63ec5e500b9aa5882d91931cd3ef750", size = 265210, upload-time = "2026-02-09T12:58:51.178Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/10/0dd02cb009b16ede425b49ec344aba13a6ae1dc39600840ea6abcb085ac4/coverage-7.13.4-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:590c0ed4bf8e85f745e6b805b2e1c457b2e33d5255dd9729743165253bc9ad39", size = 260319, upload-time = "2026-02-09T12:58:53.081Z" },
+    { url = "https://files.pythonhosted.org/packages/92/8e/234d2c927af27c6d7a5ffad5bd2cf31634c46a477b4c7adfbfa66baf7ebb/coverage-7.13.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:eb30bf180de3f632cd043322dad5751390e5385108b2807368997d1a92a509d0", size = 262638, upload-time = "2026-02-09T12:58:55.258Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/64/e5547c8ff6964e5965c35a480855911b61509cce544f4d442caa759a0702/coverage-7.13.4-cp314-cp314t-win32.whl", hash = "sha256:c4240e7eded42d131a2d2c4dec70374b781b043ddc79a9de4d55ca71f8e98aea", size = 223040, upload-time = "2026-02-09T12:58:56.936Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/96/38086d58a181aac86d503dfa9c47eb20715a79c3e3acbdf786e92e5c09a8/coverage-7.13.4-cp314-cp314t-win_amd64.whl", hash = "sha256:4c7d3cc01e7350f2f0f6f7036caaf5673fb56b6998889ccfe9e1c1fe75a9c932", size = 224148, upload-time = "2026-02-09T12:58:58.645Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/72/8d10abd3740a0beb98c305e0c3faf454366221c0f37a8bcf8f60020bb65a/coverage-7.13.4-cp314-cp314t-win_arm64.whl", hash = "sha256:23e3f687cf945070d1c90f85db66d11e3025665d8dafa831301a0e0038f3db9b", size = 222172, upload-time = "2026-02-09T12:59:00.396Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/4a/331fe2caf6799d591109bb9c08083080f6de90a823695d412a935622abb2/coverage-7.13.4-py3-none-any.whl", hash = "sha256:1af1641e57cf7ba1bd67d677c9abdbcd6cc2ab7da3bca7fa1e2b7e50e65f2ad0", size = 211242, upload-time = "2026-02-09T12:59:02.032Z" },
+]
+
+[package.optional-dependencies]
+toml = [
+    { name = "tomli", marker = "python_full_version <= '3.11' or (extra == 'extra-8-nanochat-cpu' and extra == 'extra-8-nanochat-gpu')" },
+]
+
 [[package]]
 name = "cycler"
 version = "0.12.1"
@ -1527,6 +1645,8 @@ gpu = [
 [package.dev-dependencies]
 dev = [
    { name = "pytest" },
+    { name = "pytest-cov" },
+    { name = "pytest-mock" },
 ]

 [package.metadata]
@ -1556,7 +1676,11 @@ requires-dist = [
 provides-extras = ["cpu", "gpu"]

 [package.metadata.requires-dev]
-dev = [{ name = "pytest", specifier = ">=8.0.0" }]
+dev = [
+    { name = "pytest", specifier = ">=8.0.0" },
+    { name = "pytest-cov", specifier = ">=7.0.0" },
+    { name = "pytest-mock", specifier = ">=3.15.1" },
+]

 [[package]]
 name = "nest-asyncio"
@ -2336,6 +2460,32 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
 ]

+[[package]]
+name = "pytest-cov"
+version = "7.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "coverage", extra = ["toml"] },
+    { name = "pluggy" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
+]
+
+[[package]]
+name = "pytest-mock"
+version = "3.15.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"