Merge pull request #23 from LokiMetaSmith/fix-amd-triton-reinstall

Explicitly enable allow_tf32 in nanochat/common.py
2025-12-06 04:12:13 +00:00 · 2025-11-23 02:28:00 -06:00 · 2025-11-23 02:28:00 -06:00 · e14d7ba6bf
commit e14d7ba6bf
parent 40ef6e81a9 41ba458c3b
1 changed files with 3 additions and 0 deletions
--- a/nanochat/common.py
+++ b/nanochat/common.py
@ -166,6 +166,9 @@ def compute_init(device_type="cuda"): # cuda|cpu|mps
    # Precision
    if device_type == "cuda":
        torch.set_float32_matmul_precision("high") # uses tf32 instead of fp32 for matmuls
+        # Explicitly enable allow_tf32 to ensure it's on, helping silence warnings on some platforms
+        torch.backends.cuda.matmul.allow_tf32 = True
+        # print0(f"Precision set: float32_matmul_precision=high, allow_tf32={torch.backends.cuda.matmul.allow_tf32}")

    # Distributed setup: Distributed Data Parallel (DDP), optional, and requires CUDA
    ddp, ddp_rank, ddp_local_rank, ddp_world_size = get_dist_info()