From 41ba458c3bc83568a7574b9ffac7cab69919cddd Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sun, 23 Nov 2025 08:27:13 +0000 Subject: [PATCH] Explicitly enable allow_tf32 in nanochat/common.py Even when calling `torch.set_float32_matmul_precision('high')`, the `torch.compile` (Inductor) backend on some ROCm versions may still warn that TensorFloat32 is available but not enabled. This change explicitly sets `torch.backends.cuda.matmul.allow_tf32 = True` to ensure the setting is active and to silence the warning. --- nanochat/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nanochat/common.py b/nanochat/common.py index 4b7ac92..84e4600 100644 --- a/nanochat/common.py +++ b/nanochat/common.py @@ -166,6 +166,9 @@ def compute_init(device_type="cuda"): # cuda|cpu|mps # Precision if device_type == "cuda": torch.set_float32_matmul_precision("high") # uses tf32 instead of fp32 for matmuls + # Explicitly enable allow_tf32 to ensure it's on, helping silence warnings on some platforms + torch.backends.cuda.matmul.allow_tf32 = True + # print0(f"Precision set: float32_matmul_precision=high, allow_tf32={torch.backends.cuda.matmul.allow_tf32}") # Distributed setup: Distributed Data Parallel (DDP), optional, and requires CUDA ddp, ddp_rank, ddp_local_rank, ddp_world_size = get_dist_info()