Explicitly enable allow_tf32 in nanochat/common.py

Even when calling `torch.set_float32_matmul_precision('high')`, the `torch.compile` (Inductor) backend on some ROCm versions may still warn that TensorFloat32 is available but not enabled. This change explicitly sets `torch.backends.cuda.matmul.allow_tf32 = True` to ensure the setting is active and to silence the warning.
This commit is contained in:
google-labs-jules[bot] 2025-11-23 08:27:13 +00:00
parent 68148b1bf3
commit 41ba458c3b

View File

@ -166,6 +166,9 @@ def compute_init(device_type="cuda"): # cuda|cpu|mps
# Precision
if device_type == "cuda":
torch.set_float32_matmul_precision("high") # uses tf32 instead of fp32 for matmuls
# Explicitly enable allow_tf32 to ensure it's on, helping silence warnings on some platforms
torch.backends.cuda.matmul.allow_tf32 = True
# print0(f"Precision set: float32_matmul_precision=high, allow_tf32={torch.backends.cuda.matmul.allow_tf32}")
# Distributed setup: Distributed Data Parallel (DDP), optional, and requires CUDA
ddp, ddp_rank, ddp_local_rank, ddp_world_size = get_dist_info()