From 3e5fccdfa43b376be8d30e254472046ca42cf9cf Mon Sep 17 00:00:00 2001 From: hasso <110124569+hasso5703@users.noreply.github.com> Date: Fri, 16 Jan 2026 11:18:12 +0100 Subject: [PATCH] feat: attempt fa3 load on sm < 9.0 (ampere/ada) allow fa3 on non-hopper gpus if manually installed Co-authored-by: Sofie Van Landeghem --- nanochat/gpt.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nanochat/gpt.py b/nanochat/gpt.py index d214054..5f00bc2 100644 --- a/nanochat/gpt.py +++ b/nanochat/gpt.py @@ -35,8 +35,12 @@ try: # Flash Attention 3 uses NVIDIA Hopper-specific features like TMA (Tensor Memory Accelerator). # These are only physically available on GPUs with Compute Capability >= 9.0 (e.g. H100). # We explicitly check for this to prevent "No kernel image available" crashes on Ampere/Ada GPUs (RTX 30xx/40xx) etc. - if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 9: - flash_attn = get_kernel('varunneal/flash-attention-3').flash_attn_interface + if torch.cuda.is_available(): + if torch.cuda.get_device_capability()[0] >= 9: + flash_attn = get_kernel('varunneal/flash-attention-3').flash_attn_interface + else: + # If the kernel image is not available, try installing the wheel manually from https://windreamer.github.io/flash-attention3-wheels/ + import flash_attn_interface as flash_attn except Exception: # Fallback to PyTorch SDPA on non-Hopper NVIDIA GPUs, Mac (MPS), or CPU. pass