From 68e66be05c698dce4d6c48a60fc94ab8bf516466 Mon Sep 17 00:00:00 2001
From: hasan <basbunarhasan@gmail.com>
Date: Wed, 14 Jan 2026 15:23:55 +0100
Subject: [PATCH] fix: wrap FA3 import in try-except block to support both CUDA
 and MPS

---
 nanochat/gpt.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/nanochat/gpt.py b/nanochat/gpt.py
index 38ba153..cd88e46 100644
--- a/nanochat/gpt.py
+++ b/nanochat/gpt.py
@@ -29,7 +29,13 @@ os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
 # Official docs of FA3 label it as "beta" and want you to install FA3 from source, which is a pain.
 # Wishing for official FA3 wheels soon, for now this seems to be a fast way to get them (ty varunneal)
 
-flash_attn = None
+from kernels import get_kernel
+
+try:
+    flash_attn = get_kernel('varunneal/flash-attention-3').flash_attn_interface
+except Exception:
+    # Kernel loading failed (e.g. on Mac/MPS or CPU), fallback to SDPA
+    flash_attn = None
 
 @dataclass
 class GPTConfig: