From 68e66be05c698dce4d6c48a60fc94ab8bf516466 Mon Sep 17 00:00:00 2001 From: hasan Date: Wed, 14 Jan 2026 15:23:55 +0100 Subject: [PATCH] fix: wrap FA3 import in try-except block to support both CUDA and MPS --- nanochat/gpt.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/nanochat/gpt.py b/nanochat/gpt.py index 38ba153..cd88e46 100644 --- a/nanochat/gpt.py +++ b/nanochat/gpt.py @@ -29,7 +29,13 @@ os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1" # Official docs of FA3 label it as "beta" and want you to install FA3 from source, which is a pain. # Wishing for official FA3 wheels soon, for now this seems to be a fast way to get them (ty varunneal) -flash_attn = None +from kernels import get_kernel + +try: + flash_attn = get_kernel('varunneal/flash-attention-3').flash_attn_interface +except Exception: + # Kernel loading failed (e.g. on Mac/MPS or CPU), fallback to SDPA + flash_attn = None @dataclass class GPTConfig: