From 345935c5f30ffe45a507a5a59b97f8cece1ae6e0 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Wed, 18 Feb 2026 21:38:48 +0100
Subject: [PATCH] Add back docstring for clarification

---
 nanochat/flash_attention.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nanochat/flash_attention.py b/nanochat/flash_attention.py
index c79d124..67917b1 100644
--- a/nanochat/flash_attention.py
+++ b/nanochat/flash_attention.py
@@ -26,6 +26,8 @@ def _load_flash_attention_3():
     if not torch.cuda.is_available():
         return None
     try:
+        # FA3 kernels are currently compiled for Hopper (sm90) and Ampere (sm80/sm86)
+        # Ada (sm89), Blackwell (sm100) need SDPA fallback until FA3 is recompiled
         from kernels import get_kernel, has_kernel
         supported = has_kernel(hf_kernel)
         if not supported: