From fdaebf22cc3f014aa0074eb34fb1846e72429a45 Mon Sep 17 00:00:00 2001 From: Daniel Dudek Date: Fri, 6 Feb 2026 12:34:21 +0100 Subject: [PATCH] Change default sliding window pattern to L when FA3 is not available --- scripts/base_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/base_train.py b/scripts/base_train.py index a3774e6..3f526be 100644 --- a/scripts/base_train.py +++ b/scripts/base_train.py @@ -50,7 +50,7 @@ parser.add_argument("--depth", type=int, default=20, help="depth of the Transfor parser.add_argument("--aspect-ratio", type=int, default=64, help="model_dim = depth * aspect_ratio") parser.add_argument("--head-dim", type=int, default=128, help="target head dimension for attention") parser.add_argument("--max-seq-len", type=int, default=2048, help="max context length") -parser.add_argument("--window-pattern", type=str, default="SSSL", help="sliding window pattern tiled across layers: L=full, S=half context (e.g. 'SSL')") +parser.add_argument("--window-pattern", type=str, default="SSSL" if HAS_FA3 else "L", help="sliding window pattern tiled across layers: L=full, S=half context (e.g. 'SSL')") # Training horizon (only one used, in order of precedence) parser.add_argument("--num-iterations", type=int, default=-1, help="explicit number of optimization steps (-1 = disable)") parser.add_argument("--target-flops", type=float, default=-1.0, help="calculate num_iterations to reach target_flops (-1 = disable)")