Merge 7640a1781f into 1b1cc3c599

2026-06-15 18:49:10 +00:00 · 2026-03-15 02:57:40 +08:00 · 2026-03-15 02:57:40 +08:00 · 71d90265f5
commit 71d90265f5
parent 1b1cc3c599 7640a1781f
1 changed files with 2 additions and 1 deletions
--- a/scripts/base_train.py
+++ b/scripts/base_train.py
@ -218,12 +218,13 @@ def disable_fp8(model):
        return

    # Swap Float8Linear -> Linear (our custom class that casts weights to match input dtype)
+    # Use device="meta" to avoid VRAM spike - the weight tensor will be swapped in afterwards
    for parent, attr_name, fp8_module in fp8_locations:
        linear = Linear(
            fp8_module.in_features,
            fp8_module.out_features,
            bias=fp8_module.bias is not None,
-            device=fp8_module.weight.device,
+            device="meta",  # Use meta device to avoid unnecessary VRAM allocation
            dtype=fp8_module.weight.dtype,
        )
        linear.weight = fp8_module.weight  # share, don't copy