logic to select tf32 API as per Pytorch version (#42428)

khushali9 · web-flow · commit 7f5c20945a97 · 2025-12-01T10:58:30.000+01:00
* logic to select tf32 API as per Pytorch version

* new method added into __all__

* make style and quality ran

* added global setting for tf32

* added support for MUSA as well

* make style and quality run

* cleared &gt;= 2.9.0 torch version logic
diff --git a/conftest.py b/conftest.py
@@ -31,6 +31,7 @@
     patch_testing_methods_to_collect_info,
     patch_torch_compile_force_graph,
 )
+from transformers.utils import enable_tf32
 
 
 NOT_DEVICE_TESTS = {
@@ -137,11 +138,9 @@ def check_output(self, want, got, optionflags):
 doctest.DocTestParser = HfDocTestParser
 
 if is_torch_available():
-    import torch
-
     # The flag below controls whether to allow TF32 on cuDNN. This flag defaults to True.
     # We set it to `False` for CI. See https://github.com/pytorch/pytorch/issues/157274#issuecomment-3090791615
-    torch.backends.cudnn.allow_tf32 = False
+    enable_tf32(False)
 
     # patch `torch.compile`: if `TORCH_COMPILE_FORCE_FULLGRAPH=1` (or values considered as true, e.g. yes, y, etc.),
     # the patched version will always run with `fullgraph=True`.
diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py
@@ -53,7 +53,7 @@
     requires_backends,
 )
 from .utils.generic import strtobool
-from .utils.import_utils import is_optimum_neuron_available
+from .utils.import_utils import enable_tf32, is_optimum_neuron_available
 
 
 logger = logging.get_logger(__name__)
@@ -379,7 +379,7 @@ class TrainingArguments:
             metric values.
         tf32 (`bool`, *optional*):
             Whether to enable the TF32 mode, available in Ampere and newer GPU architectures. The default value depends
-            on PyTorch's version default of `torch.backends.cuda.matmul.allow_tf32`. For more details please refer to
+            on PyTorch's version default of `torch.backends.cuda.matmul.allow_tf32` and For PyTorch 2.9+ torch.backends.cuda.matmul.fp32_precision. For more details please refer to
             the [TF32](https://huggingface.co/docs/transformers/perf_train_gpu_one#tf32) documentation. This is an
             experimental API and it may change.
         ddp_backend (`str`, *optional*):
@@ -1601,32 +1601,20 @@ def __post_init__(self):
                         f"Setting TF32 in {device_str} backends to speedup torch compile, you won't see any improvement"
                         " otherwise."
                     )
-                    if is_torch_musa_available():
-                        torch.backends.mudnn.allow_tf32 = True
-                    else:
-                        torch.backends.cuda.matmul.allow_tf32 = True
-                        torch.backends.cudnn.allow_tf32 = True
+                    enable_tf32(True)
             else:
                 logger.warning(
                     "The speedups for torchdynamo mostly come with GPU Ampere or higher and which is not detected here."
                 )
         if is_torch_available() and self.tf32 is not None:
             if self.tf32:
                 if is_torch_tf32_available():
-                    if is_torch_musa_available():
-                        torch.backends.mudnn.allow_tf32 = True
-                    else:
-                        torch.backends.cuda.matmul.allow_tf32 = True
-                        torch.backends.cudnn.allow_tf32 = True
+                    enable_tf32(True)
                 else:
                     raise ValueError("--tf32 requires Ampere or a newer GPU arch, cuda>=11 and torch>=1.7")
             else:
                 if is_torch_tf32_available():
-                    if is_torch_musa_available():
-                        torch.backends.mudnn.allow_tf32 = False
-                    else:
-                        torch.backends.cuda.matmul.allow_tf32 = False
-                        torch.backends.cudnn.allow_tf32 = False
+                    enable_tf32(False)
                 # no need to assert on else
 
         if self.report_to == "all" or self.report_to == ["all"]:
diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py
@@ -108,6 +108,7 @@
     _LazyModule,
     check_torch_load_is_safe,
     direct_transformers_import,
+    enable_tf32,
     get_torch_version,
     is_accelerate_available,
     is_apex_available,
diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py
@@ -508,6 +508,29 @@ def is_torch_tf32_available() -> bool:
     return True
 
 
+@lru_cache
+def enable_tf32(enable: bool) -> None:
+    """
+    Set TF32 mode using the appropriate PyTorch API.
+    For PyTorch 2.9+, uses the new fp32_precision API.
+    For older versions, uses the legacy allow_tf32 flags.
+    Args:
+        enable: Whether to enable TF32 mode
+    """
+    import torch
+
+    pytorch_version = version.parse(get_torch_version())
+    if pytorch_version >= version.parse("2.9.0"):
+        precision_mode = "tf32" if enable else "ieee"
+        torch.backends.fp32_precision = precision_mode
+    else:
+        if is_torch_musa_available():
+            torch.backends.mudnn.allow_tf32 = enable
+        else:
+            torch.backends.cuda.matmul.allow_tf32 = enable
+            torch.backends.cudnn.allow_tf32 = enable
+
+
 @lru_cache
 def is_torch_flex_attn_available() -> bool:
     return is_torch_available() and version.parse(get_torch_version()) >= version.parse("2.5.0")
diff --git a/utils/modular_model_detector.py b/utils/modular_model_detector.py
@@ -117,6 +117,7 @@
 
 import transformers
 from transformers import AutoModel, AutoTokenizer
+from transformers.utils import enable_tf32
 from transformers.utils import logging as transformers_logging
 
 
@@ -247,7 +248,7 @@ def __init__(self, hub_dataset: str):
             logging.getLogger(name).setLevel(logging.ERROR)
         huggingface_hub_logging.set_verbosity_error()
         transformers_logging.set_verbosity_error()
-        torch.backends.cuda.matmul.allow_tf32 = True
+        enable_tf32(True)
         torch.set_grad_enabled(False)
 
         self.models_root = MODELS_ROOT