after rebase

liangel-02 · liangel-02 · commit f83b74cc0680 · 2025-12-02T07:23:41.000-08:00
diff --git a/src/transformers/core_model_loading.py b/src/transformers/core_model_loading.py
@@ -69,7 +69,6 @@ def build_glob_alternation(
     """
     src_group_to_glob: dict[str, str] = {}
     tgt_group_to_glob: dict[str, str] = {}
-    group_to_pattern: dict[str, re.Pattern] = {}
     branches: list[str] = []
     i = 0
     for glob in globs:
@@ -78,12 +77,11 @@ def build_glob_alternation(
                 group_name = f"g{i}"
                 src_group_to_glob[group_name] = src
                 i += 1
-                # Convert the glob pattern to a regex with capture groups for wildcards
-                pattern_with_captures = src.replace("*", r"(.*?)")
-                group_to_pattern[group_name] = re.compile(f"^{pattern_with_captures}$")
                 body = src.replace("*", r".*")
                 branches.append(f"(?P<{group_name}>{body})")
-                tgt_group_to_glob[group_name] = glob.target_keys[0] if isinstance(glob.target_keys, list) else glob.target_keys
+                tgt_group_to_glob[group_name] = (
+                    glob.target_patterns[0] if isinstance(glob.target_patterns, list) else glob.target_patterns
+                )
         else:
             group_name = f"g{i}"
             src_group_to_glob[group_name] = glob
@@ -94,7 +92,7 @@ def build_glob_alternation(
             tgt_group_to_glob[group_name] = glob
 
     alternation = re.compile("|".join(branches))
-    return alternation, src_group_to_glob, tgt_group_to_glob, group_to_pattern
+    return alternation, src_group_to_glob, tgt_group_to_glob
 
 
 class ConversionOps:
@@ -336,7 +334,11 @@ def __post_init__(self):
         branches = []
         for i, source_pattern in enumerate(self.source_patterns):
             group_name = f"g{i}"
-            pattern = source_pattern.replace(".*.", r"\..*\.")
+            # support both glob-style (*) and regex-style (.*) wildcards
+            if "*" in source_pattern and ".*" not in source_pattern:
+                pattern = source_pattern.replace("*", r".*")
+            else:
+                pattern = source_pattern.replace(".*.", r"\..*\.")
             branches.append(f"(?P<{group_name}>{pattern})")
         self.compiled_sources = re.compile("|".join(branches))
 
@@ -364,12 +366,20 @@ def rename_source_key(self, source_key: str) -> tuple[str, str | None]:
         source_pattern_that_matched = self.source_patterns[int(matching_group_name[1:])]
         # If we matched, we always replace with the first target pattern, in case we have several (one to many transform)
         replacement = self.target_patterns[0]
-        # # Allow capturing groups in patterns, i.e. to add a prefix to all keys (e.g. timm_wrapper, sam3)
-        if r"\1" in replacement:
+
+        if "*" in replacement and "*" in source_pattern_that_matched:
+            pattern_with_captures = source_pattern_that_matched.replace("*", r"(.*?)")
+            pattern_regex = re.compile(f"^{pattern_with_captures}$")
+            match = pattern_regex.match(source_key)
+            if match:
+                groups = match.groups()
+                replacement = replacement.replace("*", groups[0], 1)
+        elif r"\1" in replacement:
             # The index of the internal group we need to replace is the index of the matched named group as it comes
             # inside that matched named group
             replaced_group_idx = self.compiled_sources.groupindex[matching_group_name] + 1
             replacement = replacement.replace(r"\1", match_object.group(replaced_group_idx))
+
         renamed_key = source_key.replace(match_object.group(0), replacement)
 
         return renamed_key, source_pattern_that_matched
@@ -805,9 +815,9 @@ def convert_and_load_state_dict_in_model(
     # build '(?P<g0>.*.*\\.block_sparse_moe\\..*)' and group to source {'g0': '*.block_sparse_moe.'}
     # and target to source {'g0': '*.mlp.'}. This allows us to quickly find which pattern matched.
     if tp_plan != {}:
-        tp_plan_alt, tp_plan_by_group_name, _, _ = build_glob_alternation(list(tp_plan.keys()))
+        tp_plan_alt, tp_plan_by_group_name, _ = build_glob_alternation(list(tp_plan.keys()))
     if dtype_plan != {}:
-        dtype_policy_alt, dtype_policy_by_group_name, _, _ = build_glob_alternation(list(dtype_plan.keys()))
+        dtype_policy_alt, dtype_policy_by_group_name, _ = build_glob_alternation(list(dtype_plan.keys()))
 
     pattern_to_converter = {k: converter for converter in converters for k in converter.source_patterns}
 
@@ -877,7 +887,15 @@ def convert_and_load_state_dict_in_model(
                 param_device = "cpu" if param_device == "disk" else param_device
                 future = spawn_materialize(thread_pool, tensor, param_device, _dtype)
 
-            mapping.add_tensor(renamed_key, original_key, source_pattern, future)
+            concrete_source_pattern = source_pattern
+            if isinstance(mapping, WeightConverter) and source_pattern is not None and "*" in source_pattern:
+                pattern_with_captures = source_pattern.replace("*", r"(.*?)")
+                pattern_regex = re.compile(f"^{pattern_with_captures}$")
+                concrete_source_pattern = extract_concrete_key_from_regex_pattern(
+                    original_key, source_pattern, pattern_regex
+                )
+
+            mapping.add_tensor(renamed_key, original_key, concrete_source_pattern, future)
         elif source_pattern is not None:  # add all target keys as unexpected
             mapping = pattern_to_converter[source_pattern]
             for k in mapping.target_patterns:
diff --git a/src/transformers/integrations/torchao.py b/src/transformers/integrations/torchao.py
@@ -32,7 +32,7 @@
 
 if is_torchao_available():
     TORCHAO_VERSION = version.parse(importlib.metadata.version("torchao"))
-    if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.14.0"):
+    if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.15.0"):
         from torchao.prototype.safetensors.safetensors_support import (
             unflatten_tensor_state_dict,
         )
@@ -218,27 +218,28 @@ def convert(
         is_unsafe_serialization = "_weight_" not in list(input_dict.keys())[0]
 
         param_data = {}
-        layer_name = '.'.join(full_layer_name.split(".")[:-1])
+        layer_name = ".".join(full_layer_name.split(".")[:-1])
         if is_unsafe_serialization:
             if isinstance(input_dict["weight"], list):
                 weight = input_dict["weight"][0]
             else:
                 weight = input_dict["weight"]
         else:
             for suffix in input_dict.keys():
-                if isinstance(input_dict[suffix], list):
-                    param_data[f"{layer_name}.{suffix}"] = input_dict[suffix][0]
-                else:
-                    param_data[f"{layer_name}.{suffix}"] = input_dict[suffix]
+                assert len(input_dict[suffix]) == 1
+                param_data[f"{layer_name}.{suffix}"] = input_dict[suffix][0]
 
         # If it's unsafe-serialized (i.e. not safetensors), no need for anything
         if is_unsafe_serialization:
             return {full_layer_name: weight}
         # Sanity check for the new serialization format
         elif not (TORCHAO_VERSION >= version.parse("0.15.0") and is_metadata_torchao(self.hf_quantizer.metadata)):
-            raise ValueError("To use `safetensors` serialization, you should have `torchao>=0.14.0` installed")
+            raise ValueError("To use `safetensors` serialization, you should have `torchao>=0.15.0` installed")
 
-        unflattened_state_dict, _ = unflatten_tensor_state_dict(param_data, self.hf_quantizer.metadata)
+        unflattened_state_dict, leftover_state_dict = unflatten_tensor_state_dict(
+            param_data, self.hf_quantizer.metadata
+        )
+        assert not leftover_state_dict  # there should be no unprocessed tensors
         new_param = unflattened_state_dict[full_layer_name]
 
         module, _ = get_module_from_name(model, full_layer_name)
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
@@ -3946,19 +3946,6 @@ def from_pretrained(
 
         is_quantized = hf_quantizer is not None
 
-        weight_conversions: Optional[list[WeightConverter | WeightRenaming]] = None
-        model_type = getattr(config, "model_type", None)
-        if model_type is not None:
-            weight_conversions = get_checkpoint_conversion_mapping(model_type)
-            if weight_conversions is None:
-                weight_conversions = get_checkpoint_conversion_mapping("legacy")
-            if key_mapping is not None:
-                weight_conversions.extend(
-                    [WeightRenaming(source_keys=k, target_keys=v) for k, v in key_mapping.items()]
-                )
-            if hf_quantizer is not None:
-                weight_conversions.extend(hf_quantizer.get_weight_conversions())
-
         if gguf_file:
             from .modeling_gguf_pytorch_utils import load_gguf_checkpoint
 
diff --git a/src/transformers/quantizers/quantizer_torchao.py b/src/transformers/quantizers/quantizer_torchao.py
@@ -40,14 +40,11 @@
     import torch.nn as nn
 
 if is_torchao_available():
-    import torchao
-
-    if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.14.0"):
+    if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.15.0"):
         from torchao.prototype.safetensors.safetensors_support import (
             flatten_tensor_state_dict,
             unflatten_tensor_state_dict,
         )
-        from torchao.prototype.awq import AWQConfig
         from torchao.prototype.safetensors.safetensors_utils import is_metadata_torchao
 
 
@@ -89,16 +86,6 @@ def _linear_extra_repr(self):
 
 
 if is_torchao_available():
-    SUPPORTED_SAFE_SERIALIZATION_CONFIGS = [
-        torchao.quantization.Float8WeightOnlyConfig,
-        torchao.quantization.Float8DynamicActivationFloat8WeightConfig,
-        torchao.quantization.Int4WeightOnlyConfig,
-        torchao.quantization.IntxWeightOnlyConfig,
-        torchao.quantization.Int8DynamicActivationIntxWeightConfig,
-        torchao.quantization.ModuleFqnToConfig,
-        AWQConfig,
-    ]
-
     TORCHAO_VERSION = version.parse(importlib.metadata.version("torchao"))
 
 
@@ -177,12 +164,12 @@ def get_state_dict_and_metadata(self, model, safe_serialization: bool | None = F
         If the model is safe serializable, we flatten the state dict of tensor subclasses so that it is compatible with
         the safetensors format.
         """
-        if type(self.quantization_config.quant_type) in SUPPORTED_SAFE_SERIALIZATION_CONFIGS and safe_serialization:
-            if TORCHAO_VERSION >= version.parse("0.14.0"):
+        if safe_serialization:
+            if TORCHAO_VERSION >= version.parse("0.15.0"):
                 return flatten_tensor_state_dict(model.state_dict())
             else:
                 raise RuntimeError(
-                    f"In order to use safetensors with torchao, please use torchao version >= 0.14.0. Current version: {TORCHAO_VERSION}"
+                    f"In order to use safetensors with torchao, please use torchao version >= 0.15.0. Current version: {TORCHAO_VERSION}"
                 )
         else:
             return None, {}
@@ -314,8 +301,8 @@ def create_quantized_param(
                 )
                 return
             # Sanity check for the new serialization format
-            elif not (TORCHAO_VERSION >= version.parse("0.14.0") and is_metadata_torchao(self.metadata)):
-                raise ValueError("To use `safetensors` serialization, you should have `torchao>=0.14.0` installed")
+            elif not (TORCHAO_VERSION >= version.parse("0.15.0") and is_metadata_torchao(self.metadata)):
+                raise ValueError("To use `safetensors` serialization, you should have `torchao>=0.15.0` installed")
 
             # Save the states for later quantization when they are all gathered
             if not hasattr(self, "ao_params"):
@@ -460,13 +447,10 @@ def _process_model_after_weight_loading(self, model, **kwargs):
 
     def is_serializable(self, safe_serialization=None) -> bool:
         if safe_serialization:
-            _is_torchao_serializable = type(
-                self.quantization_config.quant_type
-            ) in SUPPORTED_SAFE_SERIALIZATION_CONFIGS and TORCHAO_VERSION >= version.parse("0.14.0")
-            if not _is_torchao_serializable:
+            _is_torchao_serializable = TORCHAO_VERSION >= version.parse("0.15.0")
+            if not TORCHAO_VERSION >= version.parse("0.15.0"):
                 logger.warning(
-                    f"torchao quantized model only supports safe serialization for {SUPPORTED_SAFE_SERIALIZATION_CONFIGS}, \
-                    and torchao version >= 0.14.0, please set `safe_serialization` to False for \
+                    f"torchao quantized model only supports safe serialization for torchao version >= 0.15.0, please set `safe_serialization` to False for \
                     {type(self.quantization_config.quant_type)} and {TORCHAO_VERSION}."
                 )
             return _is_torchao_serializable
@@ -556,8 +540,8 @@ def get_weight_conversions(self):
         if self.pre_quantized:
             return [
                 WeightConverter(
-                    source_keys=["*_weight_*"],
-                    target_keys="*weight",
+                    source_patterns=["*_weight_*"],
+                    target_patterns="*weight",
                     operations=[TorchAoDeserialize(self)],
                 ),
             ]
diff --git a/tests/quantization/torchao_integration/test_torchao.py b/tests/quantization/torchao_integration/test_torchao.py
@@ -738,7 +738,7 @@ def test_serialization_expected_output(self):
 
 
 @require_torchao
-@require_torchao_version_greater_or_equal("0.14.0")
+@require_torchao_version_greater_or_equal("0.15.0")
 class TorchAoSafeSerializationTest(TorchAoSerializationTest):
     # called only once for all test in this class
     @classmethod