after rebase

liangel-02 · liangel-02 · commit 32df8ecd2f62 · 2025-12-01T10:52:50.000-08:00
diff --git a/src/transformers/core_model_loading.py b/src/transformers/core_model_loading.py
@@ -78,12 +78,13 @@ def build_glob_alternation(
                 group_name = f"g{i}"
                 src_group_to_glob[group_name] = src
                 i += 1
-                # Convert the glob pattern to a regex with capture groups for wildcards
                 pattern_with_captures = src.replace("*", r"(.*?)")
                 group_to_pattern[group_name] = re.compile(f"^{pattern_with_captures}$")
                 body = src.replace("*", r".*")
                 branches.append(f"(?P<{group_name}>{body})")
-                tgt_group_to_glob[group_name] = glob.target_keys[0] if isinstance(glob.target_keys, list) else glob.target_keys
+                tgt_group_to_glob[group_name] = (
+                    glob.target_patterns[0] if isinstance(glob.target_patterns, list) else glob.target_patterns
+                )
         else:
             group_name = f"g{i}"
             src_group_to_glob[group_name] = glob
@@ -336,7 +337,11 @@ def __post_init__(self):
         branches = []
         for i, source_pattern in enumerate(self.source_patterns):
             group_name = f"g{i}"
-            pattern = source_pattern.replace(".*.", r"\..*\.")
+            # support both glob-style (*) and regex-style (.*) wildcards
+            if "*" in source_pattern and ".*" not in source_pattern:
+                pattern = source_pattern.replace("*", r".*")
+            else:
+                pattern = source_pattern.replace(".*.", r"\..*\.")
             branches.append(f"(?P<{group_name}>{pattern})")
         self.compiled_sources = re.compile("|".join(branches))
 
@@ -364,12 +369,20 @@ def rename_source_key(self, source_key: str) -> tuple[str, str | None]:
         source_pattern_that_matched = self.source_patterns[int(matching_group_name[1:])]
         # If we matched, we always replace with the first target pattern, in case we have several (one to many transform)
         replacement = self.target_patterns[0]
-        # # Allow capturing groups in patterns, i.e. to add a prefix to all keys (e.g. timm_wrapper, sam3)
-        if r"\1" in replacement:
+
+        if "*" in replacement and "*" in source_pattern_that_matched:
+            pattern_with_captures = source_pattern_that_matched.replace("*", r"(.*?)")
+            pattern_regex = re.compile(f"^{pattern_with_captures}$")
+            match = pattern_regex.match(source_key)
+            if match:
+                groups = match.groups()
+                replacement = replacement.replace("*", groups[0], 1)
+        elif r"\1" in replacement:
             # The index of the internal group we need to replace is the index of the matched named group as it comes
             # inside that matched named group
             replaced_group_idx = self.compiled_sources.groupindex[matching_group_name] + 1
             replacement = replacement.replace(r"\1", match_object.group(replaced_group_idx))
+
         renamed_key = source_key.replace(match_object.group(0), replacement)
 
         return renamed_key, source_pattern_that_matched
@@ -877,7 +890,15 @@ def convert_and_load_state_dict_in_model(
                 param_device = "cpu" if param_device == "disk" else param_device
                 future = spawn_materialize(thread_pool, tensor, param_device, _dtype)
 
-            mapping.add_tensor(renamed_key, original_key, source_pattern, future)
+            concrete_source_pattern = source_pattern
+            if isinstance(mapping, WeightConverter) and source_pattern is not None and "*" in source_pattern:
+                pattern_with_captures = source_pattern.replace("*", r"(.*?)")
+                pattern_regex = re.compile(f"^{pattern_with_captures}$")
+                concrete_source_pattern = extract_concrete_key_from_regex_pattern(
+                    original_key, source_pattern, pattern_regex
+                )
+
+            mapping.add_tensor(renamed_key, original_key, concrete_source_pattern, future)
         elif source_pattern is not None:  # add all target keys as unexpected
             mapping = pattern_to_converter[source_pattern]
             for k in mapping.target_patterns:
diff --git a/src/transformers/integrations/torchao.py b/src/transformers/integrations/torchao.py
@@ -218,7 +218,7 @@ def convert(
         is_unsafe_serialization = "_weight_" not in list(input_dict.keys())[0]
 
         param_data = {}
-        layer_name = '.'.join(full_layer_name.split(".")[:-1])
+        layer_name = ".".join(full_layer_name.split(".")[:-1])
         if is_unsafe_serialization:
             if isinstance(input_dict["weight"], list):
                 weight = input_dict["weight"][0]
diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
@@ -3946,19 +3946,6 @@ def from_pretrained(
 
         is_quantized = hf_quantizer is not None
 
-        weight_conversions: Optional[list[WeightConverter | WeightRenaming]] = None
-        model_type = getattr(config, "model_type", None)
-        if model_type is not None:
-            weight_conversions = get_checkpoint_conversion_mapping(model_type)
-            if weight_conversions is None:
-                weight_conversions = get_checkpoint_conversion_mapping("legacy")
-            if key_mapping is not None:
-                weight_conversions.extend(
-                    [WeightRenaming(source_keys=k, target_keys=v) for k, v in key_mapping.items()]
-                )
-            if hf_quantizer is not None:
-                weight_conversions.extend(hf_quantizer.get_weight_conversions())
-
         if gguf_file:
             from .modeling_gguf_pytorch_utils import load_gguf_checkpoint
 
diff --git a/src/transformers/quantizers/quantizer_torchao.py b/src/transformers/quantizers/quantizer_torchao.py
@@ -43,11 +43,11 @@
     import torchao
 
     if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.14.0"):
+        from torchao.prototype.awq import AWQConfig
         from torchao.prototype.safetensors.safetensors_support import (
             flatten_tensor_state_dict,
             unflatten_tensor_state_dict,
         )
-        from torchao.prototype.awq import AWQConfig
         from torchao.prototype.safetensors.safetensors_utils import is_metadata_torchao
 
 
@@ -556,8 +556,8 @@ def get_weight_conversions(self):
         if self.pre_quantized:
             return [
                 WeightConverter(
-                    source_keys=["*_weight_*"],
-                    target_keys="*weight",
+                    source_patterns=["*_weight_*"],
+                    target_patterns="*weight",
                     operations=[TorchAoDeserialize(self)],
                 ),
             ]
diff --git a/tests/quantization/torchao_integration/test_torchao.py b/tests/quantization/torchao_integration/test_torchao.py
@@ -738,7 +738,7 @@ def test_serialization_expected_output(self):
 
 
 @require_torchao
-@require_torchao_version_greater_or_equal("0.14.0")
+@require_torchao_version_greater_or_equal("0.15.0")
 class TorchAoSafeSerializationTest(TorchAoSerializationTest):
     # called only once for all test in this class
     @classmethod