modify unflatten for vllm

liangel-02 · liangel-02 · commit 3aa6696eafb7 · 2025-11-05T14:28:30.000-08:00
diff --git a/benchmarks/benchmark_uintx.py b/benchmarks/benchmark_uintx.py
@@ -6,11 +6,11 @@
 from copy import deepcopy
 
 import torch
-
 from torchao.prototype.uintx import (
     uintx_affine_weight_only,
     unpack_cpu,
 )
+
 from torchao.quantization.quant_api import quantize_
 
 
diff --git a/test/prototype/safetensors/test_safetensors_support.py b/test/prototype/safetensors/test_safetensors_support.py
@@ -77,6 +77,7 @@ def test_safetensors(self, config, act_pre_scale=False):
             reconstructed_dict = unflatten_tensor_state_dict(
                 tensors_data_dict, metadata
             )
+            assert not tensors_data_dict
 
         model = torch.nn.Sequential(
             torch.nn.Linear(128, 256, dtype=torch.bfloat16, device="cuda")
diff --git a/test/test_low_bit_optim.py b/test/test_low_bit_optim.py
@@ -30,6 +30,7 @@
     common_utils.SEED = 1234
 
 from packaging.version import Version
+
 from torchao import optim
 from torchao.optim.quant_utils import (
     _fp32_to_bf16_sr,
diff --git a/torchao/prototype/safetensors/safetensors_support.py b/torchao/prototype/safetensors/safetensors_support.py
@@ -68,14 +68,21 @@ def unflatten_tensor_state_dict(
     result = {}
 
     for tensor_name in tensor_names:
+        to_be_deleted = []
+
         module_fqn, weight_name = tensor_name.rsplit(".", 1)
 
         prefix = f"{module_fqn}._{weight_name}_"
         tensor_tensors = {}
+
         for key, value in combined_data.items():
             if key.startswith(prefix):
                 # Remove the prefix
                 tensor_tensors[key[len(prefix) :]] = value
+                full_tensor_name_in_state_dict = key
+                to_be_deleted.append(
+                    full_tensor_name_in_state_dict
+                )  # for tensor subclass
 
         tensor_metadata = json.loads(metadata.get(tensor_name))
         tensor_type = tensor_metadata.get("_type")
@@ -89,9 +96,21 @@ def unflatten_tensor_state_dict(
             tensor_metadata["_data"].update(tensor_tensors)
             result[tensor_name] = object_from_dict(tensor_metadata)
         elif tensor_type == torch.Tensor.__name__:
+            if tensor_name not in tensors_data_dict.keys():
+                # we allow the option of loading in state_dict info for a single tensor
+                # if tensor state dict info is not loaded in yet, we wait for it to be provided
+                # in a future call
+                continue
             result[tensor_name] = tensors_data_dict[tensor_name]
+            to_be_deleted.append(
+                tensor_name
+            )  # add here because key for torch.Tensor has no prefix
         else:
             raise ValueError(f"Unsupported tensor type: {tensor_type}")
+
+        for tensor_name in to_be_deleted:
+            del tensors_data_dict[tensor_name]
+
     return result
 
 

Original file line number	Diff line number	Diff line change
`@@ -77,6 +77,7 @@ def test_safetensors(self, config, act_pre_scale=False):`
`77`	`77`	`reconstructed_dict = unflatten_tensor_state_dict(`
`78`	`78`	`tensors_data_dict, metadata`
`79`	`79`	`)`
	`80`	`+ assert not tensors_data_dict`
`80`	`81`
`81`	`82`	`model = torch.nn.Sequential(`
`82`	`83`	`torch.nn.Linear(128, 256, dtype=torch.bfloat16, device="cuda")`