remove unused post_init

Qubitium · Qubitium · commit f45b57cb538e · 2025-11-22T09:32:12.000Z
diff --git a/gptqmodel/nn_modules/qlinear/exllama_awq.py b/gptqmodel/nn_modules/qlinear/exllama_awq.py
diff --git a/gptqmodel/nn_modules/qlinear/exllamav2_awq.py b/gptqmodel/nn_modules/qlinear/exllamav2_awq.py
@@ -118,7 +118,6 @@ def post_init(self, scratch_space: ScratchSpace):
     def forward(self, x: torch.Tensor):
         assert self.q_handle is not None, (
             "module.post_init() must be called before module.forward(). "
-            "Use exllamav2_post_init() on the whole model."
         )
         if exlv2_ext is None:
             raise ModuleNotFoundError("External ExLlamaV2 kernels are not properly installed." + msg)
diff --git a/gptqmodel/quantization/awq/modules/linear/__init__.py b/gptqmodel/quantization/awq/modules/linear/__init__.py
@@ -3,9 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # Contact: qubitium@modelcloud.ai, x.com/qubitium
 
-from .exllama import WQLinear_Exllama, exllama_post_init
-from .exllamav2 import WQLinear_ExllamaV2, exllamav2_post_init
 from .gemm import WQLinear_GEMM
 from .gemv import WQLinear_GEMV
 from .gemv_fast import WQLinear_GEMVFast
-from .marlin import WQLinear_Marlin, marlin_post_init
+from .marlin import WQLinear_Marlin
diff --git a/gptqmodel/quantization/awq/modules/linear/exllama.py b/gptqmodel/quantization/awq/modules/linear/exllama.py
@@ -133,10 +133,3 @@ def forward(self, x):
 
         return out.view(out_shape)
 
-
-def exllama_post_init(model):
-    for _, submodule in model.named_modules():
-        if isinstance(submodule, WQLinear_Exllama):
-            submodule.post_init()
-
-    return model
diff --git a/gptqmodel/quantization/awq/modules/linear/exllamav2.py b/gptqmodel/quantization/awq/modules/linear/exllamav2.py
@@ -133,7 +133,6 @@ def scratch_space_fixed(self, max_input_len=2048, max_batch_size=8):
     def forward(self, x):
         assert self.q_handle is not None, (
             "module.post_init() must be called before module.forward(). "
-            "Use exllamav2_post_init() on the whole model."
         )
         if exlv2_ext is None:
             raise ModuleNotFoundError("External ExLlamaV2 kernels are not properly installed." + msg)
@@ -160,47 +159,3 @@ def forward(self, x):
             out.add_(self.bias)
 
         return out.view(out_shape)
-
-
-class ScratchSpace:
-    def __init__(self, scratch_bytes, dev):
-        self.scratch_bytes = scratch_bytes
-        self.scratch = torch.empty(
-            self.scratch_bytes // 2,
-            dtype=torch.float16,
-            device=dev,
-        )
-
-    def get_slice(self, size_bytes):
-        size_halfs = next_multiple(size_bytes, 128) // 2
-        scratch_slice = self.scratch.narrow(0, 0, size_halfs)
-
-        return scratch_slice
-
-
-def exllamav2_post_init(model, max_input_len: int = 2048, max_batch_size: int = 8):
-    # we search for the maximum number of bytes required for each device's scratch space
-    fixed_bytes: Dict[torch.device, int] = {}
-    for _, submodule in model.named_modules():
-        if isinstance(submodule, AwqExllamaV2QuantLinear):
-            device = submodule.qweight.device
-            scratch_fixed = submodule.scratch_space_fixed(
-                max_input_len=max_input_len, max_batch_size=max_batch_size
-            )
-            fixed_bytes[device] = max(fixed_bytes.get(device, 0), scratch_fixed)
-
-    # we allocate a model-persistent scratch space for each device
-    model.scratch_spaces: Dict[torch.device, ScratchSpace] = {}
-    for device, scratch_bytes in fixed_bytes.items():
-        model.scratch_spaces[device] = ScratchSpace(scratch_bytes, device)
-
-    for _, submodule in model.named_modules():
-        if isinstance(submodule, AwqExllamaV2QuantLinear):
-            device = submodule.qweight.device
-            submodule.post_init(scratch_space=model.scratch_spaces[device])
-
-    return model
-
-
-def next_multiple(x, multiple):
-    return ((x + multiple - 1) // multiple) * multiple

Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,6 @@ def post_init(self, scratch_space: ScratchSpace):`
`118`	`118`	`def forward(self, x: torch.Tensor):`
`119`	`119`	`assert self.q_handle is not None, (`
`120`	`120`	`"module.post_init() must be called before module.forward(). "`
`121`		`- "Use exllamav2_post_init() on the whole model."`
`122`	`121`	`)`
`123`	`122`	`if exlv2_ext is None:`
`124`	`123`	`raise ModuleNotFoundError("External ExLlamaV2 kernels are not properly installed." + msg)`