Skip to content

Commit eedd1cc

Browse files
authored
Merge branch 'main' into kylesayrs/deprecate
2 parents 5dda500 + 91d8d9d commit eedd1cc

File tree

17 files changed

+298
-248
lines changed

17 files changed

+298
-248
lines changed

examples/awq/qwen3-vl-30b-a3b-Instruct-example.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,16 @@
33
from transformers import AutoProcessor, Qwen3VLMoeForConditionalGeneration
44

55
from llmcompressor import oneshot
6-
from llmcompressor.modeling import replace_modules_for_calibration
76
from llmcompressor.modifiers.awq import AWQModifier
87
from llmcompressor.utils import dispatch_for_generation
98

10-
# NOTE: Requires a minimum of transformers 4.57.0
11-
129
MODEL_ID = "Qwen/Qwen3-VL-30B-A3B-Instruct"
1310

1411
# Load model.
1512
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(
1613
MODEL_ID, torch_dtype=torch.bfloat16, device_map=None, trust_remote_code=True
1714
)
1815
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
19-
model = replace_modules_for_calibration(model)
2016

2117
DATASET_ID = "neuralmagic/calibration"
2218
NUM_CALIBRATION_SAMPLES = 256

examples/quantization_w4a4_fp4/qwen3_vl_moe_w4a4_fp4.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,15 @@
33
from transformers import AutoProcessor, Qwen3VLMoeForConditionalGeneration
44

55
from llmcompressor import oneshot
6-
from llmcompressor.modeling import replace_modules_for_calibration
76
from llmcompressor.modifiers.quantization import QuantizationModifier
87
from llmcompressor.utils import dispatch_for_generation
98

10-
# NOTE: Requires a minimum of transformers 4.57.0
11-
129
MODEL_ID = "Qwen/Qwen3-VL-235B-A22B-Instruct"
1310

1411

1512
# Load model.
1613
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype="auto")
1714
processor = AutoProcessor.from_pretrained(MODEL_ID)
18-
model = replace_modules_for_calibration(model)
1915

2016
DATASET_ID = "neuralmagic/calibration"
2117
NUM_CALIBRATION_SAMPLES = 20

examples/quantization_w8a8_fp8/qwen3_vl_moe_fp8_example.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from transformers import AutoProcessor, Qwen3VLMoeForConditionalGeneration
22

33
from llmcompressor import oneshot
4-
from llmcompressor.modeling import replace_modules_for_calibration
54
from llmcompressor.modifiers.quantization import QuantizationModifier
65

76
# NOTE: Requires a minimum of transformers 4.57.0
@@ -11,7 +10,6 @@
1110
# Load model.
1211
model = Qwen3VLMoeForConditionalGeneration.from_pretrained(MODEL_ID, torch_dtype="auto")
1312
processor = AutoProcessor.from_pretrained(MODEL_ID)
14-
model = replace_modules_for_calibration(model)
1513

1614
# Configure the quantization algorithm and scheme.
1715
# In this case, we:

setup.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -114,22 +114,28 @@ def localversion_func(version: ScmVersion) -> str:
114114
("pyyaml>=6.0.1,<=6.0.3" if BUILD_TYPE == "release" else "pyyaml>=6.0.1"),
115115
# librosa dependency numba is currently not compatible with numpy>=2.3
116116
# https://numba.readthedocs.io/en/stable/user/installing.html#version-support-information
117-
("numpy>=2.0.0,<=2.3.3" if BUILD_TYPE == "release" else "numpy>=2.0.0"),
117+
("numpy>=2.0.0,<=2.3.5" if BUILD_TYPE == "release" else "numpy>=2.0.0"),
118118
(
119119
"requests>=2.32.2,<=2.32.5"
120120
if BUILD_TYPE == "release"
121121
else "requests>=2.32.2"
122122
),
123123
("tqdm>=4.66.3,<=4.67.1" if BUILD_TYPE == "release" else "tqdm>=4.66.3"),
124-
("torch>=2.7.0,<=2.8.0" if BUILD_TYPE == "release" else "torch>=2.7.0"),
124+
("torch>=2.7.0,<=2.9.1" if BUILD_TYPE == "release" else "torch>=2.7.0"),
125125
(
126-
"transformers>=4.54.0,<=4.56.2"
126+
"transformers>=4.54.0,<=4.57.3"
127127
if BUILD_TYPE == "release"
128-
else "transformers>=4.54.0"
128+
else "transformers>=4.54.0,<=4.57.3"
129129
),
130-
("datasets>=4.0.0,<=4.1.1" if BUILD_TYPE == "release" else "datasets>=4.0.0"),
130+
("datasets>=4.0.0,<=4.4.1" if BUILD_TYPE == "release" else "datasets>=4.0.0"),
131131
(
132-
"accelerate>=1.6.0,<=1.10.1"
132+
# auto-round 0.9.1 cannot work with accelerate <1.10.0
133+
"auto-round>=0.9.2,<=0.9.2"
134+
if BUILD_TYPE == "release"
135+
else "auto-round>=0.9.2"
136+
),
137+
(
138+
"accelerate>=1.6.0,<=1.12.0"
133139
if BUILD_TYPE == "release"
134140
else "accelerate>=1.6.0"
135141
),
@@ -138,13 +144,12 @@ def localversion_func(version: ScmVersion) -> str:
138144
if BUILD_TYPE == "release"
139145
else "nvidia-ml-py>=12.560.30"
140146
),
141-
("pillow>=10.4.0,<=11.3.0" if BUILD_TYPE == "release" else "pillow>=10.4.0"),
147+
("pillow>=10.4.0,<=12.0.0" if BUILD_TYPE == "release" else "pillow>=10.4.0"),
142148
(
143149
"compressed-tensors==0.12.2"
144150
if BUILD_TYPE == "release"
145151
else "compressed-tensors>=0.12.3a2"
146152
),
147-
("auto-round==0.9.1"),
148153
],
149154
extras_require={
150155
"dev": [
@@ -178,7 +183,7 @@ def localversion_func(version: ScmVersion) -> str:
178183
"mkdocstrings-python",
179184
"mkdocs-gen-files",
180185
"mkdocs-nav-weight",
181-
]
186+
],
182187
},
183188
entry_points={
184189
"console_scripts": [

src/llmcompressor/entrypoints/oneshot.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -201,22 +201,24 @@ def apply_recipe_modifiers(
201201
session.reset()
202202

203203
# (Helen INFERENG-661): validate recipe modifiers before initialization
204-
session.initialize(
205-
model=self.model,
206-
start=-1,
207-
recipe=self.recipe,
208-
recipe_stage=recipe_stage,
209-
recipe_args=self.recipe_args.recipe_args,
210-
calib_data=calibration_dataloader,
211-
)
212-
user_pipeline = self.dataset_args.pipeline
213-
modifiers = session.lifecycle.recipe.modifiers
214-
pipeline = CalibrationPipeline.from_modifiers(modifiers, user=user_pipeline)
215204
# Apply MoE calibration context for the entire calibration process
216205
with moe_calibration_context(
217206
self.model,
218207
calibrate_all_experts=self.dataset_args.moe_calibrate_all_experts,
219208
):
209+
session.initialize(
210+
model=self.model,
211+
start=-1,
212+
recipe=self.recipe,
213+
recipe_stage=recipe_stage,
214+
recipe_args=self.recipe_args.recipe_args,
215+
calib_data=calibration_dataloader,
216+
)
217+
user_pipeline = self.dataset_args.pipeline
218+
pipeline = CalibrationPipeline.from_modifiers(
219+
session.lifecycle.recipe.modifiers, user=user_pipeline
220+
)
221+
220222
pipeline(
221223
self.model,
222224
calibration_dataloader,

src/llmcompressor/modeling/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,3 @@
1818
# TODO: add granite4, Qwen3Next
1919

2020
from .fuse import *
21-
from .prepare import *

src/llmcompressor/modeling/deepseek_v3.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -68,20 +68,3 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
6868
hidden_states = final_hidden_states.type(hidden_states.dtype).view(*orig_shape)
6969
hidden_states = hidden_states + self.shared_experts(residuals)
7070
return hidden_states
71-
72-
73-
# Legacy function for backward compatibility
74-
def replace(
75-
config: DeepseekV3Config,
76-
module: OriginalDeepseekV3MoE,
77-
calibrate_all_experts: bool,
78-
):
79-
"""
80-
Legacy replacement function.
81-
Use CalibrationDeepseekV3MoE instead.
82-
"""
83-
return CalibrationDeepseekV3MoE(
84-
module,
85-
config,
86-
calibrate_all_experts=calibrate_all_experts,
87-
)

src/llmcompressor/modeling/llama4.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,3 @@ def __init__(self, config: Llama4TextConfig, original: Llama4TextExperts):
8787
self[i].gate_proj.weight.data = gate_proj.t().contiguous()
8888
self[i].up_proj.weight.data = up_proj.t().contiguous()
8989
self[i].down_proj.weight.data = down.t().contiguous()
90-
91-
92-
# Legacy function for backward compatibility
93-
def replace(config: Llama4Config, module: Llama4TextMoe, calibrate_all_experts: bool):
94-
"""
95-
Legacy replacement function.
96-
Use SequentialLlama4TextMoe instead.
97-
"""
98-
return SequentialLlama4TextMoe(
99-
module,
100-
config,
101-
calibrate_all_experts=calibrate_all_experts,
102-
)

src/llmcompressor/modeling/prepare.py

Lines changed: 0 additions & 62 deletions
This file was deleted.

src/llmcompressor/modeling/qwen3_moe.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -97,20 +97,3 @@ def forward(self, hidden_states: torch.Tensor):
9797

9898
def restore(self, original: torch.nn.Module) -> torch.nn.Module:
9999
return original
100-
101-
102-
# Legacy function for backward compatibility
103-
def replace(
104-
config: Qwen3MoeConfig,
105-
module: OriginalQwen3MoeSparseMoeBlock,
106-
calibrate_all_experts: bool,
107-
):
108-
"""
109-
Legacy replacement function.
110-
Use CalibrationQwen3MoeSparseMoeBlock instead.
111-
"""
112-
return CalibrationQwen3MoeSparseMoeBlock(
113-
module,
114-
config,
115-
calibrate_all_experts=calibrate_all_experts,
116-
)

0 commit comments

Comments
 (0)