Skip to content

Commit 350999c

Browse files
offline893offline0806gemini-code-assist[bot]
authored
[Bugfix]Fix eplb enable when using mtp float weights. (#4576)
### What this PR does / why we need it? Fix eplb enable when using mtp float weights. It will be remove when eplb supporting mtp and float weights. ### How was this patch tested? Deepseek-V3 + MTP + EPLB in A3. --------- Signed-off-by: offline0806 <[email protected]> Signed-off-by: offline893 <[email protected]> Co-authored-by: offline0806 <[email protected]> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent c4a11a7 commit 350999c

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

vllm_ascend/ops/common_fused_moe.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,15 @@ def __init__(self, *args, **kwargs):
169169
self.expert_map_path = ascend_config.expert_map_path
170170
self.global_redundant_expert_num = ascend_config.init_redundancy_expert
171171
self.global_num_experts = num_experts + self.global_redundant_expert_num
172+
# TODO: Flag for static expert placement. This is a temporary workaround
173+
# to allow dynamic EPLB with float weights by skipping quantization checks.
174+
self.static_eplb_enabled = False
172175
if self.custom_routing_function is None and self.e_score_correction_bias is not None:
173176
vllm_config = get_current_vllm_config()
174177
self.e_score_correction_bias.data = self.e_score_correction_bias.data.to(
175178
dtype=vllm_config.model_config.dtype)
176179
# static eplb initializing with expert_map_path
180+
init_eplb_enable = False
177181
if self.expert_map_path and os.path.exists(
178182
self.expert_map_path) and os.access(self.expert_map_path,
179183
os.R_OK):
@@ -189,6 +193,7 @@ def __init__(self, *args, **kwargs):
189193
self.moe_instance_id, self.ep_rank))
190194
self.log2phy = self.expert_load_balancer.get_rank_log2phy_map(
191195
self.moe_instance_id, self.ep_rank).npu()
196+
init_eplb_enable = True
192197
except Exception as e:
193198
logger.warning(
194199
f"Init expert map of mtp/eagle when using sample.{e}")
@@ -219,10 +224,10 @@ def __init__(self, *args, **kwargs):
219224
self.moe_load = torch.zeros(local_num_experts,
220225
dtype=torch.int64).npu()
221226

222-
eplb_enable = self.dynamic_eplb or (self.expert_map_path is not None)
223-
if eplb_enable and (not hasattr(self.quant_method, "quant_method") or
224-
not isinstance(self.quant_method.quant_method,
225-
AscendW8A8DynamicFusedMoEMethod)):
227+
if init_eplb_enable and (
228+
not hasattr(self.quant_method, "quant_method")
229+
or not isinstance(self.quant_method.quant_method,
230+
AscendW8A8DynamicFusedMoEMethod)):
226231
raise ValueError("Eplb supports only w8a8_dynamic quantization.")
227232

228233
self.moe_config.num_experts = self.global_num_experts

0 commit comments

Comments
 (0)