Skip to content

Commit ce3074d

Browse files
[Bugfix] dynamic eplb does't use fused_alltoall
Signed-off-by: shenchuxiaofugui <[email protected]>
1 parent 18221c0 commit ce3074d

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2407,10 +2407,12 @@ def _select_moe_comm_method(self,
24072407

24082408
elif soc_version in {AscendDeviceType._910_93}:
24092409
# TODO: drop the EP-size guard when dispatch_ffn_combine supports larger EP sizes
2410-
moe_comm_type = (
2411-
MoECommType.MC2 if num_tokens <= self.mc2_tokens_capacity else
2412-
MoECommType.FUSED_ALLTOALL if quant_type == "w8a8_dynamic"
2413-
and get_ep_group().world_size <= 16 else MoECommType.ALLTOALL)
2410+
fused_all2all_enable = quant_type == "w8a8_dynamic" and get_ep_group(
2411+
).world_size <= 16 and (not self.dynamic_eplb)
2412+
moe_comm_type = (MoECommType.MC2
2413+
if num_tokens <= self.mc2_tokens_capacity else
2414+
MoECommType.FUSED_ALLTOALL
2415+
if fused_all2all_enable else MoECommType.ALLTOALL)
24142416
else:
24152417
raise ValueError(f"Unsupported soc_version: {soc_version}")
24162418

0 commit comments

Comments
 (0)