Skip to content

Commit 38be5c3

Browse files
[Bugfix] dynamic eplb does't use fused_alltoall
Signed-off-by: shenchuxiaofugui <[email protected]>
1 parent 3581946 commit 38be5c3

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,10 +1391,13 @@ def _select_moe_comm_method(self,
13911391
moe_comm_type = MoECommType.ALLGATHER
13921392

13931393
elif soc_version in {AscendDeviceType._910_93}:
1394-
moe_comm_type = (
1395-
MoECommType.MC2 if num_tokens <= mc2_tokens_capacity else
1396-
MoECommType.FUSED_ALLTOALL if quant_type == "w8a8_dynamic"
1397-
and get_ep_group().world_size <= 16 else MoECommType.ALLTOALL)
1394+
# TODO: drop the EP-size guard when dispatch_ffn_combine supports larger EP sizes
1395+
fused_all2all_enable = quant_type == "w8a8_dynamic" and get_ep_group(
1396+
).world_size <= 16 and (not self.dynamic_eplb)
1397+
moe_comm_type = (MoECommType.MC2
1398+
if num_tokens <= self.mc2_tokens_capacity else
1399+
MoECommType.FUSED_ALLTOALL
1400+
if fused_all2all_enable else MoECommType.ALLTOALL)
13981401
else:
13991402
raise ValueError(f"Unsupported soc_version: {soc_version}")
14001403

0 commit comments

Comments
 (0)