Skip to content

Commit 59c9b0e

Browse files
committed
correctly use multistream in acl graph
Signed-off-by: daishixun <[email protected]>
1 parent b2a0571 commit 59c9b0e

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

vllm_ascend/ops/fused_moe/fused_moe.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -392,11 +392,14 @@ def forward_impl(self, hidden_states: torch.Tensor,
392392
if isinstance(final_hidden_states, tuple):
393393
final_hidden_states, group_list_type, expert_tokens = final_hidden_states
394394
if self.dynamic_eplb:
395-
with npu_stream_switch(moe_load_async_stream()):
396-
moe_load_async_stream().wait_stream(
397-
torch.npu.current_stream(device=expert_tokens.device))
395+
moe_load_stream = moe_load_async_stream()
396+
cur_stream = torch.npu.current_stream()
397+
398+
moe_load_stream.wait_stream(cur_stream)
399+
with npu_stream_switch(moe_load_stream):
398400
self.moe_load += expert_tokens if group_list_type == 1 else \
399401
torch.cat([expert_tokens[:1], expert_tokens[1:] - expert_tokens[:-1]])
402+
cur_stream.wait_stream(moe_load_stream)
400403

401404
final_hidden_states = forward_context.moe_comm_method.finalize(
402405
hidden_states=final_hidden_states,

0 commit comments

Comments
 (0)