Skip to content

Commit 776e14e

Browse files
committed
fix typo and delete a part of mc2_mask
Signed-off-by: linfeng-yuan <[email protected]>
1 parent 54198aa commit 776e14e

File tree

2 files changed

+1
-3
lines changed

2 files changed

+1
-3
lines changed

vllm_ascend/ops/fused_moe/token_dispatcher.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,6 @@ def token_dispatch(self,
227227
context_metadata = {
228228
"topk_ids": topk_ids,
229229
"topk_weights": topk_weights,
230-
"mc2_mask": mc2_mask,
231230
"expert_map": expert_map,
232231
"ep_recv_counts": ep_recv_counts,
233232
"tp_recv_counts": tp_recv_counts,
@@ -256,7 +255,6 @@ def get_combine_mc_kwargs(self, hidden_states: torch.Tensor,
256255
ep_recv_counts = context_metadata["ep_recv_counts"]
257256
tp_recv_counts = context_metadata["tp_recv_counts"]
258257
assist_info_for_combine = context_metadata["assist_info_for_combine"]
259-
mc2_mask = context_metadata["mc2_mask"]
260258
expand_scales = context_metadata["expand_scales"]
261259

262260
assert expert_map is not None

vllm_ascend/worker/model_runner_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -968,7 +968,7 @@ def _sync_metadata_across_dp(
968968
# NOTE: Here we can skip the all_reduce operation and avoid paading tokens
969969
# to max_tokens_acrodd_dp in D nodes. In MoE models, we must ensure that
970970
# num_tokens DOES NOT exceed mc2_tokens_capacity which means that moe_comm_method
971-
# of each rank is MC2. It is recommended to enable recompute scheduler for D Noes.
971+
# of each rank is MC2. It is recommended to enable recompute scheduler for D Nodes.
972972
if self.is_kv_consumer and not self.in_profile_run:
973973
num_tokens_after_padding = torch.tensor([num_tokens] *
974974
self.dp_size,

0 commit comments

Comments
 (0)