Skip to content

Commit adfce69

Browse files
author
刘哲续
committed
modify nz in bf16
2 parents 7027439 + 791020e commit adfce69

File tree

2 files changed

+4
-18
lines changed

2 files changed

+4
-18
lines changed

vllm_ascend/ops/common_fused_moe.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
import torch
2121
import torch_npu
22-
from vllm.config import CompilationLevel, get_current_vllm_config
22+
from vllm.config import get_current_vllm_config
2323
from vllm.distributed import (get_dp_group, get_ep_group, get_tp_group,
2424
tensor_model_parallel_all_reduce)
2525
from vllm.forward_context import get_forward_context
@@ -51,20 +51,7 @@ class AscendUnquantizedFusedMoEMethod(UnquantizedFusedMoEMethod):
5151
def __init__(self, moe: FusedMoEConfig = None):
5252

5353
super().__init__(moe=moe)
54-
55-
# NOTE: Currently, this self.use_aclgraph is only used in
56-
# UnquantizedFusedMoEMethod.forward_oot to decide whether to use in
57-
# ops/fused_moe.py:568 to circumvent torch.randint_like not supported issue.
58-
# Once torch.randint_like is supported or removed, this flag can be removed.
59-
vllm_config = get_current_vllm_config()
60-
ascend_config = get_ascend_config()
6154
self.dynamic_eplb = get_ascend_config().dynamic_eplb
62-
if ascend_config.torchair_graph_config.enabled:
63-
self.use_aclgraph = False
64-
else:
65-
self.use_aclgraph = (vllm_config.compilation_config.level
66-
== CompilationLevel.PIECEWISE and
67-
not vllm_config.model_config.enforce_eager)
6855
self.transpose = True
6956

7057
def process_weights_after_loading(self, layer):
@@ -133,7 +120,7 @@ def apply(self,
133120
# this is a naive implementation for experts load balance so as
134121
# to avoid accumulating too much tokens on a single rank.
135122
# currently it is only activated when doing profile runs.
136-
if enable_force_load_balance and not self.use_aclgraph:
123+
if enable_force_load_balance:
137124
topk_ids = torch.randint_like(topk_ids, 0, global_num_experts)
138125

139126
moe_comm_method = get_forward_context().moe_comm_method

vllm_ascend/worker/model_runner_v1.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2322,11 +2322,10 @@ def _generate_dummy_run_hidden_states(self, with_prefill,
23222322
if self.vllm_config.model_config.use_mla:
23232323
# FIXME: Try using `auto_dispatch_capture=True`
23242324
update_mla_attn_params(self.update_stream, forward_context,
2325-
positions.shape[0],
2326-
self.speculative_config)
2325+
num_tokens, self.speculative_config)
23272326
else:
23282327
update_attn_params(self.update_stream, forward_context,
2329-
positions.shape[0])
2328+
num_tokens)
23302329

23312330
if self.drafter and self.drafter.name == SpecDcodeType.EAGLE3:
23322331
hidden_states, _ = hidden_states

0 commit comments

Comments
 (0)