Skip to content

Commit f3bf192

Browse files
dragondream-chenyiz-liu
authored andcommitted
[Bugfix] add force_attention comment
Signed-off-by: chenmenglong <[email protected]>
1 parent eda3cab commit f3bf192

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

vllm_ascend/spec_decode/mtp_proposer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ def dummy_run(self,
288288
positions = self.positions[:num_tokens]
289289
previous_hidden_states = self.hidden_states[:num_tokens]
290290
for i in range(self.num_speculative_tokens):
291-
if i > 0 and not in_graph_capturing and aclgraph_runtime_mode == CUDAGraphMode.FULL:
291+
if i > 0 and in_graph_capturing and aclgraph_runtime_mode == CUDAGraphMode.FULL:
292292
aclgraph_runtime_mode = CUDAGraphMode.NONE
293293
with set_ascend_forward_context(
294294
attn_metadata,

vllm_ascend/worker/model_runner_v1.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2148,6 +2148,8 @@ def dummy_drafter_compute_logits(hidden_states):
21482148
dummy_compute_logits(hidden_states)
21492149

21502150
if self.drafter:
2151+
# `in_graph_capturing` indicates whether the main model is in graph capturing.
2152+
# The value is only used in `mtp_proposer.py` currently and defaults to False.
21512153
self.drafter.dummy_run(
21522154
num_tokens=num_tokens_padded,
21532155
with_prefill=with_prefill,
@@ -2156,7 +2158,7 @@ def dummy_drafter_compute_logits(hidden_states):
21562158
aclgraph_runtime_mode=aclgraph_runtime_mode,
21572159
batch_descriptor=batch_descriptor,
21582160
dummy_compute_logits=dummy_drafter_compute_logits,
2159-
in_graph_capturing=not force_attention)
2161+
in_graph_capturing=force_attention)
21602162
if self.in_profile_run and self.dynamic_eplb:
21612163
self.model.clear_all_moe_loads()
21622164
if not self.in_profile_run and self.dynamic_eplb:

0 commit comments

Comments
 (0)