File tree Expand file tree Collapse file tree 2 files changed +4
-2
lines changed
Expand file tree Collapse file tree 2 files changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -288,7 +288,7 @@ def dummy_run(self,
288288 positions = self .positions [:num_tokens ]
289289 previous_hidden_states = self .hidden_states [:num_tokens ]
290290 for i in range (self .num_speculative_tokens ):
291- if i > 0 and not in_graph_capturing and aclgraph_runtime_mode == CUDAGraphMode .FULL :
291+ if i > 0 and in_graph_capturing and aclgraph_runtime_mode == CUDAGraphMode .FULL :
292292 aclgraph_runtime_mode = CUDAGraphMode .NONE
293293 with set_ascend_forward_context (
294294 attn_metadata ,
Original file line number Diff line number Diff line change @@ -2148,6 +2148,8 @@ def dummy_drafter_compute_logits(hidden_states):
21482148 dummy_compute_logits (hidden_states )
21492149
21502150 if self .drafter :
2151+ # `in_graph_capturing` indicates whether the main model is in graph capturing.
2152+ # The value is only used in `mtp_proposer.py` currently and defaults to False.
21512153 self .drafter .dummy_run (
21522154 num_tokens = num_tokens_padded ,
21532155 with_prefill = with_prefill ,
@@ -2156,7 +2158,7 @@ def dummy_drafter_compute_logits(hidden_states):
21562158 aclgraph_runtime_mode = aclgraph_runtime_mode ,
21572159 batch_descriptor = batch_descriptor ,
21582160 dummy_compute_logits = dummy_drafter_compute_logits ,
2159- in_graph_capturing = not force_attention )
2161+ in_graph_capturing = force_attention )
21602162 if self .in_profile_run and self .dynamic_eplb :
21612163 self .model .clear_all_moe_loads ()
21622164 if not self .in_profile_run and self .dynamic_eplb :
You can’t perform that action at this time.
0 commit comments