Skip to content

Commit 94596bb

Browse files
committed
fix prefill
Signed-off-by: liziyu <[email protected]>
1 parent e48ca0b commit 94596bb

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2370,6 +2370,10 @@ def _dummy_run(
23702370
tp_size = self.vllm_config.parallel_config.tensor_parallel_size
23712371
num_tokens = math.ceil(num_tokens / tp_size) * tp_size
23722372

2373+
# Force dummy run on prefill stage when this node is deemed as kv producer.
2374+
if self.is_kv_producer and not self.is_kv_consumer:
2375+
with_prefill = True
2376+
23732377
# Padding for DP
23742378
(num_tokens, num_tokens_across_dp, with_prefill,
23752379
_) = self._sync_metadata_across_dp(num_tokens, with_prefill, False)
@@ -2417,10 +2421,6 @@ def _dummy_run(
24172421
num_scheduled_tokens = np.array(num_scheduled_tokens_list,
24182422
dtype=np.int32)
24192423

2420-
# Force dummy run on prefill stage when this node is deemed as kv producer.
2421-
if self.is_kv_producer and not self.is_kv_consumer:
2422-
with_prefill = True
2423-
24242424
if not self.in_profile_run and self.dynamic_eplb:
24252425
self.eplb_updator.forward_before()
24262426

0 commit comments

Comments
 (0)