Skip to content

Commit 1ceb857

Browse files
committed
force with_prefill true after allreduce in kv producer
Signed-off-by: liziyu <[email protected]>
1 parent afc5818 commit 1ceb857

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2574,6 +2574,10 @@ def _dummy_run(
25742574
tp_size = self.vllm_config.parallel_config.tensor_parallel_size
25752575
num_tokens = math.ceil(num_tokens / tp_size) * tp_size
25762576

2577+
# Force dummy run on prefill stage when this node is deemed as kv producer.
2578+
if self.is_kv_producer and not self.is_kv_consumer:
2579+
with_prefill = True
2580+
25772581
# Padding for DP
25782582
(num_tokens, num_tokens_across_dp,
25792583
with_prefill) = self._sync_metadata_across_dp(num_tokens,
@@ -2622,10 +2626,6 @@ def _dummy_run(
26222626
num_scheduled_tokens = np.array(num_scheduled_tokens_list,
26232627
dtype=np.int32)
26242628

2625-
# Force dummy run on prefill stage when this node is deemed as kv producer.
2626-
if self.is_kv_producer and not self.is_kv_consumer:
2627-
with_prefill = True
2628-
26292629
if not self.in_profile_run and self.dynamic_eplb:
26302630
self.eplb_updator.forward_before()
26312631

0 commit comments

Comments
 (0)