Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions vllm_ascend/worker/model_runner_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -2574,6 +2574,10 @@ def _dummy_run(
tp_size = self.vllm_config.parallel_config.tensor_parallel_size
num_tokens = math.ceil(num_tokens / tp_size) * tp_size

# Force dummy run on prefill stage when this node is deemed as kv producer.
if self.is_kv_producer and not self.is_kv_consumer:
with_prefill = True

# Padding for DP
(num_tokens, num_tokens_across_dp,
with_prefill) = self._sync_metadata_across_dp(num_tokens,
Expand Down Expand Up @@ -2622,10 +2626,6 @@ def _dummy_run(
num_scheduled_tokens = np.array(num_scheduled_tokens_list,
dtype=np.int32)

# Force dummy run on prefill stage when this node is deemed as kv producer.
if self.is_kv_producer and not self.is_kv_consumer:
with_prefill = True

if not self.in_profile_run and self.dynamic_eplb:
self.eplb_updator.forward_before()

Expand Down
Loading