Skip to content

Commit 4d3c812

Browse files
authored
[v0.11.0] [P/D] force with_prefill true after allreduce in kv producer (vllm-project#3835)
### What this PR does / why we need it? force with_prefill true after allreduce in kv producer. This is a backport of vllm-project#3768 and vllm-project#3849 --------- Signed-off-by: liziyu <[email protected]>
1 parent 11b1963 commit 4d3c812

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

vllm_ascend/distributed/mooncake_layerwise_connector.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,8 @@ def save_kv_layer(self, layer_name: str, kv_layer: Tuple[torch.Tensor,
11451145
connector_metadata: MooncakeLayerwiseConnectorMetadata,
11461146
**kwargs) -> None:
11471147
"""MooncakeLayerwiseConnector does not save explicitly."""
1148-
if self.kv_role == 'kv_producer':
1148+
if self.kv_role == 'kv_producer' and connector_metadata.requests.keys(
1149+
):
11491150
if self.pd_head_ratio != 1:
11501151
if self.current_layer != 0:
11511152
self.completion_event.wait()

vllm_ascend/worker/model_runner_v1.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2364,6 +2364,10 @@ def _dummy_run(
23642364
tp_size = self.vllm_config.parallel_config.tensor_parallel_size
23652365
num_tokens = math.ceil(num_tokens / tp_size) * tp_size
23662366

2367+
# Force dummy run on prefill stage when this node is deemed as kv producer.
2368+
if self.is_kv_producer and not self.is_kv_consumer:
2369+
with_prefill = True
2370+
23672371
# Padding for DP
23682372
(num_tokens, num_tokens_across_dp, with_prefill,
23692373
_) = self._sync_metadata_across_dp(num_tokens, with_prefill, False)
@@ -2411,10 +2415,6 @@ def _dummy_run(
24112415
num_scheduled_tokens = np.array(num_scheduled_tokens_list,
24122416
dtype=np.int32)
24132417

2414-
# Force dummy run on prefill stage when this node is deemed as kv producer.
2415-
if self.is_kv_producer and not self.is_kv_consumer:
2416-
with_prefill = True
2417-
24182418
if not self.in_profile_run and self.dynamic_eplb:
24192419
self.eplb_updator.forward_before()
24202420

0 commit comments

Comments
 (0)