File tree Expand file tree Collapse file tree 1 file changed +4
-4
lines changed
Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -2370,6 +2370,10 @@ def _dummy_run(
23702370 tp_size = self .vllm_config .parallel_config .tensor_parallel_size
23712371 num_tokens = math .ceil (num_tokens / tp_size ) * tp_size
23722372
2373+ # Force dummy run on prefill stage when this node is deemed as kv producer.
2374+ if self .is_kv_producer and not self .is_kv_consumer :
2375+ with_prefill = True
2376+
23732377 # Padding for DP
23742378 (num_tokens , num_tokens_across_dp , with_prefill ,
23752379 _ ) = self ._sync_metadata_across_dp (num_tokens , with_prefill , False )
@@ -2417,10 +2421,6 @@ def _dummy_run(
24172421 num_scheduled_tokens = np .array (num_scheduled_tokens_list ,
24182422 dtype = np .int32 )
24192423
2420- # Force dummy run on prefill stage when this node is deemed as kv producer.
2421- if self .is_kv_producer and not self .is_kv_consumer :
2422- with_prefill = True
2423-
24242424 if not self .in_profile_run and self .dynamic_eplb :
24252425 self .eplb_updator .forward_before ()
24262426
You can’t perform that action at this time.
0 commit comments