File tree Expand file tree Collapse file tree 1 file changed +4
-4
lines changed
Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -2574,6 +2574,10 @@ def _dummy_run(
25742574 tp_size = self .vllm_config .parallel_config .tensor_parallel_size
25752575 num_tokens = math .ceil (num_tokens / tp_size ) * tp_size
25762576
2577+ # Force dummy run on prefill stage when this node is deemed as kv producer.
2578+ if self .is_kv_producer and not self .is_kv_consumer :
2579+ with_prefill = True
2580+
25772581 # Padding for DP
25782582 (num_tokens , num_tokens_across_dp ,
25792583 with_prefill ) = self ._sync_metadata_across_dp (num_tokens ,
@@ -2622,10 +2626,6 @@ def _dummy_run(
26222626 num_scheduled_tokens = np .array (num_scheduled_tokens_list ,
26232627 dtype = np .int32 )
26242628
2625- # Force dummy run on prefill stage when this node is deemed as kv producer.
2626- if self .is_kv_producer and not self .is_kv_consumer :
2627- with_prefill = True
2628-
26292629 if not self .in_profile_run and self .dynamic_eplb :
26302630 self .eplb_updator .forward_before ()
26312631
You can’t perform that action at this time.
0 commit comments