@@ -216,17 +216,18 @@ def get_encoder_dummy_data(
216216        # Encoder-decoder multimodal models only support v0 
217217        if  total_len  >  seq_len :
218218            # `max_num_batched_tokens` is defined by `SchedulerConfig` 
219-             logger .warning (
219+             logger .warning_once (
220220                "The encoder sequence length used for profiling (" 
221-                 "max_num_batched_tokens / max_num_seqs = %d) is too short " 
221+                 f"max_num_batched_tokens / max_num_seqs = { seq_len }  ) " 
222+                 " is too short " 
222223                "to hold the multi-modal embeddings in the worst case " 
223-                 "(%d tokens in total, out of which %s are reserved for " 
224+                 f"({ total_len }   tokens in total, out of which " 
225+                 f"{ total_placeholders_by_modality }   are reserved for " 
224226                "multi-modal embeddings). This may cause certain " 
225227                "multi-modal inputs to fail during inference, even when " 
226228                "the input text is short. To avoid this, you should " 
227229                "increase `max_model_len`, reduce `max_num_seqs`, " 
228-                 "and/or reduce `mm_counts`." , seq_len , total_len ,
229-                 total_placeholders_by_modality )
230+                 "and/or reduce `mm_counts`." )
230231
231232        processor  =  cast (EncDecMultiModalProcessor , self .processor )
232233        if  processor .pad_dummy_encoder_prompt :
@@ -251,17 +252,18 @@ def get_decoder_dummy_data(
251252        # V0 does not support chunked prefill. 
252253        if  total_len  >  seq_len  and  not  envs .VLLM_USE_V1 :
253254            # `max_num_batched_tokens` is defined by `SchedulerConfig` 
254-             logger .warning (
255+             logger .warning_once (
255256                "The sequence length used for profiling (" 
256-                 "max_num_batched_tokens / max_num_seqs = %d) is too short " 
257+                 f"max_num_batched_tokens / max_num_seqs = { seq_len }  ) " 
258+                 "is too short " 
257259                "to hold the multi-modal embeddings in the worst case " 
258-                 "(%d tokens in total, out of which %s are reserved for " 
260+                 f"({ total_len }   tokens in total, out of which " 
261+                 f"{ total_placeholders_by_modality }   are reserved for " 
259262                "multi-modal embeddings). This may cause certain " 
260263                "multi-modal inputs to fail during inference, even when " 
261264                "the input text is short. To avoid this, you should " 
262265                "increase `max_model_len`, reduce `max_num_seqs`, " 
263-                 "and/or reduce `mm_counts`." , seq_len , total_len ,
264-                 total_placeholders_by_modality )
266+                 "and/or reduce `mm_counts`." )
265267
266268        if  total_len  <  seq_len :
267269            prompt_token_ids .extend ([0 ] *  (seq_len  -  total_len ))
0 commit comments