33from typing import TYPE_CHECKING
44
55from vllm .logger import init_logger
6- from vllm .multimodal import MULTIMODAL_REGISTRY
6+ from vllm .multimodal import MultiModalRegistry
77from vllm .v1 .request import Request
88
99if TYPE_CHECKING :
@@ -67,13 +67,15 @@ def get_freed_ids(self) -> list[tuple[str, int]]:
6767def compute_encoder_budget (
6868 model_config : "ModelConfig" ,
6969 scheduler_config : "SchedulerConfig" ,
70+ mm_registry : MultiModalRegistry ,
7071) -> tuple [int , int ]:
7172 """Compute the encoder cache budget based on the model and scheduler
7273 configurations.
7374
7475 Args:
7576 model_config: Model configuration.
7677 scheduler_config: Scheduler configuration.
78+ mm_registry: Provides information about the token cost.
7779
7880 Returns:
7981 - Compute budget for encoder execution, in unit of number of tokens
@@ -89,21 +91,27 @@ def compute_encoder_budget(
8991 (
9092 encoder_compute_budget ,
9193 encoder_cache_size ,
92- ) = _compute_encoder_budget_multimodal (model_config , scheduler_config )
94+ ) = _compute_encoder_budget_multimodal (
95+ model_config ,
96+ scheduler_config ,
97+ mm_registry ,
98+ )
9399
94100 return encoder_compute_budget , encoder_cache_size
95101
96102
97103def _compute_encoder_budget_multimodal (
98104 model_config : "ModelConfig" ,
99105 scheduler_config : "SchedulerConfig" ,
106+ mm_registry : MultiModalRegistry ,
100107) -> tuple [int , int ]:
101108 """Compute the encoder cache budget based on the model and scheduler
102109 configurations for a multimodal model.
103110
104111 Args:
105112 model_config: Model configuration.
106113 scheduler_config: Scheduler configuration.
114+ mm_registry: Provides information about the token cost.
107115
108116 Returns:
109117 - Compute budget for encoder execution, in unit of number of tokens
@@ -112,8 +120,8 @@ def _compute_encoder_budget_multimodal(
112120 in the input sequence.
113121 """
114122
115- max_tokens_by_modality_dict = MULTIMODAL_REGISTRY . get_max_tokens_per_item_by_nonzero_modality ( # noqa: E501
116- model_config )
123+ max_tokens_by_modality_dict = mm_registry \
124+ . get_max_tokens_per_item_by_nonzero_modality ( model_config )
117125
118126 if not max_tokens_by_modality_dict :
119127 logger .warning (
0 commit comments