@@ -977,6 +977,8 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
977977 instantiated.
978978 """
979979
980+ supports_encoder_tp_data = True
981+
980982 @classmethod
981983 def get_placeholder_str (cls , modality : str , i : int ) -> Optional [str ]:
982984 if modality .startswith ("image" ):
@@ -990,6 +992,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
990992 config = vllm_config .model_config .hf_config
991993 multimodal_config = vllm_config .model_config .multimodal_config
992994 quant_config = vllm_config .quant_config
995+ self .use_data_parallel = multimodal_config .mm_encoder_tp_mode == "data"
993996 super ().__init__ ()
994997 # All MiniCPM-V models disable `tie_word_embeddings` but
995998 # `PretrainedConfig.tie_word_embeddings` defaults to True; we cannot
@@ -1237,6 +1240,8 @@ def get_vision_hidden_states(
12371240
12381241class MiniCPMV2_0 (MiniCPMVBaseModel ):
12391242
1243+ supports_encoder_tp_data = False
1244+
12401245 def __init__ (self , * , vllm_config : VllmConfig , prefix : str = "" ):
12411246 super ().__init__ (vllm_config = vllm_config , prefix = prefix )
12421247 assert self .version == (2 , 0 )
@@ -1351,9 +1356,12 @@ def init_vision_module(
13511356 quant_config : Optional [QuantizationConfig ],
13521357 prefix : str = "" ,
13531358 ) -> nn .Module :
1354- model = Idefics2VisionTransformer (config .vision_config ,
1355- quant_config = quant_config ,
1356- prefix = prefix )
1359+ model = Idefics2VisionTransformer (
1360+ config .vision_config ,
1361+ quant_config = quant_config ,
1362+ prefix = prefix ,
1363+ use_data_parallel = self .use_data_parallel ,
1364+ )
13571365 if self .config .drop_vision_last_layer :
13581366 model .encoder .layers = model .encoder .layers [:- 1 ]
13591367 return model
@@ -1441,9 +1449,12 @@ def init_vision_module(
14411449 quant_config : Optional [QuantizationConfig ] = None ,
14421450 prefix : str = "" ,
14431451 ) -> nn .Module :
1444- model = Idefics2VisionTransformer (config .vision_config ,
1445- quant_config = quant_config ,
1446- prefix = prefix )
1452+ model = Idefics2VisionTransformer (
1453+ config .vision_config ,
1454+ quant_config = quant_config ,
1455+ prefix = prefix ,
1456+ use_data_parallel = self .use_data_parallel ,
1457+ )
14471458 if self .config .drop_vision_last_layer :
14481459 model .encoder .layers = model .encoder .layers [:- 1 ]
14491460 return model
@@ -1521,8 +1532,6 @@ class MiniCPMV4_0(MiniCPMVBaseModel, SupportsLoRA):
15211532 ],
15221533 }
15231534
1524- supports_encoder_tp_data = True
1525-
15261535 def __init__ (self , * , vllm_config : VllmConfig , prefix : str = "" ):
15271536 super ().__init__ (vllm_config = vllm_config , prefix = prefix )
15281537 assert self .version == (4 , 0 )
@@ -1546,9 +1555,12 @@ def init_vision_module(
15461555 prefix : str = "" ,
15471556 ) -> nn .Module :
15481557 quant_config = self ._maybe_ignore_quant_config (quant_config )
1549- model = Idefics2VisionTransformer (config .vision_config ,
1550- quant_config = quant_config ,
1551- prefix = prefix )
1558+ model = Idefics2VisionTransformer (
1559+ config .vision_config ,
1560+ quant_config = quant_config ,
1561+ prefix = prefix ,
1562+ use_data_parallel = self .use_data_parallel ,
1563+ )
15521564 if self .config .drop_vision_last_layer :
15531565 model .encoder .layers = model .encoder .layers [:- 1 ]
15541566 return model
@@ -1652,9 +1664,12 @@ def init_vision_module(
16521664 prefix : str = "" ,
16531665 ) -> nn .Module :
16541666 quant_config = self ._maybe_ignore_quant_config (quant_config )
1655- model = Idefics2VisionTransformer (config .vision_config ,
1656- quant_config = quant_config ,
1657- prefix = prefix )
1667+ model = Idefics2VisionTransformer (
1668+ config .vision_config ,
1669+ quant_config = quant_config ,
1670+ prefix = prefix ,
1671+ use_data_parallel = self .use_data_parallel ,
1672+ )
16581673 if self .config .drop_vision_last_layer :
16591674 model .encoder .layers = model .encoder .layers [:- 1 ]
16601675 return model
0 commit comments