Skip to content

Commit a5c061d

Browse files
authored
[CI] Fix copies (#42571)
* fix * fix circular condition
1 parent 675e876 commit a5c061d

File tree

2 files changed

+15
-21
lines changed

2 files changed

+15
-21
lines changed

src/transformers/models/fast_vlm/modeling_fast_vlm.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def forward(self, image_features):
5959
class FastVlmPreTrainedModel(PreTrainedModel):
6060
config: FastVlmConfig
6161
base_model_prefix = "model"
62-
input_modalities = ["image", "text"]
62+
input_modalities = ("image", "text")
6363
supports_gradient_checkpointing = True
6464
_skip_keys_device_placement = "past_key_values"
6565

@@ -195,12 +195,11 @@ def forward(
195195
**kwargs: Unpack[TransformersKwargs],
196196
) -> Union[tuple, FastVlmModelOutputWithPast]:
197197
r"""
198-
vision_feature_select_strategy (`str`, *optional*):
199-
The feature selection strategy used to select the vision feature from the vision backbone. Only "full" supported.
200-
201198
vision_feature_layer (`Union[int, list[int], NoneType]`, *optional*):
202199
The index of the layer to select the vision feature. If multiple indices are provided, the vision feature of the
203200
corresponding indices will be concatenated to form the vision features. Only -1 supported.
201+
vision_feature_select_strategy (`str`, *optional*):
202+
The feature selection strategy used to select the vision feature from the vision backbone. Only "full" supported.
204203
"""
205204
vision_feature_layer = (
206205
vision_feature_layer if vision_feature_layer is not None else self.config.vision_feature_layer
@@ -335,18 +334,16 @@ def forward(
335334
**kwargs: Unpack[TransformersKwargs],
336335
) -> Union[tuple, FastVlmCausalLMOutputWithPast]:
337336
r"""
337+
vision_feature_layer (`Union[int, list[int], NoneType]`, *optional*):
338+
The index of the layer to select the vision feature. If multiple indices are provided, the vision feature of the
339+
corresponding indices will be concatenated to form the vision features. Only -1 supported.
340+
vision_feature_select_strategy (`str`, *optional*):
341+
The feature selection strategy used to select the vision feature from the vision backbone. Only "full" supported.
338342
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
339343
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
340344
config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
341345
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
342346
343-
vision_feature_select_strategy (`str`, *optional*):
344-
The feature selection strategy used to select the vision feature from the vision backbone. Only "full" supported.
345-
346-
vision_feature_layer (`Union[int, list[int], NoneType]`, *optional*):
347-
The index of the layer to select the vision feature. If multiple indices are provided, the vision feature of the
348-
corresponding indices will be concatenated to form the vision features. Only -1 supported.
349-
350347
Example:
351348
352349
```python

src/transformers/models/fast_vlm/modular_fast_vlm.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -204,12 +204,11 @@ def get_image_features(
204204

205205
def forward(self, **super_kwargs):
206206
r"""
207-
vision_feature_select_strategy (`str`, *optional*):
208-
The feature selection strategy used to select the vision feature from the vision backbone. Only "full" supported.
209-
210207
vision_feature_layer (`Union[int, list[int], NoneType]`, *optional*):
211208
The index of the layer to select the vision feature. If multiple indices are provided, the vision feature of the
212209
corresponding indices will be concatenated to form the vision features. Only -1 supported.
210+
vision_feature_select_strategy (`str`, *optional*):
211+
The feature selection strategy used to select the vision feature from the vision backbone. Only "full" supported.
213212
"""
214213
super().forward(**super_kwargs)
215214

@@ -224,18 +223,16 @@ class FastVlmForConditionalGeneration(LlavaForConditionalGeneration):
224223

225224
def forward(self, **super_kwargs):
226225
r"""
226+
vision_feature_layer (`Union[int, list[int], NoneType]`, *optional*):
227+
The index of the layer to select the vision feature. If multiple indices are provided, the vision feature of the
228+
corresponding indices will be concatenated to form the vision features. Only -1 supported.
229+
vision_feature_select_strategy (`str`, *optional*):
230+
The feature selection strategy used to select the vision feature from the vision backbone. Only "full" supported.
227231
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
228232
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
229233
config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
230234
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
231235
232-
vision_feature_select_strategy (`str`, *optional*):
233-
The feature selection strategy used to select the vision feature from the vision backbone. Only "full" supported.
234-
235-
vision_feature_layer (`Union[int, list[int], NoneType]`, *optional*):
236-
The index of the layer to select the vision feature. If multiple indices are provided, the vision feature of the
237-
corresponding indices will be concatenated to form the vision features. Only -1 supported.
238-
239236
Example:
240237
241238
```python

0 commit comments

Comments
 (0)