Skip to content

Commit 4bdd305

Browse files
Set patch_size at runtime instead of modifying class defaults in LightOnOCR processor
1 parent 9246e4d commit 4bdd305

File tree

2 files changed

+4
-12
lines changed

2 files changed

+4
-12
lines changed

src/transformers/models/lightonocr/modular_lightonocr.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,6 @@ class LightOnOCRProcessorKwargs(ProcessingKwargs, total=False):
157157
"padding": False,
158158
"return_mm_token_type_ids": False,
159159
},
160-
"images_kwargs": {
161-
"patch_size": 14,
162-
},
163160
"common_kwargs": {
164161
"return_tensors": "pt",
165162
},
@@ -209,9 +206,6 @@ def __init__(
209206

210207
self.image_ids = [self.image_token_id, self.image_break_token_id, self.image_end_token_id]
211208

212-
# Set the default patch_size for images_kwargs
213-
LightOnOCRProcessorKwargs._defaults["images_kwargs"]["patch_size"] = self.effective_patch_size
214-
215209
super().__init__(image_processor, tokenizer, chat_template=chat_template)
216210

217211
def __call__(
@@ -229,6 +223,8 @@ def __call__(
229223
)
230224

231225
if images is not None:
226+
# Like pixtral
227+
output_kwargs["images_kwargs"]["patch_size"] = self.effective_patch_size
232228
image_inputs = self.image_processor(images, **output_kwargs["images_kwargs"])
233229
else:
234230
image_inputs = {}

src/transformers/models/lightonocr/processing_lightonocr.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ class LightOnOCRProcessorKwargs(ProcessingKwargs, total=False):
2626
"padding": False,
2727
"return_mm_token_type_ids": False,
2828
},
29-
"images_kwargs": {
30-
"patch_size": 14,
31-
},
3229
"common_kwargs": {
3330
"return_tensors": "pt",
3431
},
@@ -138,9 +135,6 @@ def __init__(
138135

139136
self.image_ids = [self.image_token_id, self.image_break_token_id, self.image_end_token_id]
140137

141-
# Set the default patch_size for images_kwargs
142-
LightOnOCRProcessorKwargs._defaults["images_kwargs"]["patch_size"] = self.effective_patch_size
143-
144138
super().__init__(image_processor, tokenizer, chat_template=chat_template)
145139

146140
def __call__(
@@ -158,6 +152,8 @@ def __call__(
158152
)
159153

160154
if images is not None:
155+
# Like pixtral
156+
output_kwargs["images_kwargs"]["patch_size"] = self.effective_patch_size
161157
image_inputs = self.image_processor(images, **output_kwargs["images_kwargs"])
162158
else:
163159
image_inputs = {}

0 commit comments

Comments
 (0)