Set patch_size at runtime instead of modifying class defaults in LightOnOCR processor

baptiste-aubertin · baptiste-aubertin · commit 4bdd3054924b · 2025-10-30T21:19:48.000+01:00
diff --git a/src/transformers/models/lightonocr/modular_lightonocr.py b/src/transformers/models/lightonocr/modular_lightonocr.py
@@ -157,9 +157,6 @@ class LightOnOCRProcessorKwargs(ProcessingKwargs, total=False):
             "padding": False,
             "return_mm_token_type_ids": False,
         },
-        "images_kwargs": {
-            "patch_size": 14,
-        },
         "common_kwargs": {
             "return_tensors": "pt",
         },
@@ -209,9 +206,6 @@ def __init__(
 
         self.image_ids = [self.image_token_id, self.image_break_token_id, self.image_end_token_id]
 
-        # Set the default patch_size for images_kwargs
-        LightOnOCRProcessorKwargs._defaults["images_kwargs"]["patch_size"] = self.effective_patch_size
-
         super().__init__(image_processor, tokenizer, chat_template=chat_template)
 
     def __call__(
@@ -229,6 +223,8 @@ def __call__(
         )
 
         if images is not None:
+            # Like pixtral
+            output_kwargs["images_kwargs"]["patch_size"] = self.effective_patch_size
             image_inputs = self.image_processor(images, **output_kwargs["images_kwargs"])
         else:
             image_inputs = {}
diff --git a/src/transformers/models/lightonocr/processing_lightonocr.py b/src/transformers/models/lightonocr/processing_lightonocr.py
@@ -26,9 +26,6 @@ class LightOnOCRProcessorKwargs(ProcessingKwargs, total=False):
             "padding": False,
             "return_mm_token_type_ids": False,
         },
-        "images_kwargs": {
-            "patch_size": 14,
-        },
         "common_kwargs": {
             "return_tensors": "pt",
         },
@@ -138,9 +135,6 @@ def __init__(
 
         self.image_ids = [self.image_token_id, self.image_break_token_id, self.image_end_token_id]
 
-        # Set the default patch_size for images_kwargs
-        LightOnOCRProcessorKwargs._defaults["images_kwargs"]["patch_size"] = self.effective_patch_size
-
         super().__init__(image_processor, tokenizer, chat_template=chat_template)
 
     def __call__(
@@ -158,6 +152,8 @@ def __call__(
         )
 
         if images is not None:
+            # Like pixtral
+            output_kwargs["images_kwargs"]["patch_size"] = self.effective_patch_size
             image_inputs = self.image_processor(images, **output_kwargs["images_kwargs"])
         else:
             image_inputs = {}