onnx · chapman73 · Oct 14, 2025
diff --git a/skl2onnx/operator_converters/ordinal_encoder.py b/skl2onnx/operator_converters/ordinal_encoder.py
@@ -42,7 +42,12 @@ def convert_sklearn_ordinal_encoder(
         )
     )
 
-    for categories in ordinal_op.categories_:
+    ordinal_categories = ordinal_op.categories_
+    if hasattr(ordinal_op, "infrequent_categories_") and ordinal_op.infrequent_categories_:
+        # exclude infrequent categories if present
+        ordinal_categories = [categories for categories in ordinal_categories if categories not in ordinal_op.infrequent_categories_]
+
+    for categories in ordinal_categories:
         if len(categories) == 0:
             continue
 

diff --git a/tests/test_sklearn_ordinal_encoder.py b/tests/test_sklearn_ordinal_encoder.py
@@ -504,6 +504,75 @@ def test_model_ordinal_encoder_unknown_value_nan(self):
 
         assert_almost_equal(expected.reshape(-1), got[0].reshape(-1))
 
+    @unittest.skipIf(
+        not max_categories_support(),
+        reason="OrdinalEncoder supports max_categories and min_frequencey since 1.3",
+    )
+    def test_model_ordinal_encoder_min_frequency_multi_column(self):
+        from onnxruntime import InferenceSession
+
+        model = OrdinalEncoder(min_frequency=3, handle_unknown="use_encoded_value", unknown_value=-1)
+        # First column: 'a' appears 4 times (frequent), 'b' 2 times (infrequent), 'c' 1 time (infrequent)
+        # Second column: 'x' appears 4 times (frequent), 'y' 2 times (infrequent), 'z' 1 time (infrequent)
+        data = np.array(
+            [
+                ["a", "x"],
+                ["a", "x"],
+                ["a", "x"],
+                ["a", "x"],
+                ["b", "y"],
+                ["b", "y"],
+                ["c", "z"],
+            ],
+            dtype=np.object_,
+        )
+        test_data = np.array(
+            [
+                ["a", "x"],  # frequent in both columns
+                ["b", "y"],  # infrequent in both columns
+                ["c", "z"],  # infrequent in both columns
+            ],
+            dtype=np.object_,
+        )
+
+        expected = model.fit_transform(data)
+        expected_test = model.transform(test_data)
+
+        model_onnx = convert_sklearn(
+            model,
+            "scikit-learn ordinal encoder",
+            [("input", StringTensorType([None, 2]))],
+            target_opset=TARGET_OPSET,
+        )
+        self.assertIsNotNone(model_onnx)
+        dump_data_and_model(
+            data,
+            model,
+            model_onnx,
+            basename="SklearnOrdinalEncoderMinFrequencyMultiCol",
+        )
+
+        sess = InferenceSession(
+            model_onnx.SerializeToString(), providers=["CPUExecutionProvider"]
+        )
+        got = sess.run(
+            None,
+            {
+                "input": data,
+            },
+        )
+        assert_almost_equal(expected.reshape(-1), got[0].reshape(-1))
+
+        # Test with test data
+        got_test = sess.run(
+            None,
+            {
+                "input": test_data,
+            },
+        )
+        assert_almost_equal(expected_test.reshape(-1), got_test[0].reshape(-1))
+
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)