test: Explicitly test flattening of combined (oneOf, anyOf, allOf) schemas (#3413)

edgarrmondragon · web-flow · commit 10897062b81c · 2025-12-15T18:39:55.000Z
## Summary by Sourcery

Tests:
- Add a regression test ensuring flatten_schema correctly flattens
schemas using oneOf, anyOf, and allOf combinations.

---------

Signed-off-by: Edgar Ramírez Mondragón &lt;edgarrm358@gmail.com&gt;
diff --git a/singer_sdk/helpers/_flattening.py b/singer_sdk/helpers/_flattening.py
@@ -17,6 +17,12 @@
 DEFAULT_FLATTENING_SEPARATOR = "__"
 DEFAULT_MAX_KEY_LENGTH = 255
 
+_T = t.TypeVar("_T")
+
+
+def _first(iterable: t.Iterable[_T]) -> _T | None:
+    return next(iter(iterable), None)
+
 
 class PluginFlatteningConfig(t.TypedDict):
     """Plugin flattening configuration."""
@@ -380,18 +386,22 @@ def _flatten_schema(  # noqa: C901, PLR0912
                 items.append((new_key, {"type": types}))
             else:
                 items.append((new_key, field_schema))
-        # TODO: Figure out what this really does, try breaking it.
-        # If it's not needed, remove it.
-        elif len(field_schema.values()) > 0:
-            if next(iter(field_schema.values()))[0]["type"] == "string":
-                next(iter(field_schema.values()))[0]["type"] = ["null", "string"]
-                items.append((new_key, next(iter(field_schema.values()))[0]))
-            elif next(iter(field_schema.values()))[0]["type"] == "array":
-                next(iter(field_schema.values()))[0]["type"] = ["null", "array"]
-                items.append((new_key, next(iter(field_schema.values()))[0]))
-            elif next(iter(field_schema.values()))[0]["type"] == "object":
-                next(iter(field_schema.values()))[0]["type"] = ["null", "object"]
-                items.append((new_key, next(iter(field_schema.values()))[0]))
+        # Handle oneOf, anyOf, etc.
+        elif (
+            (composite := _first(field_schema.values()))
+            and isinstance(composite, list)
+            and len(composite) > 0
+            and (first_element := _first(composite))
+        ):
+            if first_element["type"] == "string":
+                first_element["type"] = ["null", "string"]
+                items.append((new_key, first_element))
+            elif first_element["type"] == "array":
+                first_element["type"] = ["null", "array"]
+                items.append((new_key, first_element))
+            elif first_element["type"] == "object":
+                first_element["type"] = ["null", "object"]
+                items.append((new_key, first_element))
         else:
             # Handle typeless properties (e.g., "PropertyName": {})
             # Treat them as string type to allow JSON serialization
diff --git a/tests/core/test_flattening.py b/tests/core/test_flattening.py
@@ -200,6 +200,94 @@ def test_flatten_record_with_typeless_property_values():
     assert flattened_record["changes__NewValue"] == "simple string"
 
 
+def test_flatten_combined_schemas():
+    """Test that combined schemas are flattened correctly.
+
+    Examples of combined schemas:
+    - oneOf
+    - anyOf
+    - allOf
+
+    https://json-schema.org/understanding-json-schema/reference/combining
+    """
+    schema = {
+        "type": "object",
+        "properties": {
+            "name": {
+                "oneOf": [
+                    {"type": "string"},
+                    {
+                        "type": "object",
+                        "properties": {
+                            "first_name": {"type": "string"},
+                            "last_name": {"type": "string"},
+                        },
+                    },
+                ],
+            },
+            "address": {
+                "anyOf": [
+                    {
+                        "type": "object",
+                        "properties": {
+                            "street": {"type": "string"},
+                            "city": {"type": "string"},
+                            "state": {"type": "string"},
+                            "zip": {"type": "string"},
+                        },
+                    },
+                ],
+            },
+            "phones": {
+                "allOf": [
+                    {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "type": {"type": "string"},
+                                "number": {"type": "string"},
+                            },
+                        },
+                    },
+                ],
+            },
+            "id": {
+                "oneOf": [
+                    {"type": "integer"},
+                    {"type": "string", "format": "uuid"},
+                ],
+            },
+        },
+    }
+    flattened = flatten_schema(schema, max_level=1)
+    assert flattened == {
+        "type": "object",
+        "properties": {
+            "name": {"type": ["null", "string"]},
+            "address": {
+                "type": ["null", "object"],
+                "properties": {
+                    "street": {"type": "string"},
+                    "city": {"type": "string"},
+                    "state": {"type": "string"},
+                    "zip": {"type": "string"},
+                },
+            },
+            "phones": {
+                "type": ["null", "array"],
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "type": {"type": "string"},
+                        "number": {"type": "string"},
+                    },
+                },
+            },
+        },
+    }
+
+
 def test_flatten_key_with_long_names(subtests: pytest.Subtests):
     """Test that flatten_key abbreviates long key names to stay under 255 chars.