Skip to content

Commit 919ca77

Browse files
author
naddeoa
authored
Fix bug that prevents prefixes from being omitted in udfs (#1465)
The types for a udf schema allow the prefix to be None, but our code doesn't work in that scenario. In addition, the annotation interface for udfs don't even let you have a None prefix. This change fixes both of those issues.
1 parent 20e62d6 commit 919ca77

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

python/tests/experimental/core/test_udf_schema.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,14 @@ def f2(x: Union[Dict[str, List], pd.DataFrame]) -> Union[Dict[str, List], pd.Dat
7171
return pd.DataFrame({"foo": x["xx1"], "bar": x["xx2"]})
7272

7373

74+
@register_multioutput_udf(["xx1", "xx2"], no_prefix=True)
75+
def no_prefix_udf(x: Union[Dict[str, List], pd.DataFrame]) -> Union[Dict[str, List], pd.DataFrame]:
76+
if isinstance(x, dict):
77+
return {"foo": [x["xx1"][0]], "bar": [x["xx2"][0]]}
78+
else:
79+
return pd.DataFrame({"foo": x["xx1"], "bar": x["xx2"]})
80+
81+
7482
def test_multioutput_udf_row() -> None:
7583
schema = udf_schema()
7684
row = {"xx1": 42, "xx2": 3.14}
@@ -79,6 +87,8 @@ def test_multioutput_udf_row() -> None:
7987
assert results.get_column("f1.bar") is not None
8088
assert results.get_column("blah.foo") is not None
8189
assert results.get_column("blah.bar") is not None
90+
assert results.get_column("foo") is not None
91+
assert results.get_column("bar") is not None
8292

8393

8494
def test_multioutput_udf_dataframe() -> None:
@@ -89,6 +99,8 @@ def test_multioutput_udf_dataframe() -> None:
8999
assert results.get_column("f1.bar") is not None
90100
assert results.get_column("blah.foo") is not None
91101
assert results.get_column("blah.bar") is not None
102+
assert results.get_column("foo") is not None
103+
assert results.get_column("bar") is not None
92104

93105

94106
@register_dataset_udf(["col1"], schema_name="unit-tests")

python/whylogs/experimental/core/udf_schema.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,13 @@ def _apply_udf_on_dataframe(
135135
udf(Union[Dict[str, List], pd.DataFrame]) -> Union[Dict[str, List], pd.DataFrame]
136136
"""
137137

138+
def add_prefix(col):
139+
return prefix + "." + col if prefix else col
140+
138141
try:
139142
# TODO: I think it's OKAY if udf returns a dictionary
140143
udf_output = pd.DataFrame(udf(pandas))
141-
udf_output = udf_output.rename(columns={old: prefix + "." + old for old in udf_output.keys()}) # type: ignore
144+
udf_output = udf_output.rename(columns={old: add_prefix(old) for old in udf_output.keys()})
142145
for new_col in udf_output.keys():
143146
new_df[new_col] = udf_output[new_col]
144147
except Exception as e: # noqa
@@ -271,6 +274,7 @@ def register_multioutput_udf(
271274
prefix: Optional[str] = None,
272275
namespace: Optional[str] = None,
273276
schema_name: str = "",
277+
no_prefix: bool = False,
274278
) -> Callable[[Any], Any]:
275279
"""
276280
Decorator to easily configure UDFs for your data set. Decorate your UDF
@@ -294,7 +298,10 @@ def decorator_register(func):
294298
global _multicolumn_udfs
295299
name = udf_name or func.__name__
296300
name = f"{namespace}.{name}" if namespace else name
297-
output_prefix = prefix if prefix else name
301+
if no_prefix:
302+
output_prefix = None
303+
else:
304+
output_prefix = prefix if prefix else name
298305
_multicolumn_udfs[schema_name].append(UdfSpec(col_names, prefix=output_prefix, udf=func, name=name))
299306
return func
300307

0 commit comments

Comments
 (0)