Skip to content

Commit b626744

Browse files
FelipeAdachiberneasefelipe207
authored
ranking ndcg (#1461)
## Description Add ranking ndcg to log_batch_ranking_metrics - [x] I have reviewed the [Guidelines for Contributing](CONTRIBUTING.md) and the [Code of Conduct](CODE_OF_CONDUCT.md). --------- Co-authored-by: Bernease Herman <[email protected]> Co-authored-by: felipe207 <[email protected]>
1 parent df90f72 commit b626744

File tree

2 files changed

+138
-27
lines changed

2 files changed

+138
-27
lines changed

python/tests/experimental/api/test_logger.py

Lines changed: 93 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from math import isclose
2+
13
from whylogs.core.stubs import pd
24
from whylogs.experimental.api.logger import log_batch_ranking_metrics
35

@@ -15,37 +17,42 @@ def test_log_batch_ranking_metrics_single_simple():
1517
}
1618
)
1719
result = log_batch_ranking_metrics(
18-
k=1, data=single_df, prediction_column="raw_predictions", target_column="raw_targets"
20+
data=single_df, prediction_column="raw_predictions", target_column="raw_targets", convert_non_numeric=True
1921
)
2022
pandas_summary = result.view().to_pandas()
2123

22-
k = 1
2324
column_names = [
24-
"mean_average_precision_k_" + str(k),
25-
"accuracy_k_" + str(k),
25+
"mean_average_precision",
26+
"accuracy",
2627
"mean_reciprocal_rank",
27-
"precision_k_" + str(k),
28-
"recall_k_" + str(k),
28+
"precision",
29+
"recall",
2930
"top_rank",
30-
"average_precision_k_" + str(k),
31+
"average_precision",
32+
"norm_dis_cumul_gain",
3133
]
3234
for col in column_names:
3335
assert col in pandas_summary.index
34-
assert pandas_summary.loc["mean_average_precision_k_" + str(k), "counts/n"] == 1
35-
assert pandas_summary.loc["accuracy_k_" + str(k), "counts/n"] == 1
36+
assert pandas_summary.loc["mean_average_precision", "counts/n"] == 1
37+
assert pandas_summary.loc["accuracy", "counts/n"] == 1
3638
assert pandas_summary.loc["mean_reciprocal_rank", "counts/n"] == 1
37-
assert pandas_summary.loc["precision_k_" + str(k), "counts/n"] == 4
38-
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 4
39+
assert pandas_summary.loc["precision", "counts/n"] == 4
40+
assert pandas_summary.loc["recall", "counts/n"] == 4
3941
assert pandas_summary.loc["top_rank", "counts/n"] == 4
40-
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 4
42+
assert pandas_summary.loc["average_precision", "counts/n"] == 4
43+
assert pandas_summary.loc["norm_dis_cumul_gain", "counts/n"] == 1
44+
assert pandas_summary.loc["average_precision", "counts/n"] == 4
45+
assert pandas_summary.loc["norm_dis_cumul_gain", "counts/n"] == 1
4146

4247

4348
def test_log_batch_ranking_metrics_binary_simple():
4449
binary_df = pd.DataFrame(
4550
{"raw_predictions": [[True, False, True], [False, False, False], [True, True, False], [False, True, False]]}
4651
)
4752

48-
result = log_batch_ranking_metrics(k=2, data=binary_df, prediction_column="raw_predictions")
53+
result = log_batch_ranking_metrics(
54+
data=binary_df, prediction_column="raw_predictions", k=2, convert_non_numeric=True
55+
)
4956
pandas_summary = result.view().to_pandas()
5057

5158
k = 2
@@ -57,6 +64,7 @@ def test_log_batch_ranking_metrics_binary_simple():
5764
"recall_k_" + str(k),
5865
"top_rank",
5966
"average_precision_k_" + str(k),
67+
"norm_dis_cumul_gain_k_" + str(k),
6068
]
6169
for col in column_names:
6270
assert col in pandas_summary.index
@@ -67,6 +75,7 @@ def test_log_batch_ranking_metrics_binary_simple():
6775
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 4
6876
assert pandas_summary.loc["top_rank", "counts/n"] == 4
6977
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 4
78+
assert pandas_summary.loc["norm_dis_cumul_gain_k_" + str(k), "counts/n"] == 1
7079

7180

7281
def test_log_batch_ranking_metrics_multiple_simple():
@@ -81,13 +90,17 @@ def test_log_batch_ranking_metrics_multiple_simple():
8190
],
8291
}
8392
)
93+
k = 4
8494

8595
result = log_batch_ranking_metrics(
86-
k=3, data=multiple_df, prediction_column="raw_predictions", target_column="raw_targets"
96+
data=multiple_df,
97+
prediction_column="raw_predictions",
98+
target_column="raw_targets",
99+
k=k,
100+
convert_non_numeric=True,
87101
)
88102
pandas_summary = result.view().to_pandas()
89103

90-
k = 3
91104
column_names = [
92105
"mean_average_precision_k_" + str(k),
93106
"accuracy_k_" + str(k),
@@ -96,6 +109,7 @@ def test_log_batch_ranking_metrics_multiple_simple():
96109
"recall_k_" + str(k),
97110
"top_rank",
98111
"average_precision_k_" + str(k),
112+
"norm_dis_cumul_gain_k_" + str(k),
99113
]
100114
for col in column_names:
101115
assert col in pandas_summary.index
@@ -106,3 +120,67 @@ def test_log_batch_ranking_metrics_multiple_simple():
106120
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 4
107121
assert pandas_summary.loc["top_rank", "counts/n"] == 4
108122
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 4
123+
assert pandas_summary.loc["norm_dis_cumul_gain_k_" + str(k), "counts/n"] == 1
124+
125+
assert isclose(pandas_summary.loc[f"norm_dis_cumul_gain_k_{k}", "distribution/median"], 0.76244, abs_tol=0.00001)
126+
127+
128+
def test_log_batch_ranking_metrics_default_target():
129+
multiple_df = pd.DataFrame({"raw_predictions": [[3, 2, 3, 0, 1, 2, 3, 2]]})
130+
131+
result = log_batch_ranking_metrics(
132+
data=multiple_df, prediction_column="raw_predictions", k=3, convert_non_numeric=True
133+
)
134+
pandas_summary = result.view().to_pandas()
135+
136+
k = 3
137+
column_names = [
138+
"mean_average_precision_k_" + str(k),
139+
"accuracy_k_" + str(k),
140+
"mean_reciprocal_rank",
141+
"precision_k_" + str(k),
142+
"recall_k_" + str(k),
143+
"top_rank",
144+
"average_precision_k_" + str(k),
145+
"norm_dis_cumul_gain_k_" + str(k),
146+
]
147+
for col in column_names:
148+
assert col in pandas_summary.index
149+
assert pandas_summary.loc["mean_average_precision_k_" + str(k), "counts/n"] == 1
150+
assert pandas_summary.loc["accuracy_k_" + str(k), "counts/n"] == 1
151+
assert pandas_summary.loc["mean_reciprocal_rank", "counts/n"] == 1
152+
assert pandas_summary.loc["precision_k_" + str(k), "counts/n"] == 1
153+
assert pandas_summary.loc["recall_k_" + str(k), "counts/n"] == 1
154+
assert pandas_summary.loc["top_rank", "counts/n"] == 1
155+
assert pandas_summary.loc["average_precision_k_" + str(k), "counts/n"] == 1
156+
assert pandas_summary.loc["norm_dis_cumul_gain_k_" + str(k), "counts/n"] == 1
157+
158+
159+
def test_log_batch_ranking_metrics_ranking_ndcg_wikipedia():
160+
# From https://en.wikipedia.org/wiki/Discounted_cumulative_gain#Example
161+
ranking_df = pd.DataFrame({"targets": [[3, 2, 3, 0, 1, 2, 3, 2]], "predictions": [[7, 6, 5, 4, 3, 2, 1, 0]]})
162+
163+
result = log_batch_ranking_metrics(data=ranking_df, prediction_column="predictions", target_column="targets", k=6)
164+
pandas_summary = result.view().to_pandas()
165+
166+
assert isclose(pandas_summary.loc["norm_dis_cumul_gain_k_6", "distribution/median"], 0.785, abs_tol=0.01)
167+
168+
169+
def test_log_batch_ranking_metrics_ranking_ndcg_sklearn():
170+
# From https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html
171+
ranking_df = pd.DataFrame({"predictions": [[0.1, 0.2, 0.3, 4, 70]], "targets": [[10, 0, 0, 1, 5]]})
172+
173+
result = log_batch_ranking_metrics(data=ranking_df, prediction_column="predictions", target_column="targets")
174+
pandas_summary = result.view().to_pandas()
175+
176+
assert isclose(pandas_summary.loc["norm_dis_cumul_gain", "distribution/median"], 0.69569, abs_tol=0.00001)
177+
178+
179+
def test_log_batch_ranking_metrics_ranking_ndcg_withk_sklearn():
180+
# From https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html
181+
ranking_df = pd.DataFrame({"predictions": [[0.05, 1.1, 1.0, 0.5, 0.0]], "targets": [[10, 0, 0, 1, 5]]})
182+
183+
result = log_batch_ranking_metrics(data=ranking_df, prediction_column="predictions", target_column="targets", k=4)
184+
pandas_summary = result.view().to_pandas()
185+
186+
assert isclose(pandas_summary.loc["norm_dis_cumul_gain_k_4", "distribution/median"], 0.35202, abs_tol=0.00001)

python/whylogs/experimental/api/logger/__init__.py

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import math
23
from typing import Optional, Union
34

45
from whylogs.api.logger import log
@@ -10,11 +11,12 @@
1011

1112

1213
def log_batch_ranking_metrics(
13-
k: int,
1414
data: pd.core.frame.DataFrame,
1515
prediction_column: str,
1616
target_column: Optional[str] = None,
1717
score_column: Optional[str] = None,
18+
k: Optional[int] = None,
19+
convert_non_numeric=False,
1820
schema: Union[DatasetSchema, None] = None,
1921
log_full_data: bool = False,
2022
) -> ViewResultSet:
@@ -23,8 +25,7 @@ def log_batch_ranking_metrics(
2325
relevant_cols = [prediction_column]
2426
if target_column is None:
2527
target_column = "__targets"
26-
formatted_data[target_column] = True
27-
formatted_data[target_column].apply(lambda x: [x])
28+
formatted_data[target_column] = formatted_data[prediction_column].apply(lambda x: list(range(len(x)))[::-1])
2829
relevant_cols.append(target_column)
2930
if score_column is not None:
3031
relevant_cols.append(score_column)
@@ -35,6 +36,8 @@ def log_batch_ranking_metrics(
3536
# TODO: more error checking
3637
formatted_data[col] = formatted_data[col].apply(lambda x: [x])
3738

39+
_max_k = formatted_data[prediction_column].apply(len).max()
40+
3841
formatted_data["count_at_k"] = formatted_data[relevant_cols].apply(
3942
lambda row: sum([1 if pred_val in row[target_column] else 0 for pred_val in row[prediction_column][:k]]), axis=1
4043
)
@@ -52,13 +55,13 @@ def get_top_rank(row):
5255

5356
formatted_data["top_rank"] = formatted_data[relevant_cols].apply(get_top_rank, axis=1)
5457

55-
output_data = (formatted_data["count_at_k"] / k).to_frame()
56-
output_data.columns = ["precision_k_" + str(k)]
57-
output_data["recall_k_" + str(k)] = formatted_data["count_at_k"] / formatted_data["count_all"]
58+
output_data = (formatted_data["count_at_k"] / (k if k else 1)).to_frame()
59+
output_data.columns = ["precision" + ("_k_" + str(k) if k else "")]
60+
output_data["recall" + ("_k_" + str(k) if k else "")] = formatted_data["count_at_k"] / formatted_data["count_all"]
5861
output_data["top_rank"] = formatted_data["top_rank"]
5962

6063
ki_dict: pd.DataFrame = None
61-
for ki in range(1, k + 1):
64+
for ki in range(1, (k if k else _max_k) + 1):
6265
ki_result = (
6366
formatted_data[relevant_cols].apply(
6467
lambda row: sum(
@@ -74,18 +77,48 @@ def get_top_rank(row):
7477
else:
7578
ki_dict["p@" + str(ki)] = ki_result
7679

77-
output_data["average_precision_k_" + str(k)] = ki_dict.mean(axis=1)
78-
mAP_at_k = output_data["average_precision_k_" + str(k)].mean(axis=0)
80+
output_data["average_precision" + ("_k_" + str(k) if k else "")] = ki_dict.mean(axis=1)
81+
82+
def _convert_non_numeric(row_dict):
83+
return (
84+
[
85+
row_dict[target_column].index(pred_val) if pred_val in row_dict[target_column] else -1
86+
for pred_val in row_dict[prediction_column]
87+
],
88+
list(range(len(row_dict[prediction_column])))[::-1],
89+
)
90+
91+
if convert_non_numeric:
92+
formatted_data[[prediction_column, target_column]] = formatted_data.apply(
93+
_convert_non_numeric, result_type="expand", axis=1
94+
)
95+
96+
def _calculate_row_ndcg(row_dict, k):
97+
predicted_order = np.array(row_dict[prediction_column]).argsort()[::-1]
98+
target_order = np.array(row_dict[target_column]).argsort()[::-1]
99+
dcg_vals = [
100+
(rel / math.log(i + 2, 2)) for i, rel in enumerate(np.array(row_dict[target_column])[predicted_order][:k])
101+
]
102+
idcg_vals = [
103+
(rel / math.log(i + 2, 2)) for i, rel in enumerate(np.array(row_dict[target_column])[target_order][:k])
104+
]
105+
return sum(dcg_vals) / sum(idcg_vals)
106+
107+
formatted_data["norm_dis_cumul_gain_k_" + str(k)] = formatted_data.apply(_calculate_row_ndcg, args=(k,), axis=1)
108+
109+
mAP_at_k = ki_dict.mean()
79110
hit_ratio = formatted_data["count_at_k"].apply(lambda x: bool(x)).sum() / len(formatted_data)
80-
mrr = (1 / output_data["top_rank"]).replace([np.inf], np.nan).mean()
111+
mrr = (1 / formatted_data["top_rank"]).replace([np.inf], np.nan).mean()
112+
ndcg = formatted_data["norm_dis_cumul_gain_k_" + str(k)].mean()
81113

82114
result = log(pandas=output_data, schema=schema)
83115
result = result.merge(
84116
log(
85117
row={
86-
"mean_average_precision_k_" + str(k): mAP_at_k,
87-
"accuracy_k_" + str(k): hit_ratio,
118+
"mean_average_precision" + ("_k_" + str(k) if k else ""): mAP_at_k,
119+
"accuracy" + ("_k_" + str(k) if k else ""): hit_ratio,
88120
"mean_reciprocal_rank": mrr,
121+
"norm_dis_cumul_gain" + ("_k_" + str(k) if k else ""): ndcg,
89122
},
90123
schema=schema,
91124
)

0 commit comments

Comments
 (0)