Skip to content

Commit 3cde352

Browse files
authored
Change eci prefix from upper case to lowercase (#3771)
# Description This PR changes ECI case from upper to lower to match other evaluator type casing and integrate better with UI evaluation flows. # All Promptflow Contribution checklist: - [ ] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [ ] **I have read the [contribution guidelines](https://github.com/microsoft/promptflow/blob/main/CONTRIBUTING.md).** - [ ] **I confirm that all new dependencies are compatible with the MIT license.** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [ ] Title of the pull request is clear and informative. - [ ] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [ ] Pull request includes test coverage for the included changes.
1 parent e79da2e commit 3cde352

File tree

4 files changed

+13
-27
lines changed

4 files changed

+13
-27
lines changed

src/promptflow-evals/promptflow/evals/_common/rai_service.py

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -220,42 +220,28 @@ def parse_response( # pylint: disable=too-many-branches,too-many-statements
220220
response = response.replace("true", "True")
221221
parsed_response = literal_eval(response)
222222
result = {}
223-
metric_prefix = _get_metric_prefix(metric_name)
224223
# Use label instead of score since these are assumed to be boolean results.
225224
# Use np.nan as null value since it's ignored by aggregations rather than treated as 0.
226-
result[metric_prefix + "_label"] = parsed_response["label"] if "label" in parsed_response else np.nan
227-
result[metric_prefix + "_reason"] = parsed_response["reasoning"] if "reasoning" in parsed_response else ""
225+
result[metric_name + "_label"] = parsed_response["label"] if "label" in parsed_response else np.nan
226+
result[metric_name + "_reason"] = parsed_response["reasoning"] if "reasoning" in parsed_response else ""
228227

229228
if metric_name == EvaluationMetrics.XPIA:
230229
# Add "manipulated_content", "intrusion" and "information_gathering" to the result
231230
# if present else set them to np.nan
232-
result[metric_prefix + "_manipulated_content"] = (
231+
result[metric_name + "_manipulated_content"] = (
233232
parsed_response["manipulated_content"] if "manipulated_content" in parsed_response else np.nan
234233
)
235-
result[metric_prefix + "_intrusion"] = (
234+
result[metric_name + "_intrusion"] = (
236235
parsed_response["intrusion"] if "intrusion" in parsed_response else np.nan
237236
)
238-
result[metric_prefix + "_information_gathering"] = (
237+
result[metric_name + "_information_gathering"] = (
239238
parsed_response["information_gathering"] if "information_gathering" in parsed_response else np.nan
240239
)
241240

242241
return result
243242
return _parse_content_harm_response(batch_response, metric_name)
244243

245244

246-
def _get_metric_prefix(metric_name: str) -> str:
247-
"""Get the prefix for the evaluation metric. This is usually the metric name.
248-
249-
:param metric_name: The evaluation metric to use.
250-
:type metric_name: str
251-
:return: The prefix for the evaluation metric.
252-
:rtype: str
253-
"""
254-
if metric_name == _InternalEvaluationMetrics.ECI:
255-
return "ECI"
256-
return metric_name
257-
258-
259245
def _parse_content_harm_response(batch_response: List[Dict], metric_name: str) -> Dict:
260246
"""Parse the annotation response from Responsible AI service for a content harm evaluation.
261247

src/promptflow-evals/promptflow/evals/evaluators/_eci/_eci.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ class ECIEvaluator:
6565
.. code-block:: python
6666
6767
{
68-
"ECI_label": "False",
69-
"ECI_reason": "Some reason."
68+
"eci_label": "False",
69+
"eci_reason": "Some reason."
7070
}
7171
"""
7272

src/promptflow-evals/tests/evals/e2etests/test_builtin_evaluators.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -470,8 +470,8 @@ def test_eci_evaluator(self, project_scope, azure_cred):
470470
answer="Rhombus",
471471
)
472472
assert unrelated_result is not None
473-
assert not unrelated_result["ECI_label"]
474-
assert "geometry question" in unrelated_result["ECI_reason"]
473+
assert not unrelated_result["eci_label"]
474+
assert "geometry question" in unrelated_result["eci_reason"]
475475

476476
# @pytest.mark.skipif(
477477
# not is_replay(), reason="API not fully released yet. Don't run in live mode unless connected to INT."

src/promptflow-evals/tests/evals/unittests/test_evaluate.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -463,8 +463,8 @@ def test_content_safety_aggregation(self):
463463

464464
def test_label_based_aggregation(self):
465465
data = {
466-
"eci.ECI_label": [True, False, True, False, True],
467-
"eci.ECI_reasoning": ["a", "b", "c", "d", "e"],
466+
"eci.eci_label": [True, False, True, False, True],
467+
"eci.eci_reasoning": ["a", "b", "c", "d", "e"],
468468
"protected_material.protected_material_label": [False, False, False, False, True],
469469
"protected_material.protected_material_reasoning": ["f", "g", "h", "i", "j"],
470470
"unknown.unaccounted_label": [True, False, False, False, True],
@@ -478,11 +478,11 @@ def test_label_based_aggregation(self):
478478
aggregation = _aggregate_metrics(data_df, evaluators)
479479
# ECI and PM labels should be replaced with defect rates, unaccounted should not
480480
assert len(aggregation) == 3
481-
assert "eci.ECI_label" not in aggregation
481+
assert "eci.eci_label" not in aggregation
482482
assert "protected_material.protected_material_label" not in aggregation
483483
assert aggregation["unknown.unaccounted_label"] == 0.4
484484

485-
assert aggregation["eci.ECI_defect_rate"] == 0.6
485+
assert aggregation["eci.eci_defect_rate"] == 0.6
486486
assert aggregation["protected_material.protected_material_defect_rate"] == 0.2
487487
assert "unaccounted_defect_rate" not in aggregation
488488

0 commit comments

Comments
 (0)