Skip to content

Commit bc1206d

Browse files
salma-elshafeySalma Elshafey
andauthored
Ensure query param is not None for tool-related evaluators (#4596)
* Ensure query param is not None for tool-related evaluators * Remove trailing whitespace, bump versions. --------- Co-authored-by: Salma Elshafey <[email protected]>
1 parent 75fdb98 commit bc1206d

File tree

6 files changed

+46
-5
lines changed

6 files changed

+46
-5
lines changed

assets/evaluators/builtin/tool_call_accuracy/evaluator/_tool_call_accuracy.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,19 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]: # t
253253
:return: The evaluation result.
254254
:rtype: Dict
255255
"""
256+
if "query" not in eval_input:
257+
raise EvaluationException(
258+
message=(
259+
"Query is a required input to the Tool Call Accuracy evaluator."
260+
),
261+
internal_message=(
262+
"Query is a required input to the Tool Call Accuracy evaluator."
263+
),
264+
blame=ErrorBlame.USER_ERROR,
265+
category=ErrorCategory.INVALID_VALUE,
266+
target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR,
267+
)
268+
256269
# Single LLM call for all tool calls
257270
prompty_output_dict = await self._flow(timeout=self._LLM_CALL_TIMEOUT, **eval_input)
258271
llm_output = prompty_output_dict.get("llm_output", {})

assets/evaluators/builtin/tool_call_accuracy/spec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
type: "evaluator"
22
name: "builtin.tool_call_accuracy"
3-
version: 4
3+
version: 5
44
displayName: "Tool-Call-Accuracy-Evaluator"
55
description: "Measures whether the agent selects the correct tool calls, applies the correct parameters, and tracks inefficient or missing too calls, in order to resolve a user's request. This is an umbrella evaluators that assessing overall tool call quality. Use this metric in agent-based systems, and AI assistants that rely on tool integration."
66
evaluatorType: "builtin"

assets/evaluators/builtin/tool_input_accuracy/evaluator/_tool_input_accuracy.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,8 +485,23 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
485485
:return: A dictionary containing the result of the evaluation.
486486
:rtype: Dict[str, Union[str, float]]
487487
"""
488+
if "query" not in eval_input:
489+
raise EvaluationException(
490+
message=(
491+
"Query is a required input to "
492+
"the Tool Input Accuracy evaluator."
493+
),
494+
internal_message=(
495+
"Query is a required input "
496+
"to the Tool Input Accuracy evaluator."
497+
),
498+
blame=ErrorBlame.USER_ERROR,
499+
category=ErrorCategory.INVALID_VALUE,
500+
target=ErrorTarget.TOOL_INPUT_ACCURACY_EVALUATOR,
501+
)
502+
488503
# Format conversation history for cleaner evaluation
489-
if "query" in eval_input:
504+
else:
490505
eval_input["query"] = reformat_conversation_history(
491506
eval_input["query"], logger, include_system_messages=True, include_tool_calls=True
492507
)

assets/evaluators/builtin/tool_input_accuracy/spec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
type: "evaluator"
22
name: "builtin.tool_input_accuracy"
3-
version: 4
3+
version: 5
44
displayName: "Tool-Input-Accuracy-Evaluator"
55
description: "A binary evaluator (0 or 1) that checks whether all parameters in an agent’s tool call are correct, validating grounding, type, format, completeness, and contextual appropriateness using LLM-based analysis. Use it to verify agent tool usage, API integration tests, or to ensure tool call parameters are fully correct in AI workflows."
66
evaluatorType: "builtin"

assets/evaluators/builtin/tool_selection/evaluator/_tool_selection.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,8 +488,21 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
488488
:return: A dictionary containing the result of the evaluation.
489489
:rtype: Dict[str, Union[str, float]]
490490
"""
491+
if "query" not in eval_input:
492+
raise EvaluationException(
493+
message=(
494+
"Query is a required input to the Tool Selection evaluator."
495+
),
496+
internal_message=(
497+
"Query is a required inputto the Tool Selection evaluator."
498+
),
499+
blame=ErrorBlame.USER_ERROR,
500+
category=ErrorCategory.INVALID_VALUE,
501+
target=ErrorTarget.TOOL_SELECTION_EVALUATOR,
502+
)
503+
491504
# Format conversation history for cleaner evaluation
492-
if "query" in eval_input:
505+
else:
493506
eval_input["query"] = reformat_conversation_history(
494507
eval_input["query"], logger, include_system_messages=True, include_tool_calls=True
495508
)

assets/evaluators/builtin/tool_selection/spec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
type: "evaluator"
22
name: "builtin.tool_selection"
3-
version: 4
3+
version: 5
44
displayName: "Tool-Selection-Evaluator"
55
description: "Evaluates whether an AI agent selected the most appropriate and efficient tools for a given task, avoiding redundancy or missing essentials. Use it to assess tool choice quality in agent-based systems, orchestration platforms, and AI assistants that must pick the right tools from available options."
66
evaluatorType: "builtin"

0 commit comments

Comments
 (0)