Ensure query param is not None for tool-related evaluators (#4596)

salma-elshafey · Salma Elshafey · web-flow · commit bc1206dfa81f · 2025-11-10T14:29:29.000+02:00
* Ensure query param is not None for tool-related evaluators

* Remove trailing whitespace, bump versions.

---------

Co-authored-by: Salma Elshafey &lt;selshafey@microsoft.com&gt;
diff --git a/assets/evaluators/builtin/tool_call_accuracy/evaluator/_tool_call_accuracy.py b/assets/evaluators/builtin/tool_call_accuracy/evaluator/_tool_call_accuracy.py
@@ -253,6 +253,19 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:  # t
         :return: The evaluation result.
         :rtype: Dict
         """
+        if "query" not in eval_input:
+            raise EvaluationException(
+                message=(
+                    "Query is a required input to the Tool Call Accuracy evaluator."
+                ),
+                internal_message=(
+                    "Query is a required input to the Tool Call Accuracy evaluator."
+                ),
+                blame=ErrorBlame.USER_ERROR,
+                category=ErrorCategory.INVALID_VALUE,
+                target=ErrorTarget.TOOL_CALL_ACCURACY_EVALUATOR,
+            )
+
         # Single LLM call for all tool calls
         prompty_output_dict = await self._flow(timeout=self._LLM_CALL_TIMEOUT, **eval_input)
         llm_output = prompty_output_dict.get("llm_output", {})
diff --git a/assets/evaluators/builtin/tool_call_accuracy/spec.yaml b/assets/evaluators/builtin/tool_call_accuracy/spec.yaml
@@ -1,6 +1,6 @@
 type: "evaluator"
 name: "builtin.tool_call_accuracy"
-version: 4
+version: 5
 displayName: "Tool-Call-Accuracy-Evaluator"
 description: "Measures whether the agent selects the correct tool calls, applies the correct parameters, and tracks inefficient or missing too calls, in order to resolve a user's request. This is an umbrella evaluators that assessing overall tool call quality. Use this metric in agent-based systems, and AI assistants that rely on tool integration."
 evaluatorType: "builtin"
diff --git a/assets/evaluators/builtin/tool_input_accuracy/evaluator/_tool_input_accuracy.py b/assets/evaluators/builtin/tool_input_accuracy/evaluator/_tool_input_accuracy.py
@@ -485,8 +485,23 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
         :return: A dictionary containing the result of the evaluation.
         :rtype: Dict[str, Union[str, float]]
         """
+        if "query" not in eval_input:
+            raise EvaluationException(
+                message=(
+                    "Query is a required input to "
+                    "the Tool Input Accuracy evaluator."
+                ),
+                internal_message=(
+                    "Query is a required input "
+                    "to the Tool Input Accuracy evaluator."
+                ),
+                blame=ErrorBlame.USER_ERROR,
+                category=ErrorCategory.INVALID_VALUE,
+                target=ErrorTarget.TOOL_INPUT_ACCURACY_EVALUATOR,
+            )
+
         # Format conversation history for cleaner evaluation
-        if "query" in eval_input:
+        else:
             eval_input["query"] = reformat_conversation_history(
                 eval_input["query"], logger, include_system_messages=True, include_tool_calls=True
             )
diff --git a/assets/evaluators/builtin/tool_input_accuracy/spec.yaml b/assets/evaluators/builtin/tool_input_accuracy/spec.yaml
@@ -1,6 +1,6 @@
 type: "evaluator"
 name: "builtin.tool_input_accuracy"
-version: 4
+version: 5
 displayName: "Tool-Input-Accuracy-Evaluator"
 description: "A binary evaluator (0 or 1) that checks whether all parameters in an agent’s tool call are correct, validating grounding, type, format, completeness, and contextual appropriateness using LLM-based analysis. Use it to verify agent tool usage, API integration tests, or to ensure tool call parameters are fully correct in AI workflows."
 evaluatorType: "builtin"
diff --git a/assets/evaluators/builtin/tool_selection/evaluator/_tool_selection.py b/assets/evaluators/builtin/tool_selection/evaluator/_tool_selection.py
@@ -488,8 +488,21 @@ async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
         :return: A dictionary containing the result of the evaluation.
         :rtype: Dict[str, Union[str, float]]
         """
+        if "query" not in eval_input:
+            raise EvaluationException(
+                message=(
+                    "Query is a required input to the Tool Selection evaluator."
+                ),
+                internal_message=(
+                    "Query is a required inputto the Tool Selection evaluator."
+                ),
+                blame=ErrorBlame.USER_ERROR,
+                category=ErrorCategory.INVALID_VALUE,
+                target=ErrorTarget.TOOL_SELECTION_EVALUATOR,
+            )
+
         # Format conversation history for cleaner evaluation
-        if "query" in eval_input:
+        else:
             eval_input["query"] = reformat_conversation_history(
                 eval_input["query"], logger, include_system_messages=True, include_tool_calls=True
             )
diff --git a/assets/evaluators/builtin/tool_selection/spec.yaml b/assets/evaluators/builtin/tool_selection/spec.yaml
@@ -1,6 +1,6 @@
 type: "evaluator"
 name: "builtin.tool_selection"
-version: 4
+version: 5
 displayName: "Tool-Selection-Evaluator"
 description: "Evaluates whether an AI agent selected the most appropriate and efficient tools for a given task, avoiding redundancy or missing essentials. Use it to assess tool choice quality in agent-based systems, orchestration platforms, and AI assistants that must pick the right tools from available options."
 evaluatorType: "builtin"