diff --git a/assets/evaluators/builtin/tool_input_accuracy/asset.yaml b/assets/evaluators/builtin/tool_input_accuracy/asset.yaml
new file mode 100644
index 0000000000..845a698b96
--- /dev/null
+++ b/assets/evaluators/builtin/tool_input_accuracy/asset.yaml
@@ -0,0 +1,4 @@
+type: evaluator
+spec: spec.yaml
+categories:
+- Evaluator
\ No newline at end of file
diff --git a/assets/evaluators/builtin/tool_input_accuracy/evaluator/_tool_input_accuracy.py b/assets/evaluators/builtin/tool_input_accuracy/evaluator/_tool_input_accuracy.py
new file mode 100644
index 0000000000..900eeaa5a4
--- /dev/null
+++ b/assets/evaluators/builtin/tool_input_accuracy/evaluator/_tool_input_accuracy.py
@@ -0,0 +1,606 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+import os
+import logging
+from itertools import chain
+from typing import Dict, List, Union, TypeVar, cast
+from typing_extensions import override
+from azure.ai.evaluation._evaluators._common import PromptyEvaluatorBase
+from azure.ai.evaluation._exceptions import (
+    ErrorMessage,
+    ErrorBlame,
+    ErrorCategory,
+    ErrorTarget,
+    EvaluationException,
+)
+from azure.ai.evaluation._common._experimental import experimental
+from enum import Enum
+
+logger = logging.getLogger(__name__)
+
+T_EvalValue = TypeVar("T_EvalValue")
+
+
+# Create extended ErrorTarget enum with the new member
+def _create_extended_error_target():
+    """Create an extended ErrorTarget enum that includes TOOL_INPUT_ACCURACY_EVALUATOR."""
+    existing_members = {member.name: member.value for member in ErrorTarget}
+    existing_members['TOOL_INPUT_ACCURACY_EVALUATOR'] = 'ToolInputAccuracyEvaluator'
+
+    ExtendedErrorTarget = Enum('ExtendedErrorTarget', existing_members)
+    return ExtendedErrorTarget
+
+
+ExtendedErrorTarget = _create_extended_error_target()
+
+
+def _get_built_in_tool_definition(tool_name: str):
+    """Get the definition for the built-in tool."""
+    try:
+        from azure.ai.evaluation._converters._models import _BUILT_IN_DESCRIPTIONS, _BUILT_IN_PARAMS
+
+        if tool_name in _BUILT_IN_DESCRIPTIONS:
+            return {
+                "type": tool_name,
+                "description": _BUILT_IN_DESCRIPTIONS[tool_name],
+                "name": tool_name,
+                "parameters": _BUILT_IN_PARAMS.get(tool_name, {}),
+            }
+    except ImportError:
+        pass
+    return None
+
+
+def _get_needed_built_in_tool_definitions(tool_calls: List[Dict]) -> List[Dict]:
+    """Extract tool definitions needed for the given built-in tool calls."""
+    needed_definitions = []
+    for tool_call in tool_calls:
+        if isinstance(tool_call, dict):
+            tool_type = tool_call.get("type")
+
+            # Only support converter format: {type: "tool_call", name: "bing_custom_search", arguments: {...}}
+            if tool_type == "tool_call":
+                tool_name = tool_call.get("name")
+                if tool_name:
+                    definition = _get_built_in_tool_definition(tool_name)
+                    if definition and definition not in needed_definitions:
+                        needed_definitions.append(definition)
+
+    return needed_definitions
+
+
+def _extract_needed_tool_definitions(
+        tool_calls: List[Dict], tool_definitions: List[Dict], error_target: ErrorTarget
+) -> List[Dict]:
+    """Extract the tool definitions that are needed for the provided tool calls.
+
+    :param tool_calls: The tool calls that need definitions
+    :type tool_calls: List[Dict]
+    :param tool_definitions: User-provided tool definitions
+    :type tool_definitions: List[Dict]
+    :return: List of needed tool definitions
+    :rtype: List[Dict]
+    :raises EvaluationException: If validation fails
+    """
+    needed_tool_definitions = []
+
+    # Add all user-provided tool definitions
+    needed_tool_definitions.extend(tool_definitions)
+
+    # Add the needed built-in tool definitions (if they are called)
+    built_in_definitions = _get_needed_built_in_tool_definitions(tool_calls)
+    needed_tool_definitions.extend(built_in_definitions)
+
+    # OpenAPI tool is a collection of functions, so we need to expand it
+    tool_definitions_expanded = list(
+        chain.from_iterable(
+            tool.get("functions", []) if tool.get("type") == "openapi" else [tool]
+            for tool in needed_tool_definitions
+        )
+    )
+
+    # Validate that all tool calls have corresponding definitions
+    for tool_call in tool_calls:
+        if isinstance(tool_call, dict):
+            tool_type = tool_call.get("type")
+
+            if tool_type == "tool_call":
+                tool_name = tool_call.get("name")
+                if tool_name and _get_built_in_tool_definition(tool_name):
+                    # This is a built-in tool from converter, already handled above
+                    continue
+                elif tool_name:
+                    # This is a regular function tool from converter
+                    tool_definition_exists = any(
+                        tool.get("name") == tool_name and tool.get("type", "function") == "function"
+                        for tool in tool_definitions_expanded
+                    )
+                    if not tool_definition_exists:
+                        raise EvaluationException(
+                            message=f"Tool definition for {tool_name} not found",
+                            blame=ErrorBlame.USER_ERROR,
+                            category=ErrorCategory.INVALID_VALUE,
+                            target=error_target,
+                        )
+                else:
+                    raise EvaluationException(
+                        message=f"Tool call missing name: {tool_call}",
+                        blame=ErrorBlame.USER_ERROR,
+                        category=ErrorCategory.INVALID_VALUE,
+                        target=error_target,
+                    )
+            else:
+                # Unsupported tool format - only converter format is supported
+                raise EvaluationException(
+                    message=f"Unsupported tool call format. Only converter format is supported: {tool_call}",
+                    blame=ErrorBlame.USER_ERROR,
+                    category=ErrorCategory.INVALID_VALUE,
+                    target=error_target,
+                )
+        else:
+            # Tool call is not a dictionary
+            raise EvaluationException(
+                message=f"Tool call is not a dictionary: {tool_call}",
+                blame=ErrorBlame.USER_ERROR,
+                category=ErrorCategory.INVALID_VALUE,
+                target=error_target,
+            )
+
+    return needed_tool_definitions
+
+
+def _extract_text_from_content(content):
+    text = []
+    for msg in content:
+        if "text" in msg:
+            text.append(msg["text"])
+    return text
+
+
+def _get_conversation_history(query, include_system_messages=False, include_tool_calls=False):
+    all_user_queries = []
+    cur_user_query = []
+    all_agent_responses = []
+    cur_agent_response = []
+    system_message = None
+
+    # Track tool calls and results for grouping with assistant messages
+    tool_results = {}
+
+    # First pass: collect all tool results if include_tool_calls is True
+    if include_tool_calls:
+        for msg in query:
+            if msg.get("role") == "tool" and "tool_call_id" in msg:
+                tool_call_id = msg["tool_call_id"]
+                for content in msg.get("content", []):
+                    if content.get("type") == "tool_result":
+                        result = content.get("tool_result")
+                        tool_results[tool_call_id] = f"[TOOL_RESULT] {result}"
+
+    # Second pass: process messages and build conversation history
+    for msg in query:
+        if "role" not in msg:
+            continue
+
+        if include_system_messages and msg["role"] == "system" and "content" in msg:
+            system_message = msg.get("content", "")
+
+        if msg["role"] == "user" and "content" in msg:
+            # Start new user turn, close previous agent response if exists
+            if cur_agent_response != []:
+                all_agent_responses.append(cur_agent_response)
+                cur_agent_response = []
+            text_in_msg = _extract_text_from_content(msg["content"])
+            if text_in_msg:
+                cur_user_query.append(text_in_msg)
+
+        if msg["role"] == "assistant" and "content" in msg:
+            # Start new agent response, close previous user query if exists
+            if cur_user_query != []:
+                all_user_queries.append(cur_user_query)
+                cur_user_query = []
+
+            # Add text content
+            text_in_msg = _extract_text_from_content(msg["content"])
+            if text_in_msg:
+                cur_agent_response.append(text_in_msg)
+
+            # Handle tool calls in assistant messages
+            if include_tool_calls:
+                for content in msg.get("content", []):
+                    if content.get("type") == "tool_call":
+                        # Handle the format from your sample data
+                        tool_call_id = content.get("tool_call_id")
+                        func_name = content.get("name", "")
+                        args = content.get("arguments", {})
+
+                        # Also handle the nested tool_call format
+                        if "tool_call" in content and "function" in content.get("tool_call", {}):
+                            tc = content.get("tool_call", {})
+                            func_name = tc.get("function", {}).get("name", "")
+                            args = tc.get("function", {}).get("arguments", {})
+                            tool_call_id = tc.get("id")
+
+                        args_str = ", ".join(f'{k}="{v}"' for k, v in args.items())
+                        tool_call_text = f"[TOOL_CALL] {func_name}({args_str})"
+                        cur_agent_response.append(tool_call_text)
+
+                        # Immediately add tool result if available
+                        if tool_call_id and tool_call_id in tool_results:
+                            cur_agent_response.append(tool_results[tool_call_id])
+
+    # Close any remaining open queries/responses
+    if cur_user_query != []:
+        all_user_queries.append(cur_user_query)
+    if cur_agent_response != []:
+        all_agent_responses.append(cur_agent_response)
+
+    if len(all_user_queries) != len(all_agent_responses) + 1:
+        raise EvaluationException(
+            message=ErrorMessage.MALFORMED_CONVERSATION_HISTORY,
+            internal_message=ErrorMessage.MALFORMED_CONVERSATION_HISTORY,
+            target=ErrorTarget.CONVERSATION_HISTORY_PARSING,
+            category=ErrorCategory.INVALID_VALUE,
+            blame=ErrorBlame.USER_ERROR,
+        )
+    result = {"user_queries": all_user_queries, "agent_responses": all_agent_responses}
+    if include_system_messages:
+        result["system_message"] = system_message
+    return result
+
+
+def _pretty_format_conversation_history(conversation_history):
+    """Format the conversation history for better readability."""
+    formatted_history = ""
+    if "system_message" in conversation_history and conversation_history["system_message"] is not None:
+        formatted_history += "SYSTEM_PROMPT:\n"
+        formatted_history += "  " + conversation_history["system_message"] + "\n\n"
+    for i, (user_query, agent_response) in enumerate(
+        zip(conversation_history["user_queries"], conversation_history["agent_responses"] + [None])
+    ):
+        formatted_history += f"User turn {i+1}:\n"
+        for msg in user_query:
+            if isinstance(msg, list):
+                for submsg in msg:
+                    formatted_history += "  " + "\n  ".join(submsg.split("\n")) + "\n"
+            else:
+                formatted_history += "  " + "\n  ".join(msg.split("\n")) + "\n"
+        formatted_history += "\n"
+        if agent_response:
+            formatted_history += f"Agent turn {i+1}:\n"
+            for msg in agent_response:
+                if isinstance(msg, list):
+                    for submsg in msg:
+                        formatted_history += "  " + "\n  ".join(submsg.split("\n")) + "\n"
+                else:
+                    formatted_history += "  " + "\n  ".join(msg.split("\n")) + "\n"
+            formatted_history += "\n"
+    return formatted_history
+
+
+def reformat_conversation_history(query, logger=None, include_system_messages=False, include_tool_calls=False):
+    """Reformats the conversation history to a more compact representation."""
+    try:
+        conversation_history = _get_conversation_history(
+            query, include_system_messages=include_system_messages, include_tool_calls=include_tool_calls
+        )
+        return _pretty_format_conversation_history(conversation_history)
+    except Exception:
+        # If the conversation history cannot be parsed for whatever reason (e.g. the converter format changed),
+        #  the original query is returned
+        # This is a fallback to ensure that the evaluation can still proceed.
+        #  However the accuracy of the evaluation will be affected.
+        # From our tests the negative impact on IntentResolution is:
+        #   Higher intra model variance (0.142 vs 0.046)
+        #   Higher inter model variance (0.345 vs 0.607)
+        #   Lower percentage of mode in Likert scale (73.4% vs 75.4%)
+        #   Lower pairwise agreement between LLMs (85% vs 90% at the pass/fail level with threshold of 3)
+        if logger:
+            logger.warning(f"Conversation history could not be parsed, falling back to original query: {query}")
+        return query
+
+
+def _get_agent_response(agent_response_msgs, include_tool_messages=False):
+    """Extract formatted agent response including text, and optionally tool calls/results."""
+    agent_response_text = []
+    tool_results = {}
+
+    # First pass: collect tool results
+    if include_tool_messages:
+        for msg in agent_response_msgs:
+            if msg.get("role") == "tool" and "tool_call_id" in msg:
+                for content in msg.get("content", []):
+                    if content.get("type") == "tool_result":
+                        result = content.get("tool_result")
+                        tool_results[msg["tool_call_id"]] = f"[TOOL_RESULT] {result}"
+
+    # Second pass: parse assistant messages and tool calls
+    for msg in agent_response_msgs:
+        if "role" in msg and msg.get("role") == "assistant" and "content" in msg:
+            text = _extract_text_from_content(msg["content"])
+            if text:
+                agent_response_text.extend(text)
+            if include_tool_messages:
+                for content in msg.get("content", []):
+                    # Todo: Verify if this is the correct way to handle tool calls
+                    if content.get("type") == "tool_call":
+                        if "tool_call" in content and "function" in content.get("tool_call", {}):
+                            tc = content.get("tool_call", {})
+                            func_name = tc.get("function", {}).get("name", "")
+                            args = tc.get("function", {}).get("arguments", {})
+                            tool_call_id = tc.get("id")
+                        else:
+                            tool_call_id = content.get("tool_call_id")
+                            func_name = content.get("name", "")
+                            args = content.get("arguments", {})
+                        args_str = ", ".join(f'{k}="{v}"' for k, v in args.items())
+                        call_line = f"[TOOL_CALL] {func_name}({args_str})"
+                        agent_response_text.append(call_line)
+                        if tool_call_id in tool_results:
+                            agent_response_text.append(tool_results[tool_call_id])
+
+    return agent_response_text
+
+
+@experimental
+class ToolInputAccuracyEvaluator(PromptyEvaluatorBase[Union[str, float]]):
+    """The Tool Input Accuracy evaluator performs an evaluations of parameters passed to tool calls.
+
+       The evaluation criteria are as follows:
+        - Parameter grounding: All parameters must be derived from conversation history/query
+        - Type compliance: All parameters must match exact types specified in tool definitions
+        - Format compliance: All parameters must follow exact format and structure requirements
+        - Completeness: All required parameters must be provided
+        - No unexpected parameters: Only defined parameters are allowed
+
+    The evaluator uses strict binary evaluation:
+        - PASS: Only when ALL criteria are satisfied perfectly for ALL parameters
+        - FAIL: When ANY criterion fails for ANY parameter
+
+    This evaluation focuses on ensuring tool call parameters are completely correct without any tolerance
+    for partial correctness.
+
+    :param model_config: Configuration for the Azure OpenAI model.
+    :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
+        ~azure.ai.evaluation.OpenAIModelConfiguration]
+
+    .. admonition:: Example:
+
+        .. literalinclude:: ../samples/evaluation_samples_evaluate.py
+            :start-after: [START tool_input_accuracy_evaluator]
+            :end-before: [END tool_input_accuracy_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call a ToolInputAccuracyEvaluator.
+
+    .. admonition:: Example using Azure AI Project URL:
+
+        .. literalinclude:: ../samples/evaluation_samples_evaluate_fdp.py
+            :start-after: [START tool_input_accuracy_evaluator]
+            :end-before: [END tool_input_accuracy_evaluator]
+            :language: python
+            :dedent: 8
+            :caption: Initialize and call ToolInputAccuracyEvaluator using Azure AI Project URL in the following format
+                https://{resource_name}.services.ai.azure.com/api/projects/{project_name}
+
+    .. note::
+
+        To align with our support of a diverse set of models, an output key without the `gpt_` prefix has been added.
+        To maintain backwards compatibility, the old key with the `gpt_` prefix is still be present in the output;
+        however, it is recommended to use the new key moving forward as the old key will be deprecated in the future.
+    """
+
+    _PROMPTY_FILE = "tool_input_accuracy.prompty"
+    _RESULT_KEY = "tool_input_accuracy"
+
+    _NO_TOOL_CALLS_MESSAGE = "No tool calls found in response or provided tool_calls."
+    _NO_TOOL_DEFINITIONS_MESSAGE = "Tool definitions must be provided."
+    _TOOL_DEFINITIONS_MISSING_MESSAGE = "Tool definitions for all tool calls must be provided."
+
+    def __init__(
+        self,
+        model_config,
+        *,
+        credential=None,
+        **kwargs,
+    ):
+        """Initialize the Tool Input Accuracy evaluator.
+
+        :param model_config: Configuration for the Azure OpenAI model.
+        :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration,
+            ~azure.ai.evaluation.OpenAIModelConfiguration]
+        :param credential: The credential for authentication.
+        :type credential: Optional[Any]
+        :param kwargs: Additional keyword arguments.
+        :type kwargs: Any
+        """
+        current_dir = os.path.dirname(__file__)
+        prompty_path = os.path.join(current_dir, self._PROMPTY_FILE)
+        super().__init__(
+            model_config=model_config,
+            prompty_file=prompty_path,
+            result_key=self._RESULT_KEY,
+            credential=credential,
+            **kwargs,
+        )
+
+    def _convert_kwargs_to_eval_input(self, **kwargs):
+        """Convert kwargs to evaluation input format.
+
+        :keyword kwargs: The inputs to convert.
+        :type kwargs: Dict
+        :return: The formatted evaluation input.
+        :rtype: Dict
+        """
+        # Collect inputs
+        tool_definitions = kwargs.get("tool_definitions", [])  # Default to empty list
+        query = kwargs.get("query")
+        response = kwargs.get("response")
+
+        # Extract tool calls from response
+        if not response:
+            return {"error_message": "Response parameter is required to extract tool calls."}
+
+        tool_calls = self._parse_tools_from_response(response)
+        if not tool_calls:
+            return {"error_message": self._NO_TOOL_CALLS_MESSAGE}
+
+        if not isinstance(tool_calls, list):
+            tool_calls = [tool_calls]
+        if not isinstance(tool_definitions, list):
+            tool_definitions = [tool_definitions] if tool_definitions else []
+
+        try:
+            # Type cast to satisfy static type checker
+            tool_calls_typed = cast(List[Dict], tool_calls)
+            needed_tool_definitions = _extract_needed_tool_definitions(
+                tool_calls_typed, tool_definitions, ExtendedErrorTarget.TOOL_INPUT_ACCURACY_EVALUATOR
+            )
+        except EvaluationException:
+            # Check if this is because no tool definitions were provided at all
+            if len(tool_definitions) == 0:
+                return {"error_message": self._NO_TOOL_DEFINITIONS_MESSAGE}
+            else:
+                return {"error_message": self._TOOL_DEFINITIONS_MISSING_MESSAGE}
+
+        if len(needed_tool_definitions) == 0:
+            return {"error_message": self._NO_TOOL_DEFINITIONS_MESSAGE}
+
+        # Get agent response with tool calls and results using _get_agent_response
+        agent_response_with_tools = _get_agent_response(response, include_tool_messages=True)
+
+        return {
+            "query": query,
+            "tool_calls": agent_response_with_tools,
+            "tool_definitions": needed_tool_definitions,
+        }
+
+    @override
+    async def _do_eval(self, eval_input: Dict) -> Dict[str, Union[float, str]]:
+        """Do Tool Input Accuracy evaluation.
+
+        :param eval_input: The input to the evaluator.
+        :type eval_input: Dict
+        :return: A dictionary containing the result of the evaluation.
+        :rtype: Dict[str, Union[str, float]]
+        """
+        # Format conversation history for cleaner evaluation
+        if "query" in eval_input:
+            eval_input["query"] = reformat_conversation_history(
+                eval_input["query"], logger, include_system_messages=True, include_tool_calls=True
+            )
+
+        # Call the LLM to evaluate
+        llm_output = await self._flow(timeout=self._LLM_CALL_TIMEOUT, **eval_input)
+
+        if isinstance(llm_output, dict):
+            result = llm_output.get("result", None)
+            if result not in [0, 1]:
+                raise EvaluationException(
+                    message=f"Invalid result value: {result}. Expected 0 or 1.",
+                    internal_message="Invalid result value.",
+                    category=ErrorCategory.FAILED_EXECUTION,
+                    blame=ErrorBlame.SYSTEM_ERROR,
+                )
+
+            # Add parameter extraction accuracy post-processing
+            details = llm_output.get("details", {})
+            if details:
+                parameter_extraction_accuracy = self._calculate_parameter_extraction_accuracy(details)
+                details["parameter_extraction_accuracy"] = parameter_extraction_accuracy
+
+            # Format the output
+            explanation = llm_output.get("chain_of_thought", "")
+            score_result = "pass" if result == 1 else "fail"
+            response_dict = {
+                self._result_key: result,
+                f"{self._result_key}_result": score_result,
+                f"{self._result_key}_reason": explanation,
+                "details": details,
+            }
+            return response_dict
+
+        else:
+            raise EvaluationException(
+                message="Tool input accuracy evaluator returned invalid output.",
+                blame=ErrorBlame.SYSTEM_ERROR,
+                category=ErrorCategory.FAILED_EXECUTION,
+                target=ExtendedErrorTarget.TOOL_INPUT_ACCURACY_EVALUATOR,
+            )
+
+    async def _real_call(self, **kwargs):
+        """Perform the asynchronous call for the real end-to-end evaluation logic.
+
+        :keyword kwargs: The inputs to evaluate.
+        :type kwargs: Dict
+        :return: The evaluation result.
+        :rtype: Union[DoEvalResult[T_EvalValue], AggregateResult[T_EvalValue]]
+        """
+        # Convert inputs into list of evaluable inputs.
+        eval_input = self._convert_kwargs_to_eval_input(**kwargs)
+        if isinstance(eval_input, dict) and eval_input.get("error_message"):
+            # If there is an error message, return not applicable result
+            error_message = eval_input.get("error_message", "Unknown error")
+            return self._not_applicable_result(error_message, 1)
+        # Do the evaluation
+        result = await self._do_eval(eval_input)
+        # Return the result
+        return result
+
+    def _calculate_parameter_extraction_accuracy(self, details):
+        """Calculate parameter extraction accuracy from the evaluation details.
+
+        :param details: The details dictionary from the LLM evaluation output
+        :type details: Dict
+        :return: Parameter extraction accuracy as a percentage
+        :rtype: float
+        """
+        total_parameters = details.get("total_parameters_passed", 0)
+        correct_parameters = details.get("correct_parameters_passed", 0)
+
+        if total_parameters == 0:
+            return 100.0  # If no parameters were passed, accuracy is 100%
+
+        accuracy = (correct_parameters / total_parameters) * 100
+        return round(accuracy, 2)
+
+    def _not_applicable_result(
+        self, error_message: str, threshold: Union[int, float]
+    ) -> Dict[str, Union[str, float, Dict]]:
+        """Return a result indicating that the evaluation is not applicable.
+
+        :param error_message: The error message explaining why evaluation is not applicable.
+        :type error_message: str
+        :param threshold: The threshold value for the evaluator.
+        :type threshold: Union[int, float]
+        :return: A dictionary containing the result of the evaluation.
+        :rtype: Dict[str, Union[str, float, Dict]]
+        """
+        # If no tool calls were made or tool call type is not supported, return not applicable result
+        return {
+            self._result_key: self._NOT_APPLICABLE_RESULT,
+            f"{self._result_key}_result": "pass",
+            f"{self._result_key}_threshold": threshold,
+            f"{self._result_key}_reason": error_message,
+            "details": {},
+        }
+
+    @override
+    def __call__(  # pylint: disable=docstring-missing-param
+        self,
+        *args,
+        **kwargs,
+    ):
+        """
+        Evaluate parameter correctness of tool calls.
+
+        :keyword query: Query or Chat history up to the message that has the tool call being evaluated.
+        :paramtype query: Union[str, List[dict]]
+        :keyword tool_definitions: List of tool definitions whose calls are being evaluated.
+        :paramtype tool_definitions: Union[dict, List[dict]]
+        :keyword response: Response containing tool calls to be evaluated.
+        :paramtype response: Union[str, List[dict]]
+        :return: The tool input accuracy evaluation results.
+        :rtype: Dict[str, Union[str, float]]
+        """
+        return super().__call__(*args, **kwargs)
diff --git a/assets/evaluators/builtin/tool_input_accuracy/evaluator/tool_input_accuracy.prompty b/assets/evaluators/builtin/tool_input_accuracy/evaluator/tool_input_accuracy.prompty
new file mode 100644
index 0000000000..0930bb5a57
--- /dev/null
+++ b/assets/evaluators/builtin/tool_input_accuracy/evaluator/tool_input_accuracy.prompty
@@ -0,0 +1,76 @@
+---
+name: Tool Input Accuracy
+description: Evaluates the accuracy of all inputs/parameters passed to the tools by the agent
+model:
+  api: chat
+  parameters:
+    temperature: 0.0
+    max_tokens: 1000
+    top_p: 1.0
+    presence_penalty: 0
+    frequency_penalty: 0
+    response_format:
+      type: json_object
+
+inputs:
+  query:
+    type: List
+  tool_calls:
+    type: List
+  tool_definitions:
+    type: Dict
+---
+
+# system:
+You are an AI system designed to evaluate the correctness of parameters passed to tool calls. Your task is to perform a strict binary evaluation (PASS/FAIL) based on whether ALL parameters are correct.
+
+The evaluation must check ALL of the following criteria. If ANY criterion fails, the overall result is FAIL:
+1. **Parameter Groundedness**: ALL parameters must be derived from or supported by information in the conversation history/query. NO fabricated or unsupported values.
+2. **Type Compliance**: ALL parameters must match the exact type specified in the tool definitions (string, number, boolean, array, object, etc.).
+3. **Format Compliance**: ALL parameters must follow the exact format, structure, and constraints specified in the tool definitions.
+4. **Required Parameters**: ALL required parameters must be provided. Missing any required parameter results in FAIL.
+5. **Unexpected Parameters**: NO parameters should be provided that are not defined in the tool definition. Any extra/unexpected parameters result in FAIL.
+6. **Value Appropriateness**: ALL parameter values must be contextually appropriate and meaningful for the tool's purpose.
+
+## Evaluation Rules
+
+**PASS**: Only when ALL criteria above are satisfied perfectly. Every single parameter must be:
+- Properly grounded in conversation history/query
+- Correct type according to tool definition
+- Proper format and structure
+- Required parameters all present
+- No unexpected/undefined parameters
+- Contextually appropriate values
+
+**FAIL**: When ANY of the above criteria fails, including:
+- Any parameter lacks grounding in conversation history
+- Any parameter has wrong type
+- Any parameter has wrong format/structure
+- Any required parameter is missing
+- Any unexpected parameter is present
+- Any parameter value is inappropriate for the context
+
+## Task
+Analyze each tool call and its parameters against the provided tool definitions and conversation context. Provide your evaluation in the following JSON format:
+
+{
+  "chain_of_thought": "Step-by-step analysis for all parameters passed to all the tools to check for the criteria mentioned above",
+  "details": {
+    "total_parameters_passed": <number of total parameters that were passed to all tools>,
+    "correct_parameters_passed": <number of correct parameters that were passed to all tools in the agent's response>,
+    "incorrect_parameters": ["list of incorrect parameters passed with reasons"]
+  },
+  "result": <0 for FAIL, 1 for PASS>
+}
+
+
+## Conversation History/Query:
+{{query}}
+
+## Tool Calls Made:
+{{tool_calls}}
+
+## Tool Definitions:
+{{tool_definitions}}
+
+# Output
diff --git a/assets/evaluators/builtin/tool_input_accuracy/spec.yaml b/assets/evaluators/builtin/tool_input_accuracy/spec.yaml
new file mode 100644
index 0000000000..9f7922a2b4
--- /dev/null
+++ b/assets/evaluators/builtin/tool_input_accuracy/spec.yaml
@@ -0,0 +1,45 @@
+type: "evaluator"
+name: "test.tool_input_accuracy"
+version: 1
+displayName: "Tool-Input-Accuracy-Evaluator"
+description: "| | |\n| -- | -- |\n| Score range | Binary [0-1]: 0 means incorrect parameters, 1 means all parameters correct. |\n| What is this metric? | Tool Input Accuracy evaluates the correctness of all parameters passed to tool calls by the agent, performing strict binary validation (PASS/FAIL) based on parameter grounding, type compliance, format compliance, completeness, and appropriateness. |\n| How does it work? | This metric uses LLM-based validation to check that ALL parameters meet ALL criteria: derived from conversation history, correct types, proper format, all required parameters present, no unexpected parameters, and contextually appropriate values. |\n| When to use it? | Useful for validating agent tool usage, API integration testing, and ensuring tool call parameter correctness in AI agent workflows. |\n"
+evaluatorType: "builtin"
+evaluatorSubType: "code"
+categories: ["agents"]
+initParameterSchema:
+  type: "object"
+  properties:
+    deployment_name:
+      type: "string"
+    credential:
+      type: "object"
+  required: ["deployment_name"]
+dataMappingSchema:
+  type: "object"
+  properties:
+    query:
+      anyOf:
+        - type: "string"
+        - type: "array"
+          items:
+            type: "object"
+    response:
+      anyOf:
+        - type: "string"
+        - type: "array"
+          items:
+            type: "object"
+    tool_definitions:
+      anyOf:
+        - type: "object"
+        - type: "array"
+          items:
+            type: "object"
+  required: ["query", "response", "tool_definitions"]
+outputSchema:
+  tool_input_accuracy:
+    type: "binary"
+    desirable_direction: "increase"
+    min_value: 0
+    max_value: 1
+path: ./evaluator