feat: Agent can stream ChatGenerator responses (#233)

mathislucka · sjrl · web-flow · commit 8fb22ee6e5f7 · 2025-03-11T14:05:10.000+01:00
* feat: Agent can stream ChatGenerator responses

* fix: unused import

* Update haystack_experimental/components/agents/agent.py

Co-authored-by: Sebastian Husch Lee &lt;sjrl@users.noreply.github.com&gt;

* add serde test

---------

Co-authored-by: Sebastian Husch Lee &lt;sjrl@users.noreply.github.com&gt;
diff --git a/haystack_experimental/components/agents/agent.py b/haystack_experimental/components/agents/agent.py
@@ -13,6 +13,8 @@
 from haystack.core.pipeline.base import PipelineError
 from haystack.core.serialization import component_from_dict
 from haystack.dataclasses import ChatMessage
+from haystack.dataclasses.streaming_chunk import SyncStreamingCallbackT
+from haystack.utils.callable_serialization import deserialize_callable, serialize_callable
 
 from haystack_experimental.components.tools import ToolInvoker
 from haystack_experimental.dataclasses.state import State, _schema_from_dict, _schema_to_dict, _validate_schema
@@ -63,6 +65,7 @@ def __init__(  # pylint: disable=too-many-positional-arguments
         state_schema: Optional[Dict[str, Any]] = None,
         max_runs_per_component: int = 100,
         raise_on_tool_invocation_failure: bool = False,
+        streaming_callback: Optional[SyncStreamingCallbackT] = None,
     ):
         """
         Initialize the agent component.
@@ -77,6 +80,7 @@ def __init__(  # pylint: disable=too-many-positional-arguments
             component exceeds the maximum number of runs per component.
         :param raise_on_tool_invocation_failure: Should the agent raise an exception when a tool invocation fails?
             If set to False, the exception will be turned into a chat message and passed to the LLM.
+        :param streaming_callback: A callback that will be invoked when a response is streamed from the LLM.
         """
         valid_exits = ["text"] + [tool.name for tool in tools or []]
         if exit_condition not in valid_exits:
@@ -92,6 +96,7 @@ def __init__(  # pylint: disable=too-many-positional-arguments
         self.exit_condition = exit_condition
         self.max_runs_per_component = max_runs_per_component
         self.raise_on_tool_invocation_failure = raise_on_tool_invocation_failure
+        self.streaming_callback = streaming_callback
 
         output_types = {"messages": List[ChatMessage]}
         for param, config in self.state_schema.items():
@@ -178,6 +183,11 @@ def to_dict(self) -> Dict[str, Any]:
 
         :return: Dictionary with serialized data
         """
+        if self.streaming_callback is not None:
+            streaming_callback = serialize_callable(self.streaming_callback)
+        else:
+            streaming_callback = None
+
         return default_to_dict(
             self,
             chat_generator=self.chat_generator.to_dict(),
@@ -187,6 +197,7 @@ def to_dict(self) -> Dict[str, Any]:
             state_schema=_schema_to_dict(self.state_schema),
             max_runs_per_component=self.max_runs_per_component,
             raise_on_tool_invocation_failure=self.raise_on_tool_invocation_failure,
+            streaming_callback=streaming_callback
         )
 
     @classmethod
@@ -201,10 +212,13 @@ def from_dict(cls, data: Dict[str, Any]) -> "Agent":
 
         init_params["chat_generator"] = Agent._load_component(init_params["chat_generator"])
 
-        # Deserialize type annotations
         if "state_schema" in init_params:
             init_params["state_schema"] = _schema_from_dict(init_params["state_schema"])
 
+        if init_params.get("streaming_callback") is not None:
+            init_params["streaming_callback"] = deserialize_callable(init_params["streaming_callback"])
+
+
         deserialize_tools_inplace(init_params, key="tools")
 
         return default_from_dict(cls, data)
@@ -232,11 +246,17 @@ def _load_component(component_data: Dict[str, Any]) -> Component:
 
         return instance
 
-    def run(self, messages: List[ChatMessage], **kwargs) -> Dict[str, Any]:
+    def run(
+        self,
+        messages: List[ChatMessage],
+        streaming_callback: Optional[SyncStreamingCallbackT] = None,
+        **kwargs
+    ) -> Dict[str, Any]:
         """
         Process messages and execute tools until the exit condition is met.
 
         :param messages: List of chat messages to process
+        :param streaming_callback: A callback that will be invoked when a response is streamed from the LLM.
         :param kwargs: Additional keyword arguments matching the defined input types
         :return: Dictionary containing messages and outputs matching the defined output types
         """
@@ -245,11 +265,17 @@ def run(self, messages: List[ChatMessage], **kwargs) -> Dict[str, Any]:
         if self.system_prompt is not None:
             messages = [ChatMessage.from_system(self.system_prompt)] + messages
 
+        generator_inputs = {"tools": self.tools}
+
+        selected_callback = streaming_callback or self.streaming_callback
+        if selected_callback is not None:
+            generator_inputs["streaming_callback"] = selected_callback
+
         result = self.pipeline.run(
             data={
                 "joiner": {"value": messages},
                 "context_joiner": {"value": state},
-                "generator": {"tools": self.tools},
+                "generator": generator_inputs,
             },
             include_outputs_from={"context_joiner"},
         )
diff --git a/haystack_experimental/components/writers/__init__.py b/haystack_experimental/components/writers/__init__.py
@@ -4,5 +4,4 @@
 
 from haystack_experimental.components.writers.chat_message_writer import ChatMessageWriter
 
-
 _all_ = ["ChatMessageWriter"]
diff --git a/test/components/agents/test_agent.py b/test/components/agents/test_agent.py
@@ -2,17 +2,30 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from datetime import datetime
+from typing import Iterator
+
+from unittest.mock import MagicMock, patch
 import pytest
 
+from openai import Stream
+from openai.types.chat import ChatCompletionChunk, chat_completion_chunk
+
 from haystack.components.builders.prompt_builder import PromptBuilder
 from haystack.components.generators.chat.openai import OpenAIChatGenerator
-from haystack.utils import serialize_callable
+from haystack.dataclasses import ChatMessage
+from haystack.dataclasses.streaming_chunk import StreamingChunk
+from haystack.utils import serialize_callable, Secret
 
 from haystack_experimental.components.agents import Agent
 from haystack_experimental.tools import Tool, ComponentTool
 
 import os
 
+
+def streaming_callback_for_serde(chunk: StreamingChunk):
+    pass
+
 def weather_function(location):
     weather_info = {
         "Berlin": {"weather": "mostly sunny", "temperature": 7, "unit": "celsius"},
@@ -24,7 +37,6 @@ def weather_function(location):
 
 weather_parameters = {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}
 
-
 @pytest.fixture
 def weather_tool():
     return Tool(
@@ -42,11 +54,47 @@ def component_tool():
         component=PromptBuilder(template="{{parrot}}")
     )
 
+class OpenAIMockStream(Stream[ChatCompletionChunk]):
+    def __init__(self, mock_chunk: ChatCompletionChunk, client=None, *args, **kwargs):
+        client = client or MagicMock()
+        super().__init__(client=client, *args, **kwargs)
+        self.mock_chunk = mock_chunk
+
+    def __stream__(self) -> Iterator[ChatCompletionChunk]:
+        yield self.mock_chunk
+
+@pytest.fixture
+def openai_mock_chat_completion_chunk():
+    """
+    Mock the OpenAI API completion chunk response and reuse it for tests
+    """
+
+    with patch("openai.resources.chat.completions.Completions.create") as mock_chat_completion_create:
+        completion = ChatCompletionChunk(
+            id="foo",
+            model="gpt-4",
+            object="chat.completion.chunk",
+            choices=[
+                chat_completion_chunk.Choice(
+                    finish_reason="stop",
+                    logprobs=None,
+                    index=0,
+                    delta=chat_completion_chunk.ChoiceDelta(content="Hello", role="assistant"),
+                )
+            ],
+            created=int(datetime.now().timestamp()),
+            usage=None,
+        )
+        mock_chat_completion_create.return_value = OpenAIMockStream(
+            completion, cast_to=None, response=None, client=None
+        )
+        yield mock_chat_completion_create
+
 
 class TestAgent:
     def test_serde(self, weather_tool, component_tool):
-        os.environ["OPENAI_API_KEY"] = "fake-key"
-        generator = OpenAIChatGenerator()
+        os.environ["FAKE_OPENAI_KEY"] = "fake-key"
+        generator = OpenAIChatGenerator(api_key=Secret.from_env_var("FAKE_OPENAI_KEY"))
         agent = Agent(
             chat_generator=generator,
             tools=[weather_tool, component_tool],
@@ -58,6 +106,7 @@ def test_serde(self, weather_tool, component_tool):
 
         assert serialized_agent["type"] == "haystack_experimental.components.agents.agent.Agent"
         assert init_parameters["chat_generator"]["type"] == "haystack.components.generators.chat.openai.OpenAIChatGenerator"
+        assert init_parameters["streaming_callback"] == None
         assert init_parameters["tools"][0]["data"]["function"] == serialize_callable(weather_function)
         assert init_parameters["tools"][1]["data"]["component"]["type"] == "haystack.components.builders.prompt_builder.PromptBuilder"
 
@@ -68,4 +117,104 @@ def test_serde(self, weather_tool, component_tool):
         assert deserialized_agent.tools[0].function is weather_function
         assert isinstance(deserialized_agent.tools[1]._component, PromptBuilder)
 
+    def test_serde_with_streaming_callback(self, weather_tool, component_tool):
+        os.environ["FAKE_OPENAI_KEY"] = "fake-key"
+        generator = OpenAIChatGenerator(api_key=Secret.from_env_var("FAKE_OPENAI_KEY"))
+        agent = Agent(
+            chat_generator=generator,
+            tools=[weather_tool, component_tool],
+            streaming_callback=streaming_callback_for_serde,
+        )
+
+        serialized_agent = agent.to_dict()
+
+        init_parameters = serialized_agent["init_parameters"]
+        assert init_parameters["streaming_callback"] == "test.components.agents.test_agent.streaming_callback_for_serde"
+
+        deserialized_agent = Agent.from_dict(serialized_agent)
+        assert deserialized_agent.streaming_callback is streaming_callback_for_serde
+
+    def test_run_with_params_streaming(self, openai_mock_chat_completion_chunk, weather_tool):
+        chat_generator = OpenAIChatGenerator(
+            api_key=Secret.from_token("test-api-key")
+        )
+
+        streaming_callback_called = False
+
+        def streaming_callback(chunk: StreamingChunk) -> None:
+            nonlocal streaming_callback_called
+            streaming_callback_called = True
+
+
+        agent = Agent(chat_generator=chat_generator, streaming_callback=streaming_callback, tools=[weather_tool])
+        agent.warm_up()
+        response = agent.run([ChatMessage.from_user("Hello")])
+
+        # check we called the streaming callback
+        assert streaming_callback_called is True
+
+        # check that the component still returns the correct response
+        assert isinstance(response, dict)
+        assert "messages" in response
+        assert isinstance(response["messages"], list)
+        assert len(response["messages"]) == 2
+        assert [isinstance(reply, ChatMessage) for reply in response["messages"]]
+        assert "Hello" in response["messages"][1].text  # see openai_mock_chat_completion_chunk
+
+
+    def test_run_with_run_streaming(self, openai_mock_chat_completion_chunk, weather_tool):
+        chat_generator = OpenAIChatGenerator(
+            api_key=Secret.from_token("test-api-key")
+        )
+
+        streaming_callback_called = False
+
+        def streaming_callback(chunk: StreamingChunk) -> None:
+            nonlocal streaming_callback_called
+            streaming_callback_called = True
+
+
+        agent = Agent(chat_generator=chat_generator, tools=[weather_tool])
+        agent.warm_up()
+        response = agent.run([ChatMessage.from_user("Hello")], streaming_callback=streaming_callback)
+
+        # check we called the streaming callback
+        assert streaming_callback_called is True
+
+        # check that the component still returns the correct response
+        assert isinstance(response, dict)
+        assert "messages" in response
+        assert isinstance(response["messages"], list)
+        assert len(response["messages"]) == 2
+        assert [isinstance(reply, ChatMessage) for reply in response["messages"]]
+        assert "Hello" in response["messages"][1].text  # see openai_mock_chat_completion_chunk
+
+
+    def test_keep_generator_streaming(self, openai_mock_chat_completion_chunk, weather_tool):
+        streaming_callback_called = False
+
+        def streaming_callback(chunk: StreamingChunk) -> None:
+            nonlocal streaming_callback_called
+            streaming_callback_called = True
+
+        chat_generator = OpenAIChatGenerator(
+            api_key=Secret.from_token("test-api-key"),
+            streaming_callback=streaming_callback,
+        )
+
+        agent = Agent(chat_generator=chat_generator, tools=[weather_tool])
+        agent.warm_up()
+        response = agent.run([ChatMessage.from_user("Hello")])
+
+        # check we called the streaming callback
+        assert streaming_callback_called is True
+
+        # check that the component still returns the correct response
+        assert isinstance(response, dict)
+        assert "messages" in response
+        assert isinstance(response["messages"], list)
+        assert len(response["messages"]) == 2
+        assert [isinstance(reply, ChatMessage) for reply in response["messages"]]
+        assert "Hello" in response["messages"][1].text  # see openai_mock_chat_completion_chunk
+
 

Original file line number	Diff line number	Diff line change
`@@ -4,5 +4,4 @@`
`4`	`4`
`5`	`5`	`from haystack_experimental.components.writers.chat_message_writer import ChatMessageWriter`
`6`	`6`
`7`		`-`
`8`	`7`	`_all_ = ["ChatMessageWriter"]`