Skip to content

Commit 0d0c929

Browse files
qandrewAndrew Xiachaunceyjiang
authored
[responsesAPI][8] input/output messages for ResponsesParser (#30158)
Signed-off-by: Andrew Xia <[email protected]> Signed-off-by: Andrew Xia <[email protected]> Co-authored-by: Andrew Xia <[email protected]> Co-authored-by: Chauncey <[email protected]>
1 parent e94384b commit 0d0c929

File tree

5 files changed

+74
-44
lines changed

5 files changed

+74
-44
lines changed

tests/entrypoints/openai/test_response_api_parsable_context.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ async def test_mcp_tool_call(client: OpenAI, model_name: str):
165165
model=model_name,
166166
input="What is 13 * 24? Use python to calculate the result.",
167167
tools=[{"type": "code_interpreter", "container": {"type": "auto"}}],
168+
extra_body={"enable_response_messages": True},
168169
temperature=0.0,
169170
)
170171

@@ -178,3 +179,8 @@ async def test_mcp_tool_call(client: OpenAI, model_name: str):
178179
# make sure the correct math is in the final output
179180
assert response.output[3].type == "message"
180181
assert "312" in response.output[3].content[0].text
182+
183+
# test raw input_messages / output_messages
184+
assert len(response.input_messages) == 1
185+
assert len(response.output_messages) == 3
186+
assert "312" in response.output_messages[2]["message"]

vllm/entrypoints/context.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,12 +297,40 @@ def __init__(
297297
self.chat_template = chat_template
298298
self.chat_template_content_format = chat_template_content_format
299299

300+
self.input_messages: list[ResponseRawMessageAndToken] = []
301+
self.output_messages: list[ResponseRawMessageAndToken] = []
302+
300303
def append_output(self, output: RequestOutput) -> None:
301304
self.num_prompt_tokens = len(output.prompt_token_ids or [])
302305
self.num_cached_tokens = output.num_cached_tokens or 0
303306
self.num_output_tokens += len(output.outputs[0].token_ids or [])
304307
self.parser.process(output.outputs[0])
305308

309+
# only store if enable_response_messages is True, save memory
310+
if self.request.enable_response_messages:
311+
output_prompt = output.prompt or ""
312+
output_prompt_token_ids = output.prompt_token_ids or []
313+
if len(self.input_messages) == 0:
314+
self.input_messages.append(
315+
ResponseRawMessageAndToken(
316+
message=output_prompt,
317+
tokens=output_prompt_token_ids,
318+
)
319+
)
320+
else:
321+
self.output_messages.append(
322+
ResponseRawMessageAndToken(
323+
message=output_prompt,
324+
tokens=output_prompt_token_ids,
325+
)
326+
)
327+
self.output_messages.append(
328+
ResponseRawMessageAndToken(
329+
message=output.outputs[0].text,
330+
tokens=output.outputs[0].token_ids,
331+
)
332+
)
333+
306334
def append_tool_output(self, output: list[ResponseInputOutputItem]) -> None:
307335
self.parser.response_messages.extend(output)
308336

vllm/entrypoints/openai/parser/responses_parser.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,19 @@
33
import logging
44
from collections.abc import Callable
55

6-
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
6+
from openai.types.responses import ResponseFunctionToolCall, ResponseOutputItem
7+
from openai.types.responses.response_function_tool_call_output_item import (
8+
ResponseFunctionToolCallOutputItem,
9+
)
10+
from openai.types.responses.response_output_item import McpCall
711
from openai.types.responses.response_output_message import ResponseOutputMessage
812
from openai.types.responses.response_output_text import ResponseOutputText
913
from openai.types.responses.response_reasoning_item import (
1014
Content,
1115
ResponseReasoningItem,
1216
)
1317

18+
from vllm.entrypoints.constants import MCP_PREFIX
1419
from vllm.entrypoints.openai.protocol import ResponseInputOutputItem, ResponsesRequest
1520
from vllm.outputs import CompletionOutput
1621
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
@@ -111,6 +116,37 @@ def process(self, output: CompletionOutput) -> "ResponsesParser":
111116

112117
return self
113118

119+
def make_response_output_items_from_parsable_context(
120+
self,
121+
) -> list[ResponseOutputItem]:
122+
"""Given a list of sentences, construct ResponseOutput Items."""
123+
response_messages = self.response_messages[self.num_init_messages :]
124+
output_messages: list[ResponseOutputItem] = []
125+
for message in response_messages:
126+
if not isinstance(message, ResponseFunctionToolCallOutputItem):
127+
output_messages.append(message)
128+
else:
129+
if len(output_messages) == 0:
130+
raise ValueError(
131+
"Cannot have a FunctionToolCallOutput before FunctionToolCall."
132+
)
133+
if isinstance(output_messages[-1], ResponseFunctionToolCall):
134+
mcp_message = McpCall(
135+
id=f"{MCP_PREFIX}{random_uuid()}",
136+
arguments=output_messages[-1].arguments,
137+
name=output_messages[-1].name,
138+
server_label=output_messages[
139+
-1
140+
].name, # TODO: store the server label
141+
type="mcp_call",
142+
status="completed",
143+
output=message.output,
144+
# TODO: support error output
145+
)
146+
output_messages[-1] = mcp_message
147+
148+
return output_messages
149+
114150

115151
def get_responses_parser_for_simple_context(
116152
*,

vllm/entrypoints/openai/serving_responses.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@
104104
construct_input_messages,
105105
construct_tool_dicts,
106106
extract_tool_types,
107-
make_response_output_items_from_parsable_context,
108107
)
109108
from vllm.entrypoints.tool_server import ToolServer
110109
from vllm.inputs.data import TokensPrompt
@@ -658,17 +657,11 @@ async def responses_full_generator(
658657
else:
659658
status = "incomplete"
660659
elif isinstance(context, ParsableContext):
661-
response_messages = context.parser.response_messages[
662-
context.parser.num_init_messages :
663-
]
664-
output = make_response_output_items_from_parsable_context(response_messages)
660+
output = context.parser.make_response_output_items_from_parsable_context()
665661

666-
# TODO: context for non-gptoss models doesn't use messages
667-
# so we can't get them out yet
668662
if request.enable_response_messages:
669-
raise NotImplementedError(
670-
"enable_response_messages is currently only supported for gpt-oss"
671-
)
663+
input_messages = context.input_messages
664+
output_messages = context.output_messages
672665

673666
# TODO: Calculate usage.
674667
# assert final_res.prompt_token_ids is not None

vllm/entrypoints/responses_utils.py

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from openai.types.responses.response_function_tool_call_output_item import (
1717
ResponseFunctionToolCallOutputItem,
1818
)
19-
from openai.types.responses.response_output_item import McpCall
2019
from openai.types.responses.response_output_message import ResponseOutputMessage
2120
from openai.types.responses.response_reasoning_item import ResponseReasoningItem
2221
from openai.types.responses.tool import Tool
@@ -27,38 +26,6 @@
2726
ChatCompletionMessageParam,
2827
ResponseInputOutputItem,
2928
)
30-
from vllm.utils import random_uuid
31-
32-
33-
def make_response_output_items_from_parsable_context(
34-
response_messages: list[ResponseInputOutputItem],
35-
) -> list[ResponseOutputItem]:
36-
"""Given a list of sentences, construct ResponseOutput Items."""
37-
output_messages: list[ResponseOutputItem] = []
38-
for message in response_messages:
39-
if not isinstance(message, ResponseFunctionToolCallOutputItem):
40-
output_messages.append(message)
41-
else:
42-
if len(output_messages) == 0:
43-
raise ValueError(
44-
"Cannot have a FunctionToolCallOutput before FunctionToolCall."
45-
)
46-
if isinstance(output_messages[-1], ResponseFunctionToolCall):
47-
mcp_message = McpCall(
48-
id=f"{MCP_PREFIX}{random_uuid()}",
49-
arguments=output_messages[-1].arguments,
50-
name=output_messages[-1].name,
51-
server_label=output_messages[
52-
-1
53-
].name, # TODO: store the server label
54-
type=f"{MCP_PREFIX}call",
55-
status="completed",
56-
output=message.output,
57-
# TODO: support error output
58-
)
59-
output_messages[-1] = mcp_message
60-
61-
return output_messages
6229

6330

6431
def construct_input_messages(

0 commit comments

Comments
 (0)