Skip to content

Commit b9563c1

Browse files
committed
fix: correct test cases
Signed-off-by: Aaron Pham <[email protected]>
1 parent 3b00a70 commit b9563c1

File tree

1 file changed

+42
-17
lines changed

1 file changed

+42
-17
lines changed

tests/entrypoints/openai/test_serving_chat.py

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,22 +37,34 @@ def monkeypatch_module():
3737

3838

3939
@pytest.fixture(scope="module")
40-
def gptoss_server(monkeypatch_module: pytest.MonkeyPatch):
41-
with monkeypatch_module.context() as m:
42-
m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN_VLLM_V1")
43-
args = [
44-
"--enforce-eager",
45-
"--max-model-len",
46-
"4096",
40+
@pytest.mark.parametrize("with_tool_parser", [True, False])
41+
def default_server_args(with_tool_parser):
42+
args = [
43+
# use half precision for speed and memory savings in CI environment
44+
"--enforce-eager",
45+
"--max-model-len",
46+
"4096",
47+
"--reasoning-parser",
48+
"openai_gptoss",
49+
"--gpu-memory-utilization",
50+
"0.5",
51+
]
52+
if with_tool_parser:
53+
args.extend([
4754
"--tool-call-parser",
4855
"openai",
49-
"--reasoning-parser",
50-
"openai_gptoss",
5156
"--enable-auto-tool-choice",
52-
"--gpu-memory-utilization",
53-
"0.5",
54-
]
55-
with RemoteOpenAIServer(GPT_OSS_MODEL_NAME, args) as remote_server:
57+
])
58+
return args
59+
60+
61+
@pytest.fixture(scope="module")
62+
def gptoss_server(monkeypatch_module: pytest.MonkeyPatch,
63+
default_server_args: list[str]):
64+
with monkeypatch_module.context() as m:
65+
m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN_VLLM_V1")
66+
with RemoteOpenAIServer(GPT_OSS_MODEL_NAME,
67+
default_server_args) as remote_server:
5668
yield remote_server
5769

5870

@@ -63,7 +75,9 @@ async def gptoss_client(gptoss_server):
6375

6476

6577
@pytest.mark.asyncio
66-
async def test_gpt_oss_chat_tool_call_streaming(gptoss_client: OpenAI):
78+
@pytest.mark.parametrize("with_tool_parser", [True, False])
79+
async def test_gpt_oss_chat_tool_call_streaming(gptoss_client: OpenAI,
80+
with_tool_parser: bool):
6781
tools = [{
6882
"type": "function",
6983
"function": {
@@ -96,10 +110,14 @@ async def test_gpt_oss_chat_tool_call_streaming(gptoss_client: OpenAI):
96110
]
97111

98112
stream = await gptoss_client.chat.completions.create(
99-
model=GPT_OSS_MODEL_NAME, messages=messages, tools=tools, stream=True)
113+
model=GPT_OSS_MODEL_NAME,
114+
messages=messages,
115+
tools=tools if with_tool_parser else None,
116+
stream=True)
100117

101118
name = None
102119
args_buf = ""
120+
content_buf = ""
103121
async for chunk in stream:
104122
delta = chunk.choices[0].delta
105123
if delta.tool_calls:
@@ -108,8 +126,15 @@ async def test_gpt_oss_chat_tool_call_streaming(gptoss_client: OpenAI):
108126
name = tc.function.name
109127
if tc.function and tc.function.arguments:
110128
args_buf += tc.function.arguments
111-
assert name is not None
112-
assert len(args_buf) > 0
129+
if getattr(delta, "content", None):
130+
content_buf += delta.content
131+
if with_tool_parser:
132+
assert name is not None
133+
assert len(args_buf) > 0
134+
else:
135+
assert name is None
136+
assert len(args_buf) == 0
137+
assert len(content_buf) > 0
113138

114139

115140
@pytest.mark.asyncio

0 commit comments

Comments
 (0)