@@ -36,21 +36,41 @@ def monkeypatch_module():
3636 mpatch .undo ()
3737
3838
39+ @pytest .fixture (scope = "module" ,
40+ params = [True , False ],
41+ ids = ["with_tool_parser" , "without_tool_parser" ])
42+ def with_tool_parser (request ) -> bool :
43+ return request .param
44+
45+
3946@pytest .fixture (scope = "module" )
40- def gptoss_server (monkeypatch_module : pytest .MonkeyPatch ):
41- with monkeypatch_module .context () as m :
42- m .setenv ("VLLM_ATTENTION_BACKEND" , "TRITON_ATTN_VLLM_V1" )
43- args = [
44- "--enforce-eager" ,
45- "--max-model-len" ,
46- "8192" ,
47+ def default_server_args (with_tool_parser : bool ):
48+ args = [
49+ # use half precision for speed and memory savings in CI environment
50+ "--enforce-eager" ,
51+ "--max-model-len" ,
52+ "4096" ,
53+ "--reasoning-parser" ,
54+ "openai_gptoss" ,
55+ "--gpu-memory-utilization" ,
56+ "0.8" ,
57+ ]
58+ if with_tool_parser :
59+ args .extend ([
4760 "--tool-call-parser" ,
4861 "openai" ,
49- "--reasoning-parser" ,
50- "openai_gptoss" ,
5162 "--enable-auto-tool-choice" ,
52- ]
53- with RemoteOpenAIServer (GPT_OSS_MODEL_NAME , args ) as remote_server :
63+ ])
64+ return args
65+
66+
67+ @pytest .fixture (scope = "module" )
68+ def gptoss_server (monkeypatch_module : pytest .MonkeyPatch ,
69+ default_server_args : list [str ]):
70+ with monkeypatch_module .context () as m :
71+ m .setenv ("VLLM_ATTENTION_BACKEND" , "TRITON_ATTN_VLLM_V1" )
72+ with RemoteOpenAIServer (GPT_OSS_MODEL_NAME ,
73+ default_server_args ) as remote_server :
5474 yield remote_server
5575
5676
@@ -61,7 +81,8 @@ async def gptoss_client(gptoss_server):
6181
6282
6383@pytest .mark .asyncio
64- async def test_gpt_oss_chat_tool_call_streaming (gptoss_client : OpenAI ):
84+ async def test_gpt_oss_chat_tool_call_streaming (gptoss_client : OpenAI ,
85+ with_tool_parser : bool ):
6586 tools = [{
6687 "type" : "function" ,
6788 "function" : {
@@ -94,10 +115,14 @@ async def test_gpt_oss_chat_tool_call_streaming(gptoss_client: OpenAI):
94115 ]
95116
96117 stream = await gptoss_client .chat .completions .create (
97- model = GPT_OSS_MODEL_NAME , messages = messages , tools = tools , stream = True )
118+ model = GPT_OSS_MODEL_NAME ,
119+ messages = messages ,
120+ tools = tools if with_tool_parser else None ,
121+ stream = True )
98122
99123 name = None
100124 args_buf = ""
125+ content_buf = ""
101126 async for chunk in stream :
102127 delta = chunk .choices [0 ].delta
103128 if delta .tool_calls :
@@ -106,13 +131,22 @@ async def test_gpt_oss_chat_tool_call_streaming(gptoss_client: OpenAI):
106131 name = tc .function .name
107132 if tc .function and tc .function .arguments :
108133 args_buf += tc .function .arguments
109-
110- assert name is not None
111- assert len (args_buf ) > 0
134+ if getattr (delta , "content" , None ):
135+ content_buf += delta .content
136+ if with_tool_parser :
137+ assert name is not None
138+ assert len (args_buf ) > 0
139+ else :
140+ assert name is None
141+ assert len (args_buf ) == 0
142+ assert len (content_buf ) > 0
112143
113144
114145@pytest .mark .asyncio
115- async def test_gpt_oss_multi_turn_chat (gptoss_client : OpenAI ):
146+ async def test_gpt_oss_multi_turn_chat (gptoss_client : OpenAI ,
147+ with_tool_parser : bool ):
148+ if not with_tool_parser :
149+ pytest .skip ("skip non-tool for multi-turn tests" )
116150 tools = [{
117151 "type" : "function" ,
118152 "function" : {
@@ -175,7 +209,7 @@ async def test_gpt_oss_multi_turn_chat(gptoss_client: OpenAI):
175209 )
176210 second_msg = second .choices [0 ].message
177211 assert (second_msg .content is not None and len (second_msg .content ) > 0 ) or \
178- (second_msg .tool_calls is not None and len (second_msg .tool_calls ) > 0 ) # noqa: E501
212+ (second_msg .tool_calls is not None and len (second_msg .tool_calls ) > 0 )
179213
180214
181215MODEL_NAME = "openai-community/gpt2"
0 commit comments