@@ -37,22 +37,34 @@ def monkeypatch_module():
3737
3838
3939@pytest .fixture (scope = "module" )
40- def gptoss_server (monkeypatch_module : pytest .MonkeyPatch ):
41- with monkeypatch_module .context () as m :
42- m .setenv ("VLLM_ATTENTION_BACKEND" , "TRITON_ATTN_VLLM_V1" )
43- args = [
44- "--enforce-eager" ,
45- "--max-model-len" ,
46- "4096" ,
40+ @pytest .mark .parametrize ("with_tool_parser" , [True , False ])
41+ def default_server_args (with_tool_parser ):
42+ args = [
43+ # use half precision for speed and memory savings in CI environment
44+ "--enforce-eager" ,
45+ "--max-model-len" ,
46+ "4096" ,
47+ "--reasoning-parser" ,
48+ "openai_gptoss" ,
49+ "--gpu-memory-utilization" ,
50+ "0.5" ,
51+ ]
52+ if with_tool_parser :
53+ args .extend ([
4754 "--tool-call-parser" ,
4855 "openai" ,
49- "--reasoning-parser" ,
50- "openai_gptoss" ,
5156 "--enable-auto-tool-choice" ,
52- "--gpu-memory-utilization" ,
53- "0.5" ,
54- ]
55- with RemoteOpenAIServer (GPT_OSS_MODEL_NAME , args ) as remote_server :
57+ ])
58+ return args
59+
60+
61+ @pytest .fixture (scope = "module" )
62+ def gptoss_server (monkeypatch_module : pytest .MonkeyPatch ,
63+ default_server_args : list [str ]):
64+ with monkeypatch_module .context () as m :
65+ m .setenv ("VLLM_ATTENTION_BACKEND" , "TRITON_ATTN_VLLM_V1" )
66+ with RemoteOpenAIServer (GPT_OSS_MODEL_NAME ,
67+ default_server_args ) as remote_server :
5668 yield remote_server
5769
5870
@@ -63,7 +75,9 @@ async def gptoss_client(gptoss_server):
6375
6476
6577@pytest .mark .asyncio
66- async def test_gpt_oss_chat_tool_call_streaming (gptoss_client : OpenAI ):
78+ @pytest .mark .parametrize ("with_tool_parser" , [True , False ])
79+ async def test_gpt_oss_chat_tool_call_streaming (gptoss_client : OpenAI ,
80+ with_tool_parser : bool ):
6781 tools = [{
6882 "type" : "function" ,
6983 "function" : {
@@ -96,10 +110,14 @@ async def test_gpt_oss_chat_tool_call_streaming(gptoss_client: OpenAI):
96110 ]
97111
98112 stream = await gptoss_client .chat .completions .create (
99- model = GPT_OSS_MODEL_NAME , messages = messages , tools = tools , stream = True )
113+ model = GPT_OSS_MODEL_NAME ,
114+ messages = messages ,
115+ tools = tools if with_tool_parser else None ,
116+ stream = True )
100117
101118 name = None
102119 args_buf = ""
120+ content_buf = ""
103121 async for chunk in stream :
104122 delta = chunk .choices [0 ].delta
105123 if delta .tool_calls :
@@ -108,8 +126,15 @@ async def test_gpt_oss_chat_tool_call_streaming(gptoss_client: OpenAI):
108126 name = tc .function .name
109127 if tc .function and tc .function .arguments :
110128 args_buf += tc .function .arguments
111- assert name is not None
112- assert len (args_buf ) > 0
129+ if getattr (delta , "content" , None ):
130+ content_buf += delta .content
131+ if with_tool_parser :
132+ assert name is not None
133+ assert len (args_buf ) > 0
134+ else :
135+ assert name is None
136+ assert len (args_buf ) == 0
137+ assert len (content_buf ) > 0
113138
114139
115140@pytest .mark .asyncio
0 commit comments