@@ -234,6 +234,60 @@ async def patched_fetch_response(self, *args, **kwargs):
234234 assert resp .output [1 ].content [0 ].text == "The answer is 42"
235235
236236
237+ @pytest .mark .allow_call_model_methods
238+ @pytest .mark .asyncio
239+ async def test_stream_response_preserves_usage_from_earlier_chunk (monkeypatch ) -> None :
240+ """
241+ Test that when an earlier chunk has usage data and later chunks don't,
242+ the usage from the earlier chunk is preserved in the final response.
243+ This handles cases where some providers (e.g., LiteLLM) may not include
244+ usage in every chunk.
245+ """
246+ # Create test chunks where first chunk has usage, last chunk doesn't
247+ chunks = [
248+ create_chunk (create_content_delta ("Hello" ), include_usage = True ), # Has usage
249+ create_chunk (create_content_delta ("" )), # No usage (usage=None)
250+ ]
251+
252+ async def patched_fetch_response (self , * args , ** kwargs ):
253+ resp = Response (
254+ id = "resp-id" ,
255+ created_at = 0 ,
256+ model = "fake-model" ,
257+ object = "response" ,
258+ output = [],
259+ tool_choice = "none" ,
260+ tools = [],
261+ parallel_tool_calls = False ,
262+ )
263+ return resp , create_fake_stream (chunks )
264+
265+ monkeypatch .setattr (OpenAIChatCompletionsModel , "_fetch_response" , patched_fetch_response )
266+ model = OpenAIProvider (use_responses = False ).get_model ("gpt-4" )
267+ output_events = []
268+ async for event in model .stream_response (
269+ system_instructions = None ,
270+ input = "" ,
271+ model_settings = ModelSettings (),
272+ tools = [],
273+ output_schema = None ,
274+ handoffs = [],
275+ tracing = ModelTracing .DISABLED ,
276+ previous_response_id = None ,
277+ conversation_id = None ,
278+ prompt = None ,
279+ ):
280+ output_events .append (event )
281+
282+ # Verify the final response preserves usage from the first chunk
283+ response_event = output_events [- 1 ]
284+ assert response_event .type == "response.completed"
285+ assert response_event .response .usage is not None
286+ assert response_event .response .usage .input_tokens == 2
287+ assert response_event .response .usage .output_tokens == 4
288+ assert response_event .response .usage .total_tokens == 6
289+
290+
237291@pytest .mark .allow_call_model_methods
238292@pytest .mark .asyncio
239293async def test_stream_response_with_empty_reasoning_content (monkeypatch ) -> None :
0 commit comments