From 0ece2e931af7529e159a73a257e5b7eea39c4041 Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 30 Oct 2025 17:02:01 +0530 Subject: [PATCH 1/2] Add llm headers to responses api --- .../llms/openai/responses/transformation.py | 32 +++++++-- .../test_openai_responses_api.py | 69 +++++++++++++++++++ 2 files changed, 95 insertions(+), 6 deletions(-) diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py index c3abd5155db1..f75213b06889 100644 --- a/litellm/llms/openai/responses/transformation.py +++ b/litellm/llms/openai/responses/transformation.py @@ -15,7 +15,7 @@ from litellm.types.responses.main import * from litellm.types.router import GenericLiteLLMParams from litellm.types.utils import LlmProviders - +from litellm.litellm_core_utils.core_helpers import process_response_headers from ..common_utils import OpenAIError if TYPE_CHECKING: @@ -171,13 +171,19 @@ def transform_response_api_response( raise OpenAIError( message=raw_response.text, status_code=raw_response.status_code ) + raw_response_headers = dict(raw_response.headers) + processed_headers = process_response_headers(raw_response_headers) try: - return ResponsesAPIResponse(**raw_response_json) + response = ResponsesAPIResponse(**raw_response_json) except Exception: verbose_logger.debug( f"Error constructing ResponsesAPIResponse: {raw_response_json}, using model_construct" ) - return ResponsesAPIResponse.model_construct(**raw_response_json) + response = ResponsesAPIResponse.model_construct(**raw_response_json) + + response._hidden_params["additional_headers"] = processed_headers + response._hidden_params["headers"] = raw_response_headers + return response def validate_environment( self, headers: dict, model: str, litellm_params: Optional[GenericLiteLLMParams] @@ -376,14 +382,21 @@ def transform_get_response_api_response( ) -> ResponsesAPIResponse: """ Transform the get response API response into a ResponsesAPIResponse - """ + """ try: raw_response_json = raw_response.json() except Exception: raise OpenAIError( message=raw_response.text, status_code=raw_response.status_code ) - return ResponsesAPIResponse(**raw_response_json) + raw_response_headers = dict(raw_response.headers) + processed_headers = process_response_headers(raw_response_headers) + + response = ResponsesAPIResponse(**raw_response_json) + response._hidden_params["additional_headers"] = processed_headers + response._hidden_params["headers"] = raw_response_headers + + return response ######################################################### ########## LIST INPUT ITEMS TRANSFORMATION ############# @@ -460,4 +473,11 @@ def transform_cancel_response_api_response( raise OpenAIError( message=raw_response.text, status_code=raw_response.status_code ) - return ResponsesAPIResponse(**raw_response_json) + raw_response_headers = dict(raw_response.headers) + processed_headers = process_response_headers(raw_response_headers) + + response = ResponsesAPIResponse(**raw_response_json) + response._hidden_params["additional_headers"] = processed_headers + response._hidden_params["headers"] = raw_response_headers + + return response diff --git a/tests/llm_responses_api_testing/test_openai_responses_api.py b/tests/llm_responses_api_testing/test_openai_responses_api.py index 70624b02d143..21d2f82c9846 100644 --- a/tests/llm_responses_api_testing/test_openai_responses_api.py +++ b/tests/llm_responses_api_testing/test_openai_responses_api.py @@ -189,6 +189,75 @@ async def test_basic_openai_responses_api_non_streaming_with_logging(): ) +@pytest.mark.parametrize("sync_mode", [True, False]) +@pytest.mark.asyncio +async def test_openai_responses_api_returns_headers(sync_mode): + """ + Test that OpenAI responses API returns OpenAI headers in _hidden_params. + This ensures the proxy can forward these headers to clients. + + Related issue: LiteLLM responses API should return OpenAI headers like chat completions does + """ + litellm._turn_on_debug() + litellm.set_verbose = True + + if sync_mode: + response = litellm.responses( + model="gpt-4o", + input="Say hello", + max_output_tokens=20, + ) + else: + response = await litellm.aresponses( + model="gpt-4o", + input="Say hello", + max_output_tokens=20, + ) + + # Verify response is valid + assert response is not None + assert isinstance(response, ResponsesAPIResponse) + + # Verify _hidden_params exists + assert hasattr(response, "_hidden_params"), "Response should have _hidden_params attribute" + assert response._hidden_params is not None, "_hidden_params should not be None" + + # Verify additional_headers exists in _hidden_params + assert "additional_headers" in response._hidden_params, \ + "_hidden_params should contain 'additional_headers' key" + + additional_headers = response._hidden_params["additional_headers"] + assert isinstance(additional_headers, dict), "additional_headers should be a dictionary" + assert len(additional_headers) > 0, "additional_headers should not be empty" + + # Check for expected OpenAI rate limit headers + # These can be either direct (x-ratelimit-*) or prefixed (llm_provider-x-ratelimit-*) + rate_limit_headers = [ + "x-ratelimit-remaining-tokens", + "x-ratelimit-limit-tokens", + "x-ratelimit-remaining-requests", + "x-ratelimit-limit-requests", + ] + + found_headers = [] + for header_name in rate_limit_headers: + if header_name in additional_headers: + found_headers.append(header_name) + elif f"llm_provider-{header_name}" in additional_headers: + found_headers.append(f"llm_provider-{header_name}") + + assert len(found_headers) > 0, \ + f"Should find at least one OpenAI rate limit header. Headers found: {list(additional_headers.keys())}" + + # Verify headers key also exists (raw headers) + assert "headers" in response._hidden_params, \ + "_hidden_params should contain 'headers' key with raw response headers" + + print(f"✓ Successfully validated OpenAI headers in {'sync' if sync_mode else 'async'} mode") + print(f" Found {len(additional_headers)} headers total") + print(f" Rate limit headers found: {found_headers}") + + def validate_stream_event(event): """ Validate that a streaming event from litellm.responses() or litellm.aresponses() From 81c2f8339f1fecc8b074299d97cafa3dab289371 Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Fri, 31 Oct 2025 18:18:56 +0530 Subject: [PATCH 2/2] fix mock test --- tests/test_litellm/responses/test_text_format_conversion.py | 1 + .../pre_call_checks/test_responses_api_deployment_check.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tests/test_litellm/responses/test_text_format_conversion.py b/tests/test_litellm/responses/test_text_format_conversion.py index 20a87a4abbbb..645f0f2e1480 100644 --- a/tests/test_litellm/responses/test_text_format_conversion.py +++ b/tests/test_litellm/responses/test_text_format_conversion.py @@ -49,6 +49,7 @@ def __init__(self, json_data, status_code): self._json_data = json_data self.status_code = status_code self.text = json.dumps(json_data) + self.headers = {} def json(self): return self._json_data diff --git a/tests/test_litellm/router_utils/pre_call_checks/test_responses_api_deployment_check.py b/tests/test_litellm/router_utils/pre_call_checks/test_responses_api_deployment_check.py index e2abed2d7f01..3c6a05e77861 100644 --- a/tests/test_litellm/router_utils/pre_call_checks/test_responses_api_deployment_check.py +++ b/tests/test_litellm/router_utils/pre_call_checks/test_responses_api_deployment_check.py @@ -78,6 +78,7 @@ def __init__(self, json_data, status_code): self._json_data = json_data self.status_code = status_code self.text = json.dumps(json_data) + self.headers = {} def json(self): return self._json_data @@ -198,6 +199,7 @@ def __init__(self, json_data, status_code): self._json_data = json_data self.status_code = status_code self.text = json.dumps(json_data) + self.headers = {} def json(self): return self._json_data @@ -326,6 +328,7 @@ def __init__(self, json_data, status_code): self._json_data = json_data self.status_code = status_code self.text = json.dumps(json_data) + self.headers = {} def json(self): return self._json_data @@ -479,6 +482,7 @@ def __init__(self, json_data, status_code): self._json_data = json_data self.status_code = status_code self.text = json.dumps(json_data) + self.headers = {} def json(self): return self._json_data