BerriAI · ishaan-jaff · Nov 1, 2025 · Oct 30, 2025 · Oct 31, 2025
diff --git a/litellm/llms/openai/responses/transformation.py b/litellm/llms/openai/responses/transformation.py
@@ -15,7 +15,7 @@
 from litellm.types.responses.main import *
 from litellm.types.router import GenericLiteLLMParams
 from litellm.types.utils import LlmProviders
-
+from litellm.litellm_core_utils.core_helpers import process_response_headers
 from ..common_utils import OpenAIError
 
 if TYPE_CHECKING:
@@ -171,13 +171,19 @@ def transform_response_api_response(
             raise OpenAIError(
                 message=raw_response.text, status_code=raw_response.status_code
             )
+        raw_response_headers = dict(raw_response.headers)
+        processed_headers = process_response_headers(raw_response_headers)
         try:
-            return ResponsesAPIResponse(**raw_response_json)
+            response = ResponsesAPIResponse(**raw_response_json)
         except Exception:
             verbose_logger.debug(
                 f"Error constructing ResponsesAPIResponse: {raw_response_json}, using model_construct"
             )
-            return ResponsesAPIResponse.model_construct(**raw_response_json)
+            response = ResponsesAPIResponse.model_construct(**raw_response_json)
+
+        response._hidden_params["additional_headers"] = processed_headers
+        response._hidden_params["headers"] = raw_response_headers
+        return response
 
     def validate_environment(
         self, headers: dict, model: str, litellm_params: Optional[GenericLiteLLMParams]
@@ -376,14 +382,21 @@ def transform_get_response_api_response(
     ) -> ResponsesAPIResponse:
         """
         Transform the get response API response into a ResponsesAPIResponse
-        """
+        """        
         try:
             raw_response_json = raw_response.json()
         except Exception:
             raise OpenAIError(
                 message=raw_response.text, status_code=raw_response.status_code
             )
-        return ResponsesAPIResponse(**raw_response_json)
+        raw_response_headers = dict(raw_response.headers)
+        processed_headers = process_response_headers(raw_response_headers)
+
+        response = ResponsesAPIResponse(**raw_response_json)
+        response._hidden_params["additional_headers"] = processed_headers
+        response._hidden_params["headers"] = raw_response_headers
+
+        return response
 
     #########################################################
     ########## LIST INPUT ITEMS TRANSFORMATION #############
@@ -460,4 +473,11 @@ def transform_cancel_response_api_response(
             raise OpenAIError(
                 message=raw_response.text, status_code=raw_response.status_code
             )
-        return ResponsesAPIResponse(**raw_response_json)
+        raw_response_headers = dict(raw_response.headers)
+        processed_headers = process_response_headers(raw_response_headers)
+
+        response = ResponsesAPIResponse(**raw_response_json)
+        response._hidden_params["additional_headers"] = processed_headers
+        response._hidden_params["headers"] = raw_response_headers
+
+        return response
diff --git a/tests/llm_responses_api_testing/test_openai_responses_api.py b/tests/llm_responses_api_testing/test_openai_responses_api.py
@@ -189,6 +189,75 @@ async def test_basic_openai_responses_api_non_streaming_with_logging():
     )
 
 
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_openai_responses_api_returns_headers(sync_mode):
+    """
+    Test that OpenAI responses API returns OpenAI headers in _hidden_params.
+    This ensures the proxy can forward these headers to clients.
+
+    Related issue: LiteLLM responses API should return OpenAI headers like chat completions does
+    """
+    litellm._turn_on_debug()
+    litellm.set_verbose = True
+
+    if sync_mode:
+        response = litellm.responses(
+            model="gpt-4o",
+            input="Say hello",
+            max_output_tokens=20,
+        )
+    else:
+        response = await litellm.aresponses(
+            model="gpt-4o",
+            input="Say hello",
+            max_output_tokens=20,
+        )
+
+    # Verify response is valid
+    assert response is not None
+    assert isinstance(response, ResponsesAPIResponse)
+
+    # Verify _hidden_params exists
+    assert hasattr(response, "_hidden_params"), "Response should have _hidden_params attribute"
+    assert response._hidden_params is not None, "_hidden_params should not be None"
+
+    # Verify additional_headers exists in _hidden_params
+    assert "additional_headers" in response._hidden_params, \
+        "_hidden_params should contain 'additional_headers' key"
+
+    additional_headers = response._hidden_params["additional_headers"]
+    assert isinstance(additional_headers, dict), "additional_headers should be a dictionary"
+    assert len(additional_headers) > 0, "additional_headers should not be empty"
+
+    # Check for expected OpenAI rate limit headers
+    # These can be either direct (x-ratelimit-*) or prefixed (llm_provider-x-ratelimit-*)
+    rate_limit_headers = [
+        "x-ratelimit-remaining-tokens",
+        "x-ratelimit-limit-tokens",
+        "x-ratelimit-remaining-requests",
+        "x-ratelimit-limit-requests",
+    ]
+
+    found_headers = []
+    for header_name in rate_limit_headers:
+        if header_name in additional_headers:
+            found_headers.append(header_name)
+        elif f"llm_provider-{header_name}" in additional_headers:
+            found_headers.append(f"llm_provider-{header_name}")
+
+    assert len(found_headers) > 0, \
+        f"Should find at least one OpenAI rate limit header. Headers found: {list(additional_headers.keys())}"
+
+    # Verify headers key also exists (raw headers)
+    assert "headers" in response._hidden_params, \
+        "_hidden_params should contain 'headers' key with raw response headers"
+
+    print(f"✓ Successfully validated OpenAI headers in {'sync' if sync_mode else 'async'} mode")
+    print(f"  Found {len(additional_headers)} headers total")
+    print(f"  Rate limit headers found: {found_headers}")
+
+
 def validate_stream_event(event):
     """
     Validate that a streaming event from litellm.responses() or litellm.aresponses()

diff --git a/tests/test_litellm/responses/test_text_format_conversion.py b/tests/test_litellm/responses/test_text_format_conversion.py
@@ -49,6 +49,7 @@ def __init__(self, json_data, status_code):
                 self._json_data = json_data
                 self.status_code = status_code
                 self.text = json.dumps(json_data)
+                self.headers = {}
 
             def json(self):
                 return self._json_data

diff --git a/tests/test_litellm/router_utils/pre_call_checks/test_responses_api_deployment_check.py b/tests/test_litellm/router_utils/pre_call_checks/test_responses_api_deployment_check.py
@@ -78,6 +78,7 @@ def __init__(self, json_data, status_code):
             self._json_data = json_data
             self.status_code = status_code
             self.text = json.dumps(json_data)
+            self.headers = {}
 
         def json(self):
             return self._json_data
@@ -198,6 +199,7 @@ def __init__(self, json_data, status_code):
             self._json_data = json_data
             self.status_code = status_code
             self.text = json.dumps(json_data)
+            self.headers = {}
 
         def json(self):
             return self._json_data
@@ -326,6 +328,7 @@ def __init__(self, json_data, status_code):
             self._json_data = json_data
             self.status_code = status_code
             self.text = json.dumps(json_data)
+            self.headers = {}
 
         def json(self):
             return self._json_data
@@ -479,6 +482,7 @@ def __init__(self, json_data, status_code):
             self._json_data = json_data
             self.status_code = status_code
             self.text = json.dumps(json_data)
+            self.headers = {}
 
         def json(self):
             return self._json_data