openai · steven10a · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
diff --git a/README.md b/README.md
@@ -51,14 +51,14 @@ try:
         model="gpt-5",
         messages=[{"role": "user", "content": "Hello world"}],
     )
-    print(chat.llm_response.choices[0].message.content)
+    print(chat.choices[0].message.content)
 
     # Or with the Responses API
     resp = client.responses.create(
         model="gpt-5",
         input="What are the main features of your premium plan?",
     )
-    print(resp.llm_response.output_text)
+    print(resp.output_text)
 except GuardrailTripwireTriggered as e:
     print(f"Guardrail triggered: {e}")
 ```

diff --git a/docs/index.md b/docs/index.md
@@ -35,7 +35,7 @@ response = await client.responses.create(
     input="Hello"
 )
 # Guardrails run automatically
-print(response.llm_response.output_text)
+print(response.output_text)
 ```
 
 ## Next Steps

diff --git a/docs/quickstart.md b/docs/quickstart.md
@@ -70,16 +70,16 @@ async def main():
             input="Hello world"
         )
 
-        # Access OpenAI response via .llm_response
-        print(response.llm_response.output_text)
+        # Access OpenAI response attributes directly
+        print(response.output_text)
 
     except GuardrailTripwireTriggered as exc:
         print(f"Guardrail triggered: {exc.guardrail_result.info}")
 
 asyncio.run(main())
 ```
 
-**That's it!** Your existing OpenAI code now includes automatic guardrail validation based on your pipeline configuration. Just use `response.llm_response` instead of `response`.
+**That's it!** Your existing OpenAI code now includes automatic guardrail validation based on your pipeline configuration. The response object acts as a drop-in replacement for OpenAI responses with added guardrail results.
 
 ## Multi-Turn Conversations
 
@@ -98,7 +98,7 @@ while True:
             model="gpt-4o"
         )
 
-        response_content = response.llm_response.choices[0].message.content
+        response_content = response.choices[0].message.content
         print(f"Assistant: {response_content}")
 
         # ✅ Only append AFTER guardrails pass

diff --git a/docs/ref/checks/hallucination_detection.md b/docs/ref/checks/hallucination_detection.md
@@ -76,7 +76,7 @@ response = await client.responses.create(
 )
 
 # Guardrails automatically validate against your reference documents
-print(response.llm_response.output_text)
+print(response.output_text)
 ```
 
 ### How It Works

diff --git a/docs/tripwires.md b/docs/tripwires.md
@@ -25,7 +25,7 @@ try:
         model="gpt-5",
         input="Tell me a secret"
     )
-    print(response.llm_response.output_text)
+    print(response.output_text)
 
 except GuardrailTripwireTriggered as exc:
     print(f"Guardrail triggered: {exc.guardrail_result.info}")

diff --git a/examples/basic/azure_implementation.py b/examples/basic/azure_implementation.py
@@ -75,7 +75,7 @@ async def process_input(
         )
 
         # Extract the response content from the GuardrailsResponse
-        response_text = response.llm_response.choices[0].message.content
+        response_text = response.choices[0].message.content
 
         # Only show output if all guardrails pass
         print(f"\nAssistant: {response_text}")

diff --git a/examples/basic/hello_world.py b/examples/basic/hello_world.py
@@ -48,14 +48,12 @@ async def process_input(
             model="gpt-4.1-mini",
             previous_response_id=response_id,
         )
-
-        console.print(f"\nAssistant output: {response.llm_response.output_text}", end="\n\n")
-
+        console.print(f"\nAssistant output: {response.output_text}", end="\n\n")
         # Show guardrail results if any were run
         if response.guardrail_results.all_results:
             console.print(f"[dim]Guardrails checked: {len(response.guardrail_results.all_results)}[/dim]")
 
-        return response.llm_response.id
+        return response.id
 
     except GuardrailTripwireTriggered:
         raise

diff --git a/examples/basic/local_model.py b/examples/basic/local_model.py
@@ -48,7 +48,7 @@ async def process_input(
         )
 
         # Access response content using standard OpenAI API
-        response_content = response.llm_response.choices[0].message.content
+        response_content = response.choices[0].message.content
         console.print(f"\nAssistant output: {response_content}", end="\n\n")
 
         # Add to conversation history

diff --git a/examples/basic/multi_bundle.py b/examples/basic/multi_bundle.py
@@ -66,15 +66,15 @@ async def process_input(
     with Live(output_text, console=console, refresh_per_second=10) as live:
         try:
             async for chunk in stream:
-                # Access streaming response exactly like native OpenAI API through .llm_response
-                if hasattr(chunk.llm_response, "delta") and chunk.llm_response.delta:
-                    output_text += chunk.llm_response.delta
+                # Access streaming response exactly like native OpenAI API (flattened)
+                if hasattr(chunk, "delta") and chunk.delta:
+                    output_text += chunk.delta
                     live.update(output_text)
 
             # Get the response ID from the final chunk
             response_id_to_return = None
-            if hasattr(chunk.llm_response, "response") and hasattr(chunk.llm_response.response, "id"):
-                response_id_to_return = chunk.llm_response.response.id
+            if hasattr(chunk, "response") and hasattr(chunk.response, "id"):
+                response_id_to_return = chunk.response.id
 
             return response_id_to_return
 

diff --git a/examples/basic/multiturn_chat_with_alignment.py b/examples/basic/multiturn_chat_with_alignment.py
@@ -235,7 +235,7 @@ async def main(malicious: bool = False) -> None:
                     tools=tools,
                 )
                 print_guardrail_results("initial", resp)
-                choice = resp.llm_response.choices[0]
+                choice = resp.choices[0]
                 message = choice.message
                 tool_calls = getattr(message, "tool_calls", []) or []
 
@@ -327,7 +327,7 @@ async def main(malicious: bool = False) -> None:
                     )
 
                     print_guardrail_results("final", resp)
-                    final_message = resp.llm_response.choices[0].message
+                    final_message = resp.choices[0].message
                     console.print(
                         Panel(
                             final_message.content or "(no output)",

diff --git a/examples/basic/pii_mask_example.py b/examples/basic/pii_mask_example.py
@@ -90,7 +90,7 @@ async def process_input(
         )
 
         # Show the LLM response (already masked if PII was detected)
-        content = response.llm_response.choices[0].message.content
+        content = response.choices[0].message.content
         console.print(f"\n[bold blue]Assistant output:[/bold blue] {content}\n")
 
         # Show PII masking information if detected in pre-flight

diff --git a/examples/basic/structured_outputs_example.py b/examples/basic/structured_outputs_example.py
@@ -56,11 +56,11 @@ async def extract_user_info(
         )
 
         # Access the parsed structured output
-        user_info = response.llm_response.output_parsed
+        user_info = response.output_parsed
         print(f"✅ Successfully extracted: {user_info.name}, {user_info.age}, {user_info.email}")
 
         # Return user info and response ID (only returned if guardrails pass)
-        return user_info, response.llm_response.id
+        return user_info, response.id
 
     except GuardrailTripwireTriggered:
         # Guardrail blocked - no response ID returned, conversation history unchanged

diff --git a/examples/basic/suppress_tripwire.py b/examples/basic/suppress_tripwire.py
@@ -68,8 +68,8 @@ async def process_input(
         else:
             console.print("[bold green]No guardrails triggered.[/bold green]")
 
-        console.print(f"\n[bold blue]Assistant output:[/bold blue] {response.llm_response.output_text}\n")
-        return response.llm_response.id
+        console.print(f"\n[bold blue]Assistant output:[/bold blue] {response.output_text}\n")
+        return response.id
 
     except Exception as e:
         console.print(f"[bold red]Error: {e}[/bold red]")

diff --git a/examples/hallucination_detection/run_hallucination_detection.py b/examples/hallucination_detection/run_hallucination_detection.py
@@ -52,7 +52,7 @@ async def main():
                 model="gpt-4.1-mini",
             )
 
-            response_content = response.llm_response.choices[0].message.content
+            response_content = response.choices[0].message.content
             console.print(
                 Panel(
                     f"[bold green]Tripwire not triggered[/bold green]\n\nResponse: {response_content}",

diff --git a/examples/implementation_code/blocking/blocking_completions.py b/examples/implementation_code/blocking/blocking_completions.py
@@ -25,7 +25,7 @@ async def process_input(
             model="gpt-4.1-mini",
         )
 
-        response_content = response.llm_response.choices[0].message.content
+        response_content = response.choices[0].message.content
         print(f"\nAssistant: {response_content}")
 
         # Guardrails passed - now safe to add to conversation history

diff --git a/examples/implementation_code/blocking/blocking_responses.py b/examples/implementation_code/blocking/blocking_responses.py
@@ -18,9 +18,9 @@ async def process_input(guardrails_client: GuardrailsAsyncOpenAI, user_input: st
         # including pre-flight, input, and output stages, plus the LLM call
         response = await guardrails_client.responses.create(input=user_input, model="gpt-4.1-mini", previous_response_id=response_id)
 
-        print(f"\nAssistant: {response.llm_response.output_text}")
+        print(f"\nAssistant: {response.output_text}")
 
-        return response.llm_response.id
+        return response.id
 
     except GuardrailTripwireTriggered:
         # GuardrailsClient automatically handles tripwire exceptions

diff --git a/examples/implementation_code/streaming/streaming_completions.py b/examples/implementation_code/streaming/streaming_completions.py
@@ -30,8 +30,8 @@ async def process_input(
         # Stream with output guardrail checks and accumulate response
         response_content = ""
         async for chunk in stream:
-            if chunk.llm_response.choices[0].delta.content:
-                delta = chunk.llm_response.choices[0].delta.content
+            if chunk.choices[0].delta.content:
+                delta = chunk.choices[0].delta.content
                 print(delta, end="", flush=True)
                 response_content += delta
 

diff --git a/examples/implementation_code/streaming/streaming_responses.py b/examples/implementation_code/streaming/streaming_responses.py
@@ -26,15 +26,15 @@ async def process_input(guardrails_client: GuardrailsAsyncOpenAI, user_input: st
 
         # Stream with output guardrail checks
         async for chunk in stream:
-            # Access streaming response exactly like native OpenAI API through .llm_response
+            # Access streaming response exactly like native OpenAI API
             # For responses API streaming, check for delta content
-            if hasattr(chunk.llm_response, "delta") and chunk.llm_response.delta:
-                print(chunk.llm_response.delta, end="", flush=True)
+            if hasattr(chunk, "delta") and chunk.delta:
+                print(chunk.delta, end="", flush=True)
 
         # Get the response ID from the final chunk
         response_id_to_return = None
-        if hasattr(chunk.llm_response, "response") and hasattr(chunk.llm_response.response, "id"):
-            response_id_to_return = chunk.llm_response.response.id
+        if hasattr(chunk, "response") and hasattr(chunk.response, "id"):
+            response_id_to_return = chunk.response.id
 
         return response_id_to_return
 

diff --git a/examples/internal_examples/custom_context.py b/examples/internal_examples/custom_context.py
@@ -58,7 +58,7 @@ async def main() -> None:
                     model="gpt-4.1-nano",
                     messages=messages + [{"role": "user", "content": user_input}],
                 )
-                response_content = response.llm_response.choices[0].message.content
+                response_content = response.choices[0].message.content
                 print("Assistant:", response_content)
 
                 # Guardrails passed - now safe to add to conversation history

diff --git a/src/guardrails/_base_client.py b/src/guardrails/_base_client.py
@@ -7,9 +7,11 @@
 from __future__ import annotations
 
 import logging
+import warnings
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Final, Union
+from weakref import WeakValueDictionary
 
 from openai.types import Completion
 from openai.types.chat import ChatCompletion, ChatCompletionChunk
@@ -23,6 +25,32 @@
 
 logger = logging.getLogger(__name__)
 
+# Track which GuardrailsResponse instances (by id) have already emitted deprecation warnings
+# Uses WeakValueDictionary to avoid keeping instances alive just for warning tracking
+_warned_instance_ids: WeakValueDictionary[int, Any] = WeakValueDictionary()
+
+
+def _warn_llm_response_deprecation(instance: Any) -> None:
+    """Emit deprecation warning for llm_response access (once per instance).
+
+    This function is called when users explicitly access the llm_response attribute.
+    Uses instance ID tracking to avoid warning multiple times for the same instance.
+
+    Args:
+        instance: The GuardrailsResponse instance accessing llm_response.
+    """
+    instance_id = id(instance)
+    if instance_id not in _warned_instance_ids:
+        warnings.warn(
+            "Accessing 'llm_response' is deprecated. "
+            "Access response attributes directly instead (e.g., use 'response.output_text' "
+            "instead of 'response.llm_response.output_text'). "
+            "The 'llm_response' attribute will be removed in future versions.",
+            DeprecationWarning,
+            stacklevel=3,
+        )
+        _warned_instance_ids[instance_id] = instance
+
 # Type alias for OpenAI response types
 OpenAIResponseType = Union[Completion, ChatCompletion, ChatCompletionChunk, Response]  # noqa: UP007
 
@@ -54,22 +82,63 @@ def triggered_results(self) -> list[GuardrailResult]:
         return [r for r in self.all_results if r.tripwire_triggered]
 
 
-@dataclass(frozen=True, slots=True)
+@dataclass(frozen=True, slots=True, weakref_slot=True)
 class GuardrailsResponse:
     """Wrapper around any OpenAI response with guardrail results.
 
-    This class provides the same interface as OpenAI responses, with additional
-    guardrail results accessible via the guardrail_results attribute.
+    This class acts as a transparent proxy to the underlying OpenAI response,
+    allowing direct access to all OpenAI response attributes while adding
+    guardrail results.
 
-    Users should access content the same way as with OpenAI responses:
+    Users can access response attributes directly (recommended):
     - For chat completions: response.choices[0].message.content
     - For responses: response.output_text
     - For streaming: response.choices[0].delta.content
+
+    The guardrail results are accessible via:
+    - response.guardrail_results.preflight
+    - response.guardrail_results.input
+    - response.guardrail_results.output
+
+    For backward compatibility, llm_response is still accessible but deprecated:
+    - response.llm_response (deprecated, emits warning once per instance)
     """
 
-    llm_response: OpenAIResponseType  # OpenAI response object (chat completion, response, etc.)
+    _llm_response: OpenAIResponseType  # Private: OpenAI response object
     guardrail_results: GuardrailResults
 
+    @property
+    def llm_response(self) -> OpenAIResponseType:
+        """Access the underlying OpenAI response (deprecated).
+
+        This property is provided for backward compatibility but is deprecated.
+        Users should access response attributes directly instead.
+
+        Returns:
+            The underlying OpenAI response object.
+        """
+        _warn_llm_response_deprecation(self)
+        return self._llm_response
+
+    def __getattr__(self, name: str) -> Any:
+        """Delegate attribute access to _llm_response for transparency.
+
+        This method is called when an attribute is not found on GuardrailsResponse.
+        It delegates the access to the underlying _llm_response object, making
+        GuardrailsResponse act as a transparent proxy.
+
+        Args:
+            name: The attribute name being accessed.
+
+        Returns:
+            The attribute value from _llm_response.
+
+        Raises:
+            AttributeError: If the attribute doesn't exist on _llm_response either.
+        """
+        # Access _llm_response directly without triggering deprecation warning
+        return getattr(self._llm_response, name)
+
 
 class GuardrailsBaseClient:
     """Base class with shared functionality for guardrails clients."""
@@ -135,7 +204,7 @@ def _create_guardrails_response(
             output=output_results,
         )
         return GuardrailsResponse(
-            llm_response=llm_response,
+            _llm_response=llm_response,
             guardrail_results=guardrail_results,
         )