BerriAI
diff --git a/‎litellm/llms/perplexity/chat/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎litellm/llms/perplexity/chat/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎litellm/llms/perplexity/chat/transformation.py‎
Lines changed: 103 additions & 30 deletions b/‎litellm/llms/perplexity/chat/transformation.py‎
Lines changed: 103 additions & 30 deletions
diff --git a/‎litellm/main.py‎
Lines changed: 26 additions & 1 deletion b/‎litellm/main.py‎
Lines changed: 26 additions & 1 deletion
diff --git a/‎tests/llm_translation/test_perplexity_reasoning.py‎
Lines changed: 22 additions & 21 deletions b/‎tests/llm_translation/test_perplexity_reasoning.py‎
Lines changed: 22 additions & 21 deletions
diff --git a/‎tests/local_testing/test_completion.py‎
Lines changed: 16 additions & 22 deletions b/‎tests/local_testing/test_completion.py‎
Lines changed: 16 additions & 22 deletions
@@ -0,0 +1 @@
+"""Perplexity chat completion transformations."""
@@ -1,25 +1,32 @@
-"""
-Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions`
-"""
+"""Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions`."""
 
-from typing import Any, List, Optional, Tuple
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple
 
-import httpx
 import litellm
 from litellm._logging import verbose_logger
-from litellm.secret_managers.main import get_secret_str
-from litellm.types.llms.openai import AllMessageValues
-from litellm.types.utils import Usage, PromptTokensDetailsWrapper
-from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
-from litellm.types.utils import ModelResponse
-from litellm.types.llms.openai import ChatCompletionAnnotation
-from litellm.types.llms.openai import ChatCompletionAnnotationURLCitation
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage
+
+if TYPE_CHECKING:
+    import httpx
+
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+    from litellm.types.llms.openai import (
+        AllMessageValues,
+        ChatCompletionAnnotation,
+        ChatCompletionAnnotationURLCitation,
+    )
 
 
 class PerplexityChatConfig(OpenAIGPTConfig):
+    """Configuration for Perplexity chat completions."""
+
     @property
-    def custom_llm_provider(self) -> Optional[str]:
+    def custom_llm_provider(self) -> str | None:
+        """Return the custom LLM provider name."""
         return "perplexity"
 
     def _get_openai_compatible_provider_info(
@@ -33,6 +40,38 @@ def _get_openai_compatible_provider_info(
         )
         return api_base, dynamic_api_key
 
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: list,
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        """Validate Perplexity environment and set headers."""
+        # Get API key from environment if not provided
+        if api_key is None:
+            _, api_key = self._get_openai_compatible_provider_info(
+                api_base=api_base, api_key=api_key
+            )
+        
+        # Validate API key is present
+        if api_key is None:
+            raise ValueError(
+                "The api_key client option must be set either by passing api_key to the client or by setting the PERPLEXITY_API_KEY environment variable"
+            )
+        
+        # Set authorization header
+        headers["Authorization"] = f"Bearer {api_key}"
+        
+        # Ensure Content-Type is set to application/json
+        if "content-type" not in headers and "Content-Type" not in headers:
+            headers["Content-Type"] = "application/json"
+        
+        return headers
+
     def get_supported_openai_params(self, model: str) -> list:
         """
         Perplexity supports a subset of OpenAI params
@@ -72,7 +111,8 @@ def get_supported_openai_params(self, model: str) -> list:
 
         return base_openai_params
 
-    def transform_response(
+
+    def transform_response(  # noqa: PLR0913
         self,
         model: str,
         raw_response: httpx.Response,
@@ -82,10 +122,11 @@ def transform_response(
         messages: List[AllMessageValues],
         optional_params: dict,
         litellm_params: dict,
-        encoding: Any,
+        encoding: Any,  
         api_key: Optional[str] = None,
-        json_mode: Optional[bool] = None,
+        json_mode: Optional[bool] = None,  
     ) -> ModelResponse:
+        """Transform Perplexity response to standard format."""
         # Call the parent transform_response first to handle the standard transformation
         model_response = super().transform_response(
             model=model,
@@ -104,28 +145,29 @@ def transform_response(
         # Extract and enhance usage with Perplexity-specific fields
         try:
             raw_response_json = raw_response.json()
+            self.add_cost_to_usage(model_response, raw_response_json)
             self._enhance_usage_with_perplexity_fields(
-                model_response, raw_response_json
+                model_response, raw_response_json,
             )
             self._add_citations_as_annotations(model_response, raw_response_json)
-        except Exception as e:
+        except (ValueError, TypeError, KeyError) as e:
             verbose_logger.debug(f"Error extracting Perplexity-specific usage fields: {e}")
 
         return model_response
 
-    def _enhance_usage_with_perplexity_fields(
-        self, model_response: ModelResponse, raw_response_json: dict
+    def _enhance_usage_with_perplexity_fields(  
+        self, model_response: ModelResponse, raw_response_json: dict,
     ) -> None:
-        """
-        Extract citation tokens and search queries from Perplexity API response
-        and add them to the usage object using standard LiteLLM fields.
+        """Extract citation tokens and search queries from Perplexity API response.
+
+        Add them to the usage object using standard LiteLLM fields.
         """
         if not hasattr(model_response, "usage") or model_response.usage is None:
             # Create a usage object if it doesn't exist (when usage was None)
             model_response.usage = Usage(  # type: ignore[attr-defined]
                 prompt_tokens=0,
                 completion_tokens=0,
-                total_tokens=0
+                total_tokens=0,
             )
 
         usage = model_response.usage  # type: ignore[attr-defined]
@@ -146,7 +188,7 @@ def _enhance_usage_with_perplexity_fields(
         # Extract search queries count from usage or response metadata
         # Perplexity might include this in the usage object or as separate metadata
         perplexity_usage = raw_response_json.get("usage", {})
-        
+
         # Try to extract search queries from usage field first, then root level
         num_search_queries = perplexity_usage.get("num_search_queries")
         if num_search_queries is None:
@@ -155,18 +197,18 @@ def _enhance_usage_with_perplexity_fields(
             num_search_queries = perplexity_usage.get("search_queries")
         if num_search_queries is None:
             num_search_queries = raw_response_json.get("search_queries")
-        
+
         # Create or update prompt_tokens_details to include web search requests and citation tokens
         if citation_tokens > 0 or (
             num_search_queries is not None and num_search_queries > 0
         ):
             if usage.prompt_tokens_details is None:
                 usage.prompt_tokens_details = PromptTokensDetailsWrapper()
-            
+
             # Store citation tokens count for cost calculation
             if citation_tokens > 0:
-                setattr(usage, "citation_tokens", citation_tokens)
-            
+                usage.citation_tokens = citation_tokens
+
             # Store search queries count in the standard web_search_requests field
             if num_search_queries is not None and num_search_queries > 0:
                 usage.prompt_tokens_details.web_search_requests = num_search_queries
@@ -248,4 +290,35 @@ def _add_citations_as_annotations(
         if citations:
             setattr(model_response, "citations", citations)
         if search_results:
-            setattr(model_response, "search_results", search_results)
+            setattr(model_response, "search_results", search_results)
+
+    def add_cost_to_usage(self, model_response: ModelResponse, raw_response_json: dict) -> None:
+        """Add the cost to the usage object."""
+        try:
+            usage_data = raw_response_json.get("usage")
+            if usage_data:
+                # Try different possible cost field locations
+                response_cost = None
+
+                # Check if cost is directly in usage (flat structure)
+                if "total_cost" in usage_data:
+                    response_cost = usage_data["total_cost"]
+                # Check if cost is nested (cost.total_cost structure)
+                elif "cost" in usage_data and isinstance(usage_data["cost"], dict):
+                    response_cost = usage_data["cost"].get("total_cost")
+                # Check if cost is a simple value
+                elif "cost" in usage_data:
+                    response_cost = usage_data["cost"]
+
+                if response_cost is not None:
+                    # Store cost in hidden params for the cost calculator to use
+                    if not hasattr(model_response, "_hidden_params"):
+                        model_response._hidden_params = {}  
+                    if "additional_headers" not in model_response._hidden_params:  
+                        model_response._hidden_params["additional_headers"] = {}  
+                    model_response._hidden_params["additional_headers"][  
+                        "llm_provider-x-litellm-response-cost"
+                    ] = float(response_cost)
+        except (ValueError, TypeError, KeyError) as e:
+            verbose_logger.debug(f"Error adding cost to usage: {e}")
+            # If we can't extract cost, continue without it - don't fail the response
@@ -2033,11 +2033,36 @@ def completion(  # type: ignore # noqa: PLR0915
             logging.post_call(
                 input=messages, api_key=api_key, original_response=response
             )
+        elif custom_llm_provider == "perplexity":
+            response = base_llm_http_handler.completion(
+                model=model,
+                messages=messages,
+                headers=headers,
+                model_response=model_response,
+                api_key=api_key,
+                api_base=api_base,
+                acompletion=acompletion,
+                logging_obj=logging,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                shared_session=shared_session,
+                timeout=timeout,
+                client=client,
+                custom_llm_provider=custom_llm_provider,
+                encoding=encoding,
+                stream=stream,
+                provider_config=provider_config,
+            )
+
+            ## LOGGING - Call after response has been processed by transform_response
+            logging.post_call(
+                input=messages, api_key=api_key, original_response=response
+            )
+            
         elif (
             model in litellm.open_ai_chat_completion_models
             or custom_llm_provider == "custom_openai"
             or custom_llm_provider == "deepinfra"
-            or custom_llm_provider == "perplexity"
             or custom_llm_provider == "nvidia_nim"
             or custom_llm_provider == "cerebras"
             or custom_llm_provider == "baseten"
 
@@ -62,13 +62,12 @@ def test_perplexity_reasoning_effort_mock_completion(self, model):
         """
         Test that reasoning_effort is correctly passed in actual completion call (mocked)
         """
-        from openai import OpenAI
-        from openai.types.chat.chat_completion import ChatCompletion
+        import httpx
 
         litellm.set_verbose = True
 
         # Mock successful response with reasoning content
-        response_object = {
+        response_json = {
             "id": "cmpl-test",
             "object": "chat.completion",
             "created": 1677652288,
@@ -94,35 +93,37 @@ def test_perplexity_reasoning_effort_mock_completion(self, model):
             },
         }
 
-        pydantic_obj = ChatCompletion(**response_object)
-
-        def _return_pydantic_obj(*args, **kwargs):
-            new_response = MagicMock()
-            new_response.headers = {"content-type": "application/json"}
-            new_response.parse.return_value = pydantic_obj
-            return new_response
-
-        openai_client = OpenAI(api_key="fake-api-key")
+        def mock_post(*args, **kwargs):
+            # Create a mock response
+            mock_response = MagicMock(spec=httpx.Response)
+            mock_response.status_code = 200
+            mock_response.headers = {"content-type": "application/json"}
+            mock_response.json.return_value = response_json
+            mock_response.text = json.dumps(response_json)
+            
+            # Store the request data for verification
+            mock_post.last_request_data = kwargs.get("data")
+            if isinstance(mock_post.last_request_data, (str, bytes)):
+                mock_post.last_request_data = json.loads(mock_post.last_request_data)
+            
+            return mock_response
 
-        with patch.object(
-            openai_client.chat.completions.with_raw_response, "create", side_effect=_return_pydantic_obj
-        ) as mock_client:
+        # Mock at the HTTP handler level
+        with patch("litellm.llms.custom_httpx.http_handler.HTTPHandler.post", side_effect=mock_post) as mock_http:
 
             response = completion(
                 model=model,
                 messages=[{"role": "user", "content": "Hello, please think about this carefully."}],
                 reasoning_effort="high",
-                client=openai_client,
+                api_key="fake-api-key",
             )
 
             # Verify the call was made
-            assert mock_client.called
-            
-            # Get the request data from the mock call
-            call_args = mock_client.call_args
-            request_data = call_args.kwargs
+            assert mock_http.called
 
             # Verify reasoning_effort was included in the request
+            request_data = mock_post.last_request_data
+            assert request_data is not None
             assert "reasoning_effort" in request_data
             assert request_data["reasoning_effort"] == "high"
 
 
@@ -1244,6 +1244,9 @@ def test_completion_fireworks_ai_dynamic_params(api_key, api_base):
 # @pytest.mark.skip(reason="this test is flaky")
 def test_completion_perplexity_api():
     try:
+        import httpx
+        import json
+        
         response_object = {
             "id": "a8f37485-026e-45da-81a9-cf0184896840",
             "model": "llama-3-sonar-small-32k-online",
@@ -1270,25 +1273,17 @@ def test_completion_perplexity_api():
             ],
         }
 
-        from openai import OpenAI
-        from openai.types.chat.chat_completion import ChatCompletion
-
-        pydantic_obj = ChatCompletion(**response_object)
-
-        def _return_pydantic_obj(*args, **kwargs):
-            new_response = MagicMock()
-            new_response.headers = {"hello": "world"}
-
-            new_response.parse.return_value = pydantic_obj
-            return new_response
-
-        openai_client = OpenAI()
-
-        with patch.object(
-            openai_client.chat.completions.with_raw_response,
-            "create",
-            side_effect=_return_pydantic_obj,
-        ) as mock_client:
+        def mock_post(*args, **kwargs):
+            # Create a mock response
+            mock_response = MagicMock(spec=httpx.Response)
+            mock_response.status_code = 200
+            mock_response.headers = {"content-type": "application/json"}
+            mock_response.json.return_value = response_object
+            mock_response.text = json.dumps(response_object)
+            return mock_response
+
+        # Mock at the HTTP handler level
+        with patch("litellm.llms.custom_httpx.http_handler.HTTPHandler.post", side_effect=mock_post):
             # litellm.set_verbose= True
             messages = [
                 {"role": "system", "content": "You're a good bot"},
@@ -1302,10 +1297,9 @@ def _return_pydantic_obj(*args, **kwargs):
                 },
             ]
             response = completion(
-                model="mistral-7b-instruct",
+                model="perplexity/llama-3-sonar-small-32k-online",
                 messages=messages,
-                api_base="https://api.perplexity.ai",
-                client=openai_client,
+                api_key="fake-api-key",
             )
             print(response)
             assert hasattr(response, "citations")
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""Perplexity chat completion transformations."""`