Merge pull request #2426 from BerriAI/litellm_whisper_cost_tracking

krrishdholakia · web-flow · commit c7d0af0a2e4c · 2024-03-09T19:12:06.000-08:00
feat: add cost tracking + caching for `/audio/transcription` calls
diff --git a/litellm/caching.py b/litellm/caching.py
@@ -10,7 +10,7 @@
 import litellm
 import time, logging, asyncio
 import json, traceback, ast, hashlib
-from typing import Optional, Literal, List, Union, Any
+from typing import Optional, Literal, List, Union, Any, BinaryIO
 from openai._models import BaseModel as OpenAIObject
 from litellm._logging import verbose_logger
 
@@ -765,8 +765,24 @@ def __init__(
         password: Optional[str] = None,
         similarity_threshold: Optional[float] = None,
         supported_call_types: Optional[
-            List[Literal["completion", "acompletion", "embedding", "aembedding"]]
-        ] = ["completion", "acompletion", "embedding", "aembedding"],
+            List[
+                Literal[
+                    "completion",
+                    "acompletion",
+                    "embedding",
+                    "aembedding",
+                    "atranscription",
+                    "transcription",
+                ]
+            ]
+        ] = [
+            "completion",
+            "acompletion",
+            "embedding",
+            "aembedding",
+            "atranscription",
+            "transcription",
+        ],
         # s3 Bucket, boto3 configuration
         s3_bucket_name: Optional[str] = None,
         s3_region_name: Optional[str] = None,
@@ -881,9 +897,14 @@ def get_cache_key(self, *args, **kwargs):
             "input",
             "encoding_format",
         ]  # embedding kwargs = model, input, user, encoding_format. Model, user are checked in completion_kwargs
-
+        transcription_only_kwargs = [
+            "file",
+            "language",
+        ]
         # combined_kwargs - NEEDS to be ordered across get_cache_key(). Do not use a set()
-        combined_kwargs = completion_kwargs + embedding_only_kwargs
+        combined_kwargs = (
+            completion_kwargs + embedding_only_kwargs + transcription_only_kwargs
+        )
         for param in combined_kwargs:
             # ignore litellm params here
             if param in kwargs:
@@ -915,6 +936,17 @@ def get_cache_key(self, *args, **kwargs):
                     param_value = (
                         caching_group or model_group or kwargs[param]
                     )  # use caching_group, if set then model_group if it exists, else use kwargs["model"]
+                elif param == "file":
+                    metadata_file_name = kwargs.get("metadata", {}).get(
+                        "file_name", None
+                    )
+                    litellm_params_file_name = kwargs.get("litellm_params", {}).get(
+                        "file_name", None
+                    )
+                    if metadata_file_name is not None:
+                        param_value = metadata_file_name
+                    elif litellm_params_file_name is not None:
+                        param_value = litellm_params_file_name
                 else:
                     if kwargs[param] is None:
                         continue  # ignore None params
@@ -1144,8 +1176,24 @@ def enable_cache(
     port: Optional[str] = None,
     password: Optional[str] = None,
     supported_call_types: Optional[
-        List[Literal["completion", "acompletion", "embedding", "aembedding"]]
-    ] = ["completion", "acompletion", "embedding", "aembedding"],
+        List[
+            Literal[
+                "completion",
+                "acompletion",
+                "embedding",
+                "aembedding",
+                "atranscription",
+                "transcription",
+            ]
+        ]
+    ] = [
+        "completion",
+        "acompletion",
+        "embedding",
+        "aembedding",
+        "atranscription",
+        "transcription",
+    ],
     **kwargs,
 ):
     """
@@ -1193,8 +1241,24 @@ def update_cache(
     port: Optional[str] = None,
     password: Optional[str] = None,
     supported_call_types: Optional[
-        List[Literal["completion", "acompletion", "embedding", "aembedding"]]
-    ] = ["completion", "acompletion", "embedding", "aembedding"],
+        List[
+            Literal[
+                "completion",
+                "acompletion",
+                "embedding",
+                "aembedding",
+                "atranscription",
+                "transcription",
+            ]
+        ]
+    ] = [
+        "completion",
+        "acompletion",
+        "embedding",
+        "aembedding",
+        "atranscription",
+        "transcription",
+    ],
     **kwargs,
 ):
     """
diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py
@@ -861,7 +861,8 @@ def audio_transcriptions(
             additional_args={"complete_input_dict": data},
             original_response=stringified_response,
         )
-        final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="audio_transcription")  # type: ignore
+        hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
+        final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
         return final_response
 
     async def async_audio_transcriptions(
@@ -921,7 +922,8 @@ async def async_audio_transcriptions(
                 },
                 original_response=stringified_response,
             )
-            response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="audio_transcription")  # type: ignore
+            hidden_params = {"model": "whisper-1", "custom_llm_provider": "azure"}
+            response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
             return response
         except Exception as e:
             ## LOGGING
diff --git a/litellm/llms/openai.py b/litellm/llms/openai.py
@@ -753,6 +753,7 @@ def image_generation(
             # return response
             return convert_to_model_response_object(response_object=response, model_response_object=model_response, response_type="image_generation")  # type: ignore
         except OpenAIError as e:
+
             exception_mapping_worked = True
             ## LOGGING
             logging_obj.post_call(
@@ -824,7 +825,8 @@ def audio_transcriptions(
             additional_args={"complete_input_dict": data},
             original_response=stringified_response,
         )
-        final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="audio_transcription")  # type: ignore
+        hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
+        final_response = convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
         return final_response
 
     async def async_audio_transcriptions(
@@ -862,7 +864,8 @@ async def async_audio_transcriptions(
                 additional_args={"complete_input_dict": data},
                 original_response=stringified_response,
             )
-            return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, response_type="audio_transcription")  # type: ignore
+            hidden_params = {"model": "whisper-1", "custom_llm_provider": "openai"}
+            return convert_to_model_response_object(response_object=stringified_response, model_response_object=model_response, hidden_params=hidden_params, response_type="audio_transcription")  # type: ignore
         except Exception as e:
             ## LOGGING
             logging_obj.post_call(
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -3295,6 +3295,7 @@ async def audio_transcriptions(
             user_api_key_dict, "team_id", None
         )
         data["metadata"]["endpoint"] = str(request.url)
+        data["metadata"]["file_name"] = file.filename
 
         ### TEAM-SPECIFIC PARAMS ###
         if user_api_key_dict.team_id is not None:
@@ -3329,7 +3330,7 @@ async def audio_transcriptions(
                 data = await proxy_logging_obj.pre_call_hook(
                     user_api_key_dict=user_api_key_dict,
                     data=data,
-                    call_type="moderation",
+                    call_type="audio_transcription",
                 )
 
                 ## ROUTE TO CORRECT ENDPOINT ##
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
@@ -96,7 +96,11 @@ async def pre_call_hook(
         user_api_key_dict: UserAPIKeyAuth,
         data: dict,
         call_type: Literal[
-            "completion", "embeddings", "image_generation", "moderation"
+            "completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
         ],
     ):
         """
diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py
@@ -6,7 +6,12 @@
 )  # Adds the parent directory to the system path
 import time
 import litellm
-from litellm import get_max_tokens, model_cost, open_ai_chat_completion_models
+from litellm import (
+    get_max_tokens,
+    model_cost,
+    open_ai_chat_completion_models,
+    TranscriptionResponse,
+)
 import pytest
 
 
@@ -238,3 +243,57 @@ def test_cost_bedrock_pricing_actual_calls():
         messages=[{"role": "user", "content": "Hey, how's it going?"}],
     )
     assert cost > 0
+
+
+def test_whisper_openai():
+    litellm.set_verbose = True
+    transcription = TranscriptionResponse(
+        text="Four score and seven years ago, our fathers brought forth on this continent a new nation, conceived in liberty and dedicated to the proposition that all men are created equal. Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure."
+    )
+    transcription._hidden_params = {
+        "model": "whisper-1",
+        "custom_llm_provider": "openai",
+        "optional_params": {},
+        "model_id": None,
+    }
+    _total_time_in_seconds = 3
+
+    transcription._response_ms = _total_time_in_seconds * 1000
+    cost = litellm.completion_cost(model="whisper-1", completion_response=transcription)
+
+    print(f"cost: {cost}")
+    print(f"whisper dict: {litellm.model_cost['whisper-1']}")
+    expected_cost = round(
+        litellm.model_cost["whisper-1"]["output_cost_per_second"]
+        * _total_time_in_seconds,
+        5,
+    )
+    assert cost == expected_cost
+
+
+def test_whisper_azure():
+    litellm.set_verbose = True
+    transcription = TranscriptionResponse(
+        text="Four score and seven years ago, our fathers brought forth on this continent a new nation, conceived in liberty and dedicated to the proposition that all men are created equal. Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure."
+    )
+    transcription._hidden_params = {
+        "model": "whisper-1",
+        "custom_llm_provider": "azure",
+        "optional_params": {},
+        "model_id": None,
+    }
+    _total_time_in_seconds = 3
+
+    transcription._response_ms = _total_time_in_seconds * 1000
+    cost = litellm.completion_cost(
+        model="azure/azure-whisper", completion_response=transcription
+    )
+
+    print(f"cost: {cost}")
+    print(f"whisper dict: {litellm.model_cost['whisper-1']}")
+    expected_cost = round(
+        litellm.model_cost["whisper-1"]["output_cost_per_second"]
+        * _total_time_in_seconds,
+        5,
+    )
+    assert cost == expected_cost
diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py
@@ -973,6 +973,7 @@ def test_image_generation_openai():
 
         print(f"customHandler_success.errors: {customHandler_success.errors}")
         print(f"customHandler_success.states: {customHandler_success.states}")
+        time.sleep(2)
         assert len(customHandler_success.errors) == 0
         assert len(customHandler_success.states) == 3  # pre, post, success
         # test failure callback
diff --git a/litellm/tests/test_custom_logger.py b/litellm/tests/test_custom_logger.py
@@ -100,7 +100,7 @@ async def async_test_logging_fn(self, kwargs, completion_obj, start_time, end_ti
 def test_async_chat_openai_stream():
     try:
         tmp_function = TmpFunction()
-        # litellm.set_verbose = True
+        litellm.set_verbose = True
         litellm.success_callback = [tmp_function.async_test_logging_fn]
         complete_streaming_response = ""
 
diff --git a/litellm/tests/test_proxy_server.py b/litellm/tests/test_proxy_server.py
@@ -336,6 +336,8 @@ def test_load_router_config():
             "acompletion",
             "embedding",
             "aembedding",
+            "atranscription",
+            "transcription",
         ]  # init with all call types
 
         litellm.disable_cache()
diff --git a/litellm/utils.py b/litellm/utils.py
diff --git a/tests/test_whisper.py b/tests/test_whisper.py

Original file line number	Diff line number	Diff line change
`@@ -3295,6 +3295,7 @@ async def audio_transcriptions(`
`3295`	`3295`	`user_api_key_dict, "team_id", None`
`3296`	`3296`	`)`
`3297`	`3297`	`data["metadata"]["endpoint"] = str(request.url)`
	`3298`	`+ data["metadata"]["file_name"] = file.filename`
`3298`	`3299`
`3299`	`3300`	`### TEAM-SPECIFIC PARAMS ###`
`3300`	`3301`	`if user_api_key_dict.team_id is not None:`
`@@ -3329,7 +3330,7 @@ async def audio_transcriptions(`
`3329`	`3330`	`data = await proxy_logging_obj.pre_call_hook(`
`3330`	`3331`	`user_api_key_dict=user_api_key_dict,`
`3331`	`3332`	`data=data,`
`3332`		`- call_type="moderation",`
	`3333`	`+ call_type="audio_transcription",`
`3333`	`3334`	`)`
`3334`	`3335`
`3335`	`3336`	`## ROUTE TO CORRECT ENDPOINT ##`