1212 ChatCompletionResponseMessage ,
1313 ChatCompletionToolMessage ,
1414)
15- from litellm .types .utils import Choices , Message , ModelResponse
15+ from litellm .types .utils import (
16+ Choices ,
17+ CompletionTokensDetailsWrapper ,
18+ Message ,
19+ ModelResponse ,
20+ PromptTokensDetailsWrapper ,
21+ Usage ,
22+ )
1623
1724
1825class TestLiteLLMCompletionResponsesConfig :
@@ -675,4 +682,255 @@ def test_function_call_without_call_id_fallback_to_id(self):
675682 assert len (tool_calls ) == 1
676683
677684 tool_call = tool_calls [0 ]
678- assert tool_call .get ("id" ) == "fallback_id"
685+ assert tool_call .get ("id" ) == "fallback_id"
686+
687+
688+ class TestUsageTransformation :
689+ """Test cases for usage transformation from Chat Completion to Responses API format"""
690+
691+ def test_transform_usage_with_cached_tokens_anthropic (self ):
692+ """Test that cached_tokens from Anthropic are properly transformed to input_tokens_details"""
693+ # Setup: Simulate Anthropic usage with cache_read_input_tokens
694+ usage = Usage (
695+ prompt_tokens = 13 ,
696+ completion_tokens = 27 ,
697+ total_tokens = 40 ,
698+ prompt_tokens_details = PromptTokensDetailsWrapper (
699+ cached_tokens = 5 , # From Anthropic cache_read_input_tokens
700+ text_tokens = 8 ,
701+ ),
702+ )
703+
704+ chat_completion_response = ModelResponse (
705+ id = "test-response-id" ,
706+ created = 1234567890 ,
707+ model = "claude-sonnet-4" ,
708+ object = "chat.completion" ,
709+ usage = usage ,
710+ choices = [
711+ Choices (
712+ finish_reason = "stop" ,
713+ index = 0 ,
714+ message = Message (content = "Hello!" , role = "assistant" ),
715+ )
716+ ],
717+ )
718+
719+ # Execute
720+ response_usage = LiteLLMCompletionResponsesConfig ._transform_chat_completion_usage_to_responses_usage (
721+ chat_completion_response = chat_completion_response
722+ )
723+
724+ # Assert
725+ assert response_usage .input_tokens == 13
726+ assert response_usage .output_tokens == 27
727+ assert response_usage .total_tokens == 40
728+ assert response_usage .input_tokens_details is not None
729+ assert response_usage .input_tokens_details .cached_tokens == 5
730+ assert response_usage .input_tokens_details .text_tokens == 8
731+
732+ def test_transform_usage_with_cached_tokens_gemini (self ):
733+ """Test that cached_tokens from Gemini are properly transformed to input_tokens_details"""
734+ # Setup: Simulate Gemini usage with cachedContentTokenCount
735+ usage = Usage (
736+ prompt_tokens = 9 ,
737+ completion_tokens = 27 ,
738+ total_tokens = 36 ,
739+ prompt_tokens_details = PromptTokensDetailsWrapper (
740+ cached_tokens = 3 , # From Gemini cachedContentTokenCount
741+ text_tokens = 6 ,
742+ ),
743+ )
744+
745+ chat_completion_response = ModelResponse (
746+ id = "test-response-id" ,
747+ created = 1234567890 ,
748+ model = "gemini-2.0-flash" ,
749+ object = "chat.completion" ,
750+ usage = usage ,
751+ choices = [
752+ Choices (
753+ finish_reason = "stop" ,
754+ index = 0 ,
755+ message = Message (content = "Hello!" , role = "assistant" ),
756+ )
757+ ],
758+ )
759+
760+ # Execute
761+ response_usage = LiteLLMCompletionResponsesConfig ._transform_chat_completion_usage_to_responses_usage (
762+ chat_completion_response = chat_completion_response
763+ )
764+
765+ # Assert
766+ assert response_usage .input_tokens == 9
767+ assert response_usage .output_tokens == 27
768+ assert response_usage .total_tokens == 36
769+ assert response_usage .input_tokens_details is not None
770+ assert response_usage .input_tokens_details .cached_tokens == 3
771+ assert response_usage .input_tokens_details .text_tokens == 6
772+
773+ def test_transform_usage_with_reasoning_tokens_gemini (self ):
774+ """Test that reasoning_tokens from Gemini are properly transformed to output_tokens_details"""
775+ # Setup: Simulate Gemini usage with thoughtsTokenCount
776+ usage = Usage (
777+ prompt_tokens = 10 ,
778+ completion_tokens = 100 ,
779+ total_tokens = 110 ,
780+ completion_tokens_details = CompletionTokensDetailsWrapper (
781+ reasoning_tokens = 50 , # From Gemini thoughtsTokenCount
782+ text_tokens = 50 ,
783+ ),
784+ )
785+
786+ chat_completion_response = ModelResponse (
787+ id = "test-response-id" ,
788+ created = 1234567890 ,
789+ model = "gemini-2.0-flash" ,
790+ object = "chat.completion" ,
791+ usage = usage ,
792+ choices = [
793+ Choices (
794+ finish_reason = "stop" ,
795+ index = 0 ,
796+ message = Message (content = "Hello!" , role = "assistant" ),
797+ )
798+ ],
799+ )
800+
801+ # Execute
802+ response_usage = LiteLLMCompletionResponsesConfig ._transform_chat_completion_usage_to_responses_usage (
803+ chat_completion_response = chat_completion_response
804+ )
805+
806+ # Assert
807+ assert response_usage .output_tokens == 100
808+ assert response_usage .output_tokens_details is not None
809+ assert response_usage .output_tokens_details .reasoning_tokens == 50
810+ assert response_usage .output_tokens_details .text_tokens == 50
811+
812+ def test_transform_usage_with_cached_and_reasoning_tokens (self ):
813+ """Test transformation with both cached tokens (input) and reasoning tokens (output)"""
814+ # Setup: Combined Anthropic cached tokens and Gemini reasoning tokens
815+ usage = Usage (
816+ prompt_tokens = 13 ,
817+ completion_tokens = 100 ,
818+ total_tokens = 113 ,
819+ prompt_tokens_details = PromptTokensDetailsWrapper (
820+ cached_tokens = 5 , # Anthropic cache_read_input_tokens
821+ text_tokens = 8 ,
822+ ),
823+ completion_tokens_details = CompletionTokensDetailsWrapper (
824+ reasoning_tokens = 50 , # Gemini thoughtsTokenCount
825+ text_tokens = 50 ,
826+ ),
827+ )
828+
829+ chat_completion_response = ModelResponse (
830+ id = "test-response-id" ,
831+ created = 1234567890 ,
832+ model = "claude-sonnet-4" ,
833+ object = "chat.completion" ,
834+ usage = usage ,
835+ choices = [
836+ Choices (
837+ finish_reason = "stop" ,
838+ index = 0 ,
839+ message = Message (content = "Hello!" , role = "assistant" ),
840+ )
841+ ],
842+ )
843+
844+ # Execute
845+ response_usage = LiteLLMCompletionResponsesConfig ._transform_chat_completion_usage_to_responses_usage (
846+ chat_completion_response = chat_completion_response
847+ )
848+
849+ # Assert
850+ assert response_usage .input_tokens == 13
851+ assert response_usage .output_tokens == 100
852+ assert response_usage .total_tokens == 113
853+
854+ # Verify input_tokens_details
855+ assert response_usage .input_tokens_details is not None
856+ assert response_usage .input_tokens_details .cached_tokens == 5
857+ assert response_usage .input_tokens_details .text_tokens == 8
858+
859+ # Verify output_tokens_details
860+ assert response_usage .output_tokens_details is not None
861+ assert response_usage .output_tokens_details .reasoning_tokens == 50
862+ assert response_usage .output_tokens_details .text_tokens == 50
863+
864+ def test_transform_usage_with_zero_cached_tokens (self ):
865+ """Test that cached_tokens=0 is properly handled (no cached tokens used)"""
866+ # Setup: Usage with cached_tokens=0 (no cache hit)
867+ usage = Usage (
868+ prompt_tokens = 9 ,
869+ completion_tokens = 27 ,
870+ total_tokens = 36 ,
871+ prompt_tokens_details = PromptTokensDetailsWrapper (
872+ cached_tokens = 0 , # No cache hit
873+ text_tokens = 9 ,
874+ ),
875+ )
876+
877+ chat_completion_response = ModelResponse (
878+ id = "test-response-id" ,
879+ created = 1234567890 ,
880+ model = "claude-sonnet-4" ,
881+ object = "chat.completion" ,
882+ usage = usage ,
883+ choices = [
884+ Choices (
885+ finish_reason = "stop" ,
886+ index = 0 ,
887+ message = Message (content = "Hello!" , role = "assistant" ),
888+ )
889+ ],
890+ )
891+
892+ # Execute
893+ response_usage = LiteLLMCompletionResponsesConfig ._transform_chat_completion_usage_to_responses_usage (
894+ chat_completion_response = chat_completion_response
895+ )
896+
897+ # Assert: Should still include cached_tokens=0 in input_tokens_details
898+ assert response_usage .input_tokens_details is not None
899+ assert response_usage .input_tokens_details .cached_tokens == 0
900+ assert response_usage .input_tokens_details .text_tokens == 9
901+
902+ def test_transform_usage_without_details (self ):
903+ """Test transformation when prompt_tokens_details and completion_tokens_details are None"""
904+ # Setup: Usage without details (basic usage only)
905+ usage = Usage (
906+ prompt_tokens = 9 ,
907+ completion_tokens = 27 ,
908+ total_tokens = 36 ,
909+ )
910+
911+ chat_completion_response = ModelResponse (
912+ id = "test-response-id" ,
913+ created = 1234567890 ,
914+ model = "gpt-4o" ,
915+ object = "chat.completion" ,
916+ usage = usage ,
917+ choices = [
918+ Choices (
919+ finish_reason = "stop" ,
920+ index = 0 ,
921+ message = Message (content = "Hello!" , role = "assistant" ),
922+ )
923+ ],
924+ )
925+
926+ # Execute
927+ response_usage = LiteLLMCompletionResponsesConfig ._transform_chat_completion_usage_to_responses_usage (
928+ chat_completion_response = chat_completion_response
929+ )
930+
931+ # Assert: Basic usage should still be transformed, but details should be None
932+ assert response_usage .input_tokens == 9
933+ assert response_usage .output_tokens == 27
934+ assert response_usage .total_tokens == 36
935+ assert response_usage .input_tokens_details is None
936+ assert response_usage .output_tokens_details is None
0 commit comments