Skip to content

Commit 7646918

Browse files
authored
Merge pull request #17641 from BerriAI/litellm_responses_api_usage_populated
Add usage details in responses usage object
2 parents 60a325e + 41f0cf8 commit 7646918

File tree

2 files changed

+292
-2
lines changed

2 files changed

+292
-2
lines changed

litellm/responses/litellm_completion_transformation/transformation.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@
2525
ChatCompletionToolParamFunctionChunk,
2626
ChatCompletionUserMessage,
2727
GenericChatCompletionMessage,
28+
InputTokensDetails,
2829
OpenAIMcpServerTool,
2930
OpenAIWebSearchOptions,
3031
OpenAIWebSearchUserLocation,
32+
OutputTokensDetails,
3133
Reasoning,
3234
ResponseAPIUsage,
3335
ResponseInputParam,
@@ -1131,6 +1133,36 @@ def _transform_chat_completion_usage_to_responses_usage(
11311133
if hasattr(usage, "cost") and usage.cost is not None:
11321134
setattr(response_usage, "cost", usage.cost)
11331135

1136+
# Translate prompt_tokens_details to input_tokens_details
1137+
if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details is not None:
1138+
prompt_details = usage.prompt_tokens_details
1139+
input_details_dict: Dict[str, Optional[int]] = {}
1140+
1141+
if hasattr(prompt_details, "cached_tokens") and prompt_details.cached_tokens is not None:
1142+
input_details_dict["cached_tokens"] = prompt_details.cached_tokens
1143+
1144+
if hasattr(prompt_details, "text_tokens") and prompt_details.text_tokens is not None:
1145+
input_details_dict["text_tokens"] = prompt_details.text_tokens
1146+
1147+
if hasattr(prompt_details, "audio_tokens") and prompt_details.audio_tokens is not None:
1148+
input_details_dict["audio_tokens"] = prompt_details.audio_tokens
1149+
1150+
if input_details_dict:
1151+
response_usage.input_tokens_details = InputTokensDetails(**input_details_dict)
1152+
1153+
# Translate completion_tokens_details to output_tokens_details
1154+
if hasattr(usage, "completion_tokens_details") and usage.completion_tokens_details is not None:
1155+
completion_details = usage.completion_tokens_details
1156+
output_details_dict: Dict[str, Optional[int]] = {}
1157+
if hasattr(completion_details, "reasoning_tokens") and completion_details.reasoning_tokens is not None:
1158+
output_details_dict["reasoning_tokens"] = completion_details.reasoning_tokens
1159+
1160+
if hasattr(completion_details, "text_tokens") and completion_details.text_tokens is not None:
1161+
output_details_dict["text_tokens"] = completion_details.text_tokens
1162+
1163+
if output_details_dict:
1164+
response_usage.output_tokens_details = OutputTokensDetails(**output_details_dict)
1165+
11341166
return response_usage
11351167

11361168
@staticmethod

tests/test_litellm/responses/litellm_completion_transformation/test_litellm_completion_responses.py

Lines changed: 260 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,14 @@
1212
ChatCompletionResponseMessage,
1313
ChatCompletionToolMessage,
1414
)
15-
from litellm.types.utils import Choices, Message, ModelResponse
15+
from litellm.types.utils import (
16+
Choices,
17+
CompletionTokensDetailsWrapper,
18+
Message,
19+
ModelResponse,
20+
PromptTokensDetailsWrapper,
21+
Usage,
22+
)
1623

1724

1825
class TestLiteLLMCompletionResponsesConfig:
@@ -675,4 +682,255 @@ def test_function_call_without_call_id_fallback_to_id(self):
675682
assert len(tool_calls) == 1
676683

677684
tool_call = tool_calls[0]
678-
assert tool_call.get("id") == "fallback_id"
685+
assert tool_call.get("id") == "fallback_id"
686+
687+
688+
class TestUsageTransformation:
689+
"""Test cases for usage transformation from Chat Completion to Responses API format"""
690+
691+
def test_transform_usage_with_cached_tokens_anthropic(self):
692+
"""Test that cached_tokens from Anthropic are properly transformed to input_tokens_details"""
693+
# Setup: Simulate Anthropic usage with cache_read_input_tokens
694+
usage = Usage(
695+
prompt_tokens=13,
696+
completion_tokens=27,
697+
total_tokens=40,
698+
prompt_tokens_details=PromptTokensDetailsWrapper(
699+
cached_tokens=5, # From Anthropic cache_read_input_tokens
700+
text_tokens=8,
701+
),
702+
)
703+
704+
chat_completion_response = ModelResponse(
705+
id="test-response-id",
706+
created=1234567890,
707+
model="claude-sonnet-4",
708+
object="chat.completion",
709+
usage=usage,
710+
choices=[
711+
Choices(
712+
finish_reason="stop",
713+
index=0,
714+
message=Message(content="Hello!", role="assistant"),
715+
)
716+
],
717+
)
718+
719+
# Execute
720+
response_usage = LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
721+
chat_completion_response=chat_completion_response
722+
)
723+
724+
# Assert
725+
assert response_usage.input_tokens == 13
726+
assert response_usage.output_tokens == 27
727+
assert response_usage.total_tokens == 40
728+
assert response_usage.input_tokens_details is not None
729+
assert response_usage.input_tokens_details.cached_tokens == 5
730+
assert response_usage.input_tokens_details.text_tokens == 8
731+
732+
def test_transform_usage_with_cached_tokens_gemini(self):
733+
"""Test that cached_tokens from Gemini are properly transformed to input_tokens_details"""
734+
# Setup: Simulate Gemini usage with cachedContentTokenCount
735+
usage = Usage(
736+
prompt_tokens=9,
737+
completion_tokens=27,
738+
total_tokens=36,
739+
prompt_tokens_details=PromptTokensDetailsWrapper(
740+
cached_tokens=3, # From Gemini cachedContentTokenCount
741+
text_tokens=6,
742+
),
743+
)
744+
745+
chat_completion_response = ModelResponse(
746+
id="test-response-id",
747+
created=1234567890,
748+
model="gemini-2.0-flash",
749+
object="chat.completion",
750+
usage=usage,
751+
choices=[
752+
Choices(
753+
finish_reason="stop",
754+
index=0,
755+
message=Message(content="Hello!", role="assistant"),
756+
)
757+
],
758+
)
759+
760+
# Execute
761+
response_usage = LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
762+
chat_completion_response=chat_completion_response
763+
)
764+
765+
# Assert
766+
assert response_usage.input_tokens == 9
767+
assert response_usage.output_tokens == 27
768+
assert response_usage.total_tokens == 36
769+
assert response_usage.input_tokens_details is not None
770+
assert response_usage.input_tokens_details.cached_tokens == 3
771+
assert response_usage.input_tokens_details.text_tokens == 6
772+
773+
def test_transform_usage_with_reasoning_tokens_gemini(self):
774+
"""Test that reasoning_tokens from Gemini are properly transformed to output_tokens_details"""
775+
# Setup: Simulate Gemini usage with thoughtsTokenCount
776+
usage = Usage(
777+
prompt_tokens=10,
778+
completion_tokens=100,
779+
total_tokens=110,
780+
completion_tokens_details=CompletionTokensDetailsWrapper(
781+
reasoning_tokens=50, # From Gemini thoughtsTokenCount
782+
text_tokens=50,
783+
),
784+
)
785+
786+
chat_completion_response = ModelResponse(
787+
id="test-response-id",
788+
created=1234567890,
789+
model="gemini-2.0-flash",
790+
object="chat.completion",
791+
usage=usage,
792+
choices=[
793+
Choices(
794+
finish_reason="stop",
795+
index=0,
796+
message=Message(content="Hello!", role="assistant"),
797+
)
798+
],
799+
)
800+
801+
# Execute
802+
response_usage = LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
803+
chat_completion_response=chat_completion_response
804+
)
805+
806+
# Assert
807+
assert response_usage.output_tokens == 100
808+
assert response_usage.output_tokens_details is not None
809+
assert response_usage.output_tokens_details.reasoning_tokens == 50
810+
assert response_usage.output_tokens_details.text_tokens == 50
811+
812+
def test_transform_usage_with_cached_and_reasoning_tokens(self):
813+
"""Test transformation with both cached tokens (input) and reasoning tokens (output)"""
814+
# Setup: Combined Anthropic cached tokens and Gemini reasoning tokens
815+
usage = Usage(
816+
prompt_tokens=13,
817+
completion_tokens=100,
818+
total_tokens=113,
819+
prompt_tokens_details=PromptTokensDetailsWrapper(
820+
cached_tokens=5, # Anthropic cache_read_input_tokens
821+
text_tokens=8,
822+
),
823+
completion_tokens_details=CompletionTokensDetailsWrapper(
824+
reasoning_tokens=50, # Gemini thoughtsTokenCount
825+
text_tokens=50,
826+
),
827+
)
828+
829+
chat_completion_response = ModelResponse(
830+
id="test-response-id",
831+
created=1234567890,
832+
model="claude-sonnet-4",
833+
object="chat.completion",
834+
usage=usage,
835+
choices=[
836+
Choices(
837+
finish_reason="stop",
838+
index=0,
839+
message=Message(content="Hello!", role="assistant"),
840+
)
841+
],
842+
)
843+
844+
# Execute
845+
response_usage = LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
846+
chat_completion_response=chat_completion_response
847+
)
848+
849+
# Assert
850+
assert response_usage.input_tokens == 13
851+
assert response_usage.output_tokens == 100
852+
assert response_usage.total_tokens == 113
853+
854+
# Verify input_tokens_details
855+
assert response_usage.input_tokens_details is not None
856+
assert response_usage.input_tokens_details.cached_tokens == 5
857+
assert response_usage.input_tokens_details.text_tokens == 8
858+
859+
# Verify output_tokens_details
860+
assert response_usage.output_tokens_details is not None
861+
assert response_usage.output_tokens_details.reasoning_tokens == 50
862+
assert response_usage.output_tokens_details.text_tokens == 50
863+
864+
def test_transform_usage_with_zero_cached_tokens(self):
865+
"""Test that cached_tokens=0 is properly handled (no cached tokens used)"""
866+
# Setup: Usage with cached_tokens=0 (no cache hit)
867+
usage = Usage(
868+
prompt_tokens=9,
869+
completion_tokens=27,
870+
total_tokens=36,
871+
prompt_tokens_details=PromptTokensDetailsWrapper(
872+
cached_tokens=0, # No cache hit
873+
text_tokens=9,
874+
),
875+
)
876+
877+
chat_completion_response = ModelResponse(
878+
id="test-response-id",
879+
created=1234567890,
880+
model="claude-sonnet-4",
881+
object="chat.completion",
882+
usage=usage,
883+
choices=[
884+
Choices(
885+
finish_reason="stop",
886+
index=0,
887+
message=Message(content="Hello!", role="assistant"),
888+
)
889+
],
890+
)
891+
892+
# Execute
893+
response_usage = LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
894+
chat_completion_response=chat_completion_response
895+
)
896+
897+
# Assert: Should still include cached_tokens=0 in input_tokens_details
898+
assert response_usage.input_tokens_details is not None
899+
assert response_usage.input_tokens_details.cached_tokens == 0
900+
assert response_usage.input_tokens_details.text_tokens == 9
901+
902+
def test_transform_usage_without_details(self):
903+
"""Test transformation when prompt_tokens_details and completion_tokens_details are None"""
904+
# Setup: Usage without details (basic usage only)
905+
usage = Usage(
906+
prompt_tokens=9,
907+
completion_tokens=27,
908+
total_tokens=36,
909+
)
910+
911+
chat_completion_response = ModelResponse(
912+
id="test-response-id",
913+
created=1234567890,
914+
model="gpt-4o",
915+
object="chat.completion",
916+
usage=usage,
917+
choices=[
918+
Choices(
919+
finish_reason="stop",
920+
index=0,
921+
message=Message(content="Hello!", role="assistant"),
922+
)
923+
],
924+
)
925+
926+
# Execute
927+
response_usage = LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
928+
chat_completion_response=chat_completion_response
929+
)
930+
931+
# Assert: Basic usage should still be transformed, but details should be None
932+
assert response_usage.input_tokens == 9
933+
assert response_usage.output_tokens == 27
934+
assert response_usage.total_tokens == 36
935+
assert response_usage.input_tokens_details is None
936+
assert response_usage.output_tokens_details is None

0 commit comments

Comments
 (0)