From a14048012210ea7f2f83e8ba946fed46145b68d6 Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Wed, 29 Oct 2025 14:05:05 +0530 Subject: [PATCH 1/8] Add proxy support to container apis --- litellm/containers/main.py | 27 ++ litellm/proxy/common_request_processing.py | 8 + litellm/proxy/container_endpoints/__init__.py | 0 .../proxy/container_endpoints/endpoints.py | 406 ++++++++++++++++++ litellm/proxy/proxy_server.py | 2 + litellm/proxy/route_llm_request.py | 18 +- litellm/router.py | 40 +- tests/test_litellm/test_container_router.py | 213 +++++++++ 8 files changed, 712 insertions(+), 2 deletions(-) create mode 100644 litellm/proxy/container_endpoints/__init__.py create mode 100644 litellm/proxy/container_endpoints/endpoints.py create mode 100644 tests/test_litellm/test_container_router.py diff --git a/litellm/containers/main.py b/litellm/containers/main.py index cd183699278d..d9582bac4e25 100644 --- a/litellm/containers/main.py +++ b/litellm/containers/main.py @@ -386,6 +386,15 @@ def list_containers( litellm_call_id: Optional[str] = kwargs.get("litellm_call_id") _is_async = kwargs.pop("async_call", False) is True + # Check for mock response first + mock_response = kwargs.get("mock_response") + if mock_response is not None: + if isinstance(mock_response, str): + mock_response = json.loads(mock_response) + + response = ContainerListResponse(**mock_response) + return response + # get llm provider logic litellm_params = GenericLiteLLMParams(**kwargs) # get provider config @@ -562,6 +571,15 @@ def retrieve_container( litellm_call_id: Optional[str] = kwargs.get("litellm_call_id") _is_async = kwargs.pop("async_call", False) is True + # Check for mock response first + mock_response = kwargs.get("mock_response") + if mock_response is not None: + if isinstance(mock_response, str): + mock_response = json.loads(mock_response) + + response = ContainerObject(**mock_response) + return response + # get llm provider logic litellm_params = GenericLiteLLMParams(**kwargs) # get provider config @@ -730,6 +748,15 @@ def delete_container( litellm_call_id: Optional[str] = kwargs.get("litellm_call_id") _is_async = kwargs.pop("async_call", False) is True + # Check for mock response first + mock_response = kwargs.get("mock_response") + if mock_response is not None: + if isinstance(mock_response, str): + mock_response = json.loads(mock_response) + + response = DeleteContainerResult(**mock_response) + return response + # get llm provider logic litellm_params = GenericLiteLLMParams(**kwargs) # get provider config diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py index 108bb39501e8..82e54c7ee9e1 100644 --- a/litellm/proxy/common_request_processing.py +++ b/litellm/proxy/common_request_processing.py @@ -321,6 +321,10 @@ async def common_processing_pre_call_logic( "avideo_status", "avideo_content", "avideo_remix", + "acreate_container", + "alist_containers", + "aretrieve_container", + "adelete_container", ], version: Optional[str] = None, user_model: Optional[str] = None, @@ -419,6 +423,10 @@ async def base_process_llm_request( "avideo_status", "avideo_content", "avideo_remix", + "acreate_container", + "alist_containers", + "aretrieve_container", + "adelete_container", ], proxy_logging_obj: ProxyLogging, general_settings: dict, diff --git a/litellm/proxy/container_endpoints/__init__.py b/litellm/proxy/container_endpoints/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/litellm/proxy/container_endpoints/endpoints.py b/litellm/proxy/container_endpoints/endpoints.py new file mode 100644 index 000000000000..1a3d41f0ac9e --- /dev/null +++ b/litellm/proxy/container_endpoints/endpoints.py @@ -0,0 +1,406 @@ +#### Container Endpoints ##### + +from typing import Any, Dict +from fastapi import APIRouter, Depends, Request, Response +from fastapi.responses import ORJSONResponse + +from litellm.proxy._types import * +from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth, user_api_key_auth +from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing +from litellm.proxy.common_utils.http_parsing_utils import _read_request_body +from litellm.proxy.common_utils.openai_endpoint_utils import ( + get_custom_llm_provider_from_request_headers, + get_custom_llm_provider_from_request_query, + get_custom_llm_provider_from_request_body, +) + +router = APIRouter() + + +@router.post( + "/v1/containers", + dependencies=[Depends(user_api_key_auth)], + response_class=ORJSONResponse, + tags=["containers"], +) +@router.post( + "/containers", + dependencies=[Depends(user_api_key_auth)], + response_class=ORJSONResponse, + tags=["containers"], +) +async def create_container( + request: Request, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Container creation endpoint for creating new containers. + + Follows the OpenAI Containers API spec: + https://platform.openai.com/docs/api-reference/containers + + Example: + ```bash + curl -X POST "http://localhost:4000/v1/containers" \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "My Container", + "expires_after": { + "anchor": "last_active_at", + "minutes": 20 + } + }' + ``` + + Or specify provider via header: + ```bash + curl -X POST "http://localhost:4000/v1/containers" \ + -H "Authorization: Bearer sk-1234" \ + -H "custom-llm-provider: azure" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "My Container" + }' + ``` + """ + from litellm.proxy.proxy_server import ( + general_settings, + llm_router, + proxy_config, + proxy_logging_obj, + select_data_generator, + user_api_base, + user_max_tokens, + user_model, + user_request_timeout, + user_temperature, + version, + ) + + # Read request body + data = await _read_request_body(request=request) + + # Extract custom_llm_provider using priority chain + # Priority: headers > query params > request body > default + custom_llm_provider = ( + get_custom_llm_provider_from_request_headers(request=request) + or get_custom_llm_provider_from_request_query(request=request) + or await get_custom_llm_provider_from_request_body(request=request) + or "openai" + ) + + # Add custom_llm_provider to data + data["custom_llm_provider"] = custom_llm_provider + + # Process request using ProxyBaseLLMRequestProcessing + processor = ProxyBaseLLMRequestProcessing(data=data) + try: + return await processor.base_process_llm_request( + request=request, + fastapi_response=fastapi_response, + user_api_key_dict=user_api_key_dict, + route_type="acreate_container", + proxy_logging_obj=proxy_logging_obj, + llm_router=llm_router, + general_settings=general_settings, + proxy_config=proxy_config, + select_data_generator=select_data_generator, + model=None, + user_model=user_model, + user_temperature=user_temperature, + user_request_timeout=user_request_timeout, + user_max_tokens=user_max_tokens, + user_api_base=user_api_base, + version=version, + ) + except Exception as e: + raise await processor._handle_llm_api_exception( + e=e, + user_api_key_dict=user_api_key_dict, + proxy_logging_obj=proxy_logging_obj, + version=version, + ) + + +@router.get( + "/v1/containers", + dependencies=[Depends(user_api_key_auth)], + response_class=ORJSONResponse, + tags=["containers"], +) +@router.get( + "/containers", + dependencies=[Depends(user_api_key_auth)], + response_class=ORJSONResponse, + tags=["containers"], +) +async def list_containers( + request: Request, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Container list endpoint for retrieving a list of containers. + + Follows the OpenAI Containers API spec: + https://platform.openai.com/docs/api-reference/containers + + Example: + ```bash + curl -X GET "http://localhost:4000/v1/containers?limit=20&order=desc" \ + -H "Authorization: Bearer sk-1234" + ``` + + Or specify provider via header or query param: + ```bash + curl -X GET "http://localhost:4000/v1/containers?custom_llm_provider=azure" \ + -H "Authorization: Bearer sk-1234" + ``` + """ + from litellm.proxy.proxy_server import ( + general_settings, + llm_router, + proxy_config, + proxy_logging_obj, + select_data_generator, + user_api_base, + user_max_tokens, + user_model, + user_request_timeout, + user_temperature, + version, + ) + + # Read query parameters + query_params = dict(request.query_params) + data: Dict[str, Any] = {"query_params": query_params} + + # Extract custom_llm_provider using priority chain + custom_llm_provider = ( + get_custom_llm_provider_from_request_headers(request=request) + or get_custom_llm_provider_from_request_query(request=request) + or "openai" + ) + + # Add custom_llm_provider to data + data["custom_llm_provider"] = custom_llm_provider + + # Process request using ProxyBaseLLMRequestProcessing + processor = ProxyBaseLLMRequestProcessing(data=data) + try: + return await processor.base_process_llm_request( + request=request, + fastapi_response=fastapi_response, + user_api_key_dict=user_api_key_dict, + route_type="alist_containers", + proxy_logging_obj=proxy_logging_obj, + llm_router=llm_router, + general_settings=general_settings, + proxy_config=proxy_config, + select_data_generator=select_data_generator, + model=None, + user_model=user_model, + user_temperature=user_temperature, + user_request_timeout=user_request_timeout, + user_max_tokens=user_max_tokens, + user_api_base=user_api_base, + version=version, + ) + except Exception as e: + raise await processor._handle_llm_api_exception( + e=e, + user_api_key_dict=user_api_key_dict, + proxy_logging_obj=proxy_logging_obj, + version=version, + ) + + +@router.get( + "/v1/containers/{container_id}", + dependencies=[Depends(user_api_key_auth)], + response_class=ORJSONResponse, + tags=["containers"], +) +@router.get( + "/containers/{container_id}", + dependencies=[Depends(user_api_key_auth)], + response_class=ORJSONResponse, + tags=["containers"], +) +async def retrieve_container( + request: Request, + container_id: str, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Container retrieve endpoint for getting details of a specific container. + + Follows the OpenAI Containers API spec: + https://platform.openai.com/docs/api-reference/containers + + Example: + ```bash + curl -X GET "http://localhost:4000/v1/containers/cntr_123" \ + -H "Authorization: Bearer sk-1234" + ``` + + Or specify provider via header: + ```bash + curl -X GET "http://localhost:4000/v1/containers/cntr_123" \ + -H "Authorization: Bearer sk-1234" \ + -H "custom-llm-provider: azure" + ``` + """ + from litellm.proxy.proxy_server import ( + general_settings, + llm_router, + proxy_config, + proxy_logging_obj, + select_data_generator, + user_api_base, + user_max_tokens, + user_model, + user_request_timeout, + user_temperature, + version, + ) + + # Include container_id in request data + data: Dict[str, Any] = {"container_id": container_id} + + # Extract custom_llm_provider using priority chain + custom_llm_provider = ( + get_custom_llm_provider_from_request_headers(request=request) + or get_custom_llm_provider_from_request_query(request=request) + or "openai" + ) + + # Add custom_llm_provider to data + data["custom_llm_provider"] = custom_llm_provider + + # Process request using ProxyBaseLLMRequestProcessing + processor = ProxyBaseLLMRequestProcessing(data=data) + try: + return await processor.base_process_llm_request( + request=request, + fastapi_response=fastapi_response, + user_api_key_dict=user_api_key_dict, + route_type="aretrieve_container", + proxy_logging_obj=proxy_logging_obj, + llm_router=llm_router, + general_settings=general_settings, + proxy_config=proxy_config, + select_data_generator=select_data_generator, + model=None, + user_model=user_model, + user_temperature=user_temperature, + user_request_timeout=user_request_timeout, + user_max_tokens=user_max_tokens, + user_api_base=user_api_base, + version=version, + ) + except Exception as e: + raise await processor._handle_llm_api_exception( + e=e, + user_api_key_dict=user_api_key_dict, + proxy_logging_obj=proxy_logging_obj, + version=version, + ) + + +@router.delete( + "/v1/containers/{container_id}", + dependencies=[Depends(user_api_key_auth)], + response_class=ORJSONResponse, + tags=["containers"], +) +@router.delete( + "/containers/{container_id}", + dependencies=[Depends(user_api_key_auth)], + response_class=ORJSONResponse, + tags=["containers"], +) +async def delete_container( + request: Request, + container_id: str, + fastapi_response: Response, + user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), +): + """ + Container delete endpoint for deleting a specific container. + + Follows the OpenAI Containers API spec: + https://platform.openai.com/docs/api-reference/containers + + Example: + ```bash + curl -X DELETE "http://localhost:4000/v1/containers/cntr_123" \ + -H "Authorization: Bearer sk-1234" + ``` + + Or specify provider via header: + ```bash + curl -X DELETE "http://localhost:4000/v1/containers/cntr_123" \ + -H "Authorization: Bearer sk-1234" \ + -H "custom-llm-provider: azure" + ``` + """ + from litellm.proxy.proxy_server import ( + general_settings, + llm_router, + proxy_config, + proxy_logging_obj, + select_data_generator, + user_api_base, + user_max_tokens, + user_model, + user_request_timeout, + user_temperature, + version, + ) + + # Include container_id in request data + data: Dict[str, Any] = {"container_id": container_id} + + # Extract custom_llm_provider using priority chain + custom_llm_provider = ( + get_custom_llm_provider_from_request_headers(request=request) + or get_custom_llm_provider_from_request_query(request=request) + or "openai" + ) + + # Add custom_llm_provider to data + data["custom_llm_provider"] = custom_llm_provider + + # Process request using ProxyBaseLLMRequestProcessing + processor = ProxyBaseLLMRequestProcessing(data=data) + try: + return await processor.base_process_llm_request( + request=request, + fastapi_response=fastapi_response, + user_api_key_dict=user_api_key_dict, + route_type="adelete_container", + proxy_logging_obj=proxy_logging_obj, + llm_router=llm_router, + general_settings=general_settings, + proxy_config=proxy_config, + select_data_generator=select_data_generator, + model=None, + user_model=user_model, + user_temperature=user_temperature, + user_request_timeout=user_request_timeout, + user_max_tokens=user_max_tokens, + user_api_base=user_api_base, + version=version, + ) + except Exception as e: + raise await processor._handle_llm_api_exception( + e=e, + user_api_key_dict=user_api_key_dict, + proxy_logging_obj=proxy_logging_obj, + version=version, + ) + diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index eba2caa4c2a5..5f16327c7801 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -310,6 +310,7 @@ def generate_feedback_box(): from litellm.proxy.middleware.prometheus_auth_middleware import PrometheusAuthMiddleware from litellm.proxy.ocr_endpoints.endpoints import router as ocr_router from litellm.proxy.video_endpoints.endpoints import router as video_router +from litellm.proxy.container_endpoints.endpoints import router as container_router from litellm.proxy.openai_files_endpoints.files_endpoints import ( router as openai_files_router, ) @@ -9904,6 +9905,7 @@ async def get_routes(): app.include_router(rerank_router) app.include_router(ocr_router) app.include_router(video_router) +app.include_router(container_router) app.include_router(search_router) app.include_router(image_router) app.include_router(fine_tuning_router) diff --git a/litellm/proxy/route_llm_request.py b/litellm/proxy/route_llm_request.py index 735cc955ef9d..bb36bc9d2732 100644 --- a/litellm/proxy/route_llm_request.py +++ b/litellm/proxy/route_llm_request.py @@ -32,6 +32,10 @@ "avideo_status": "/videos/{video_id}", "avideo_content": "/videos/{video_id}/content", "avideo_remix": "/videos/{video_id}/remix", + "acreate_container": "/containers", + "alist_containers": "/containers", + "aretrieve_container": "/containers/{container_id}", + "adelete_container": "/containers/{container_id}", } @@ -112,6 +116,10 @@ async def route_request( "avideo_status", "avideo_content", "avideo_remix", + "acreate_container", + "alist_containers", + "aretrieve_container", + "adelete_container", ], ): """ @@ -152,6 +160,10 @@ async def route_request( models = [model.strip() for model in data.pop("model").split(",")] return llm_router.abatch_completion(models=models, **data) elif llm_router is not None: + # Skip model-based routing for container operations + if route_type in ["acreate_container", "alist_containers", "aretrieve_container", "adelete_container"]: + return getattr(llm_router, f"{route_type}")(**data) + team_model_name = ( llm_router.map_team_model(data["model"], team_id) if team_id is not None @@ -194,7 +206,11 @@ async def route_request( "alist_input_items", "avector_store_create", "avector_store_search", - "asearch" + "asearch", + "acreate_container", + "alist_containers", + "aretrieve_container", + "adelete_container", ]: # moderation endpoint does not require `model` parameter return getattr(llm_router, f"{route_type}")(**data) diff --git a/litellm/router.py b/litellm/router.py index d772bcdbe454..28ac39ecf280 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -919,6 +919,28 @@ def initialize_router_endpoints(self): self.avideo_remix = self.factory_function(avideo_remix, call_type="avideo_remix") self.video_remix = self.factory_function(video_remix, call_type="video_remix") + # Container routes + ######################################################### + from litellm.containers import ( + acreate_container, + create_container, + alist_containers, + list_containers, + aretrieve_container, + retrieve_container, + adelete_container, + delete_container, + ) + + self.acreate_container = self.factory_function(acreate_container, call_type="acreate_container") + self.create_container = self.factory_function(create_container, call_type="create_container") + self.alist_containers = self.factory_function(alist_containers, call_type="alist_containers") + self.list_containers = self.factory_function(list_containers, call_type="list_containers") + self.aretrieve_container = self.factory_function(aretrieve_container, call_type="aretrieve_container") + self.retrieve_container = self.factory_function(retrieve_container, call_type="retrieve_container") + self.adelete_container = self.factory_function(adelete_container, call_type="adelete_container") + self.delete_container = self.factory_function(delete_container, call_type="delete_container") + def validate_fallbacks(self, fallback_param: Optional[List]): """ Validate the fallbacks parameter. @@ -3663,7 +3685,15 @@ def factory_function( "avideo_content", "video_content", "avideo_remix", - "video_remix" + "video_remix", + "acreate_container", + "create_container", + "alist_containers", + "list_containers", + "aretrieve_container", + "retrieve_container", + "adelete_container", + "delete_container" ] = "assistants", ): """ @@ -3687,6 +3717,10 @@ def factory_function( "video_status", "video_content", "video_remix", + "create_container", + "list_containers", + "retrieve_container", + "delete_container", ): def sync_wrapper( @@ -3741,6 +3775,10 @@ async def async_wrapper( "avideo_status", "avideo_content", "avideo_remix", + "acreate_container", + "alist_containers", + "aretrieve_container", + "adelete_container", ): return await self._ageneric_api_call_with_fallbacks( original_function=original_function, diff --git a/tests/test_litellm/test_container_router.py b/tests/test_litellm/test_container_router.py new file mode 100644 index 000000000000..cc2266ad32be --- /dev/null +++ b/tests/test_litellm/test_container_router.py @@ -0,0 +1,213 @@ +""" +Test suite for Container API router functionality. +Tests that the router method gets called correctly for container operations. +""" + +import pytest +from unittest.mock import Mock, patch, MagicMock +import litellm + + +class TestContainerRouter: + """Test suite for Container API router functionality""" + + def setup_method(self): + """Setup test fixtures""" + self.container_name = "Test Container" + self.container_id = "cntr_123456789" + + @patch("litellm.containers.main.base_llm_http_handler") + def test_create_container_router_call_mock(self, mock_handler): + """Test that create_container calls the router method with mock response""" + # Setup mock response + mock_response = { + "id": self.container_id, + "object": "container", + "created_at": 1747857508, + "status": "running", + "expires_after": { + "anchor": "last_active_at", + "minutes": 20 + }, + "last_active_at": 1747857508, + "name": self.container_name + } + + # Configure the mock handler + mock_handler.container_create_handler.return_value = mock_response + + # Call the create_container function with mock response + result = litellm.create_container( + name=self.container_name, + custom_llm_provider="openai", + mock_response=mock_response + ) + + # Verify the result is a ContainerObject with the expected data + assert result.id == mock_response["id"] + assert result.object == mock_response["object"] + assert result.name == mock_response["name"] + assert result.status == mock_response["status"] + assert result.created_at == mock_response["created_at"] + + @patch("litellm.containers.main.base_llm_http_handler") + def test_list_containers_router_call_mock(self, mock_handler): + """Test that list_containers calls the router method with mock response""" + # Setup mock response + mock_response = { + "object": "list", + "data": [ + { + "id": "cntr_123", + "object": "container", + "created_at": 1747857508, + "status": "running", + "name": "Container 1" + }, + { + "id": "cntr_456", + "object": "container", + "created_at": 1747857509, + "status": "running", + "name": "Container 2" + } + ], + "has_more": False + } + + # Configure the mock handler + mock_handler.container_list_handler.return_value = mock_response + + # Call the list_containers function with mock response + result = litellm.list_containers( + custom_llm_provider="openai", + mock_response=mock_response + ) + + # Verify the result is a ContainerListResponse with the expected data + assert result.object == "list" + assert len(result.data) == 2 + assert result.data[0].id == "cntr_123" + assert result.data[1].id == "cntr_456" + assert result.has_more is False + + @patch("litellm.containers.main.base_llm_http_handler") + def test_retrieve_container_router_call_mock(self, mock_handler): + """Test that retrieve_container calls the router method with mock response""" + # Setup mock response + mock_response = { + "id": self.container_id, + "object": "container", + "created_at": 1747857508, + "status": "running", + "expires_after": { + "anchor": "last_active_at", + "minutes": 20 + }, + "last_active_at": 1747857508, + "name": self.container_name + } + + # Configure the mock handler + mock_handler.container_retrieve_handler.return_value = mock_response + + # Call the retrieve_container function with mock response + result = litellm.retrieve_container( + container_id=self.container_id, + custom_llm_provider="openai", + mock_response=mock_response + ) + + # Verify the result is a ContainerObject with the expected data + assert result.id == mock_response["id"] + assert result.object == mock_response["object"] + assert result.name == mock_response["name"] + assert result.status == mock_response["status"] + + @patch("litellm.containers.main.base_llm_http_handler") + def test_delete_container_router_call_mock(self, mock_handler): + """Test that delete_container calls the router method with mock response""" + # Setup mock response + mock_response = { + "id": self.container_id, + "object": "container.deleted", + "deleted": True + } + + # Configure the mock handler + mock_handler.container_delete_handler.return_value = mock_response + + # Call the delete_container function with mock response + result = litellm.delete_container( + container_id=self.container_id, + custom_llm_provider="openai", + mock_response=mock_response + ) + + # Verify the result is a DeleteContainerResult with the expected data + assert result.id == mock_response["id"] + assert result.object == mock_response["object"] + assert result.deleted is True + + @pytest.mark.asyncio + @patch("litellm.containers.main.base_llm_http_handler") + async def test_acreate_container_router_call_mock(self, mock_handler): + """Test that acreate_container (async) calls the router method with mock response""" + # Setup mock response + mock_response = { + "id": self.container_id, + "object": "container", + "created_at": 1747857508, + "status": "running", + "name": self.container_name + } + + # Configure the mock handler + mock_handler.container_create_handler.return_value = mock_response + + # Call the async create_container function with mock response + result = await litellm.acreate_container( + name=self.container_name, + custom_llm_provider="openai", + mock_response=mock_response + ) + + # Verify the result is a ContainerObject with the expected data + assert result.id == mock_response["id"] + assert result.object == mock_response["object"] + assert result.name == mock_response["name"] + assert result.status == mock_response["status"] + + @pytest.mark.asyncio + @patch("litellm.containers.main.base_llm_http_handler") + async def test_alist_containers_router_call_mock(self, mock_handler): + """Test that alist_containers (async) calls the router method with mock response""" + # Setup mock response + mock_response = { + "object": "list", + "data": [ + { + "id": "cntr_123", + "object": "container", + "created_at": 1747857508, + "status": "running", + "name": "Container 1" + } + ], + "has_more": False + } + + # Configure the mock handler + mock_handler.container_list_handler.return_value = mock_response + + # Call the async list_containers function with mock response + result = await litellm.alist_containers( + custom_llm_provider="openai", + mock_response=mock_response + ) + + # Verify the result is a ContainerListResponse with the expected data + assert result.object == "list" + assert len(result.data) == 1 + assert result.data[0].id == "cntr_123" + From 627e8e0e3692cdaf9f36d70a6e3a4792ae3c4b95 Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 30 Oct 2025 15:37:11 +0530 Subject: [PATCH 2/8] Add logging support --- litellm/containers/main.py | 3 --- litellm/litellm_core_utils/litellm_logging.py | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/litellm/containers/main.py b/litellm/containers/main.py index d9582bac4e25..c499f945d68e 100644 --- a/litellm/containers/main.py +++ b/litellm/containers/main.py @@ -21,9 +21,6 @@ from litellm.types.utils import CallTypes from litellm.utils import ProviderConfigManager, client -# Default model for container operations - can be any provider that supports containers -DEFAULT_CONTAINER_ENDPOINT_MODEL = "openai/gpt-4" - __all__ = [ "acreate_container", "adelete_container", diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index e4e675023900..dc98f0b7dd6e 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -116,6 +116,7 @@ Usage, ) from litellm.types.videos.main import VideoObject +from litellm.types.containers.main import ContainerObject from litellm.utils import _get_base_model_from_metadata, executor, print_verbose from ..integrations.argilla import ArgillaLogger @@ -1622,6 +1623,7 @@ def _is_recognized_call_type_for_logging( or isinstance(logging_result, dict) and logging_result.get("object") == "vector_store.search_results.page" or isinstance(logging_result, VideoObject) + or isinstance(logging_result, ContainerObject) or (self.call_type == CallTypes.call_mcp_tool.value) ): return True From 4dc1807b8ebf4bf7d8a1eb74188d911465489ecc Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 30 Oct 2025 22:49:27 +0530 Subject: [PATCH 3/8] prometheus metric measures how often s3_v2 is failing --- .../integrations/prometheus.py | 18 ++ litellm/integrations/custom_logger.py | 31 ++ litellm/integrations/s3_v2.py | 4 +- litellm/litellm_core_utils/litellm_logging.py | 49 ++- litellm/llms/custom_httpx/llm_http_handler.py | 279 ++++++++++++++++-- prometheus.yml | 2 +- .../integrations/test_prometheus.py | 83 ++++++ 7 files changed, 437 insertions(+), 29 deletions(-) diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py index 49037678df94..c84af8bd4bc3 100644 --- a/enterprise/litellm_enterprise/integrations/prometheus.py +++ b/enterprise/litellm_enterprise/integrations/prometheus.py @@ -298,6 +298,13 @@ def __init__( self.get_labels_for_metric("litellm_deployment_failed_fallbacks"), ) + # Callback Logging Failure Metrics + self.litellm_callback_logging_failures_metric = self._counter_factory( + name="litellm_callback_logging_failures_metric", + documentation="Total number of failures when emitting logs to callbacks (e.g. s3_v2, langfuse, etc)", + labelnames=["callback_name"], + ) + self.litellm_llm_api_failed_requests_metric = self._counter_factory( name="litellm_llm_api_failed_requests_metric", documentation="deprecated - use litellm_proxy_failed_requests_metric", @@ -1723,6 +1730,17 @@ def increment_deployment_cooled_down( litellm_model_name, model_id, api_base, api_provider, exception_status ).inc() + def increment_callback_logging_failure( + self, + callback_name: str, + ): + """ + Increment metric when logging to a callback fails (e.g., s3_v2, langfuse, etc.) + """ + self.litellm_callback_logging_failures_metric.labels( + callback_name=callback_name + ).inc() + def track_provider_remaining_budget( self, provider: str, spend: float, budget_limit: float ): diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index d631c4668658..f5985524f684 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -567,3 +567,34 @@ async def get_proxy_server_request_from_cold_storage_with_object_key( Get the proxy server request from cold storage using the object key directly. """ pass + + def handle_callback_failure(self, callback_name: str): + """ + Handle callback logging failures by incrementing Prometheus metrics. + + Call this method in exception handlers within your callback when logging fails. + """ + try: + try: + from litellm_enterprise.integrations.prometheus import PrometheusLogger + except ImportError: + PrometheusLogger = None + + if PrometheusLogger is None: + return + + import litellm + + all_callbacks = [] + all_callbacks.extend(litellm.callbacks or []) + all_callbacks.extend(litellm._async_success_callback or []) + all_callbacks.extend(litellm.success_callback or []) + + for callback_obj in all_callbacks: + if hasattr(callback_obj, 'increment_callback_logging_failure'): + callback_obj.increment_callback_logging_failure(callback_name=callback_name) + break + + except Exception as e: + from litellm._logging import verbose_logger + verbose_logger.debug(f"Error in handle_callback_failure: {str(e)}") \ No newline at end of file diff --git a/litellm/integrations/s3_v2.py b/litellm/integrations/s3_v2.py index a65500c80dcc..c702096d18e6 100644 --- a/litellm/integrations/s3_v2.py +++ b/litellm/integrations/s3_v2.py @@ -239,7 +239,7 @@ async def _async_log_event_base(self, kwargs, response_obj, start_time, end_time ) except Exception as e: verbose_logger.exception(f"s3 Layer Error - {str(e)}") - pass + self.handle_callback_failure(callback_name="S3Logger") async def async_upload_data_to_s3( self, batch_logging_element: s3BatchLoggingElement @@ -323,6 +323,7 @@ async def async_upload_data_to_s3( response.raise_for_status() except Exception as e: verbose_logger.exception(f"Error uploading to s3: {str(e)}") + self.handle_callback_failure(callback_name="S3Logger") async def async_send_batch(self): """ @@ -471,6 +472,7 @@ def upload_data_to_s3(self, batch_logging_element: s3BatchLoggingElement): response.raise_for_status() except Exception as e: verbose_logger.exception(f"Error uploading to s3: {str(e)}") + self.handle_callback_failure(callback_name="S3Logger") async def _download_object_from_s3(self, s3_object_key: str) -> Optional[dict]: """ diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index dc98f0b7dd6e..113c54e9f5ef 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -1,6 +1,7 @@ # What is this? ## Common Utility file for Logging handler # Logging function -> log the exact model details + what's being sent | Non-Blocking +import asyncio import copy import datetime import json @@ -2159,6 +2160,11 @@ def success_handler( # noqa: PLR0915 ) if capture_exception: # log this error to sentry for debugging capture_exception(e) + # Track callback logging failures in Prometheus + try: + self._handle_callback_failure(callback=callback) + except Exception: + pass except Exception as e: verbose_logger.exception( "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}".format( @@ -2460,12 +2466,43 @@ async def async_success_handler( # noqa: PLR0915 end_time=end_time, print_verbose=print_verbose, ) - except Exception: + except Exception as e: verbose_logger.error( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" ) + self._handle_callback_failure(callback=callback) pass + def _handle_callback_failure(self, callback: Any): + """ + Handle callback logging failures by incrementing Prometheus metrics. + + Works for both sync and async contexts since Prometheus counter increment is synchronous. + + Args: + callback: The callback that failed + """ + try: + callback_name = self._get_callback_name(callback) + + if PrometheusLogger is None: + return + + all_callbacks = [] + all_callbacks.extend(litellm.callbacks or []) + all_callbacks.extend(litellm._async_success_callback or []) + all_callbacks.extend(litellm.success_callback or []) + + for callback_obj in all_callbacks: + if hasattr(callback_obj, 'increment_callback_logging_failure'): + callback_obj.increment_callback_logging_failure(callback_name=callback_name) + break # Only increment once + + except Exception as e: + verbose_logger.debug( + f"Error in _handle_callback_failure: {str(e)}" + ) + def _failure_handler_helper_fn( self, exception, traceback_exception, start_time=None, end_time=None ): @@ -2801,6 +2838,10 @@ async def async_failure_handler( str(e), callback ) ) + # Track callback logging failures in Prometheus + asyncio.create_task( + self._handle_callback_failure(callback=callback) + ) def _get_trace_id(self, service_name: Literal["langfuse"]) -> Optional[str]: """ @@ -2933,11 +2974,15 @@ def _get_callback_name(self, cb) -> str: Helper to get the name of a callback function Args: - cb: The callback function/string to get the name of + cb: The callback object/function/string to get the name of Returns: The name of the callback """ + if isinstance(cb, str): + return cb + if hasattr(cb, "__class__"): + return cb.__class__.__name__ if hasattr(cb, "__name__"): return cb.__name__ if hasattr(cb, "__func__"): diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index 0ab72604c9d9..f37c96f8401f 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -2044,13 +2044,97 @@ def response_api_handler( custom_llm_provider=custom_llm_provider, ) else: - # For non-streaming requests - response = sync_httpx_client.post( - url=api_base, - headers=headers, - json=data, - timeout=timeout - or float(response_api_optional_request_params.get("timeout", 0)), + # For non-streaming requests, return a mock OpenAI compatible response and relevant headers + class MockResponse: + def __init__(self, json_data, status_code=200, headers=None): + self._json_data = json_data + self.status_code = status_code + self.headers = headers or {"content-type": "application/json"} + def json(self): + return self._json_data + @property + def text(self): + import json as _json + return _json.dumps(self._json_data) + def raise_for_status(self): + pass + + mock_response_json = { + "id": "resp_67ccd2bed1ec8190b14f964abc0542670bb6a6b452d3795b", + "object": "response", + "created_at": 1741476542, + "status": "completed", + "error": None, + "incomplete_details": None, + "instructions": None, + "max_output_tokens": None, + "model": "gpt-4.1-2025-04-14", + "output": [ + { + "type": "message", + "id": "msg_67ccd2bf17f0819081ff3bb2cf6508e60bb6a6b452d3795b", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": ( + "In a peaceful grove beneath a silver moon, a unicorn named Lumina discovered a hidden pool that reflected the stars. " + "As she dipped her horn into the water, the pool began to shimmer, revealing a pathway to a magical realm of endless night skies. " + "Filled with wonder, Lumina whispered a wish for all who dream to find their own hidden magic, and as she glanced back, her hoofprints sparkled like stardust." + ), + "annotations": [] + } + ] + } + ], + "parallel_tool_calls": True, + "previous_response_id": None, + "reasoning": { + "effort": None, + "summary": None + }, + "store": True, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 36, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 87, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 123 + }, + "user": None, + "metadata": {} + } + + mock_response_headers = { + "content-type": "application/json", + "x-ratelimit-limit-requests": "1000", + "x-ratelimit-remaining-requests": "999", + "x-ratelimit-reset-requests": "60", + "x-ratelimit-limit-tokens": "100000", + "x-ratelimit-remaining-tokens": "99999", + "x-ratelimit-reset-tokens": "60", + "OpenAI-Organization": "org-mockorganization123", + "Request-Id": "req-mockrequestid123" + } + response = MockResponse( + json_data=mock_response_json, + status_code=200, + headers=mock_response_headers ) except Exception as e: raise self._handle_error( @@ -2168,12 +2252,103 @@ async def async_response_api_handler( ) else: # For non-streaming, proceed as before - response = await async_httpx_client.post( - url=api_base, - headers=headers, - json=data, - timeout=timeout - or float(response_api_optional_request_params.get("timeout", 0)), + # response = await async_httpx_client.post( + # url=api_base, + # headers=headers, + # json=data, + # timeout=timeout + # or float(response_api_optional_request_params.get("timeout", 0)), + # ) + class MockResponse: + def __init__(self, json_data, status_code=200, headers=None): + self._json_data = json_data + self.status_code = status_code + self.headers = headers or {"content-type": "application/json"} + def json(self): + return self._json_data + @property + def text(self): + import json as _json + return _json.dumps(self._json_data) + def raise_for_status(self): + pass + + mock_response_json = { + "id": "resp_67ccd2bed1ec8190b14f964abc0542670bb6a6b452d3795b", + "object": "response", + "created_at": 1741476542, + "status": "completed", + "error": None, + "incomplete_details": None, + "instructions": None, + "max_output_tokens": None, + "model": "gpt-4.1-2025-04-14", + "output": [ + { + "type": "message", + "id": "msg_67ccd2bf17f0819081ff3bb2cf6508e60bb6a6b452d3795b", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": ( + "In a peaceful grove beneath a silver moon, a unicorn named Lumina discovered a hidden pool that reflected the stars. " + "As she dipped her horn into the water, the pool began to shimmer, revealing a pathway to a magical realm of endless night skies. " + "Filled with wonder, Lumina whispered a wish for all who dream to find their own hidden magic, and as she glanced back, her hoofprints sparkled like stardust." + ), + "annotations": [] + } + ] + } + ], + "parallel_tool_calls": True, + "previous_response_id": None, + "reasoning": { + "effort": None, + "summary": None + }, + "store": True, + "temperature": 1.0, + "text": { + "format": { + "type": "text" + } + }, + "tool_choice": "auto", + "tools": [], + "top_p": 1.0, + "truncation": "disabled", + "usage": { + "input_tokens": 36, + "input_tokens_details": { + "cached_tokens": 0 + }, + "output_tokens": 87, + "output_tokens_details": { + "reasoning_tokens": 0 + }, + "total_tokens": 123 + }, + "user": None, + "metadata": {} + } + + mock_response_headers = { + "content-type": "application/json", + "x-ratelimit-limit-requests": "1000", + "x-ratelimit-remaining-requests": "999", + "x-ratelimit-reset-requests": "60", + "x-ratelimit-limit-tokens": "100000", + "x-ratelimit-remaining-tokens": "99999", + "x-ratelimit-reset-tokens": "60", + "OpenAI-Organization": "org-mockorganization123", + "Request-Id": "req-mockrequestid123" + } + response = MockResponse( + json_data=mock_response_json, + status_code=200, + headers=mock_response_headers ) except Exception as e: @@ -5004,12 +5179,37 @@ def container_create_handler( ) try: - response = sync_httpx_client.post( - url=api_base, - headers=headers, - json=data, - timeout=timeout, - ) + # Mock httpx response for container create + import types + class MockResponse: + def __init__(self, json_data, status_code=200): + self._json_data = json_data + self.status_code = status_code + self.headers = {"content-type": "application/json"} + def json(self): + return self._json_data + @property + def text(self): + import json + return json.dumps(self._json_data) + def raise_for_status(self): + pass + + import uuid + container_uuid = str(uuid.uuid4()) + + response = MockResponse({ + "id": container_uuid, + "object": "container", + "created_at": 1747857508, + "status": "running", + "expires_after": { + "anchor": "last_active_at", + "minutes": 20 + }, + "last_active_at": 1747857508, + "name": "My Container" + }) return container_provider_config.transform_container_create_response( raw_response=response, @@ -5080,13 +5280,42 @@ async def async_container_create_handler( ) try: - response = await async_httpx_client.post( - url=api_base, - headers=headers, - json=data, - timeout=timeout, - ) + # response = await async_httpx_client.post( + # url=api_base, + # headers=headers, + # json=data, + # timeout=timeout, + # ) + class MockResponse: + def __init__(self, json_data, status_code=200): + self._json_data = json_data + self.status_code = status_code + self.headers = {"content-type": "application/json"} + def json(self): + return self._json_data + @property + def text(self): + import json + return json.dumps(self._json_data) + def raise_for_status(self): + pass + + import uuid + container_uuid = str(uuid.uuid4()) + + response = MockResponse({ + "id": container_uuid, + "object": "container", + "created_at": 1747857508, + "status": "running", + "expires_after": { + "anchor": "last_active_at", + "minutes": 20 + }, + "last_active_at": 1747857508, + "name": "My Container" + }) return container_provider_config.transform_container_create_response( raw_response=response, logging_obj=logging_obj, diff --git a/prometheus.yml b/prometheus.yml index 5cb4f90d787d..7f7f583a7f32 100644 --- a/prometheus.yml +++ b/prometheus.yml @@ -4,4 +4,4 @@ global: scrape_configs: - job_name: 'litellm' static_configs: - - targets: ['litellm:4000'] # Assuming Litellm exposes metrics at port 4000 + - targets: ['host.docker.internal:4000'] # LiteLLM proxy on host machine diff --git a/tests/enterprise/litellm_enterprise/integrations/test_prometheus.py b/tests/enterprise/litellm_enterprise/integrations/test_prometheus.py index 82f4c4bd2985..7e01bda1f264 100644 --- a/tests/enterprise/litellm_enterprise/integrations/test_prometheus.py +++ b/tests/enterprise/litellm_enterprise/integrations/test_prometheus.py @@ -838,3 +838,86 @@ async def test_spend_counter_semantics(mock_prometheus_logger): # ============================================================================== # END SEMANTIC VALIDATION TESTS # ============================================================================== + + +# ============================================================================== +# CALLBACK FAILURE METRICS TESTS +# ============================================================================== + +def test_callback_failure_metric_increments(prometheus_logger): + """ + Test that the callback logging failure metric can be incremented. + + This tests the litellm_callback_logging_failures_metric counter. + """ + # Get initial value + initial_value = 0 + try: + initial_value = prometheus_logger.litellm_callback_logging_failures_metric.labels( + callback_name="S3Logger" + )._value.get() + except Exception: + initial_value = 0 + + # Increment the metric + prometheus_logger.increment_callback_logging_failure(callback_name="S3Logger") + + # Verify it incremented by 1 + current_value = prometheus_logger.litellm_callback_logging_failures_metric.labels( + callback_name="S3Logger" + )._value.get() + + assert current_value == initial_value + 1, \ + f"Expected callback failure metric to increment by 1, got {current_value - initial_value}" + + # Increment again for different callback + prometheus_logger.increment_callback_logging_failure(callback_name="LangFuseLogger") + + langfuse_value = prometheus_logger.litellm_callback_logging_failures_metric.labels( + callback_name="LangFuseLogger" + )._value.get() + + assert langfuse_value == 1, "LangFuseLogger metric should be 1" + + # S3Logger should still be initial + 1 + s3_value = prometheus_logger.litellm_callback_logging_failures_metric.labels( + callback_name="S3Logger" + )._value.get() + assert s3_value == initial_value + 1, "S3Logger metric should not change" + + print(f"✓ Callback failure metric test passed: S3Logger={s3_value}, LangFuseLogger={langfuse_value}") + + +def test_callback_failure_metric_different_callbacks(prometheus_logger): + """ + Test that different callbacks are tracked separately with their own labels. + """ + callbacks_to_test = ["S3Logger", "LangFuseLogger", "DataDogLogger", "CustomCallback"] + + for callback_name in callbacks_to_test: + # Get initial value + initial = 0 + try: + initial = prometheus_logger.litellm_callback_logging_failures_metric.labels( + callback_name=callback_name + )._value.get() + except Exception: + initial = 0 + + # Increment + prometheus_logger.increment_callback_logging_failure(callback_name=callback_name) + + # Verify incremented + current = prometheus_logger.litellm_callback_logging_failures_metric.labels( + callback_name=callback_name + )._value.get() + + assert current == initial + 1, \ + f"{callback_name} should increment by 1" + + print(f"✓ Multiple callback tracking test passed for {len(callbacks_to_test)} callbacks") + + +# ============================================================================== +# END CALLBACK FAILURE METRICS TESTS +# ============================================================================== From 3885514198321483ecc7c55f2bb8f5d2046a8268 Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 30 Oct 2025 22:58:45 +0530 Subject: [PATCH 4/8] remove not needed files --- litellm/integrations/custom_logger.py | 8 - litellm/litellm_core_utils/litellm_logging.py | 2 +- litellm/llms/custom_httpx/llm_http_handler.py | 281 ++---------------- 3 files changed, 27 insertions(+), 264 deletions(-) diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index f5985524f684..b6542c7456b2 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -575,14 +575,6 @@ def handle_callback_failure(self, callback_name: str): Call this method in exception handlers within your callback when logging fails. """ try: - try: - from litellm_enterprise.integrations.prometheus import PrometheusLogger - except ImportError: - PrometheusLogger = None - - if PrometheusLogger is None: - return - import litellm all_callbacks = [] diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 113c54e9f5ef..b1b2b0610ef8 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -2466,7 +2466,7 @@ async def async_success_handler( # noqa: PLR0915 end_time=end_time, print_verbose=print_verbose, ) - except Exception as e: + except Exception: verbose_logger.error( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" ) diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py index f37c96f8401f..5af63a9c9367 100644 --- a/litellm/llms/custom_httpx/llm_http_handler.py +++ b/litellm/llms/custom_httpx/llm_http_handler.py @@ -2044,97 +2044,13 @@ def response_api_handler( custom_llm_provider=custom_llm_provider, ) else: - # For non-streaming requests, return a mock OpenAI compatible response and relevant headers - class MockResponse: - def __init__(self, json_data, status_code=200, headers=None): - self._json_data = json_data - self.status_code = status_code - self.headers = headers or {"content-type": "application/json"} - def json(self): - return self._json_data - @property - def text(self): - import json as _json - return _json.dumps(self._json_data) - def raise_for_status(self): - pass - - mock_response_json = { - "id": "resp_67ccd2bed1ec8190b14f964abc0542670bb6a6b452d3795b", - "object": "response", - "created_at": 1741476542, - "status": "completed", - "error": None, - "incomplete_details": None, - "instructions": None, - "max_output_tokens": None, - "model": "gpt-4.1-2025-04-14", - "output": [ - { - "type": "message", - "id": "msg_67ccd2bf17f0819081ff3bb2cf6508e60bb6a6b452d3795b", - "status": "completed", - "role": "assistant", - "content": [ - { - "type": "output_text", - "text": ( - "In a peaceful grove beneath a silver moon, a unicorn named Lumina discovered a hidden pool that reflected the stars. " - "As she dipped her horn into the water, the pool began to shimmer, revealing a pathway to a magical realm of endless night skies. " - "Filled with wonder, Lumina whispered a wish for all who dream to find their own hidden magic, and as she glanced back, her hoofprints sparkled like stardust." - ), - "annotations": [] - } - ] - } - ], - "parallel_tool_calls": True, - "previous_response_id": None, - "reasoning": { - "effort": None, - "summary": None - }, - "store": True, - "temperature": 1.0, - "text": { - "format": { - "type": "text" - } - }, - "tool_choice": "auto", - "tools": [], - "top_p": 1.0, - "truncation": "disabled", - "usage": { - "input_tokens": 36, - "input_tokens_details": { - "cached_tokens": 0 - }, - "output_tokens": 87, - "output_tokens_details": { - "reasoning_tokens": 0 - }, - "total_tokens": 123 - }, - "user": None, - "metadata": {} - } - - mock_response_headers = { - "content-type": "application/json", - "x-ratelimit-limit-requests": "1000", - "x-ratelimit-remaining-requests": "999", - "x-ratelimit-reset-requests": "60", - "x-ratelimit-limit-tokens": "100000", - "x-ratelimit-remaining-tokens": "99999", - "x-ratelimit-reset-tokens": "60", - "OpenAI-Organization": "org-mockorganization123", - "Request-Id": "req-mockrequestid123" - } - response = MockResponse( - json_data=mock_response_json, - status_code=200, - headers=mock_response_headers + # For non-streaming requests + response = sync_httpx_client.post( + url=api_base, + headers=headers, + json=data, + timeout=timeout + or float(response_api_optional_request_params.get("timeout", 0)), ) except Exception as e: raise self._handle_error( @@ -2252,103 +2168,12 @@ async def async_response_api_handler( ) else: # For non-streaming, proceed as before - # response = await async_httpx_client.post( - # url=api_base, - # headers=headers, - # json=data, - # timeout=timeout - # or float(response_api_optional_request_params.get("timeout", 0)), - # ) - class MockResponse: - def __init__(self, json_data, status_code=200, headers=None): - self._json_data = json_data - self.status_code = status_code - self.headers = headers or {"content-type": "application/json"} - def json(self): - return self._json_data - @property - def text(self): - import json as _json - return _json.dumps(self._json_data) - def raise_for_status(self): - pass - - mock_response_json = { - "id": "resp_67ccd2bed1ec8190b14f964abc0542670bb6a6b452d3795b", - "object": "response", - "created_at": 1741476542, - "status": "completed", - "error": None, - "incomplete_details": None, - "instructions": None, - "max_output_tokens": None, - "model": "gpt-4.1-2025-04-14", - "output": [ - { - "type": "message", - "id": "msg_67ccd2bf17f0819081ff3bb2cf6508e60bb6a6b452d3795b", - "status": "completed", - "role": "assistant", - "content": [ - { - "type": "output_text", - "text": ( - "In a peaceful grove beneath a silver moon, a unicorn named Lumina discovered a hidden pool that reflected the stars. " - "As she dipped her horn into the water, the pool began to shimmer, revealing a pathway to a magical realm of endless night skies. " - "Filled with wonder, Lumina whispered a wish for all who dream to find their own hidden magic, and as she glanced back, her hoofprints sparkled like stardust." - ), - "annotations": [] - } - ] - } - ], - "parallel_tool_calls": True, - "previous_response_id": None, - "reasoning": { - "effort": None, - "summary": None - }, - "store": True, - "temperature": 1.0, - "text": { - "format": { - "type": "text" - } - }, - "tool_choice": "auto", - "tools": [], - "top_p": 1.0, - "truncation": "disabled", - "usage": { - "input_tokens": 36, - "input_tokens_details": { - "cached_tokens": 0 - }, - "output_tokens": 87, - "output_tokens_details": { - "reasoning_tokens": 0 - }, - "total_tokens": 123 - }, - "user": None, - "metadata": {} - } - - mock_response_headers = { - "content-type": "application/json", - "x-ratelimit-limit-requests": "1000", - "x-ratelimit-remaining-requests": "999", - "x-ratelimit-reset-requests": "60", - "x-ratelimit-limit-tokens": "100000", - "x-ratelimit-remaining-tokens": "99999", - "x-ratelimit-reset-tokens": "60", - "OpenAI-Organization": "org-mockorganization123", - "Request-Id": "req-mockrequestid123" - } - response = MockResponse( - json_data=mock_response_json, - status_code=200, - headers=mock_response_headers + response = await async_httpx_client.post( + url=api_base, + headers=headers, + json=data, + timeout=timeout + or float(response_api_optional_request_params.get("timeout", 0)), ) except Exception as e: @@ -5179,37 +5004,12 @@ def container_create_handler( ) try: - # Mock httpx response for container create - import types - class MockResponse: - def __init__(self, json_data, status_code=200): - self._json_data = json_data - self.status_code = status_code - self.headers = {"content-type": "application/json"} - def json(self): - return self._json_data - @property - def text(self): - import json - return json.dumps(self._json_data) - def raise_for_status(self): - pass - - import uuid - container_uuid = str(uuid.uuid4()) - - response = MockResponse({ - "id": container_uuid, - "object": "container", - "created_at": 1747857508, - "status": "running", - "expires_after": { - "anchor": "last_active_at", - "minutes": 20 - }, - "last_active_at": 1747857508, - "name": "My Container" - }) + response = sync_httpx_client.post( + url=api_base, + headers=headers, + json=data, + timeout=timeout, + ) return container_provider_config.transform_container_create_response( raw_response=response, @@ -5280,42 +5080,13 @@ async def async_container_create_handler( ) try: - # response = await async_httpx_client.post( - # url=api_base, - # headers=headers, - # json=data, - # timeout=timeout, - # ) - class MockResponse: - def __init__(self, json_data, status_code=200): - self._json_data = json_data - self.status_code = status_code - self.headers = {"content-type": "application/json"} - def json(self): - return self._json_data - @property - def text(self): - import json - return json.dumps(self._json_data) - def raise_for_status(self): - pass + response = await async_httpx_client.post( + url=api_base, + headers=headers, + json=data, + timeout=timeout, + ) - - import uuid - container_uuid = str(uuid.uuid4()) - - response = MockResponse({ - "id": container_uuid, - "object": "container", - "created_at": 1747857508, - "status": "running", - "expires_after": { - "anchor": "last_active_at", - "minutes": 20 - }, - "last_active_at": 1747857508, - "name": "My Container" - }) return container_provider_config.transform_container_create_response( raw_response=response, logging_obj=logging_obj, @@ -6606,4 +6377,4 @@ async def async_text_to_speech_handler( model=model, raw_response=response, logging_obj=logging_obj, - ) + ) \ No newline at end of file From c18cbf0f38989e07ba91b2fa7576e68691526e5f Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 30 Oct 2025 22:59:26 +0530 Subject: [PATCH 5/8] remove not needed files --- prometheus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prometheus.yml b/prometheus.yml index 7f7f583a7f32..5cb4f90d787d 100644 --- a/prometheus.yml +++ b/prometheus.yml @@ -4,4 +4,4 @@ global: scrape_configs: - job_name: 'litellm' static_configs: - - targets: ['host.docker.internal:4000'] # LiteLLM proxy on host machine + - targets: ['litellm:4000'] # Assuming Litellm exposes metrics at port 4000 From 0c6db47b4529f6cf9e6f131ee80de6d26631c242 Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 30 Oct 2025 23:00:48 +0530 Subject: [PATCH 6/8] remove not needed files --- litellm/litellm_core_utils/litellm_logging.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index b1b2b0610ef8..b0a011cd6d21 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -2485,9 +2485,6 @@ def _handle_callback_failure(self, callback: Any): try: callback_name = self._get_callback_name(callback) - if PrometheusLogger is None: - return - all_callbacks = [] all_callbacks.extend(litellm.callbacks or []) all_callbacks.extend(litellm._async_success_callback or []) From ffaaa14c71645b11a220423d80c50d0982aee179 Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Thu, 30 Oct 2025 23:14:11 +0530 Subject: [PATCH 7/8] fix mypy errors --- litellm/integrations/custom_logger.py | 8 ++++---- litellm/litellm_core_utils/litellm_logging.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index b6542c7456b2..f93925513e66 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -578,13 +578,13 @@ def handle_callback_failure(self, callback_name: str): import litellm all_callbacks = [] - all_callbacks.extend(litellm.callbacks or []) - all_callbacks.extend(litellm._async_success_callback or []) - all_callbacks.extend(litellm.success_callback or []) + all_callbacks.extend(litellm.callbacks or []) # type: ignore + all_callbacks.extend(litellm._async_success_callback or []) # type: ignore + all_callbacks.extend(litellm.success_callback or []) # type: ignore for callback_obj in all_callbacks: if hasattr(callback_obj, 'increment_callback_logging_failure'): - callback_obj.increment_callback_logging_failure(callback_name=callback_name) + callback_obj.increment_callback_logging_failure(callback_name=callback_name) # type: ignore break except Exception as e: diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index b0a011cd6d21..0793dff57eba 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -2486,13 +2486,13 @@ def _handle_callback_failure(self, callback: Any): callback_name = self._get_callback_name(callback) all_callbacks = [] - all_callbacks.extend(litellm.callbacks or []) - all_callbacks.extend(litellm._async_success_callback or []) - all_callbacks.extend(litellm.success_callback or []) + all_callbacks.extend(litellm.callbacks or []) # type: ignore + all_callbacks.extend(litellm._async_success_callback or []) # type: ignore + all_callbacks.extend(litellm.success_callback or []) # type: ignore for callback_obj in all_callbacks: if hasattr(callback_obj, 'increment_callback_logging_failure'): - callback_obj.increment_callback_logging_failure(callback_name=callback_name) + callback_obj.increment_callback_logging_failure(callback_name=callback_name) # type: ignore break # Only increment once except Exception as e: From 5263a2f284a1f14e2823f3e900dcc2882c1bb5c5 Mon Sep 17 00:00:00 2001 From: Sameer Kankute Date: Mon, 3 Nov 2025 21:39:14 +0530 Subject: [PATCH 8/8] Use logging_callback_manager to get all the callbacks --- litellm/integrations/custom_logger.py | 18 +++++++++++------- litellm/litellm_core_utils/litellm_logging.py | 5 +---- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index f93925513e66..006a95e61c8c 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -576,17 +576,21 @@ def handle_callback_failure(self, callback_name: str): """ try: import litellm + from litellm._logging import verbose_logger + + all_callbacks = litellm.logging_callback_manager._get_all_callbacks() - all_callbacks = [] - all_callbacks.extend(litellm.callbacks or []) # type: ignore - all_callbacks.extend(litellm._async_success_callback or []) # type: ignore - all_callbacks.extend(litellm.success_callback or []) # type: ignore - for callback_obj in all_callbacks: if hasattr(callback_obj, 'increment_callback_logging_failure'): + verbose_logger.debug(f"Incrementing callback failure metric for {callback_name}") callback_obj.increment_callback_logging_failure(callback_name=callback_name) # type: ignore - break + return + + verbose_logger.debug( + f"No callback with increment_callback_logging_failure method found for {callback_name}. " + "Ensure 'prometheus' is in your callbacks config." + ) except Exception as e: from litellm._logging import verbose_logger - verbose_logger.debug(f"Error in handle_callback_failure: {str(e)}") \ No newline at end of file + verbose_logger.debug(f"Error in handle_callback_failure for {callback_name}: {str(e)}") \ No newline at end of file diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 0793dff57eba..fefc6edc097f 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -2485,10 +2485,7 @@ def _handle_callback_failure(self, callback: Any): try: callback_name = self._get_callback_name(callback) - all_callbacks = [] - all_callbacks.extend(litellm.callbacks or []) # type: ignore - all_callbacks.extend(litellm._async_success_callback or []) # type: ignore - all_callbacks.extend(litellm.success_callback or []) # type: ignore + all_callbacks = litellm.logging_callback_manager._get_all_callbacks() for callback_obj in all_callbacks: if hasattr(callback_obj, 'increment_callback_logging_failure'):