From a14048012210ea7f2f83e8ba946fed46145b68d6 Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Wed, 29 Oct 2025 14:05:05 +0530
Subject: [PATCH 1/8] Add proxy support to container apis

---
 litellm/containers/main.py                    |  27 ++
 litellm/proxy/common_request_processing.py    |   8 +
 litellm/proxy/container_endpoints/__init__.py |   0
 .../proxy/container_endpoints/endpoints.py    | 406 ++++++++++++++++++
 litellm/proxy/proxy_server.py                 |   2 +
 litellm/proxy/route_llm_request.py            |  18 +-
 litellm/router.py                             |  40 +-
 tests/test_litellm/test_container_router.py   | 213 +++++++++
 8 files changed, 712 insertions(+), 2 deletions(-)
 create mode 100644 litellm/proxy/container_endpoints/__init__.py
 create mode 100644 litellm/proxy/container_endpoints/endpoints.py
 create mode 100644 tests/test_litellm/test_container_router.py

diff --git a/litellm/containers/main.py b/litellm/containers/main.py
index cd183699278d..d9582bac4e25 100644
--- a/litellm/containers/main.py
+++ b/litellm/containers/main.py
@@ -386,6 +386,15 @@ def list_containers(
         litellm_call_id: Optional[str] = kwargs.get("litellm_call_id")
         _is_async = kwargs.pop("async_call", False) is True
 
+        # Check for mock response first
+        mock_response = kwargs.get("mock_response")
+        if mock_response is not None:
+            if isinstance(mock_response, str):
+                mock_response = json.loads(mock_response)
+
+            response = ContainerListResponse(**mock_response)
+            return response
+
         # get llm provider logic
         litellm_params = GenericLiteLLMParams(**kwargs)
         # get provider config
@@ -562,6 +571,15 @@ def retrieve_container(
         litellm_call_id: Optional[str] = kwargs.get("litellm_call_id")
         _is_async = kwargs.pop("async_call", False) is True
 
+        # Check for mock response first
+        mock_response = kwargs.get("mock_response")
+        if mock_response is not None:
+            if isinstance(mock_response, str):
+                mock_response = json.loads(mock_response)
+
+            response = ContainerObject(**mock_response)
+            return response
+
         # get llm provider logic
         litellm_params = GenericLiteLLMParams(**kwargs)
         # get provider config
@@ -730,6 +748,15 @@ def delete_container(
         litellm_call_id: Optional[str] = kwargs.get("litellm_call_id")
         _is_async = kwargs.pop("async_call", False) is True
 
+        # Check for mock response first
+        mock_response = kwargs.get("mock_response")
+        if mock_response is not None:
+            if isinstance(mock_response, str):
+                mock_response = json.loads(mock_response)
+
+            response = DeleteContainerResult(**mock_response)
+            return response
+
         # get llm provider logic
         litellm_params = GenericLiteLLMParams(**kwargs)
         # get provider config
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index 108bb39501e8..82e54c7ee9e1 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -321,6 +321,10 @@ async def common_processing_pre_call_logic(
             "avideo_status",
             "avideo_content",
             "avideo_remix",
+            "acreate_container",
+            "alist_containers",
+            "aretrieve_container",
+            "adelete_container",
         ],
         version: Optional[str] = None,
         user_model: Optional[str] = None,
@@ -419,6 +423,10 @@ async def base_process_llm_request(
             "avideo_status",
             "avideo_content",
             "avideo_remix",
+            "acreate_container",
+            "alist_containers",
+            "aretrieve_container",
+            "adelete_container",
         ],
         proxy_logging_obj: ProxyLogging,
         general_settings: dict,
diff --git a/litellm/proxy/container_endpoints/__init__.py b/litellm/proxy/container_endpoints/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/litellm/proxy/container_endpoints/endpoints.py b/litellm/proxy/container_endpoints/endpoints.py
new file mode 100644
index 000000000000..1a3d41f0ac9e
--- /dev/null
+++ b/litellm/proxy/container_endpoints/endpoints.py
@@ -0,0 +1,406 @@
+#### Container Endpoints #####
+
+from typing import Any, Dict
+from fastapi import APIRouter, Depends, Request, Response
+from fastapi.responses import ORJSONResponse
+
+from litellm.proxy._types import *
+from litellm.proxy.auth.user_api_key_auth import UserAPIKeyAuth, user_api_key_auth
+from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
+from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
+from litellm.proxy.common_utils.openai_endpoint_utils import (
+    get_custom_llm_provider_from_request_headers,
+    get_custom_llm_provider_from_request_query,
+    get_custom_llm_provider_from_request_body,
+)
+
+router = APIRouter()
+
+
+@router.post(
+    "/v1/containers",
+    dependencies=[Depends(user_api_key_auth)],
+    response_class=ORJSONResponse,
+    tags=["containers"],
+)
+@router.post(
+    "/containers",
+    dependencies=[Depends(user_api_key_auth)],
+    response_class=ORJSONResponse,
+    tags=["containers"],
+)
+async def create_container(
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Container creation endpoint for creating new containers.
+    
+    Follows the OpenAI Containers API spec:
+    https://platform.openai.com/docs/api-reference/containers
+    
+    Example:
+    ```bash
+    curl -X POST "http://localhost:4000/v1/containers" \
+        -H "Authorization: Bearer sk-1234" \
+        -H "Content-Type: application/json" \
+        -d '{
+            "name": "My Container",
+            "expires_after": {
+                "anchor": "last_active_at",
+                "minutes": 20
+            }
+        }'
+    ```
+    
+    Or specify provider via header:
+    ```bash
+    curl -X POST "http://localhost:4000/v1/containers" \
+        -H "Authorization: Bearer sk-1234" \
+        -H "custom-llm-provider: azure" \
+        -H "Content-Type: application/json" \
+        -d '{
+            "name": "My Container"
+        }'
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        general_settings,
+        llm_router,
+        proxy_config,
+        proxy_logging_obj,
+        select_data_generator,
+        user_api_base,
+        user_max_tokens,
+        user_model,
+        user_request_timeout,
+        user_temperature,
+        version,
+    )
+
+    # Read request body
+    data = await _read_request_body(request=request)
+
+    # Extract custom_llm_provider using priority chain
+    # Priority: headers > query params > request body > default
+    custom_llm_provider = (
+        get_custom_llm_provider_from_request_headers(request=request)
+        or get_custom_llm_provider_from_request_query(request=request)
+        or await get_custom_llm_provider_from_request_body(request=request)
+        or "openai"
+    )
+    
+    # Add custom_llm_provider to data
+    data["custom_llm_provider"] = custom_llm_provider
+    
+    # Process request using ProxyBaseLLMRequestProcessing
+    processor = ProxyBaseLLMRequestProcessing(data=data)
+    try:
+        return await processor.base_process_llm_request(
+            request=request,
+            fastapi_response=fastapi_response,
+            user_api_key_dict=user_api_key_dict,
+            route_type="acreate_container",
+            proxy_logging_obj=proxy_logging_obj,
+            llm_router=llm_router,
+            general_settings=general_settings,
+            proxy_config=proxy_config,
+            select_data_generator=select_data_generator,
+            model=None,
+            user_model=user_model,
+            user_temperature=user_temperature,
+            user_request_timeout=user_request_timeout,
+            user_max_tokens=user_max_tokens,
+            user_api_base=user_api_base,
+            version=version,
+        )
+    except Exception as e:
+        raise await processor._handle_llm_api_exception(
+            e=e,
+            user_api_key_dict=user_api_key_dict,
+            proxy_logging_obj=proxy_logging_obj,
+            version=version,
+        )
+
+
+@router.get(
+    "/v1/containers",
+    dependencies=[Depends(user_api_key_auth)],
+    response_class=ORJSONResponse,
+    tags=["containers"],
+)
+@router.get(
+    "/containers",
+    dependencies=[Depends(user_api_key_auth)],
+    response_class=ORJSONResponse,
+    tags=["containers"],
+)
+async def list_containers(
+    request: Request,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Container list endpoint for retrieving a list of containers.
+    
+    Follows the OpenAI Containers API spec:
+    https://platform.openai.com/docs/api-reference/containers
+    
+    Example:
+    ```bash
+    curl -X GET "http://localhost:4000/v1/containers?limit=20&order=desc" \
+        -H "Authorization: Bearer sk-1234"
+    ```
+    
+    Or specify provider via header or query param:
+    ```bash
+    curl -X GET "http://localhost:4000/v1/containers?custom_llm_provider=azure" \
+        -H "Authorization: Bearer sk-1234"
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        general_settings,
+        llm_router,
+        proxy_config,
+        proxy_logging_obj,
+        select_data_generator,
+        user_api_base,
+        user_max_tokens,
+        user_model,
+        user_request_timeout,
+        user_temperature,
+        version,
+    )
+
+    # Read query parameters
+    query_params = dict(request.query_params)
+    data: Dict[str, Any] = {"query_params": query_params}
+
+    # Extract custom_llm_provider using priority chain
+    custom_llm_provider = (
+        get_custom_llm_provider_from_request_headers(request=request)
+        or get_custom_llm_provider_from_request_query(request=request)
+        or "openai"
+    )
+    
+    # Add custom_llm_provider to data
+    data["custom_llm_provider"] = custom_llm_provider
+
+    # Process request using ProxyBaseLLMRequestProcessing
+    processor = ProxyBaseLLMRequestProcessing(data=data)
+    try:
+        return await processor.base_process_llm_request(
+            request=request,
+            fastapi_response=fastapi_response,
+            user_api_key_dict=user_api_key_dict,
+            route_type="alist_containers",
+            proxy_logging_obj=proxy_logging_obj,
+            llm_router=llm_router,
+            general_settings=general_settings,
+            proxy_config=proxy_config,
+            select_data_generator=select_data_generator,
+            model=None,
+            user_model=user_model,
+            user_temperature=user_temperature,
+            user_request_timeout=user_request_timeout,
+            user_max_tokens=user_max_tokens,
+            user_api_base=user_api_base,
+            version=version,
+        )
+    except Exception as e:
+        raise await processor._handle_llm_api_exception(
+            e=e,
+            user_api_key_dict=user_api_key_dict,
+            proxy_logging_obj=proxy_logging_obj,
+            version=version,
+        )
+
+
+@router.get(
+    "/v1/containers/{container_id}",
+    dependencies=[Depends(user_api_key_auth)],
+    response_class=ORJSONResponse,
+    tags=["containers"],
+)
+@router.get(
+    "/containers/{container_id}",
+    dependencies=[Depends(user_api_key_auth)],
+    response_class=ORJSONResponse,
+    tags=["containers"],
+)
+async def retrieve_container(
+    request: Request,
+    container_id: str,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Container retrieve endpoint for getting details of a specific container.
+    
+    Follows the OpenAI Containers API spec:
+    https://platform.openai.com/docs/api-reference/containers
+    
+    Example:
+    ```bash
+    curl -X GET "http://localhost:4000/v1/containers/cntr_123" \
+        -H "Authorization: Bearer sk-1234"
+    ```
+    
+    Or specify provider via header:
+    ```bash
+    curl -X GET "http://localhost:4000/v1/containers/cntr_123" \
+        -H "Authorization: Bearer sk-1234" \
+        -H "custom-llm-provider: azure"
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        general_settings,
+        llm_router,
+        proxy_config,
+        proxy_logging_obj,
+        select_data_generator,
+        user_api_base,
+        user_max_tokens,
+        user_model,
+        user_request_timeout,
+        user_temperature,
+        version,
+    )
+
+    # Include container_id in request data
+    data: Dict[str, Any] = {"container_id": container_id}
+
+    # Extract custom_llm_provider using priority chain
+    custom_llm_provider = (
+        get_custom_llm_provider_from_request_headers(request=request)
+        or get_custom_llm_provider_from_request_query(request=request)
+        or "openai"
+    )
+    
+    # Add custom_llm_provider to data
+    data["custom_llm_provider"] = custom_llm_provider
+
+    # Process request using ProxyBaseLLMRequestProcessing
+    processor = ProxyBaseLLMRequestProcessing(data=data)
+    try:
+        return await processor.base_process_llm_request(
+            request=request,
+            fastapi_response=fastapi_response,
+            user_api_key_dict=user_api_key_dict,
+            route_type="aretrieve_container",
+            proxy_logging_obj=proxy_logging_obj,
+            llm_router=llm_router,
+            general_settings=general_settings,
+            proxy_config=proxy_config,
+            select_data_generator=select_data_generator,
+            model=None,
+            user_model=user_model,
+            user_temperature=user_temperature,
+            user_request_timeout=user_request_timeout,
+            user_max_tokens=user_max_tokens,
+            user_api_base=user_api_base,
+            version=version,
+        )
+    except Exception as e:
+        raise await processor._handle_llm_api_exception(
+            e=e,
+            user_api_key_dict=user_api_key_dict,
+            proxy_logging_obj=proxy_logging_obj,
+            version=version,
+        )
+
+
+@router.delete(
+    "/v1/containers/{container_id}",
+    dependencies=[Depends(user_api_key_auth)],
+    response_class=ORJSONResponse,
+    tags=["containers"],
+)
+@router.delete(
+    "/containers/{container_id}",
+    dependencies=[Depends(user_api_key_auth)],
+    response_class=ORJSONResponse,
+    tags=["containers"],
+)
+async def delete_container(
+    request: Request,
+    container_id: str,
+    fastapi_response: Response,
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+):
+    """
+    Container delete endpoint for deleting a specific container.
+    
+    Follows the OpenAI Containers API spec:
+    https://platform.openai.com/docs/api-reference/containers
+    
+    Example:
+    ```bash
+    curl -X DELETE "http://localhost:4000/v1/containers/cntr_123" \
+        -H "Authorization: Bearer sk-1234"
+    ```
+    
+    Or specify provider via header:
+    ```bash
+    curl -X DELETE "http://localhost:4000/v1/containers/cntr_123" \
+        -H "Authorization: Bearer sk-1234" \
+        -H "custom-llm-provider: azure"
+    ```
+    """
+    from litellm.proxy.proxy_server import (
+        general_settings,
+        llm_router,
+        proxy_config,
+        proxy_logging_obj,
+        select_data_generator,
+        user_api_base,
+        user_max_tokens,
+        user_model,
+        user_request_timeout,
+        user_temperature,
+        version,
+    )
+
+    # Include container_id in request data
+    data: Dict[str, Any] = {"container_id": container_id}
+
+    # Extract custom_llm_provider using priority chain
+    custom_llm_provider = (
+        get_custom_llm_provider_from_request_headers(request=request)
+        or get_custom_llm_provider_from_request_query(request=request)
+        or "openai"
+    )
+    
+    # Add custom_llm_provider to data
+    data["custom_llm_provider"] = custom_llm_provider
+
+    # Process request using ProxyBaseLLMRequestProcessing
+    processor = ProxyBaseLLMRequestProcessing(data=data)
+    try:
+        return await processor.base_process_llm_request(
+            request=request,
+            fastapi_response=fastapi_response,
+            user_api_key_dict=user_api_key_dict,
+            route_type="adelete_container",
+            proxy_logging_obj=proxy_logging_obj,
+            llm_router=llm_router,
+            general_settings=general_settings,
+            proxy_config=proxy_config,
+            select_data_generator=select_data_generator,
+            model=None,
+            user_model=user_model,
+            user_temperature=user_temperature,
+            user_request_timeout=user_request_timeout,
+            user_max_tokens=user_max_tokens,
+            user_api_base=user_api_base,
+            version=version,
+        )
+    except Exception as e:
+        raise await processor._handle_llm_api_exception(
+            e=e,
+            user_api_key_dict=user_api_key_dict,
+            proxy_logging_obj=proxy_logging_obj,
+            version=version,
+        )
+
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index eba2caa4c2a5..5f16327c7801 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -310,6 +310,7 @@ def generate_feedback_box():
 from litellm.proxy.middleware.prometheus_auth_middleware import PrometheusAuthMiddleware
 from litellm.proxy.ocr_endpoints.endpoints import router as ocr_router
 from litellm.proxy.video_endpoints.endpoints import router as video_router
+from litellm.proxy.container_endpoints.endpoints import router as container_router
 from litellm.proxy.openai_files_endpoints.files_endpoints import (
     router as openai_files_router,
 )
@@ -9904,6 +9905,7 @@ async def get_routes():
 app.include_router(rerank_router)
 app.include_router(ocr_router)
 app.include_router(video_router)
+app.include_router(container_router)
 app.include_router(search_router)
 app.include_router(image_router)
 app.include_router(fine_tuning_router)
diff --git a/litellm/proxy/route_llm_request.py b/litellm/proxy/route_llm_request.py
index 735cc955ef9d..bb36bc9d2732 100644
--- a/litellm/proxy/route_llm_request.py
+++ b/litellm/proxy/route_llm_request.py
@@ -32,6 +32,10 @@
     "avideo_status": "/videos/{video_id}",
     "avideo_content": "/videos/{video_id}/content",
     "avideo_remix": "/videos/{video_id}/remix",
+    "acreate_container": "/containers",
+    "alist_containers": "/containers",
+    "aretrieve_container": "/containers/{container_id}",
+    "adelete_container": "/containers/{container_id}",
 }
 
 
@@ -112,6 +116,10 @@ async def route_request(
         "avideo_status",
         "avideo_content",
         "avideo_remix",
+        "acreate_container",
+        "alist_containers",
+        "aretrieve_container",
+        "adelete_container",
     ],
 ):
     """
@@ -152,6 +160,10 @@ async def route_request(
             models = [model.strip() for model in data.pop("model").split(",")]
             return llm_router.abatch_completion(models=models, **data)
     elif llm_router is not None:
+        # Skip model-based routing for container operations
+        if route_type in ["acreate_container", "alist_containers", "aretrieve_container", "adelete_container"]:
+            return getattr(llm_router, f"{route_type}")(**data)
+        
         team_model_name = (
             llm_router.map_team_model(data["model"], team_id)
             if team_id is not None
@@ -194,7 +206,11 @@ async def route_request(
                 "alist_input_items",
                 "avector_store_create",
                 "avector_store_search",
-                "asearch"
+                "asearch",
+                "acreate_container",
+                "alist_containers",
+                "aretrieve_container",
+                "adelete_container",
             ]:
                 # moderation endpoint does not require `model` parameter
                 return getattr(llm_router, f"{route_type}")(**data)
diff --git a/litellm/router.py b/litellm/router.py
index d772bcdbe454..28ac39ecf280 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -919,6 +919,28 @@ def initialize_router_endpoints(self):
         self.avideo_remix = self.factory_function(avideo_remix, call_type="avideo_remix")
         self.video_remix = self.factory_function(video_remix, call_type="video_remix")
 
+        # Container routes
+        #########################################################
+        from litellm.containers import (
+            acreate_container,
+            create_container,
+            alist_containers,
+            list_containers,
+            aretrieve_container,
+            retrieve_container,
+            adelete_container,
+            delete_container,
+        )
+
+        self.acreate_container = self.factory_function(acreate_container, call_type="acreate_container")
+        self.create_container = self.factory_function(create_container, call_type="create_container")
+        self.alist_containers = self.factory_function(alist_containers, call_type="alist_containers")
+        self.list_containers = self.factory_function(list_containers, call_type="list_containers")
+        self.aretrieve_container = self.factory_function(aretrieve_container, call_type="aretrieve_container")
+        self.retrieve_container = self.factory_function(retrieve_container, call_type="retrieve_container")
+        self.adelete_container = self.factory_function(adelete_container, call_type="adelete_container")
+        self.delete_container = self.factory_function(delete_container, call_type="delete_container")
+
     def validate_fallbacks(self, fallback_param: Optional[List]):
         """
         Validate the fallbacks parameter.
@@ -3663,7 +3685,15 @@ def factory_function(
             "avideo_content",
             "video_content",
             "avideo_remix",
-            "video_remix"
+            "video_remix",
+            "acreate_container",
+            "create_container",
+            "alist_containers",
+            "list_containers",
+            "aretrieve_container",
+            "retrieve_container",
+            "adelete_container",
+            "delete_container"
         ] = "assistants",
     ):
         """
@@ -3687,6 +3717,10 @@ def factory_function(
             "video_status",
             "video_content",
             "video_remix",
+            "create_container",
+            "list_containers",
+            "retrieve_container",
+            "delete_container",
         ):
 
             def sync_wrapper(
@@ -3741,6 +3775,10 @@ async def async_wrapper(
                 "avideo_status",
                 "avideo_content",
                 "avideo_remix",
+                "acreate_container",
+                "alist_containers",
+                "aretrieve_container",
+                "adelete_container",
             ):
                 return await self._ageneric_api_call_with_fallbacks(
                     original_function=original_function,
diff --git a/tests/test_litellm/test_container_router.py b/tests/test_litellm/test_container_router.py
new file mode 100644
index 000000000000..cc2266ad32be
--- /dev/null
+++ b/tests/test_litellm/test_container_router.py
@@ -0,0 +1,213 @@
+"""
+Test suite for Container API router functionality.
+Tests that the router method gets called correctly for container operations.
+"""
+
+import pytest
+from unittest.mock import Mock, patch, MagicMock
+import litellm
+
+
+class TestContainerRouter:
+    """Test suite for Container API router functionality"""
+
+    def setup_method(self):
+        """Setup test fixtures"""
+        self.container_name = "Test Container"
+        self.container_id = "cntr_123456789"
+
+    @patch("litellm.containers.main.base_llm_http_handler")
+    def test_create_container_router_call_mock(self, mock_handler):
+        """Test that create_container calls the router method with mock response"""
+        # Setup mock response
+        mock_response = {
+            "id": self.container_id,
+            "object": "container",
+            "created_at": 1747857508,
+            "status": "running",
+            "expires_after": {
+                "anchor": "last_active_at",
+                "minutes": 20
+            },
+            "last_active_at": 1747857508,
+            "name": self.container_name
+        }
+        
+        # Configure the mock handler
+        mock_handler.container_create_handler.return_value = mock_response
+        
+        # Call the create_container function with mock response
+        result = litellm.create_container(
+            name=self.container_name,
+            custom_llm_provider="openai",
+            mock_response=mock_response
+        )
+        
+        # Verify the result is a ContainerObject with the expected data
+        assert result.id == mock_response["id"]
+        assert result.object == mock_response["object"]
+        assert result.name == mock_response["name"]
+        assert result.status == mock_response["status"]
+        assert result.created_at == mock_response["created_at"]
+
+    @patch("litellm.containers.main.base_llm_http_handler")
+    def test_list_containers_router_call_mock(self, mock_handler):
+        """Test that list_containers calls the router method with mock response"""
+        # Setup mock response
+        mock_response = {
+            "object": "list",
+            "data": [
+                {
+                    "id": "cntr_123",
+                    "object": "container",
+                    "created_at": 1747857508,
+                    "status": "running",
+                    "name": "Container 1"
+                },
+                {
+                    "id": "cntr_456",
+                    "object": "container",
+                    "created_at": 1747857509,
+                    "status": "running",
+                    "name": "Container 2"
+                }
+            ],
+            "has_more": False
+        }
+        
+        # Configure the mock handler
+        mock_handler.container_list_handler.return_value = mock_response
+        
+        # Call the list_containers function with mock response
+        result = litellm.list_containers(
+            custom_llm_provider="openai",
+            mock_response=mock_response
+        )
+        
+        # Verify the result is a ContainerListResponse with the expected data
+        assert result.object == "list"
+        assert len(result.data) == 2
+        assert result.data[0].id == "cntr_123"
+        assert result.data[1].id == "cntr_456"
+        assert result.has_more is False
+
+    @patch("litellm.containers.main.base_llm_http_handler")
+    def test_retrieve_container_router_call_mock(self, mock_handler):
+        """Test that retrieve_container calls the router method with mock response"""
+        # Setup mock response
+        mock_response = {
+            "id": self.container_id,
+            "object": "container",
+            "created_at": 1747857508,
+            "status": "running",
+            "expires_after": {
+                "anchor": "last_active_at",
+                "minutes": 20
+            },
+            "last_active_at": 1747857508,
+            "name": self.container_name
+        }
+        
+        # Configure the mock handler
+        mock_handler.container_retrieve_handler.return_value = mock_response
+        
+        # Call the retrieve_container function with mock response
+        result = litellm.retrieve_container(
+            container_id=self.container_id,
+            custom_llm_provider="openai",
+            mock_response=mock_response
+        )
+        
+        # Verify the result is a ContainerObject with the expected data
+        assert result.id == mock_response["id"]
+        assert result.object == mock_response["object"]
+        assert result.name == mock_response["name"]
+        assert result.status == mock_response["status"]
+
+    @patch("litellm.containers.main.base_llm_http_handler")
+    def test_delete_container_router_call_mock(self, mock_handler):
+        """Test that delete_container calls the router method with mock response"""
+        # Setup mock response
+        mock_response = {
+            "id": self.container_id,
+            "object": "container.deleted",
+            "deleted": True
+        }
+        
+        # Configure the mock handler
+        mock_handler.container_delete_handler.return_value = mock_response
+        
+        # Call the delete_container function with mock response
+        result = litellm.delete_container(
+            container_id=self.container_id,
+            custom_llm_provider="openai",
+            mock_response=mock_response
+        )
+        
+        # Verify the result is a DeleteContainerResult with the expected data
+        assert result.id == mock_response["id"]
+        assert result.object == mock_response["object"]
+        assert result.deleted is True
+
+    @pytest.mark.asyncio
+    @patch("litellm.containers.main.base_llm_http_handler")
+    async def test_acreate_container_router_call_mock(self, mock_handler):
+        """Test that acreate_container (async) calls the router method with mock response"""
+        # Setup mock response
+        mock_response = {
+            "id": self.container_id,
+            "object": "container",
+            "created_at": 1747857508,
+            "status": "running",
+            "name": self.container_name
+        }
+        
+        # Configure the mock handler
+        mock_handler.container_create_handler.return_value = mock_response
+        
+        # Call the async create_container function with mock response
+        result = await litellm.acreate_container(
+            name=self.container_name,
+            custom_llm_provider="openai",
+            mock_response=mock_response
+        )
+        
+        # Verify the result is a ContainerObject with the expected data
+        assert result.id == mock_response["id"]
+        assert result.object == mock_response["object"]
+        assert result.name == mock_response["name"]
+        assert result.status == mock_response["status"]
+
+    @pytest.mark.asyncio
+    @patch("litellm.containers.main.base_llm_http_handler")
+    async def test_alist_containers_router_call_mock(self, mock_handler):
+        """Test that alist_containers (async) calls the router method with mock response"""
+        # Setup mock response
+        mock_response = {
+            "object": "list",
+            "data": [
+                {
+                    "id": "cntr_123",
+                    "object": "container",
+                    "created_at": 1747857508,
+                    "status": "running",
+                    "name": "Container 1"
+                }
+            ],
+            "has_more": False
+        }
+        
+        # Configure the mock handler
+        mock_handler.container_list_handler.return_value = mock_response
+        
+        # Call the async list_containers function with mock response
+        result = await litellm.alist_containers(
+            custom_llm_provider="openai",
+            mock_response=mock_response
+        )
+        
+        # Verify the result is a ContainerListResponse with the expected data
+        assert result.object == "list"
+        assert len(result.data) == 1
+        assert result.data[0].id == "cntr_123"
+

From 627e8e0e3692cdaf9f36d70a6e3a4792ae3c4b95 Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Thu, 30 Oct 2025 15:37:11 +0530
Subject: [PATCH 2/8] Add logging support

---
 litellm/containers/main.py                    | 3 ---
 litellm/litellm_core_utils/litellm_logging.py | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/litellm/containers/main.py b/litellm/containers/main.py
index d9582bac4e25..c499f945d68e 100644
--- a/litellm/containers/main.py
+++ b/litellm/containers/main.py
@@ -21,9 +21,6 @@
 from litellm.types.utils import CallTypes
 from litellm.utils import ProviderConfigManager, client
 
-# Default model for container operations - can be any provider that supports containers
-DEFAULT_CONTAINER_ENDPOINT_MODEL = "openai/gpt-4"
-
 __all__ = [
     "acreate_container",
     "adelete_container",
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index e4e675023900..dc98f0b7dd6e 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -116,6 +116,7 @@
     Usage,
 )
 from litellm.types.videos.main import VideoObject
+from litellm.types.containers.main import ContainerObject
 from litellm.utils import _get_base_model_from_metadata, executor, print_verbose
 
 from ..integrations.argilla import ArgillaLogger
@@ -1622,6 +1623,7 @@ def _is_recognized_call_type_for_logging(
             or isinstance(logging_result, dict)
             and logging_result.get("object") == "vector_store.search_results.page"
             or isinstance(logging_result, VideoObject) 
+            or isinstance(logging_result, ContainerObject)
             or (self.call_type == CallTypes.call_mcp_tool.value)
         ):
             return True

From 4dc1807b8ebf4bf7d8a1eb74188d911465489ecc Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Thu, 30 Oct 2025 22:49:27 +0530
Subject: [PATCH 3/8] prometheus metric  measures how often s3_v2 is failing

---
 .../integrations/prometheus.py                |  18 ++
 litellm/integrations/custom_logger.py         |  31 ++
 litellm/integrations/s3_v2.py                 |   4 +-
 litellm/litellm_core_utils/litellm_logging.py |  49 ++-
 litellm/llms/custom_httpx/llm_http_handler.py | 279 ++++++++++++++++--
 prometheus.yml                                |   2 +-
 .../integrations/test_prometheus.py           |  83 ++++++
 7 files changed, 437 insertions(+), 29 deletions(-)

diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py
index 49037678df94..c84af8bd4bc3 100644
--- a/enterprise/litellm_enterprise/integrations/prometheus.py
+++ b/enterprise/litellm_enterprise/integrations/prometheus.py
@@ -298,6 +298,13 @@ def __init__(
                 self.get_labels_for_metric("litellm_deployment_failed_fallbacks"),
             )
 
+            # Callback Logging Failure Metrics
+            self.litellm_callback_logging_failures_metric = self._counter_factory(
+                name="litellm_callback_logging_failures_metric",
+                documentation="Total number of failures when emitting logs to callbacks (e.g. s3_v2, langfuse, etc)",
+                labelnames=["callback_name"],
+            )
+
             self.litellm_llm_api_failed_requests_metric = self._counter_factory(
                 name="litellm_llm_api_failed_requests_metric",
                 documentation="deprecated - use litellm_proxy_failed_requests_metric",
@@ -1723,6 +1730,17 @@ def increment_deployment_cooled_down(
             litellm_model_name, model_id, api_base, api_provider, exception_status
         ).inc()
 
+    def increment_callback_logging_failure(
+        self,
+        callback_name: str,
+    ):
+        """
+        Increment metric when logging to a callback fails (e.g., s3_v2, langfuse, etc.)
+        """
+        self.litellm_callback_logging_failures_metric.labels(
+            callback_name=callback_name
+        ).inc()
+
     def track_provider_remaining_budget(
         self, provider: str, spend: float, budget_limit: float
     ):
diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index d631c4668658..f5985524f684 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -567,3 +567,34 @@ async def get_proxy_server_request_from_cold_storage_with_object_key(
         Get the proxy server request from cold storage using the object key directly.
         """
         pass
+
+    def handle_callback_failure(self, callback_name: str):
+        """
+        Handle callback logging failures by incrementing Prometheus metrics.
+        
+        Call this method in exception handlers within your callback when logging fails.
+        """
+        try:
+            try:
+                from litellm_enterprise.integrations.prometheus import PrometheusLogger
+            except ImportError:
+                PrometheusLogger = None
+            
+            if PrometheusLogger is None:
+                return
+            
+            import litellm
+            
+            all_callbacks = []
+            all_callbacks.extend(litellm.callbacks or [])
+            all_callbacks.extend(litellm._async_success_callback or [])
+            all_callbacks.extend(litellm.success_callback or [])
+                        
+            for callback_obj in all_callbacks:
+                if hasattr(callback_obj, 'increment_callback_logging_failure'):
+                    callback_obj.increment_callback_logging_failure(callback_name=callback_name)
+                    break
+                    
+        except Exception as e:
+            from litellm._logging import verbose_logger
+            verbose_logger.debug(f"Error in handle_callback_failure: {str(e)}")
\ No newline at end of file
diff --git a/litellm/integrations/s3_v2.py b/litellm/integrations/s3_v2.py
index a65500c80dcc..c702096d18e6 100644
--- a/litellm/integrations/s3_v2.py
+++ b/litellm/integrations/s3_v2.py
@@ -239,7 +239,7 @@ async def _async_log_event_base(self, kwargs, response_obj, start_time, end_time
             )
         except Exception as e:
             verbose_logger.exception(f"s3 Layer Error - {str(e)}")
-            pass
+            self.handle_callback_failure(callback_name="S3Logger")
 
     async def async_upload_data_to_s3(
         self, batch_logging_element: s3BatchLoggingElement
@@ -323,6 +323,7 @@ async def async_upload_data_to_s3(
             response.raise_for_status()
         except Exception as e:
             verbose_logger.exception(f"Error uploading to s3: {str(e)}")
+            self.handle_callback_failure(callback_name="S3Logger")
 
     async def async_send_batch(self):
         """
@@ -471,6 +472,7 @@ def upload_data_to_s3(self, batch_logging_element: s3BatchLoggingElement):
             response.raise_for_status()
         except Exception as e:
             verbose_logger.exception(f"Error uploading to s3: {str(e)}")
+            self.handle_callback_failure(callback_name="S3Logger")
 
     async def _download_object_from_s3(self, s3_object_key: str) -> Optional[dict]:
         """
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index dc98f0b7dd6e..113c54e9f5ef 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -1,6 +1,7 @@
 # What is this?
 ## Common Utility file for Logging handler
 # Logging function -> log the exact model details + what's being sent | Non-Blocking
+import asyncio
 import copy
 import datetime
 import json
@@ -2159,6 +2160,11 @@ def success_handler(  # noqa: PLR0915
                     )
                     if capture_exception:  # log this error to sentry for debugging
                         capture_exception(e)
+                    # Track callback logging failures in Prometheus
+                    try:
+                        self._handle_callback_failure(callback=callback)
+                    except Exception:
+                        pass
         except Exception as e:
             verbose_logger.exception(
                 "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}".format(
@@ -2460,12 +2466,43 @@ async def async_success_handler(  # noqa: PLR0915
                             end_time=end_time,
                             print_verbose=print_verbose,
                         )
-            except Exception:
+            except Exception as e:
                 verbose_logger.error(
                     f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
                 )
+                self._handle_callback_failure(callback=callback)
                 pass
 
+    def _handle_callback_failure(self, callback: Any):
+        """
+        Handle callback logging failures by incrementing Prometheus metrics.
+        
+        Works for both sync and async contexts since Prometheus counter increment is synchronous.
+        
+        Args:
+            callback: The callback that failed
+        """
+        try:
+            callback_name = self._get_callback_name(callback)
+            
+            if PrometheusLogger is None:
+                return
+            
+            all_callbacks = []
+            all_callbacks.extend(litellm.callbacks or [])
+            all_callbacks.extend(litellm._async_success_callback or [])
+            all_callbacks.extend(litellm.success_callback or [])
+                        
+            for callback_obj in all_callbacks:
+                if hasattr(callback_obj, 'increment_callback_logging_failure'):
+                    callback_obj.increment_callback_logging_failure(callback_name=callback_name)
+                    break  # Only increment once
+                    
+        except Exception as e:
+            verbose_logger.debug(
+                f"Error in _handle_callback_failure: {str(e)}"
+            )
+
     def _failure_handler_helper_fn(
         self, exception, traceback_exception, start_time=None, end_time=None
     ):
@@ -2801,6 +2838,10 @@ async def async_failure_handler(
                         str(e), callback
                     )
                 )
+                # Track callback logging failures in Prometheus
+                asyncio.create_task(
+                    self._handle_callback_failure(callback=callback)
+                )
 
     def _get_trace_id(self, service_name: Literal["langfuse"]) -> Optional[str]:
         """
@@ -2933,11 +2974,15 @@ def _get_callback_name(self, cb) -> str:
         Helper to get the name of a callback function
 
         Args:
-            cb: The callback function/string to get the name of
+            cb: The callback object/function/string to get the name of
 
         Returns:
             The name of the callback
         """
+        if isinstance(cb, str):
+            return cb
+        if hasattr(cb, "__class__"):
+            return cb.__class__.__name__
         if hasattr(cb, "__name__"):
             return cb.__name__
         if hasattr(cb, "__func__"):
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
index 0ab72604c9d9..f37c96f8401f 100644
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -2044,13 +2044,97 @@ def response_api_handler(
                     custom_llm_provider=custom_llm_provider,
                 )
             else:
-                # For non-streaming requests
-                response = sync_httpx_client.post(
-                    url=api_base,
-                    headers=headers,
-                    json=data,
-                    timeout=timeout
-                    or float(response_api_optional_request_params.get("timeout", 0)),
+                # For non-streaming requests, return a mock OpenAI compatible response and relevant headers
+                class MockResponse:
+                    def __init__(self, json_data, status_code=200, headers=None):
+                        self._json_data = json_data
+                        self.status_code = status_code
+                        self.headers = headers or {"content-type": "application/json"}
+                    def json(self):
+                        return self._json_data
+                    @property
+                    def text(self):
+                        import json as _json
+                        return _json.dumps(self._json_data)
+                    def raise_for_status(self):
+                        pass
+
+                mock_response_json = {
+                    "id": "resp_67ccd2bed1ec8190b14f964abc0542670bb6a6b452d3795b",
+                    "object": "response",
+                    "created_at": 1741476542,
+                    "status": "completed",
+                    "error": None,
+                    "incomplete_details": None,
+                    "instructions": None,
+                    "max_output_tokens": None,
+                    "model": "gpt-4.1-2025-04-14",
+                    "output": [
+                        {
+                            "type": "message",
+                            "id": "msg_67ccd2bf17f0819081ff3bb2cf6508e60bb6a6b452d3795b",
+                            "status": "completed",
+                            "role": "assistant",
+                            "content": [
+                                {
+                                    "type": "output_text",
+                                    "text": (
+                                        "In a peaceful grove beneath a silver moon, a unicorn named Lumina discovered a hidden pool that reflected the stars. "
+                                        "As she dipped her horn into the water, the pool began to shimmer, revealing a pathway to a magical realm of endless night skies. "
+                                        "Filled with wonder, Lumina whispered a wish for all who dream to find their own hidden magic, and as she glanced back, her hoofprints sparkled like stardust."
+                                    ),
+                                    "annotations": []
+                                }
+                            ]
+                        }
+                    ],
+                    "parallel_tool_calls": True,
+                    "previous_response_id": None,
+                    "reasoning": {
+                        "effort": None,
+                        "summary": None
+                    },
+                    "store": True,
+                    "temperature": 1.0,
+                    "text": {
+                        "format": {
+                            "type": "text"
+                        }
+                    },
+                    "tool_choice": "auto",
+                    "tools": [],
+                    "top_p": 1.0,
+                    "truncation": "disabled",
+                    "usage": {
+                        "input_tokens": 36,
+                        "input_tokens_details": {
+                            "cached_tokens": 0
+                        },
+                        "output_tokens": 87,
+                        "output_tokens_details": {
+                            "reasoning_tokens": 0
+                        },
+                        "total_tokens": 123
+                    },
+                    "user": None,
+                    "metadata": {}
+                }
+
+                mock_response_headers = {
+                    "content-type": "application/json",
+                    "x-ratelimit-limit-requests": "1000",
+                    "x-ratelimit-remaining-requests": "999",
+                    "x-ratelimit-reset-requests": "60",
+                    "x-ratelimit-limit-tokens": "100000",
+                    "x-ratelimit-remaining-tokens": "99999",
+                    "x-ratelimit-reset-tokens": "60",
+                    "OpenAI-Organization": "org-mockorganization123",
+                    "Request-Id": "req-mockrequestid123"
+                }
+                response = MockResponse(
+                    json_data=mock_response_json, 
+                    status_code=200, 
+                    headers=mock_response_headers
                 )
         except Exception as e:
             raise self._handle_error(
@@ -2168,12 +2252,103 @@ async def async_response_api_handler(
                 )
             else:
                 # For non-streaming, proceed as before
-                response = await async_httpx_client.post(
-                    url=api_base,
-                    headers=headers,
-                    json=data,
-                    timeout=timeout
-                    or float(response_api_optional_request_params.get("timeout", 0)),
+                # response = await async_httpx_client.post(
+                #     url=api_base,
+                #     headers=headers,
+                #     json=data,
+                #     timeout=timeout
+                #     or float(response_api_optional_request_params.get("timeout", 0)),
+                # )
+                class MockResponse:
+                    def __init__(self, json_data, status_code=200, headers=None):
+                        self._json_data = json_data
+                        self.status_code = status_code
+                        self.headers = headers or {"content-type": "application/json"}
+                    def json(self):
+                        return self._json_data
+                    @property
+                    def text(self):
+                        import json as _json
+                        return _json.dumps(self._json_data)
+                    def raise_for_status(self):
+                        pass
+
+                mock_response_json = {
+                    "id": "resp_67ccd2bed1ec8190b14f964abc0542670bb6a6b452d3795b",
+                    "object": "response",
+                    "created_at": 1741476542,
+                    "status": "completed",
+                    "error": None,
+                    "incomplete_details": None,
+                    "instructions": None,
+                    "max_output_tokens": None,
+                    "model": "gpt-4.1-2025-04-14",
+                    "output": [
+                        {
+                            "type": "message",
+                            "id": "msg_67ccd2bf17f0819081ff3bb2cf6508e60bb6a6b452d3795b",
+                            "status": "completed",
+                            "role": "assistant",
+                            "content": [
+                                {
+                                    "type": "output_text",
+                                    "text": (
+                                        "In a peaceful grove beneath a silver moon, a unicorn named Lumina discovered a hidden pool that reflected the stars. "
+                                        "As she dipped her horn into the water, the pool began to shimmer, revealing a pathway to a magical realm of endless night skies. "
+                                        "Filled with wonder, Lumina whispered a wish for all who dream to find their own hidden magic, and as she glanced back, her hoofprints sparkled like stardust."
+                                    ),
+                                    "annotations": []
+                                }
+                            ]
+                        }
+                    ],
+                    "parallel_tool_calls": True,
+                    "previous_response_id": None,
+                    "reasoning": {
+                        "effort": None,
+                        "summary": None
+                    },
+                    "store": True,
+                    "temperature": 1.0,
+                    "text": {
+                        "format": {
+                            "type": "text"
+                        }
+                    },
+                    "tool_choice": "auto",
+                    "tools": [],
+                    "top_p": 1.0,
+                    "truncation": "disabled",
+                    "usage": {
+                        "input_tokens": 36,
+                        "input_tokens_details": {
+                            "cached_tokens": 0
+                        },
+                        "output_tokens": 87,
+                        "output_tokens_details": {
+                            "reasoning_tokens": 0
+                        },
+                        "total_tokens": 123
+                    },
+                    "user": None,
+                    "metadata": {}
+                }
+
+                mock_response_headers = {
+                    "content-type": "application/json",
+                    "x-ratelimit-limit-requests": "1000",
+                    "x-ratelimit-remaining-requests": "999",
+                    "x-ratelimit-reset-requests": "60",
+                    "x-ratelimit-limit-tokens": "100000",
+                    "x-ratelimit-remaining-tokens": "99999",
+                    "x-ratelimit-reset-tokens": "60",
+                    "OpenAI-Organization": "org-mockorganization123",
+                    "Request-Id": "req-mockrequestid123"
+                }
+                response = MockResponse(
+                    json_data=mock_response_json, 
+                    status_code=200, 
+                    headers=mock_response_headers
                 )
 
         except Exception as e:
@@ -5004,12 +5179,37 @@ def container_create_handler(
         )
 
         try:
-            response = sync_httpx_client.post(
-                url=api_base,
-                headers=headers,
-                json=data,
-                timeout=timeout,
-            )
+            # Mock httpx response for container create
+            import types
+            class MockResponse:
+                def __init__(self, json_data, status_code=200):
+                    self._json_data = json_data
+                    self.status_code = status_code
+                    self.headers = {"content-type": "application/json"}
+                def json(self):
+                    return self._json_data
+                @property
+                def text(self):
+                    import json
+                    return json.dumps(self._json_data)
+                def raise_for_status(self):
+                    pass
+
+            import uuid
+            container_uuid = str(uuid.uuid4())
+
+            response = MockResponse({
+                "id": container_uuid,
+                "object": "container",
+                "created_at": 1747857508,
+                "status": "running",
+                "expires_after": {
+                    "anchor": "last_active_at",
+                    "minutes": 20
+                },
+                "last_active_at": 1747857508,
+                "name": "My Container"
+            })
 
             return container_provider_config.transform_container_create_response(
                 raw_response=response,
@@ -5080,13 +5280,42 @@ async def async_container_create_handler(
         )
 
         try:
-            response = await async_httpx_client.post(
-                url=api_base,
-                headers=headers,
-                json=data,
-                timeout=timeout,
-            )
+            # response = await async_httpx_client.post(
+            #     url=api_base,
+            #     headers=headers,
+            #     json=data,
+            #     timeout=timeout,
+            # )
+            class MockResponse:
+                def __init__(self, json_data, status_code=200):
+                    self._json_data = json_data
+                    self.status_code = status_code
+                    self.headers = {"content-type": "application/json"}
+                def json(self):
+                    return self._json_data
+                @property
+                def text(self):
+                    import json
+                    return json.dumps(self._json_data)
+                def raise_for_status(self):
+                    pass
 
+            
+            import uuid
+            container_uuid = str(uuid.uuid4())
+
+            response = MockResponse({
+                "id": container_uuid,
+                "object": "container",
+                "created_at": 1747857508,
+                "status": "running",
+                "expires_after": {
+                    "anchor": "last_active_at",
+                    "minutes": 20
+                },
+                "last_active_at": 1747857508,
+                "name": "My Container"
+            })
             return container_provider_config.transform_container_create_response(
                 raw_response=response,
                 logging_obj=logging_obj,
diff --git a/prometheus.yml b/prometheus.yml
index 5cb4f90d787d..7f7f583a7f32 100644
--- a/prometheus.yml
+++ b/prometheus.yml
@@ -4,4 +4,4 @@ global:
 scrape_configs:
   - job_name: 'litellm'
     static_configs:
-      - targets: ['litellm:4000']  # Assuming Litellm exposes metrics at port 4000
+      - targets: ['host.docker.internal:4000']  # LiteLLM proxy on host machine
diff --git a/tests/enterprise/litellm_enterprise/integrations/test_prometheus.py b/tests/enterprise/litellm_enterprise/integrations/test_prometheus.py
index 82f4c4bd2985..7e01bda1f264 100644
--- a/tests/enterprise/litellm_enterprise/integrations/test_prometheus.py
+++ b/tests/enterprise/litellm_enterprise/integrations/test_prometheus.py
@@ -838,3 +838,86 @@ async def test_spend_counter_semantics(mock_prometheus_logger):
 # ==============================================================================
 # END SEMANTIC VALIDATION TESTS
 # ==============================================================================
+
+
+# ==============================================================================
+# CALLBACK FAILURE METRICS TESTS
+# ==============================================================================
+
+def test_callback_failure_metric_increments(prometheus_logger):
+    """
+    Test that the callback logging failure metric can be incremented.
+    
+    This tests the litellm_callback_logging_failures_metric counter.
+    """
+    # Get initial value
+    initial_value = 0
+    try:
+        initial_value = prometheus_logger.litellm_callback_logging_failures_metric.labels(
+            callback_name="S3Logger"
+        )._value.get()
+    except Exception:
+        initial_value = 0
+    
+    # Increment the metric
+    prometheus_logger.increment_callback_logging_failure(callback_name="S3Logger")
+    
+    # Verify it incremented by 1
+    current_value = prometheus_logger.litellm_callback_logging_failures_metric.labels(
+        callback_name="S3Logger"
+    )._value.get()
+    
+    assert current_value == initial_value + 1, \
+        f"Expected callback failure metric to increment by 1, got {current_value - initial_value}"
+    
+    # Increment again for different callback
+    prometheus_logger.increment_callback_logging_failure(callback_name="LangFuseLogger")
+    
+    langfuse_value = prometheus_logger.litellm_callback_logging_failures_metric.labels(
+        callback_name="LangFuseLogger"
+    )._value.get()
+    
+    assert langfuse_value == 1, "LangFuseLogger metric should be 1"
+    
+    # S3Logger should still be initial + 1
+    s3_value = prometheus_logger.litellm_callback_logging_failures_metric.labels(
+        callback_name="S3Logger"
+    )._value.get()
+    assert s3_value == initial_value + 1, "S3Logger metric should not change"
+    
+    print(f"✓ Callback failure metric test passed: S3Logger={s3_value}, LangFuseLogger={langfuse_value}")
+
+
+def test_callback_failure_metric_different_callbacks(prometheus_logger):
+    """
+    Test that different callbacks are tracked separately with their own labels.
+    """
+    callbacks_to_test = ["S3Logger", "LangFuseLogger", "DataDogLogger", "CustomCallback"]
+    
+    for callback_name in callbacks_to_test:
+        # Get initial value
+        initial = 0
+        try:
+            initial = prometheus_logger.litellm_callback_logging_failures_metric.labels(
+                callback_name=callback_name
+            )._value.get()
+        except Exception:
+            initial = 0
+        
+        # Increment
+        prometheus_logger.increment_callback_logging_failure(callback_name=callback_name)
+        
+        # Verify incremented
+        current = prometheus_logger.litellm_callback_logging_failures_metric.labels(
+            callback_name=callback_name
+        )._value.get()
+        
+        assert current == initial + 1, \
+            f"{callback_name} should increment by 1"
+    
+    print(f"✓ Multiple callback tracking test passed for {len(callbacks_to_test)} callbacks")
+
+
+# ==============================================================================
+# END CALLBACK FAILURE METRICS TESTS
+# ==============================================================================

From 3885514198321483ecc7c55f2bb8f5d2046a8268 Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Thu, 30 Oct 2025 22:58:45 +0530
Subject: [PATCH 4/8] remove not needed files

---
 litellm/integrations/custom_logger.py         |   8 -
 litellm/litellm_core_utils/litellm_logging.py |   2 +-
 litellm/llms/custom_httpx/llm_http_handler.py | 281 ++----------------
 3 files changed, 27 insertions(+), 264 deletions(-)

diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index f5985524f684..b6542c7456b2 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -575,14 +575,6 @@ def handle_callback_failure(self, callback_name: str):
         Call this method in exception handlers within your callback when logging fails.
         """
         try:
-            try:
-                from litellm_enterprise.integrations.prometheus import PrometheusLogger
-            except ImportError:
-                PrometheusLogger = None
-            
-            if PrometheusLogger is None:
-                return
-            
             import litellm
             
             all_callbacks = []
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 113c54e9f5ef..b1b2b0610ef8 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -2466,7 +2466,7 @@ async def async_success_handler(  # noqa: PLR0915
                             end_time=end_time,
                             print_verbose=print_verbose,
                         )
-            except Exception as e:
+            except Exception:
                 verbose_logger.error(
                     f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
                 )
diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
index f37c96f8401f..5af63a9c9367 100644
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -2044,97 +2044,13 @@ def response_api_handler(
                     custom_llm_provider=custom_llm_provider,
                 )
             else:
-                # For non-streaming requests, return a mock OpenAI compatible response and relevant headers
-                class MockResponse:
-                    def __init__(self, json_data, status_code=200, headers=None):
-                        self._json_data = json_data
-                        self.status_code = status_code
-                        self.headers = headers or {"content-type": "application/json"}
-                    def json(self):
-                        return self._json_data
-                    @property
-                    def text(self):
-                        import json as _json
-                        return _json.dumps(self._json_data)
-                    def raise_for_status(self):
-                        pass
-
-                mock_response_json = {
-                    "id": "resp_67ccd2bed1ec8190b14f964abc0542670bb6a6b452d3795b",
-                    "object": "response",
-                    "created_at": 1741476542,
-                    "status": "completed",
-                    "error": None,
-                    "incomplete_details": None,
-                    "instructions": None,
-                    "max_output_tokens": None,
-                    "model": "gpt-4.1-2025-04-14",
-                    "output": [
-                        {
-                            "type": "message",
-                            "id": "msg_67ccd2bf17f0819081ff3bb2cf6508e60bb6a6b452d3795b",
-                            "status": "completed",
-                            "role": "assistant",
-                            "content": [
-                                {
-                                    "type": "output_text",
-                                    "text": (
-                                        "In a peaceful grove beneath a silver moon, a unicorn named Lumina discovered a hidden pool that reflected the stars. "
-                                        "As she dipped her horn into the water, the pool began to shimmer, revealing a pathway to a magical realm of endless night skies. "
-                                        "Filled with wonder, Lumina whispered a wish for all who dream to find their own hidden magic, and as she glanced back, her hoofprints sparkled like stardust."
-                                    ),
-                                    "annotations": []
-                                }
-                            ]
-                        }
-                    ],
-                    "parallel_tool_calls": True,
-                    "previous_response_id": None,
-                    "reasoning": {
-                        "effort": None,
-                        "summary": None
-                    },
-                    "store": True,
-                    "temperature": 1.0,
-                    "text": {
-                        "format": {
-                            "type": "text"
-                        }
-                    },
-                    "tool_choice": "auto",
-                    "tools": [],
-                    "top_p": 1.0,
-                    "truncation": "disabled",
-                    "usage": {
-                        "input_tokens": 36,
-                        "input_tokens_details": {
-                            "cached_tokens": 0
-                        },
-                        "output_tokens": 87,
-                        "output_tokens_details": {
-                            "reasoning_tokens": 0
-                        },
-                        "total_tokens": 123
-                    },
-                    "user": None,
-                    "metadata": {}
-                }
-
-                mock_response_headers = {
-                    "content-type": "application/json",
-                    "x-ratelimit-limit-requests": "1000",
-                    "x-ratelimit-remaining-requests": "999",
-                    "x-ratelimit-reset-requests": "60",
-                    "x-ratelimit-limit-tokens": "100000",
-                    "x-ratelimit-remaining-tokens": "99999",
-                    "x-ratelimit-reset-tokens": "60",
-                    "OpenAI-Organization": "org-mockorganization123",
-                    "Request-Id": "req-mockrequestid123"
-                }
-                response = MockResponse(
-                    json_data=mock_response_json, 
-                    status_code=200, 
-                    headers=mock_response_headers
+                # For non-streaming requests
+                response = sync_httpx_client.post(
+                    url=api_base,
+                    headers=headers,
+                    json=data,
+                    timeout=timeout
+                    or float(response_api_optional_request_params.get("timeout", 0)),
                 )
         except Exception as e:
             raise self._handle_error(
@@ -2252,103 +2168,12 @@ async def async_response_api_handler(
                 )
             else:
                 # For non-streaming, proceed as before
-                # response = await async_httpx_client.post(
-                #     url=api_base,
-                #     headers=headers,
-                #     json=data,
-                #     timeout=timeout
-                #     or float(response_api_optional_request_params.get("timeout", 0)),
-                # )
-                class MockResponse:
-                    def __init__(self, json_data, status_code=200, headers=None):
-                        self._json_data = json_data
-                        self.status_code = status_code
-                        self.headers = headers or {"content-type": "application/json"}
-                    def json(self):
-                        return self._json_data
-                    @property
-                    def text(self):
-                        import json as _json
-                        return _json.dumps(self._json_data)
-                    def raise_for_status(self):
-                        pass
-
-                mock_response_json = {
-                    "id": "resp_67ccd2bed1ec8190b14f964abc0542670bb6a6b452d3795b",
-                    "object": "response",
-                    "created_at": 1741476542,
-                    "status": "completed",
-                    "error": None,
-                    "incomplete_details": None,
-                    "instructions": None,
-                    "max_output_tokens": None,
-                    "model": "gpt-4.1-2025-04-14",
-                    "output": [
-                        {
-                            "type": "message",
-                            "id": "msg_67ccd2bf17f0819081ff3bb2cf6508e60bb6a6b452d3795b",
-                            "status": "completed",
-                            "role": "assistant",
-                            "content": [
-                                {
-                                    "type": "output_text",
-                                    "text": (
-                                        "In a peaceful grove beneath a silver moon, a unicorn named Lumina discovered a hidden pool that reflected the stars. "
-                                        "As she dipped her horn into the water, the pool began to shimmer, revealing a pathway to a magical realm of endless night skies. "
-                                        "Filled with wonder, Lumina whispered a wish for all who dream to find their own hidden magic, and as she glanced back, her hoofprints sparkled like stardust."
-                                    ),
-                                    "annotations": []
-                                }
-                            ]
-                        }
-                    ],
-                    "parallel_tool_calls": True,
-                    "previous_response_id": None,
-                    "reasoning": {
-                        "effort": None,
-                        "summary": None
-                    },
-                    "store": True,
-                    "temperature": 1.0,
-                    "text": {
-                        "format": {
-                            "type": "text"
-                        }
-                    },
-                    "tool_choice": "auto",
-                    "tools": [],
-                    "top_p": 1.0,
-                    "truncation": "disabled",
-                    "usage": {
-                        "input_tokens": 36,
-                        "input_tokens_details": {
-                            "cached_tokens": 0
-                        },
-                        "output_tokens": 87,
-                        "output_tokens_details": {
-                            "reasoning_tokens": 0
-                        },
-                        "total_tokens": 123
-                    },
-                    "user": None,
-                    "metadata": {}
-                }
-
-                mock_response_headers = {
-                    "content-type": "application/json",
-                    "x-ratelimit-limit-requests": "1000",
-                    "x-ratelimit-remaining-requests": "999",
-                    "x-ratelimit-reset-requests": "60",
-                    "x-ratelimit-limit-tokens": "100000",
-                    "x-ratelimit-remaining-tokens": "99999",
-                    "x-ratelimit-reset-tokens": "60",
-                    "OpenAI-Organization": "org-mockorganization123",
-                    "Request-Id": "req-mockrequestid123"
-                }
-                response = MockResponse(
-                    json_data=mock_response_json, 
-                    status_code=200, 
-                    headers=mock_response_headers
+                response = await async_httpx_client.post(
+                    url=api_base,
+                    headers=headers,
+                    json=data,
+                    timeout=timeout
+                    or float(response_api_optional_request_params.get("timeout", 0)),
                 )
 
         except Exception as e:
@@ -5179,37 +5004,12 @@ def container_create_handler(
         )
 
         try:
-            # Mock httpx response for container create
-            import types
-            class MockResponse:
-                def __init__(self, json_data, status_code=200):
-                    self._json_data = json_data
-                    self.status_code = status_code
-                    self.headers = {"content-type": "application/json"}
-                def json(self):
-                    return self._json_data
-                @property
-                def text(self):
-                    import json
-                    return json.dumps(self._json_data)
-                def raise_for_status(self):
-                    pass
-
-            import uuid
-            container_uuid = str(uuid.uuid4())
-
-            response = MockResponse({
-                "id": container_uuid,
-                "object": "container",
-                "created_at": 1747857508,
-                "status": "running",
-                "expires_after": {
-                    "anchor": "last_active_at",
-                    "minutes": 20
-                },
-                "last_active_at": 1747857508,
-                "name": "My Container"
-            })
+            response = sync_httpx_client.post(
+                url=api_base,
+                headers=headers,
+                json=data,
+                timeout=timeout,
+            )
 
             return container_provider_config.transform_container_create_response(
                 raw_response=response,
@@ -5280,42 +5080,13 @@ async def async_container_create_handler(
         )
 
         try:
-            # response = await async_httpx_client.post(
-            #     url=api_base,
-            #     headers=headers,
-            #     json=data,
-            #     timeout=timeout,
-            # )
-            class MockResponse:
-                def __init__(self, json_data, status_code=200):
-                    self._json_data = json_data
-                    self.status_code = status_code
-                    self.headers = {"content-type": "application/json"}
-                def json(self):
-                    return self._json_data
-                @property
-                def text(self):
-                    import json
-                    return json.dumps(self._json_data)
-                def raise_for_status(self):
-                    pass
+            response = await async_httpx_client.post(
+                url=api_base,
+                headers=headers,
+                json=data,
+                timeout=timeout,
+            )
 
-            
-            import uuid
-            container_uuid = str(uuid.uuid4())
-
-            response = MockResponse({
-                "id": container_uuid,
-                "object": "container",
-                "created_at": 1747857508,
-                "status": "running",
-                "expires_after": {
-                    "anchor": "last_active_at",
-                    "minutes": 20
-                },
-                "last_active_at": 1747857508,
-                "name": "My Container"
-            })
             return container_provider_config.transform_container_create_response(
                 raw_response=response,
                 logging_obj=logging_obj,
@@ -6606,4 +6377,4 @@ async def async_text_to_speech_handler(
             model=model,
             raw_response=response,
             logging_obj=logging_obj,
-        )
+        )
\ No newline at end of file

From c18cbf0f38989e07ba91b2fa7576e68691526e5f Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Thu, 30 Oct 2025 22:59:26 +0530
Subject: [PATCH 5/8] remove not needed files

---
 prometheus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prometheus.yml b/prometheus.yml
index 7f7f583a7f32..5cb4f90d787d 100644
--- a/prometheus.yml
+++ b/prometheus.yml
@@ -4,4 +4,4 @@ global:
 scrape_configs:
   - job_name: 'litellm'
     static_configs:
-      - targets: ['host.docker.internal:4000']  # LiteLLM proxy on host machine
+      - targets: ['litellm:4000']  # Assuming Litellm exposes metrics at port 4000

From 0c6db47b4529f6cf9e6f131ee80de6d26631c242 Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Thu, 30 Oct 2025 23:00:48 +0530
Subject: [PATCH 6/8] remove not needed files

---
 litellm/litellm_core_utils/litellm_logging.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index b1b2b0610ef8..b0a011cd6d21 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -2485,9 +2485,6 @@ def _handle_callback_failure(self, callback: Any):
         try:
             callback_name = self._get_callback_name(callback)
             
-            if PrometheusLogger is None:
-                return
-            
             all_callbacks = []
             all_callbacks.extend(litellm.callbacks or [])
             all_callbacks.extend(litellm._async_success_callback or [])

From ffaaa14c71645b11a220423d80c50d0982aee179 Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Thu, 30 Oct 2025 23:14:11 +0530
Subject: [PATCH 7/8] fix mypy errors

---
 litellm/integrations/custom_logger.py         | 8 ++++----
 litellm/litellm_core_utils/litellm_logging.py | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index b6542c7456b2..f93925513e66 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -578,13 +578,13 @@ def handle_callback_failure(self, callback_name: str):
             import litellm
             
             all_callbacks = []
-            all_callbacks.extend(litellm.callbacks or [])
-            all_callbacks.extend(litellm._async_success_callback or [])
-            all_callbacks.extend(litellm.success_callback or [])
+            all_callbacks.extend(litellm.callbacks or [])  # type: ignore
+            all_callbacks.extend(litellm._async_success_callback or [])  # type: ignore
+            all_callbacks.extend(litellm.success_callback or [])  # type: ignore
                         
             for callback_obj in all_callbacks:
                 if hasattr(callback_obj, 'increment_callback_logging_failure'):
-                    callback_obj.increment_callback_logging_failure(callback_name=callback_name)
+                    callback_obj.increment_callback_logging_failure(callback_name=callback_name)  # type: ignore
                     break
                     
         except Exception as e:
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index b0a011cd6d21..0793dff57eba 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -2486,13 +2486,13 @@ def _handle_callback_failure(self, callback: Any):
             callback_name = self._get_callback_name(callback)
             
             all_callbacks = []
-            all_callbacks.extend(litellm.callbacks or [])
-            all_callbacks.extend(litellm._async_success_callback or [])
-            all_callbacks.extend(litellm.success_callback or [])
+            all_callbacks.extend(litellm.callbacks or [])  # type: ignore
+            all_callbacks.extend(litellm._async_success_callback or [])  # type: ignore
+            all_callbacks.extend(litellm.success_callback or [])  # type: ignore
                         
             for callback_obj in all_callbacks:
                 if hasattr(callback_obj, 'increment_callback_logging_failure'):
-                    callback_obj.increment_callback_logging_failure(callback_name=callback_name)
+                    callback_obj.increment_callback_logging_failure(callback_name=callback_name)  # type: ignore
                     break  # Only increment once
                     
         except Exception as e:

From 5263a2f284a1f14e2823f3e900dcc2882c1bb5c5 Mon Sep 17 00:00:00 2001
From: Sameer Kankute <sameer@berri.ai>
Date: Mon, 3 Nov 2025 21:39:14 +0530
Subject: [PATCH 8/8] Use logging_callback_manager to get all the callbacks

---
 litellm/integrations/custom_logger.py         | 18 +++++++++++-------
 litellm/litellm_core_utils/litellm_logging.py |  5 +----
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index f93925513e66..006a95e61c8c 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -576,17 +576,21 @@ def handle_callback_failure(self, callback_name: str):
         """
         try:
             import litellm
+            from litellm._logging import verbose_logger
+            
+            all_callbacks = litellm.logging_callback_manager._get_all_callbacks()
             
-            all_callbacks = []
-            all_callbacks.extend(litellm.callbacks or [])  # type: ignore
-            all_callbacks.extend(litellm._async_success_callback or [])  # type: ignore
-            all_callbacks.extend(litellm.success_callback or [])  # type: ignore
-                        
             for callback_obj in all_callbacks:
                 if hasattr(callback_obj, 'increment_callback_logging_failure'):
+                    verbose_logger.debug(f"Incrementing callback failure metric for {callback_name}")
                     callback_obj.increment_callback_logging_failure(callback_name=callback_name)  # type: ignore
-                    break
+                    return
+            
+            verbose_logger.debug(
+                f"No callback with increment_callback_logging_failure method found for {callback_name}. "
+                "Ensure 'prometheus' is in your callbacks config."
+            )
                     
         except Exception as e:
             from litellm._logging import verbose_logger
-            verbose_logger.debug(f"Error in handle_callback_failure: {str(e)}")
\ No newline at end of file
+            verbose_logger.debug(f"Error in handle_callback_failure for {callback_name}: {str(e)}")
\ No newline at end of file
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 0793dff57eba..fefc6edc097f 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -2485,10 +2485,7 @@ def _handle_callback_failure(self, callback: Any):
         try:
             callback_name = self._get_callback_name(callback)
             
-            all_callbacks = []
-            all_callbacks.extend(litellm.callbacks or [])  # type: ignore
-            all_callbacks.extend(litellm._async_success_callback or [])  # type: ignore
-            all_callbacks.extend(litellm.success_callback or [])  # type: ignore
+            all_callbacks = litellm.logging_callback_manager._get_all_callbacks()
                         
             for callback_obj in all_callbacks:
                 if hasattr(callback_obj, 'increment_callback_logging_failure'):