BerriAI · ishaan-jaff · Nov 3, 2025 · Oct 29, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py
@@ -298,6 +298,13 @@ def __init__(
                 self.get_labels_for_metric("litellm_deployment_failed_fallbacks"),
             )
 
+            # Callback Logging Failure Metrics
+            self.litellm_callback_logging_failures_metric = self._counter_factory(
+                name="litellm_callback_logging_failures_metric",
+                documentation="Total number of failures when emitting logs to callbacks (e.g. s3_v2, langfuse, etc)",
+                labelnames=["callback_name"],
+            )
+
             self.litellm_llm_api_failed_requests_metric = self._counter_factory(
                 name="litellm_llm_api_failed_requests_metric",
                 documentation="deprecated - use litellm_proxy_failed_requests_metric",
@@ -1723,6 +1730,17 @@ def increment_deployment_cooled_down(
             litellm_model_name, model_id, api_base, api_provider, exception_status
         ).inc()
 
+    def increment_callback_logging_failure(
+        self,
+        callback_name: str,
+    ):
+        """
+        Increment metric when logging to a callback fails (e.g., s3_v2, langfuse, etc.)
+        """
+        self.litellm_callback_logging_failures_metric.labels(
+            callback_name=callback_name
+        ).inc()
+
     def track_provider_remaining_budget(
         self, provider: str, spend: float, budget_limit: float
     ):

diff --git a/litellm/containers/main.py b/litellm/containers/main.py
@@ -21,9 +21,6 @@
 from litellm.types.utils import CallTypes
 from litellm.utils import ProviderConfigManager, client
 
-# Default model for container operations - can be any provider that supports containers
-DEFAULT_CONTAINER_ENDPOINT_MODEL = "openai/gpt-4"
-
 __all__ = [
     "acreate_container",
     "adelete_container",
@@ -386,6 +383,15 @@ def list_containers(
         litellm_call_id: Optional[str] = kwargs.get("litellm_call_id")
         _is_async = kwargs.pop("async_call", False) is True
 
+        # Check for mock response first
+        mock_response = kwargs.get("mock_response")
+        if mock_response is not None:
+            if isinstance(mock_response, str):
+                mock_response = json.loads(mock_response)
+
+            response = ContainerListResponse(**mock_response)
+            return response
+
         # get llm provider logic
         litellm_params = GenericLiteLLMParams(**kwargs)
         # get provider config
@@ -562,6 +568,15 @@ def retrieve_container(
         litellm_call_id: Optional[str] = kwargs.get("litellm_call_id")
         _is_async = kwargs.pop("async_call", False) is True
 
+        # Check for mock response first
+        mock_response = kwargs.get("mock_response")
+        if mock_response is not None:
+            if isinstance(mock_response, str):
+                mock_response = json.loads(mock_response)
+
+            response = ContainerObject(**mock_response)
+            return response
+
         # get llm provider logic
         litellm_params = GenericLiteLLMParams(**kwargs)
         # get provider config
@@ -730,6 +745,15 @@ def delete_container(
         litellm_call_id: Optional[str] = kwargs.get("litellm_call_id")
         _is_async = kwargs.pop("async_call", False) is True
 
+        # Check for mock response first
+        mock_response = kwargs.get("mock_response")
+        if mock_response is not None:
+            if isinstance(mock_response, str):
+                mock_response = json.loads(mock_response)
+
+            response = DeleteContainerResult(**mock_response)
+            return response
+
         # get llm provider logic
         litellm_params = GenericLiteLLMParams(**kwargs)
         # get provider config

diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
@@ -567,3 +567,26 @@ async def get_proxy_server_request_from_cold_storage_with_object_key(
         Get the proxy server request from cold storage using the object key directly.
         """
         pass
+
+    def handle_callback_failure(self, callback_name: str):
+        """
+        Handle callback logging failures by incrementing Prometheus metrics.
+
+        Call this method in exception handlers within your callback when logging fails.
+        """
+        try:
+            import litellm
+
+            all_callbacks = []
+            all_callbacks.extend(litellm.callbacks or [])  # type: ignore
+            all_callbacks.extend(litellm._async_success_callback or [])  # type: ignore
+            all_callbacks.extend(litellm.success_callback or [])  # type: ignore
+
+            for callback_obj in all_callbacks:
+                if hasattr(callback_obj, 'increment_callback_logging_failure'):
+                    callback_obj.increment_callback_logging_failure(callback_name=callback_name)  # type: ignore
+                    break
+
+        except Exception as e:
+            from litellm._logging import verbose_logger
+            verbose_logger.debug(f"Error in handle_callback_failure: {str(e)}")
diff --git a/litellm/integrations/s3_v2.py b/litellm/integrations/s3_v2.py
@@ -239,7 +239,7 @@ async def _async_log_event_base(self, kwargs, response_obj, start_time, end_time
             )
         except Exception as e:
             verbose_logger.exception(f"s3 Layer Error - {str(e)}")
-            pass
+            self.handle_callback_failure(callback_name="S3Logger")
 
     async def async_upload_data_to_s3(
         self, batch_logging_element: s3BatchLoggingElement
@@ -323,6 +323,7 @@ async def async_upload_data_to_s3(
             response.raise_for_status()
         except Exception as e:
             verbose_logger.exception(f"Error uploading to s3: {str(e)}")
+            self.handle_callback_failure(callback_name="S3Logger")
 
     async def async_send_batch(self):
         """
@@ -471,6 +472,7 @@ def upload_data_to_s3(self, batch_logging_element: s3BatchLoggingElement):
             response.raise_for_status()
         except Exception as e:
             verbose_logger.exception(f"Error uploading to s3: {str(e)}")
+            self.handle_callback_failure(callback_name="S3Logger")
 
     async def _download_object_from_s3(self, s3_object_key: str) -> Optional[dict]:
         """

diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
@@ -1,6 +1,7 @@
 # What is this?
 ## Common Utility file for Logging handler
 # Logging function -> log the exact model details + what's being sent | Non-Blocking
+import asyncio
 import copy
 import datetime
 import json
@@ -116,6 +117,7 @@
     Usage,
 )
 from litellm.types.videos.main import VideoObject
+from litellm.types.containers.main import ContainerObject
 from litellm.utils import _get_base_model_from_metadata, executor, print_verbose
 
 from ..integrations.argilla import ArgillaLogger
@@ -1622,6 +1624,7 @@ def _is_recognized_call_type_for_logging(
             or isinstance(logging_result, dict)
             and logging_result.get("object") == "vector_store.search_results.page"
             or isinstance(logging_result, VideoObject) 
+            or isinstance(logging_result, ContainerObject)
             or (self.call_type == CallTypes.call_mcp_tool.value)
         ):
             return True
@@ -2157,6 +2160,11 @@ def success_handler(  # noqa: PLR0915
                     )
                     if capture_exception:  # log this error to sentry for debugging
                         capture_exception(e)
+                    # Track callback logging failures in Prometheus
+                    try:
+                        self._handle_callback_failure(callback=callback)
+                    except Exception:
+                        pass
         except Exception as e:
             verbose_logger.exception(
                 "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {}".format(
@@ -2462,8 +2470,36 @@ async def async_success_handler(  # noqa: PLR0915
                 verbose_logger.error(
                     f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
                 )
+                self._handle_callback_failure(callback=callback)
                 pass
 
+    def _handle_callback_failure(self, callback: Any):
+        """
+        Handle callback logging failures by incrementing Prometheus metrics.
+
+        Works for both sync and async contexts since Prometheus counter increment is synchronous.
+
+        Args:
+            callback: The callback that failed
+        """
+        try:
+            callback_name = self._get_callback_name(callback)
+
+            all_callbacks = []
+            all_callbacks.extend(litellm.callbacks or [])  # type: ignore
+            all_callbacks.extend(litellm._async_success_callback or [])  # type: ignore
+            all_callbacks.extend(litellm.success_callback or [])  # type: ignore
+
+            for callback_obj in all_callbacks:
+                if hasattr(callback_obj, 'increment_callback_logging_failure'):
+                    callback_obj.increment_callback_logging_failure(callback_name=callback_name)  # type: ignore
+                    break  # Only increment once
+
+        except Exception as e:
+            verbose_logger.debug(
+                f"Error in _handle_callback_failure: {str(e)}"
+            )
+
     def _failure_handler_helper_fn(
         self, exception, traceback_exception, start_time=None, end_time=None
     ):
@@ -2799,6 +2835,10 @@ async def async_failure_handler(
                         str(e), callback
                     )
                 )
+                # Track callback logging failures in Prometheus
+                asyncio.create_task(
+                    self._handle_callback_failure(callback=callback)
+                )
 
     def _get_trace_id(self, service_name: Literal["langfuse"]) -> Optional[str]:
         """
@@ -2931,11 +2971,15 @@ def _get_callback_name(self, cb) -> str:
         Helper to get the name of a callback function
 
         Args:
-            cb: The callback function/string to get the name of
+            cb: The callback object/function/string to get the name of
 
         Returns:
             The name of the callback
         """
+        if isinstance(cb, str):
+            return cb
+        if hasattr(cb, "__class__"):
+            return cb.__class__.__name__
         if hasattr(cb, "__name__"):
             return cb.__name__
         if hasattr(cb, "__func__"):

diff --git a/litellm/llms/custom_httpx/llm_http_handler.py b/litellm/llms/custom_httpx/llm_http_handler.py
@@ -6377,4 +6377,4 @@ async def async_text_to_speech_handler(
             model=model,
             raw_response=response,
             logging_obj=logging_obj,
-        )
+        )
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
@@ -321,6 +321,10 @@ async def common_processing_pre_call_logic(
             "avideo_status",
             "avideo_content",
             "avideo_remix",
+            "acreate_container",
+            "alist_containers",
+            "aretrieve_container",
+            "adelete_container",
         ],
         version: Optional[str] = None,
         user_model: Optional[str] = None,
@@ -419,6 +423,10 @@ async def base_process_llm_request(
             "avideo_status",
             "avideo_content",
             "avideo_remix",
+            "acreate_container",
+            "alist_containers",
+            "aretrieve_container",
+            "adelete_container",
         ],
         proxy_logging_obj: ProxyLogging,
         general_settings: dict,

diff --git a/litellm/proxy/container_endpoints/__init__.py b/litellm/proxy/container_endpoints/__init__.py