Merge branch 'BerriAI:main' into dev1206

chenzhaofei01 · web-flow · commit 44b3a85bde67 · 2025-12-08T23:05:18.000+08:00
diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
@@ -739,6 +739,8 @@ router_settings:
 | OPENMETER_API_ENDPOINT | API endpoint for OpenMeter integration
 | OPENMETER_API_KEY | API key for OpenMeter services
 | OPENMETER_EVENT_TYPE | Type of events sent to OpenMeter
+| ONYX_API_BASE | Base URL for Onyx Security AI Guard service (defaults to https://ai-guard.onyx.security)
+| ONYX_API_KEY | API key for Onyx Security AI Guard service
 | OTEL_ENDPOINT | OpenTelemetry endpoint for traces
 | OTEL_EXPORTER_OTLP_ENDPOINT | OpenTelemetry endpoint for traces
 | OTEL_ENVIRONMENT_NAME | Environment name for OpenTelemetry
diff --git a/litellm/llms/anthropic/chat/handler.py b/litellm/llms/anthropic/chat/handler.py
@@ -874,7 +874,7 @@ def _handle_message_delta(self, chunk: dict) -> Tuple[str, Optional[Usage]]:
 
     def _handle_accumulated_json_chunk(
         self, data_str: str
-    ) -> Optional[GenericStreamingChunk]:
+    ) -> Optional[ModelResponseStream]:
         """
         Handle partial JSON chunks by accumulating them until valid JSON is received.
 
@@ -885,7 +885,7 @@ def _handle_accumulated_json_chunk(
             data_str: The JSON string to parse (without "data:" prefix)
 
         Returns:
-            GenericStreamingChunk if JSON is complete, None if still accumulating
+            ModelResponseStream if JSON is complete, None if still accumulating
         """
         # Accumulate JSON data
         self.accumulated_json += data_str
@@ -899,15 +899,15 @@ def _handle_accumulated_json_chunk(
             # If it's not valid JSON yet, continue to the next chunk
             return None
 
-    def _parse_sse_data(self, str_line: str) -> Optional[GenericStreamingChunk]:
+    def _parse_sse_data(self, str_line: str) -> Optional[ModelResponseStream]:
         """
         Parse SSE data line, handling both complete and partial JSON chunks.
 
         Args:
             str_line: The SSE line starting with "data:"
 
         Returns:
-            GenericStreamingChunk if parsing succeeded, None if accumulating partial JSON
+            ModelResponseStream if parsing succeeded, None if accumulating partial JSON
         """
         data_str = str_line[5:]  # Remove "data:" prefix
 
diff --git a/litellm/proxy/response_polling/background_streaming.py b/litellm/proxy/response_polling/background_streaming.py
@@ -9,6 +9,7 @@
 """
 import asyncio
 import json
+from typing import Any, Dict
 
 from fastapi import Request, Response
 
@@ -85,7 +86,7 @@ async def background_streaming_task(  # noqa: PLR0915
         
         # Process streaming response following OpenAI events format
         # https://platform.openai.com/docs/api-reference/responses-streaming
-        output_items = {}  # Track output items by ID
+        output_items: Dict[str, Dict[str, Any]] = {}  # Track output items by ID
         accumulated_text = {}  # Track accumulated text deltas by (item_id, content_index)
         
         # ResponsesAPIResponse fields to extract from response.completed