Merge pull request #13796 from 0x-fang/fix_aip_pass_through_08_20

krrishdholakia · web-flow · commit a61275f6e213 · 2025-08-21T19:41:23.000-07:00
Fix(Bedrock): fix application inference profile for pass-through endpoints for bedrock
diff --git a/litellm/passthrough/main.py b/litellm/passthrough/main.py
@@ -25,6 +25,7 @@
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
 from litellm.utils import client
+from litellm.proxy.pass_through_endpoints.common_utils import encode_bedrock_runtime_modelid_arn
 
 base_llm_http_handler = BaseLLMHTTPHandler()
 from .utils import BasePassthroughUtils
@@ -241,6 +242,12 @@ def llm_passthrough_route(
         request_query_params=request_query_params,
         litellm_params=litellm_params_dict,
     )
+    
+    # need to encode the id of application-inference-profile for bedrock
+    if custom_llm_provider == "bedrock" and "application-inference-profile" in endpoint:
+        encoded_url_str = encode_bedrock_runtime_modelid_arn(str(updated_url))
+        updated_url = httpx.URL(encoded_url_str)
+    
     # Add or update query parameters
     provider_api_key = provider_config.get_api_key(api_key)
 
diff --git a/litellm/proxy/pass_through_endpoints/common_utils.py b/litellm/proxy/pass_through_endpoints/common_utils.py
@@ -14,3 +14,55 @@ def get_litellm_virtual_key(request: Request) -> str:
     if litellm_api_key:
         return f"Bearer {litellm_api_key}"
     return request.headers.get("Authorization", "")
+
+
+def encode_bedrock_runtime_modelid_arn(endpoint: str) -> str:
+    """
+    Encodes any "/" found in the modelId of an AWS Bedrock Runtime Endpoint when arns are passed in.
+    - modelID value can be an ARN which contains slashes that SHOULD NOT be treated as path separators.
+    e.g endpoint: /model/<modelId>/invoke
+    <modelId> containing arns with slashes need to be encoded from
+        arn:aws:bedrock:ap-southeast-1:123456789012:application-inference-profile/abdefg12334 =>
+        arn:aws:bedrock:ap-southeast-1:123456789012:application-inference-profile%2Fabdefg12334
+    so that it is treated as one part of the path.
+    Otherwise, the encoded endpoint will return 500 error when passed to Bedrock endpoint.
+        
+    See the apis in https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html
+    for more details on the regex patterns of modelId which we use in the regex logic below.
+    
+    Args:
+        endpoint (str): The original endpoint string which may contain ARNs that contain slashes.
+        
+    Returns:
+        str: The endpoint with properly encoded ARN slashes
+    """
+    import re
+
+    # Early exit: if no ARN detected, return unchanged
+    if 'arn:aws:' not in endpoint:
+        return endpoint
+
+    # Handle all patterns in one go - more efficient and cleaner
+    patterns = [
+        # Custom model with 2 slashes (order matters - do this first)
+        (r'(custom-model)/([a-z0-9.-]+)/([a-z0-9]+)', r'\1%2F\2%2F\3'),
+
+        # All other resource types with 1 slash
+        (r'(:application-inference-profile)/', r'\1%2F'),
+        (r'(:inference-profile)/', r'\1%2F'),
+        (r'(:foundation-model)/', r'\1%2F'),
+        (r'(:imported-model)/', r'\1%2F'),
+        (r'(:provisioned-model)/', r'\1%2F'),
+        (r'(:prompt)/', r'\1%2F'),
+        (r'(:endpoint)/', r'\1%2F'),
+        (r'(:prompt-router)/', r'\1%2F'),
+        (r'(:default-prompt-router)/', r'\1%2F'),
+    ]
+
+    for pattern, replacement in patterns:
+        # Check if pattern exists before applying regex (early exit optimization)
+        if re.search(pattern, endpoint):
+            endpoint = re.sub(pattern, replacement, endpoint)
+            break  # Exit after first match since each ARN has only one resource type
+
+    return endpoint
diff --git a/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py b/litellm/proxy/pass_through_endpoints/llm_passthrough_endpoints.py
@@ -492,20 +492,26 @@ async def bedrock_llm_proxy_route(
     data: Dict[str, Any] = {}
     base_llm_response_processor = ProxyBaseLLMRequestProcessing(data=data)
     try:
-        model = endpoint.split("/")[1]
+        endpoint_parts = endpoint.split("/")
+        if "application-inference-profile" in endpoint:
+            # For application-inference-profile, include the profile ID part as well
+            model = "/".join(endpoint_parts[1:3])
+        else:
+            model = endpoint_parts[1]
     except Exception:
         raise HTTPException(
             status_code=400,
             detail={
                 "error": "Model missing from endpoint. Expected format: /model/<Model>/<endpoint>. Got: "
                 + endpoint,
             },
-        )
+        ) 
 
     data["method"] = request.method
     data["endpoint"] = endpoint
     data["data"] = request_body
-
+    data["custom_llm_provider"] = "bedrock"
+    
     try:
         result = await base_llm_response_processor.base_passthrough_process_llm_request(
             request=request,
diff --git a/tests/test_litellm/passthrough/test_passthrough_main.py b/tests/test_litellm/passthrough/test_passthrough_main.py
@@ -4,7 +4,10 @@
 
 import pytest
 from fastapi.testclient import TestClient
-
+from litellm.llms.custom_httpx.http_handler import HTTPHandler
+from unittest.mock import MagicMock, patch
+import httpx
+    
 sys.path.insert(
     0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
@@ -45,3 +48,89 @@ def test_llm_passthrough_route():
 
         assert response.status_code == 200
         assert response.json == {"message": "Hello, world!"}
+
+
+def test_bedrock_application_inference_profile_url_encoding():
+    client = HTTPHandler()
+    
+    mock_provider_config = MagicMock()
+    mock_provider_config.get_complete_url.return_value = (
+        httpx.URL("https://bedrock-runtime.us-east-1.amazonaws.com/model/arn:aws:bedrock:us-east-1:123456789123:application-inference-profile/r742sbn2zckd/converse"),
+        "https://bedrock-runtime.us-east-1.amazonaws.com"
+    )
+    mock_provider_config.get_api_key.return_value = "test-key"
+    mock_provider_config.validate_environment.return_value = {}
+    mock_provider_config.sign_request.return_value = ({}, None)
+    mock_provider_config.is_streaming_request.return_value = False
+
+    with patch("litellm.utils.ProviderConfigManager.get_provider_passthrough_config", return_value=mock_provider_config), \
+         patch("litellm.litellm_core_utils.get_litellm_params.get_litellm_params", return_value={}), \
+         patch("litellm.litellm_core_utils.get_llm_provider_logic.get_llm_provider", return_value=("test-model", "bedrock", "test-key", "test-base")), \
+         patch.object(client.client, "send", return_value=MagicMock(status_code=200)) as mock_send, \
+         patch.object(client.client, "build_request") as mock_build_request:
+        
+        # Mock logging object
+        mock_logging_obj = MagicMock()
+        mock_logging_obj.update_environment_variables = MagicMock()
+        
+        response = llm_passthrough_route(
+            model="arn:aws:bedrock:us-east-1:123456789123:application-inference-profile/r742sbn2zckd",
+            endpoint="model/arn:aws:bedrock:us-east-1:123456789123:application-inference-profile/r742sbn2zckd/converse",
+            method="POST",
+            custom_llm_provider="bedrock",
+            client=client,
+            litellm_logging_obj=mock_logging_obj,
+        )
+
+        # Verify that build_request was called with the encoded URL
+        mock_build_request.assert_called_once()
+        call_args = mock_build_request.call_args
+        
+        # The URL should have the application-inference-profile ID encoded
+        actual_url = str(call_args.kwargs["url"])
+        assert "application-inference-profile%2Fr742sbn2zckd" in actual_url
+        assert response.status_code == 200
+
+
+def test_bedrock_non_application_inference_profile_no_encoding():
+    client = HTTPHandler()
+    
+    # Mock the provider config and its methods
+    mock_provider_config = MagicMock()
+    mock_provider_config.get_complete_url.return_value = (
+        httpx.URL("https://bedrock-runtime.us-east-1.amazonaws.com/model/anthropic.claude-3-sonnet-20240229-v1:0/converse"),
+        "https://bedrock-runtime.us-east-1.amazonaws.com"
+    )
+    mock_provider_config.get_api_key.return_value = "test-key"
+    mock_provider_config.validate_environment.return_value = {}
+    mock_provider_config.sign_request.return_value = ({}, None)
+    mock_provider_config.is_streaming_request.return_value = False
+
+    with patch("litellm.utils.ProviderConfigManager.get_provider_passthrough_config", return_value=mock_provider_config), \
+         patch("litellm.litellm_core_utils.get_litellm_params.get_litellm_params", return_value={}), \
+         patch("litellm.litellm_core_utils.get_llm_provider_logic.get_llm_provider", return_value=("test-model", "bedrock", "test-key", "test-base")), \
+         patch.object(client.client, "send", return_value=MagicMock(status_code=200)) as mock_send, \
+         patch.object(client.client, "build_request") as mock_build_request:
+        
+        # Mock logging object
+        mock_logging_obj = MagicMock()
+        mock_logging_obj.update_environment_variables = MagicMock()
+        
+        response = llm_passthrough_route(
+            model="anthropic.claude-3-sonnet-20240229-v1:0",
+            endpoint="model/anthropic.claude-3-sonnet-20240229-v1:0/converse",
+            method="POST",
+            custom_llm_provider="bedrock",
+            client=client,
+            litellm_logging_obj=mock_logging_obj,
+        )
+
+        # Verify that build_request was called with the original URL (no encoding)
+        mock_build_request.assert_called_once()
+        call_args = mock_build_request.call_args
+        
+        # The URL should NOT have application-inference-profile encoding
+        actual_url = str(call_args.kwargs["url"])
+        assert "application-inference-profile%2F" not in actual_url
+        assert "anthropic.claude-3-sonnet-20240229-v1:0" in actual_url
+        assert response.status_code == 200
diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py b/tests/test_litellm/proxy/pass_through_endpoints/test_llm_pass_through_endpoints.py
@@ -21,6 +21,7 @@
     create_pass_through_route,
     vertex_discovery_proxy_route,
     vertex_proxy_route,
+    bedrock_llm_proxy_route,
 )
 from litellm.types.passthrough_endpoints.vertex_ai import VertexPassThroughCredentials
 
@@ -853,3 +854,63 @@ async def test_is_streaming_request_fn():
     mock_request.headers = {"content-type": "multipart/form-data"}
     mock_request.form = AsyncMock(return_value={"stream": "true"})
     assert await is_streaming_request_fn(mock_request) is True
+
+class TestBedrockLLMProxyRoute:
+    @pytest.mark.asyncio
+    async def test_bedrock_llm_proxy_route_application_inference_profile(self):
+        mock_request = Mock()
+        mock_request.method = "POST"
+        mock_response = Mock()
+        mock_user_api_key_dict = Mock()
+        mock_request_body = {"messages": [{"role": "user", "content": "test"}]}
+        mock_processor = Mock()
+        mock_processor.base_passthrough_process_llm_request = AsyncMock(return_value="success")
+        
+        with patch("litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints._read_request_body", return_value=mock_request_body), \
+             patch("litellm.proxy.common_request_processing.ProxyBaseLLMRequestProcessing", return_value=mock_processor):
+            
+            # Test application-inference-profile endpoint
+            endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/r742sbn2zckd/converse"
+            
+            result = await bedrock_llm_proxy_route(
+                endpoint=endpoint,
+                request=mock_request,
+                fastapi_response=mock_response,
+                user_api_key_dict=mock_user_api_key_dict,
+            )
+            
+            mock_processor.base_passthrough_process_llm_request.assert_called_once()
+            call_kwargs = mock_processor.base_passthrough_process_llm_request.call_args.kwargs
+            
+            # For application-inference-profile, model should be "arn:aws:bedrock:us-east-1:026090525607:application-inference-profile/r742sbn2zckd"
+            assert call_kwargs["model"] == "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/r742sbn2zckd"
+            assert result == "success"
+            
+    @pytest.mark.asyncio
+    async def test_bedrock_llm_proxy_route_regular_model(self):
+        mock_request = Mock()
+        mock_request.method = "POST"
+        mock_response = Mock()
+        mock_user_api_key_dict = Mock()
+        mock_request_body = {"messages": [{"role": "user", "content": "test"}]}
+        mock_processor = Mock()
+        mock_processor.base_passthrough_process_llm_request = AsyncMock(return_value="success")
+        
+        with patch("litellm.proxy.pass_through_endpoints.llm_passthrough_endpoints._read_request_body", return_value=mock_request_body), \
+             patch("litellm.proxy.common_request_processing.ProxyBaseLLMRequestProcessing", return_value=mock_processor):
+            
+            # Test regular model endpoint
+            endpoint = "model/anthropic.claude-3-sonnet-20240229-v1:0/converse"
+            
+            result = await bedrock_llm_proxy_route(
+                endpoint=endpoint,
+                request=mock_request,
+                fastapi_response=mock_response,
+                user_api_key_dict=mock_user_api_key_dict,
+            )
+            mock_processor.base_passthrough_process_llm_request.assert_called_once()
+            call_kwargs = mock_processor.base_passthrough_process_llm_request.call_args.kwargs
+            
+            # For regular models, model should be just the model ID
+            assert call_kwargs["model"] == "anthropic.claude-3-sonnet-20240229-v1:0"
+            assert result == "success"
diff --git a/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py b/tests/test_litellm/proxy/pass_through_endpoints/test_passthrough_endpoints_common_utils.py
@@ -9,6 +9,7 @@
 import pytest
 from fastapi import Request, Response
 from fastapi.testclient import TestClient
+from litellm.proxy.pass_through_endpoints.common_utils import encode_bedrock_runtime_modelid_arn
 
 sys.path.insert(
     0, os.path.abspath("../../../..")
@@ -42,3 +43,55 @@ async def test_get_litellm_virtual_key():
     }
     result = get_litellm_virtual_key(mock_request)
     assert result == "Bearer test-key-123"
+
+def test_encode_bedrock_runtime_modelid_arn():
+    # Test application-inference-profile ARN
+    endpoint = "model/arn:aws:bedrock:us-east-1:123456789123:application-inference-profile/r742sbn2zckd/converse"
+    expected = "model/arn:aws:bedrock:us-east-1:123456789123:application-inference-profile%2Fr742sbn2zckd/converse"
+    result = encode_bedrock_runtime_modelid_arn(endpoint)
+    assert result == expected
+    
+    # Test inference-profile ARN
+    endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:inference-profile/test-profile/invoke"
+    expected = "model/arn:aws:bedrock:us-east-1:123456789012:inference-profile%2Ftest-profile/invoke"
+    result = encode_bedrock_runtime_modelid_arn(endpoint)
+    assert result == expected
+    
+    # Test foundation-model ARN
+    endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:foundation-model/anthropic.claude-3/converse"
+    expected = "model/arn:aws:bedrock:us-east-1:123456789012:foundation-model%2Fanthropic.claude-3/converse"
+    result = encode_bedrock_runtime_modelid_arn(endpoint)
+    assert result == expected
+    
+    # Test custom-model ARN (2 slashes)
+    endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:custom-model/my-model.fine-tuned/abc123/invoke"
+    expected = "model/arn:aws:bedrock:us-east-1:123456789012:custom-model%2Fmy-model.fine-tuned%2Fabc123/invoke"
+    result = encode_bedrock_runtime_modelid_arn(endpoint)
+    assert result == expected
+    
+    # Test provisioned-model ARN
+    endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:provisioned-model/test-model/converse"
+    expected = "model/arn:aws:bedrock:us-east-1:123456789012:provisioned-model%2Ftest-model/converse"
+    result = encode_bedrock_runtime_modelid_arn(endpoint)
+    assert result == expected
+
+
+def test_encode_bedrock_runtime_modelid_arn_no_arn():
+    # Test regular model ID (no ARN)
+    endpoint = "model/anthropic.claude-3-sonnet-20240229-v1:0/converse"
+    result = encode_bedrock_runtime_modelid_arn(endpoint)
+    assert result == endpoint
+
+
+def test_encode_bedrock_runtime_modelid_arn_edge_cases():
+    # Test multiple ARN types (should only encode first match)
+    endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test1/converse"
+    expected = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile%2Ftest1/converse"
+    result = encode_bedrock_runtime_modelid_arn(endpoint)
+    assert result == expected
+    
+    # Test ARN with special characters in resource ID
+    endpoint = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/test-profile.v1/invoke"
+    expected = "model/arn:aws:bedrock:us-east-1:123456789012:application-inference-profile%2Ftest-profile.v1/invoke"
+    result = encode_bedrock_runtime_modelid_arn(endpoint)
+    assert result == expected