BerriAI
diff --git a/‎enterprise/litellm_enterprise/proxy/management_endpoints/internal_user_endpoints.py‎
Lines changed: 11 additions & 5 deletions b/‎enterprise/litellm_enterprise/proxy/management_endpoints/internal_user_endpoints.py‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎litellm/model_prices_and_context_window_backup.json‎
Lines changed: 0 additions & 16 deletions b/‎litellm/model_prices_and_context_window_backup.json‎
Lines changed: 0 additions & 16 deletions
diff --git a/‎litellm/proxy/_types.py‎
Lines changed: 19 additions & 16 deletions b/‎litellm/proxy/_types.py‎
Lines changed: 19 additions & 16 deletions
@@ -2,6 +2,8 @@
 Enterprise internal user management endpoints
 """
 
+import os
+
 from fastapi import APIRouter, Depends, HTTPException
 
 from litellm.proxy._types import UserAPIKeyAuth
@@ -21,7 +23,7 @@ async def available_enterprise_users(
     """
     For keys with `max_users` set, return the list of users that are allowed to use the key.
     """
-    from litellm.proxy._types import CommonProxyErrors
+    from litellm.proxy._types import CommonProxyErrors, EnterpriseLicenseData
     from litellm.proxy.proxy_server import (
         premium_user,
         premium_user_data,
@@ -34,10 +36,14 @@ async def available_enterprise_users(
             detail={"error": CommonProxyErrors.db_not_connected_error.value},
         )
 
-    if premium_user is None:
-        raise HTTPException(
-            status_code=500, detail={"error": CommonProxyErrors.not_premium_user.value}
-        )
+    if not premium_user:
+        # check if SSO is enabled - show 5 user limit
+        from litellm.proxy.auth.auth_utils import _has_user_setup_sso
+
+        if _has_user_setup_sso():
+            premium_user_data = EnterpriseLicenseData(
+                max_users=5,
+            )
 
     # Count number of rows in LiteLLM_UserTable
     user_count = await prisma_client.db.litellm_usertable.count()
 
@@ -17691,22 +17691,6 @@
         },
         "supports_tool_choice": true
     },
-    "databricks/databricks-meta-llama-3-1-70b-instruct": {
-        "max_tokens": 128000,
-        "max_input_tokens": 128000,
-        "max_output_tokens": 128000,
-        "input_cost_per_token": 1.00002e-06,
-        "input_dbu_cost_per_token": 1.4286e-05,
-        "output_cost_per_token": 2.99999e-06,
-        "output_dbu_cost_per_token": 4.2857e-05,
-        "litellm_provider": "databricks",
-        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
-        "metadata": {
-            "notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
-        },
-        "supports_tool_choice": true
-    },
     "databricks/databricks-meta-llama-3-3-70b-instruct": {
         "max_tokens": 128000,
         "max_input_tokens": 128000,
 
@@ -530,7 +530,7 @@ class LiteLLMRoutes(enum.Enum):
     # Routes accessible by Admin Viewer (read-only admin access)
     admin_viewer_routes = [
         "/user/list",
-        "/user/available_users", 
+        "/user/available_users",
         "/user/available_roles",
         "/user/daily/activity",
         "/team/daily/activity",
@@ -540,7 +540,10 @@ class LiteLLMRoutes(enum.Enum):
 
     # All routes accesible by an Org Admin
     org_admin_allowed_routes = (
-        org_admin_only_routes + management_routes + self_managed_routes + admin_viewer_routes
+        org_admin_only_routes
+        + management_routes
+        + self_managed_routes
+        + admin_viewer_routes
     )
 
 
@@ -585,13 +588,14 @@ def check_llm_api_params(cls, values):
 ######### Request Class Definition ######
 class ProxyChatCompletionRequest(LiteLLMPydanticObjectBase):
     """
-    Pydantic model for chat completion requests that includes both OpenAI standard fields 
+    Pydantic model for chat completion requests that includes both OpenAI standard fields
     and LiteLLM-specific parameters. This replaces the previous TypedDict version.
     """
+
     # Required fields (from ChatCompletionRequest)
     model: str
     messages: List[AllMessageValues]
-    
+
     # Standard OpenAI completion parameters (all optional)
     frequency_penalty: Optional[float] = None
     logit_bias: Optional[Dict[str, float]] = None
@@ -614,10 +618,10 @@ class ProxyChatCompletionRequest(LiteLLMPydanticObjectBase):
     functions: Optional[List[Dict[str, Any]]] = None
     user: Optional[str] = None
     stream: Optional[bool] = None
-    
+
     # LiteLLM-specific metadata param (from original ChatCompletionRequest)
     metadata: Optional[Dict[str, Any]] = None
-    
+
     # Optional LiteLLM params
     guardrails: Optional[List[str]] = None
     caching: Optional[bool] = None
@@ -1873,7 +1877,8 @@ def get_litellm_internal_health_check_user_api_key_auth(cls) -> "UserAPIKeyAuth"
             key_alias=LITTELM_INTERNAL_HEALTH_SERVICE_ACCOUNT_NAME,
             team_alias=LITTELM_INTERNAL_HEALTH_SERVICE_ACCOUNT_NAME,
         )
-    
+
+
 class UserInfoResponse(LiteLLMPydanticObjectBase):
     user_id: Optional[str]
     user_info: Optional[Union[dict, BaseModel]]
@@ -2120,7 +2125,6 @@ class TokenCountRequest(LiteLLMPydanticObjectBase):
     Anthropic token counting endpoint uses /messages
     """
 
-    
     contents: Optional[List[dict]] = None
     """
     Google /countTokens endpoint expects contents to be a list of dicts with the following structure:
@@ -2265,7 +2269,7 @@ class AllCallbacks(LiteLLMPydanticObjectBase):
 
     braintrust: CallbackOnUI = CallbackOnUI(
         litellm_callback_name="braintrust",
-        litellm_callback_params=["BRAINTRUST_API_KEY","BRAINTRUST_API_BASE"],
+        litellm_callback_params=["BRAINTRUST_API_KEY", "BRAINTRUST_API_BASE"],
         ui_callback_name="Braintrust",
     )
 
@@ -2319,7 +2323,9 @@ class SpendLogsMetadata(TypedDict):
     error_information: Optional[StandardLoggingPayloadErrorInformation]
     usage_object: Optional[dict]
     model_map_information: Optional[StandardLoggingModelInformation]
-    cold_storage_object_key: Optional[str]  # S3/GCS object key for cold storage retrieval
+    cold_storage_object_key: Optional[
+        str
+    ]  # S3/GCS object key for cold storage retrieval
 
 
 class SpendLogsPayload(TypedDict):
@@ -2646,7 +2652,7 @@ def safe_get_team_member_rpm_limit(self) -> Optional[int]:
         if self.litellm_budget_table is not None:
             return self.litellm_budget_table.rpm_limit
         return None
-    
+
     def safe_get_team_member_tpm_limit(self) -> Optional[int]:
         if self.litellm_budget_table is not None:
             return self.litellm_budget_table.tpm_limit
@@ -2763,14 +2769,11 @@ class TeamMemberUpdateRequest(TeamMemberDeleteRequest):
     max_budget_in_team: Optional[float] = None
     role: Optional[Literal["admin", "user"]] = None
     tpm_limit: Optional[int] = Field(
-        default=None,
-        description="Tokens per minute limit for this team member"
+        default=None, description="Tokens per minute limit for this team member"
     )
     rpm_limit: Optional[int] = Field(
-        default=None,
-        description="Requests per minute limit for this team member"
+        default=None, description="Requests per minute limit for this team member"
     )
-    
 
 
 class TeamMemberUpdateResponse(MemberUpdateResponse):