Skip to content

Commit 49cb9bd

Browse files
Merge pull request #13843 from BerriAI/litellm_dev_08_29_2025_p3
SSO - Free SSO usage for up to 5 users + remove deprecated dbrx models (dbrx-instruct, llama 3.1)
2 parents f2c5e80 + 988434a commit 49cb9bd

File tree

5 files changed

+96
-102
lines changed

5 files changed

+96
-102
lines changed

enterprise/litellm_enterprise/proxy/management_endpoints/internal_user_endpoints.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
Enterprise internal user management endpoints
33
"""
44

5+
import os
6+
57
from fastapi import APIRouter, Depends, HTTPException
68

79
from litellm.proxy._types import UserAPIKeyAuth
@@ -21,7 +23,7 @@ async def available_enterprise_users(
2123
"""
2224
For keys with `max_users` set, return the list of users that are allowed to use the key.
2325
"""
24-
from litellm.proxy._types import CommonProxyErrors
26+
from litellm.proxy._types import CommonProxyErrors, EnterpriseLicenseData
2527
from litellm.proxy.proxy_server import (
2628
premium_user,
2729
premium_user_data,
@@ -34,10 +36,14 @@ async def available_enterprise_users(
3436
detail={"error": CommonProxyErrors.db_not_connected_error.value},
3537
)
3638

37-
if premium_user is None:
38-
raise HTTPException(
39-
status_code=500, detail={"error": CommonProxyErrors.not_premium_user.value}
40-
)
39+
if not premium_user:
40+
# check if SSO is enabled - show 5 user limit
41+
from litellm.proxy.auth.auth_utils import _has_user_setup_sso
42+
43+
if _has_user_setup_sso():
44+
premium_user_data = EnterpriseLicenseData(
45+
max_users=5,
46+
)
4147

4248
# Count number of rows in LiteLLM_UserTable
4349
user_count = await prisma_client.db.litellm_usertable.count()

litellm/model_prices_and_context_window_backup.json

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17691,22 +17691,6 @@
1769117691
},
1769217692
"supports_tool_choice": true
1769317693
},
17694-
"databricks/databricks-meta-llama-3-1-70b-instruct": {
17695-
"max_tokens": 128000,
17696-
"max_input_tokens": 128000,
17697-
"max_output_tokens": 128000,
17698-
"input_cost_per_token": 1.00002e-06,
17699-
"input_dbu_cost_per_token": 1.4286e-05,
17700-
"output_cost_per_token": 2.99999e-06,
17701-
"output_dbu_cost_per_token": 4.2857e-05,
17702-
"litellm_provider": "databricks",
17703-
"mode": "chat",
17704-
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
17705-
"metadata": {
17706-
"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."
17707-
},
17708-
"supports_tool_choice": true
17709-
},
1771017694
"databricks/databricks-meta-llama-3-3-70b-instruct": {
1771117695
"max_tokens": 128000,
1771217696
"max_input_tokens": 128000,

litellm/proxy/_types.py

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,7 @@ class LiteLLMRoutes(enum.Enum):
530530
# Routes accessible by Admin Viewer (read-only admin access)
531531
admin_viewer_routes = [
532532
"/user/list",
533-
"/user/available_users",
533+
"/user/available_users",
534534
"/user/available_roles",
535535
"/user/daily/activity",
536536
"/team/daily/activity",
@@ -540,7 +540,10 @@ class LiteLLMRoutes(enum.Enum):
540540

541541
# All routes accesible by an Org Admin
542542
org_admin_allowed_routes = (
543-
org_admin_only_routes + management_routes + self_managed_routes + admin_viewer_routes
543+
org_admin_only_routes
544+
+ management_routes
545+
+ self_managed_routes
546+
+ admin_viewer_routes
544547
)
545548

546549

@@ -585,13 +588,14 @@ def check_llm_api_params(cls, values):
585588
######### Request Class Definition ######
586589
class ProxyChatCompletionRequest(LiteLLMPydanticObjectBase):
587590
"""
588-
Pydantic model for chat completion requests that includes both OpenAI standard fields
591+
Pydantic model for chat completion requests that includes both OpenAI standard fields
589592
and LiteLLM-specific parameters. This replaces the previous TypedDict version.
590593
"""
594+
591595
# Required fields (from ChatCompletionRequest)
592596
model: str
593597
messages: List[AllMessageValues]
594-
598+
595599
# Standard OpenAI completion parameters (all optional)
596600
frequency_penalty: Optional[float] = None
597601
logit_bias: Optional[Dict[str, float]] = None
@@ -614,10 +618,10 @@ class ProxyChatCompletionRequest(LiteLLMPydanticObjectBase):
614618
functions: Optional[List[Dict[str, Any]]] = None
615619
user: Optional[str] = None
616620
stream: Optional[bool] = None
617-
621+
618622
# LiteLLM-specific metadata param (from original ChatCompletionRequest)
619623
metadata: Optional[Dict[str, Any]] = None
620-
624+
621625
# Optional LiteLLM params
622626
guardrails: Optional[List[str]] = None
623627
caching: Optional[bool] = None
@@ -1873,7 +1877,8 @@ def get_litellm_internal_health_check_user_api_key_auth(cls) -> "UserAPIKeyAuth"
18731877
key_alias=LITTELM_INTERNAL_HEALTH_SERVICE_ACCOUNT_NAME,
18741878
team_alias=LITTELM_INTERNAL_HEALTH_SERVICE_ACCOUNT_NAME,
18751879
)
1876-
1880+
1881+
18771882
class UserInfoResponse(LiteLLMPydanticObjectBase):
18781883
user_id: Optional[str]
18791884
user_info: Optional[Union[dict, BaseModel]]
@@ -2120,7 +2125,6 @@ class TokenCountRequest(LiteLLMPydanticObjectBase):
21202125
Anthropic token counting endpoint uses /messages
21212126
"""
21222127

2123-
21242128
contents: Optional[List[dict]] = None
21252129
"""
21262130
Google /countTokens endpoint expects contents to be a list of dicts with the following structure:
@@ -2265,7 +2269,7 @@ class AllCallbacks(LiteLLMPydanticObjectBase):
22652269

22662270
braintrust: CallbackOnUI = CallbackOnUI(
22672271
litellm_callback_name="braintrust",
2268-
litellm_callback_params=["BRAINTRUST_API_KEY","BRAINTRUST_API_BASE"],
2272+
litellm_callback_params=["BRAINTRUST_API_KEY", "BRAINTRUST_API_BASE"],
22692273
ui_callback_name="Braintrust",
22702274
)
22712275

@@ -2319,7 +2323,9 @@ class SpendLogsMetadata(TypedDict):
23192323
error_information: Optional[StandardLoggingPayloadErrorInformation]
23202324
usage_object: Optional[dict]
23212325
model_map_information: Optional[StandardLoggingModelInformation]
2322-
cold_storage_object_key: Optional[str] # S3/GCS object key for cold storage retrieval
2326+
cold_storage_object_key: Optional[
2327+
str
2328+
] # S3/GCS object key for cold storage retrieval
23232329

23242330

23252331
class SpendLogsPayload(TypedDict):
@@ -2646,7 +2652,7 @@ def safe_get_team_member_rpm_limit(self) -> Optional[int]:
26462652
if self.litellm_budget_table is not None:
26472653
return self.litellm_budget_table.rpm_limit
26482654
return None
2649-
2655+
26502656
def safe_get_team_member_tpm_limit(self) -> Optional[int]:
26512657
if self.litellm_budget_table is not None:
26522658
return self.litellm_budget_table.tpm_limit
@@ -2763,14 +2769,11 @@ class TeamMemberUpdateRequest(TeamMemberDeleteRequest):
27632769
max_budget_in_team: Optional[float] = None
27642770
role: Optional[Literal["admin", "user"]] = None
27652771
tpm_limit: Optional[int] = Field(
2766-
default=None,
2767-
description="Tokens per minute limit for this team member"
2772+
default=None, description="Tokens per minute limit for this team member"
27682773
)
27692774
rpm_limit: Optional[int] = Field(
2770-
default=None,
2771-
description="Requests per minute limit for this team member"
2775+
default=None, description="Requests per minute limit for this team member"
27722776
)
2773-
27742777

27752778

27762779
class TeamMemberUpdateResponse(MemberUpdateResponse):

0 commit comments

Comments
 (0)