Skip to content

Commit 9df6f4e

Browse files
Merge branch 'main' into litellm_dev_08_20_2025_p1
2 parents b23e35f + 49cb9bd commit 9df6f4e

File tree

112 files changed

+8621
-1862
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+8621
-1862
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,4 +94,5 @@ test.py
9494

9595
litellm_config.yaml
9696
.cursor
97-
.vscode/launch.json
97+
.vscode/launch.json
98+
litellm/proxy/to_delete_loadtest_work/*

cookbook/liteLLM_Baseten.ipynb

Lines changed: 61 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,21 @@
66
"id": "gZx-wHJapG5w"
77
},
88
"source": [
9-
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
9+
"# LiteLLM with Baseten Model APIs\n",
1010
"\n",
11-
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
12-
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
13-
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
11+
"This notebook demonstrates how to use LiteLLM with Baseten's Model APIs instead of dedicated deployments.\n",
1412
"\n",
15-
"\n",
16-
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
17-
"Example call\n",
13+
"## Example Usage\n",
1814
"```python\n",
19-
"model = \"q841o8w\" # baseten model version ID\n",
20-
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
21-
"```"
15+
"response = completion(\n",
16+
" model=\"baseten/openai/gpt-oss-120b\",\n",
17+
" messages=[{\"role\": \"user\", \"content\": \"Hello!\"}],\n",
18+
" max_tokens=1000,\n",
19+
" temperature=0.7\n",
20+
")\n",
21+
"```\n",
22+
"\n",
23+
"## Setup"
2224
]
2325
},
2426
{
@@ -29,20 +31,25 @@
2931
},
3032
"outputs": [],
3133
"source": [
32-
"!pip install litellm==0.1.399\n",
33-
"!pip install baseten urllib3"
34+
"%pip install litellm"
3435
]
3536
},
3637
{
3738
"cell_type": "code",
38-
"execution_count": 2,
39+
"execution_count": null,
3940
"metadata": {
4041
"id": "VEukLhDzo4vw"
4142
},
4243
"outputs": [],
4344
"source": [
4445
"import os\n",
45-
"from litellm import completion"
46+
"from litellm import completion\n",
47+
"\n",
48+
"# Set your Baseten API key\n",
49+
"os.environ['BASETEN_API_KEY'] = \"\" #@param {type:\"string\"}\n",
50+
"\n",
51+
"# Test message\n",
52+
"messages = [{\"role\": \"user\", \"content\": \"What is AGI?\"}]"
4653
]
4754
},
4855
{
@@ -51,19 +58,31 @@
5158
"id": "4STYM2OHFNlc"
5259
},
5360
"source": [
54-
"## Setup"
61+
"## Example 1: Basic Completion\n",
62+
"\n",
63+
"Simple completion with the GPT-OSS 120B model"
5564
]
5665
},
5766
{
5867
"cell_type": "code",
59-
"execution_count": 21,
68+
"execution_count": null,
6069
"metadata": {
6170
"id": "DorpLxw1FHbC"
6271
},
6372
"outputs": [],
6473
"source": [
65-
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
66-
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
74+
"print(\"=== Basic Completion ===\")\n",
75+
"response = completion(\n",
76+
" model=\"baseten/openai/gpt-oss-120b\",\n",
77+
" messages=messages,\n",
78+
" max_tokens=1000,\n",
79+
" temperature=0.7,\n",
80+
" top_p=0.9,\n",
81+
" presence_penalty=0.1,\n",
82+
" frequency_penalty=0.1,\n",
83+
")\n",
84+
"print(f\"Response: {response.choices[0].message.content}\")\n",
85+
"print(f\"Usage: {response.usage}\")"
6786
]
6887
},
6988
{
@@ -72,151 +91,41 @@
7291
"id": "syF3dTdKFSQQ"
7392
},
7493
"source": [
75-
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
76-
"### Pass Your Baseten model `Version ID` as `model`"
94+
"## Example 2: Streaming Completion\n",
95+
"\n",
96+
"Streaming completion with usage statistics"
7797
]
7898
},
7999
{
80100
"cell_type": "code",
81-
"execution_count": 18,
101+
"execution_count": null,
82102
"metadata": {
83103
"colab": {
84104
"base_uri": "https://localhost:8080/"
85105
},
86106
"id": "rPgSoMlsojz0",
87107
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
88108
},
89-
"outputs": [
90-
{
91-
"name": "stderr",
92-
"output_type": "stream",
93-
"text": [
94-
"\u001b[32mINFO\u001b[0m API key set.\n",
95-
"INFO:baseten:API key set.\n"
96-
]
97-
},
98-
{
99-
"data": {
100-
"text/plain": [
101-
"{'choices': [{'finish_reason': 'stop',\n",
102-
" 'index': 0,\n",
103-
" 'message': {'role': 'assistant',\n",
104-
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
105-
" 'created': 1692135883.699066,\n",
106-
" 'model': 'qvv0xeq'}"
107-
]
108-
},
109-
"execution_count": 18,
110-
"metadata": {},
111-
"output_type": "execute_result"
112-
}
113-
],
114-
"source": [
115-
"model = \"qvv0xeq\"\n",
116-
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
117-
"response"
118-
]
119-
},
120-
{
121-
"cell_type": "markdown",
122-
"metadata": {
123-
"id": "7n21UroEGCGa"
124-
},
125-
"source": [
126-
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
127-
"### Pass Your Baseten model `Version ID` as `model`"
128-
]
129-
},
130-
{
131-
"cell_type": "code",
132-
"execution_count": 19,
133-
"metadata": {
134-
"colab": {
135-
"base_uri": "https://localhost:8080/"
136-
},
137-
"id": "uLVWFH899lAF",
138-
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
139-
},
140-
"outputs": [
141-
{
142-
"name": "stderr",
143-
"output_type": "stream",
144-
"text": [
145-
"\u001b[32mINFO\u001b[0m API key set.\n",
146-
"INFO:baseten:API key set.\n"
147-
]
148-
},
149-
{
150-
"data": {
151-
"text/plain": [
152-
"{'choices': [{'finish_reason': 'stop',\n",
153-
" 'index': 0,\n",
154-
" 'message': {'role': 'assistant',\n",
155-
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
156-
" 'created': 1692135900.2806294,\n",
157-
" 'model': 'q841o8w'}"
158-
]
159-
},
160-
"execution_count": 19,
161-
"metadata": {},
162-
"output_type": "execute_result"
163-
}
164-
],
165-
"source": [
166-
"model = \"q841o8w\"\n",
167-
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
168-
"response"
169-
]
170-
},
171-
{
172-
"cell_type": "markdown",
173-
"metadata": {
174-
"id": "6-TFwmPAGPXq"
175-
},
176-
"source": [
177-
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
178-
"### Pass Your Baseten model `Version ID` as `model`"
179-
]
180-
},
181-
{
182-
"cell_type": "code",
183-
"execution_count": 20,
184-
"metadata": {
185-
"colab": {
186-
"base_uri": "https://localhost:8080/"
187-
},
188-
"id": "gbeYZOrUE_Bp",
189-
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
190-
},
191-
"outputs": [
192-
{
193-
"name": "stderr",
194-
"output_type": "stream",
195-
"text": [
196-
"\u001b[32mINFO\u001b[0m API key set.\n",
197-
"INFO:baseten:API key set.\n"
198-
]
199-
},
200-
{
201-
"data": {
202-
"text/plain": [
203-
"{'choices': [{'finish_reason': 'stop',\n",
204-
" 'index': 0,\n",
205-
" 'message': {'role': 'assistant',\n",
206-
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
207-
" 'created': 1692135914.7472186,\n",
208-
" 'model': '31dxrj3'}"
209-
]
210-
},
211-
"execution_count": 20,
212-
"metadata": {},
213-
"output_type": "execute_result"
214-
}
215-
],
109+
"outputs": [],
216110
"source": [
217-
"model = \"31dxrj3\"\n",
218-
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
219-
"response"
111+
"print(\"=== Streaming Completion ===\")\n",
112+
"response = completion(\n",
113+
" model=\"baseten/openai/gpt-oss-120b\",\n",
114+
" messages=[{\"role\": \"user\", \"content\": \"Write a short poem about AI\"}],\n",
115+
" stream=True,\n",
116+
" max_tokens=500,\n",
117+
" temperature=0.8,\n",
118+
" stream_options={\n",
119+
" \"include_usage\": True,\n",
120+
" \"continuous_usage_stats\": True\n",
121+
" },\n",
122+
")\n",
123+
"\n",
124+
"print(\"Streaming response:\")\n",
125+
"for chunk in response:\n",
126+
" if chunk.choices and chunk.choices[0].delta.content:\n",
127+
" print(chunk.choices[0].delta.content, end=\"\", flush=True)\n",
128+
"print(\"\\n\")"
220129
]
221130
}
222131
],
@@ -234,4 +143,4 @@
234143
},
235144
"nbformat": 4,
236145
"nbformat_minor": 0
237-
}
146+
}

deploy/charts/litellm-helm/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ If `db.useStackgresOperator` is used (not yet implemented):
2424
| `replicaCount` | The number of LiteLLM Proxy pods to be deployed | `1` |
2525
| `masterkeySecretName` | The name of the Kubernetes Secret that contains the Master API Key for LiteLLM. If not specified, use the generated secret name. | N/A |
2626
| `masterkeySecretKey` | The key within the Kubernetes Secret that contains the Master API Key for LiteLLM. If not specified, use `masterkey` as the key. | N/A |
27-
| `masterkey` | The Master API Key for LiteLLM. If not specified, a random key is generated. | N/A |
27+
| `masterkey` | The Master API Key for LiteLLM. If not specified, a random key in the `sk-...` format is generated. | N/A |
2828
| `environmentSecrets` | An optional array of Secret object names. The keys and values in these secrets will be presented to the LiteLLM proxy pod as environment variables. See below for an example Secret object. | `[]` |
2929
| `environmentConfigMaps` | An optional array of ConfigMap object names. The keys and values in these configmaps will be presented to the LiteLLM proxy pod as environment variables. See below for an example Secret object. | `[]` |
3030
| `image.repository` | LiteLLM Proxy image repository | `ghcr.io/berriai/litellm` |
@@ -135,7 +135,7 @@ service, the **Proxy Endpoint** should be set to `http://<RELEASE>-litellm:4000`
135135

136136
The **Proxy Key** is the value specified for `masterkey` or, if a `masterkey`
137137
was not provided to the helm command line, the `masterkey` is a randomly
138-
generated string stored in the `<RELEASE>-litellm-masterkey` Kubernetes Secret.
138+
generated string in the `sk-...` format stored in the `<RELEASE>-litellm-masterkey` Kubernetes Secret.
139139

140140
```bash
141141
kubectl -n litellm get secret <RELEASE>-litellm-masterkey -o jsonpath="{.data.masterkey}"

deploy/charts/litellm-helm/templates/deployment.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,14 @@ spec:
7171
name: {{ .Values.db.secret.name }}
7272
key: {{ .Values.db.secret.passwordKey }}
7373
- name: DATABASE_HOST
74+
{{- if .Values.db.secret.endpointKey }}
75+
valueFrom:
76+
secretKeyRef:
77+
name: {{ .Values.db.secret.name }}
78+
key: {{ .Values.db.secret.endpointKey }}
79+
{{- else }}
7480
value: {{ .Values.db.endpoint }}
81+
{{- end }}
7582
- name: DATABASE_NAME
7683
value: {{ .Values.db.database }}
7784
- name: DATABASE_URL

deploy/charts/litellm-helm/templates/migrations-job.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,14 @@ spec:
4949
name: {{ .Values.db.secret.name }}
5050
key: {{ .Values.db.secret.passwordKey }}
5151
- name: DATABASE_HOST
52+
{{- if .Values.db.secret.endpointKey }}
53+
valueFrom:
54+
secretKeyRef:
55+
name: {{ .Values.db.secret.name }}
56+
key: {{ .Values.db.secret.endpointKey }}
57+
{{- else }}
5258
value: {{ .Values.db.endpoint }}
59+
{{- end }}
5360
- name: DATABASE_NAME
5461
value: {{ .Values.db.database }}
5562
- name: DATABASE_URL

deploy/charts/litellm-helm/templates/secret-masterkey.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{{- if not .Values.masterkeySecretName }}
2-
{{ $masterkey := (.Values.masterkey | default (randAlphaNum 17)) }}
2+
{{ $masterkey := (.Values.masterkey | default (printf "sk-%s" (randAlphaNum 18))) }}
33
apiVersion: v1
44
kind: Secret
55
metadata:

deploy/charts/litellm-helm/tests/masterkey-secret_tests.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,19 @@ suite: test masterkey secret
22
templates:
33
- secret-masterkey.yaml
44
tests:
5-
- it: should create a secret if masterkeySecretName is not set
5+
- it: should create a secret if masterkeySecretName is not set. should start with sk-xxxx (base64 encoded as c2st*)
66
template: secret-masterkey.yaml
77
set:
88
masterkeySecretName: ""
99
asserts:
1010
- isKind:
1111
of: Secret
12+
- matchRegex:
13+
path: data.masterkey
14+
pattern: ^c2st
15+
# Note: The masterkey is generated as "sk-<18-random-chars>" in plain text,
16+
# but stored as base64 encoded in Kubernetes secret (requirement).
17+
# "sk-" base64 encodes to "c2st", so we check for "^c2st" pattern.
1218
- it: should not create a secret if masterkeySecretName is set
1319
template: secret-masterkey.yaml
1420
set:

deploy/charts/litellm-helm/values.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ db:
161161
name: postgres
162162
usernameKey: username
163163
passwordKey: password
164+
# Optional: when set, DATABASE_HOST will be sourced from this secret key instead of db.endpoint
165+
endpointKey: ""
164166

165167
# Use the Stackgres Helm chart to deploy an instance of a Stackgres cluster.
166168
# The Stackgres Operator must already be installed within the target

docker/Dockerfile.non_root

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ RUN mkdir -p /nonexistent /.npm && \
7070
chown -R nobody:nogroup /app && \
7171
chown -R nobody:nogroup /nonexistent /.npm && \
7272
PRISMA_PATH=$(python -c "import os, prisma; print(os.path.dirname(prisma.__file__))") && \
73-
chown -R nobody:nogroup $PRISMA_PATH
73+
chown -R nobody:nogroup $PRISMA_PATH && \
74+
LITELLM_PKG_MIGRATIONS_PATH="$(python -c 'import os, litellm_proxy_extras; print(os.path.dirname(litellm_proxy_extras.__file__))' 2>/dev/null || echo '')/migrations" && \
75+
[ -n "$LITELLM_PKG_MIGRATIONS_PATH" ] && chown -R nobody:nogroup $LITELLM_PKG_MIGRATIONS_PATH
7476

7577
# --- OpenShift Compatibility: Apply Red Hat recommended pattern ---
7678
# Get paths for directories that need write access at runtime

0 commit comments

Comments
 (0)