BerriAI
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎cookbook/liteLLM_Baseten.ipynb‎
Lines changed: 61 additions & 152 deletions b/‎cookbook/liteLLM_Baseten.ipynb‎
Lines changed: 61 additions & 152 deletions
diff --git a/‎deploy/charts/litellm-helm/README.md‎
Lines changed: 2 additions & 2 deletions b/‎deploy/charts/litellm-helm/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎deploy/charts/litellm-helm/templates/deployment.yaml‎
Lines changed: 7 additions & 0 deletions b/‎deploy/charts/litellm-helm/templates/deployment.yaml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎deploy/charts/litellm-helm/templates/migrations-job.yaml‎
Lines changed: 7 additions & 0 deletions b/‎deploy/charts/litellm-helm/templates/migrations-job.yaml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎deploy/charts/litellm-helm/templates/secret-masterkey.yaml‎
Lines changed: 1 addition & 1 deletion b/‎deploy/charts/litellm-helm/templates/secret-masterkey.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/charts/litellm-helm/tests/masterkey-secret_tests.yaml‎
Lines changed: 7 additions & 1 deletion b/‎deploy/charts/litellm-helm/tests/masterkey-secret_tests.yaml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎deploy/charts/litellm-helm/values.yaml‎
Lines changed: 2 additions & 0 deletions b/‎deploy/charts/litellm-helm/values.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docker/Dockerfile.non_root‎
Lines changed: 3 additions & 1 deletion b/‎docker/Dockerfile.non_root‎
Lines changed: 3 additions & 1 deletion
@@ -94,4 +94,5 @@ test.py
 
 litellm_config.yaml
 .cursor
-.vscode/launch.json
+.vscode/launch.json
+litellm/proxy/to_delete_loadtest_work/*
@@ -6,19 +6,21 @@
     "id": "gZx-wHJapG5w"
    },
    "source": [
-    "# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
+    "# LiteLLM with Baseten Model APIs\n",
     "\n",
-    "* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
-    "* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
-    "* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
+    "This notebook demonstrates how to use LiteLLM with Baseten's Model APIs instead of dedicated deployments.\n",
     "\n",
-    "\n",
-    "## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
-    "Example call\n",
+    "## Example Usage\n",
     "```python\n",
-    "model = \"q841o8w\" # baseten model version ID\n",
-    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-    "```"
+    "response = completion(\n",
+    "    model=\"baseten/openai/gpt-oss-120b\",\n",
+    "    messages=[{\"role\": \"user\", \"content\": \"Hello!\"}],\n",
+    "    max_tokens=1000,\n",
+    "    temperature=0.7\n",
+    ")\n",
+    "```\n",
+    "\n",
+    "## Setup"
    ]
   },
   {
@@ -29,20 +31,25 @@
    },
    "outputs": [],
    "source": [
-    "!pip install litellm==0.1.399\n",
-    "!pip install baseten urllib3"
+    "%pip install litellm"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "id": "VEukLhDzo4vw"
    },
    "outputs": [],
    "source": [
     "import os\n",
-    "from litellm import completion"
+    "from litellm import completion\n",
+    "\n",
+    "# Set your Baseten API key\n",
+    "os.environ['BASETEN_API_KEY'] = \"\" #@param {type:\"string\"}\n",
+    "\n",
+    "# Test message\n",
+    "messages = [{\"role\": \"user\", \"content\": \"What is AGI?\"}]"
    ]
   },
   {
@@ -51,19 +58,31 @@
     "id": "4STYM2OHFNlc"
    },
    "source": [
-    "## Setup"
+    "## Example 1: Basic Completion\n",
+    "\n",
+    "Simple completion with the GPT-OSS 120B model"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {
     "id": "DorpLxw1FHbC"
    },
    "outputs": [],
    "source": [
-    "os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
-    "messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
+    "print(\"=== Basic Completion ===\")\n",
+    "response = completion(\n",
+    "    model=\"baseten/openai/gpt-oss-120b\",\n",
+    "    messages=messages,\n",
+    "    max_tokens=1000,\n",
+    "    temperature=0.7,\n",
+    "    top_p=0.9,\n",
+    "    presence_penalty=0.1,\n",
+    "    frequency_penalty=0.1,\n",
+    ")\n",
+    "print(f\"Response: {response.choices[0].message.content}\")\n",
+    "print(f\"Usage: {response.usage}\")"
    ]
   },
   {
@@ -72,151 +91,41 @@
     "id": "syF3dTdKFSQQ"
    },
    "source": [
-    "## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
-    "### Pass Your Baseten model `Version ID` as `model`"
+    "## Example 2: Streaming Completion\n",
+    "\n",
+    "Streaming completion with usage statistics"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
     "id": "rPgSoMlsojz0",
     "outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32mINFO\u001b[0m API key set.\n",
-      "INFO:baseten:API key set.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'choices': [{'finish_reason': 'stop',\n",
-       "   'index': 0,\n",
-       "   'message': {'role': 'assistant',\n",
-       "    'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
-       " 'created': 1692135883.699066,\n",
-       " 'model': 'qvv0xeq'}"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model = \"qvv0xeq\"\n",
-    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-    "response"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "7n21UroEGCGa"
-   },
-   "source": [
-    "## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
-    "### Pass Your Baseten model `Version ID` as `model`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "uLVWFH899lAF",
-    "outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32mINFO\u001b[0m API key set.\n",
-      "INFO:baseten:API key set.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'choices': [{'finish_reason': 'stop',\n",
-       "   'index': 0,\n",
-       "   'message': {'role': 'assistant',\n",
-       "    'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
-       " 'created': 1692135900.2806294,\n",
-       " 'model': 'q841o8w'}"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model = \"q841o8w\"\n",
-    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-    "response"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "6-TFwmPAGPXq"
-   },
-   "source": [
-    "## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
-    "### Pass Your Baseten model `Version ID` as `model`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "gbeYZOrUE_Bp",
-    "outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32mINFO\u001b[0m API key set.\n",
-      "INFO:baseten:API key set.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'choices': [{'finish_reason': 'stop',\n",
-       "   'index': 0,\n",
-       "   'message': {'role': 'assistant',\n",
-       "    'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
-       " 'created': 1692135914.7472186,\n",
-       " 'model': '31dxrj3'}"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "model = \"31dxrj3\"\n",
-    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-    "response"
+    "print(\"=== Streaming Completion ===\")\n",
+    "response = completion(\n",
+    "    model=\"baseten/openai/gpt-oss-120b\",\n",
+    "    messages=[{\"role\": \"user\", \"content\": \"Write a short poem about AI\"}],\n",
+    "    stream=True,\n",
+    "    max_tokens=500,\n",
+    "    temperature=0.8,\n",
+    "    stream_options={\n",
+    "        \"include_usage\": True,\n",
+    "        \"continuous_usage_stats\": True\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "print(\"Streaming response:\")\n",
+    "for chunk in response:\n",
+    "    if chunk.choices and chunk.choices[0].delta.content:\n",
+    "        print(chunk.choices[0].delta.content, end=\"\", flush=True)\n",
+    "print(\"\\n\")"
    ]
   }
  ],
@@ -234,4 +143,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
@@ -24,7 +24,7 @@ If `db.useStackgresOperator` is used (not yet implemented):
 | `replicaCount`                                             | The number of LiteLLM Proxy pods to be deployed                                                                                                                                       | `1`  |
 | `masterkeySecretName`                                      | The name of the Kubernetes Secret that contains the Master API Key for LiteLLM.  If not specified, use the generated secret name.                                                                                                         | N/A  |
 | `masterkeySecretKey`                                      | The key within the Kubernetes Secret that contains the Master API Key for LiteLLM.  If not specified, use `masterkey` as the key.                                                                                                         | N/A  |
-| `masterkey`                                                | The Master API Key for LiteLLM.  If not specified, a random key is generated.                                                                                                         | N/A  |
+| `masterkey`                                                | The Master API Key for LiteLLM.  If not specified, a random key in the `sk-...` format is generated.                                                                                   | N/A  |
 | `environmentSecrets`                                       | An optional array of Secret object names.  The keys and values in these secrets will be presented to the LiteLLM proxy pod as environment variables.  See below for an example Secret object.  | `[]`  |
 | `environmentConfigMaps`                                       | An optional array of ConfigMap object names.  The keys and values in these configmaps will be presented to the LiteLLM proxy pod as environment variables.  See below for an example Secret object.  | `[]`  |
 | `image.repository`                                         | LiteLLM Proxy image repository                                                                                                                                                        | `ghcr.io/berriai/litellm`  |
@@ -135,7 +135,7 @@ service, the **Proxy Endpoint** should be set to `http://<RELEASE>-litellm:4000`
 
 The **Proxy Key** is the value specified for `masterkey` or, if a `masterkey`
 was not provided to the helm command line, the `masterkey` is a randomly
-generated string stored in the `<RELEASE>-litellm-masterkey` Kubernetes Secret.
+generated string in the `sk-...` format stored in the `<RELEASE>-litellm-masterkey` Kubernetes Secret.
 
 ```bash
 kubectl -n litellm get secret <RELEASE>-litellm-masterkey -o jsonpath="{.data.masterkey}"
 
@@ -71,7 +71,14 @@ spec:
                   name: {{ .Values.db.secret.name }}
                   key: {{ .Values.db.secret.passwordKey }}
             - name: DATABASE_HOST
+              {{- if .Values.db.secret.endpointKey }}
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.db.secret.name }}
+                  key: {{ .Values.db.secret.endpointKey }}
+              {{- else }}
               value: {{ .Values.db.endpoint }}
+              {{- end }}
             - name: DATABASE_NAME
               value: {{ .Values.db.database }}
             - name: DATABASE_URL
 
@@ -49,7 +49,14 @@ spec:
                   name: {{ .Values.db.secret.name }}
                   key: {{ .Values.db.secret.passwordKey }}
             - name: DATABASE_HOST
+              {{- if .Values.db.secret.endpointKey }}
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.db.secret.name }}
+                  key: {{ .Values.db.secret.endpointKey }}
+              {{- else }}
               value: {{ .Values.db.endpoint }}
+              {{- end }}
             - name: DATABASE_NAME
               value: {{ .Values.db.database }}
             - name: DATABASE_URL
 
@@ -1,5 +1,5 @@
 {{- if not .Values.masterkeySecretName }}
-{{ $masterkey := (.Values.masterkey | default (randAlphaNum 17)) }}
+{{ $masterkey := (.Values.masterkey | default (printf "sk-%s" (randAlphaNum 18))) }}
 apiVersion: v1
 kind: Secret
 metadata:
 
@@ -2,13 +2,19 @@ suite: test masterkey secret
 templates:
   - secret-masterkey.yaml
 tests:
-  - it: should create a secret if masterkeySecretName is not set
+  - it: should create a secret if masterkeySecretName is not set. should start with sk-xxxx (base64 encoded as c2st*)
     template: secret-masterkey.yaml
     set:
       masterkeySecretName: ""
     asserts:
       - isKind:
           of: Secret
+      - matchRegex:
+          path: data.masterkey
+          pattern: ^c2st
+          # Note: The masterkey is generated as "sk-<18-random-chars>" in plain text,
+          # but stored as base64 encoded in Kubernetes secret (requirement).
+          # "sk-" base64 encodes to "c2st", so we check for "^c2st" pattern.
   - it: should not create a secret if masterkeySecretName is set
     template: secret-masterkey.yaml
     set:
 
@@ -161,6 +161,8 @@ db:
     name: postgres
     usernameKey: username
     passwordKey: password
+    # Optional: when set, DATABASE_HOST will be sourced from this secret key instead of db.endpoint
+    endpointKey: ""
 
   # Use the Stackgres Helm chart to deploy an instance of a Stackgres cluster.
   #  The Stackgres Operator must already be installed within the target
 
@@ -70,7 +70,9 @@ RUN mkdir -p /nonexistent /.npm && \
   chown -R nobody:nogroup /app && \
   chown -R nobody:nogroup /nonexistent /.npm && \
   PRISMA_PATH=$(python -c "import os, prisma; print(os.path.dirname(prisma.__file__))") && \
-  chown -R nobody:nogroup $PRISMA_PATH
+  chown -R nobody:nogroup $PRISMA_PATH && \
+  LITELLM_PKG_MIGRATIONS_PATH="$(python -c 'import os, litellm_proxy_extras; print(os.path.dirname(litellm_proxy_extras.__file__))' 2>/dev/null || echo '')/migrations" && \
+  [ -n "$LITELLM_PKG_MIGRATIONS_PATH" ] && chown -R nobody:nogroup $LITELLM_PKG_MIGRATIONS_PATH
 
 # --- OpenShift Compatibility: Apply Red Hat recommended pattern ---
 # Get paths for directories that need write access at runtime