BerriAI
diff --git a/‎cookbook/liteLLM_Baseten.ipynb‎
Lines changed: 61 additions & 152 deletions b/‎cookbook/liteLLM_Baseten.ipynb‎
Lines changed: 61 additions & 152 deletions
@@ -6,19 +6,21 @@
     "id": "gZx-wHJapG5w"
    },
    "source": [
-    "# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
+    "# LiteLLM with Baseten Model APIs\n",
     "\n",
-    "* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
-    "* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
-    "* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
+    "This notebook demonstrates how to use LiteLLM with Baseten's Model APIs instead of dedicated deployments.\n",
     "\n",
-    "\n",
-    "## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
-    "Example call\n",
+    "## Example Usage\n",
     "```python\n",
-    "model = \"q841o8w\" # baseten model version ID\n",
-    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-    "```"
+    "response = completion(\n",
+    "    model=\"baseten/openai/gpt-oss-120b\",\n",
+    "    messages=[{\"role\": \"user\", \"content\": \"Hello!\"}],\n",
+    "    max_tokens=1000,\n",
+    "    temperature=0.7\n",
+    ")\n",
+    "```\n",
+    "\n",
+    "## Setup"
    ]
   },
   {
@@ -29,20 +31,25 @@
    },
    "outputs": [],
    "source": [
-    "!pip install litellm==0.1.399\n",
-    "!pip install baseten urllib3"
+    "%pip install litellm"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "id": "VEukLhDzo4vw"
    },
    "outputs": [],
    "source": [
     "import os\n",
-    "from litellm import completion"
+    "from litellm import completion\n",
+    "\n",
+    "# Set your Baseten API key\n",
+    "os.environ['BASETEN_API_KEY'] = \"\" #@param {type:\"string\"}\n",
+    "\n",
+    "# Test message\n",
+    "messages = [{\"role\": \"user\", \"content\": \"What is AGI?\"}]"
    ]
   },
   {
@@ -51,19 +58,31 @@
     "id": "4STYM2OHFNlc"
    },
    "source": [
-    "## Setup"
+    "## Example 1: Basic Completion\n",
+    "\n",
+    "Simple completion with the GPT-OSS 120B model"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {
     "id": "DorpLxw1FHbC"
    },
    "outputs": [],
    "source": [
-    "os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
-    "messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
+    "print(\"=== Basic Completion ===\")\n",
+    "response = completion(\n",
+    "    model=\"baseten/openai/gpt-oss-120b\",\n",
+    "    messages=messages,\n",
+    "    max_tokens=1000,\n",
+    "    temperature=0.7,\n",
+    "    top_p=0.9,\n",
+    "    presence_penalty=0.1,\n",
+    "    frequency_penalty=0.1,\n",
+    ")\n",
+    "print(f\"Response: {response.choices[0].message.content}\")\n",
+    "print(f\"Usage: {response.usage}\")"
    ]
   },
   {
@@ -72,151 +91,41 @@
     "id": "syF3dTdKFSQQ"
    },
    "source": [
-    "## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
-    "### Pass Your Baseten model `Version ID` as `model`"
+    "## Example 2: Streaming Completion\n",
+    "\n",
+    "Streaming completion with usage statistics"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
     "id": "rPgSoMlsojz0",
     "outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32mINFO\u001b[0m API key set.\n",
-      "INFO:baseten:API key set.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'choices': [{'finish_reason': 'stop',\n",
-       "   'index': 0,\n",
-       "   'message': {'role': 'assistant',\n",
-       "    'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
-       " 'created': 1692135883.699066,\n",
-       " 'model': 'qvv0xeq'}"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model = \"qvv0xeq\"\n",
-    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-    "response"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "7n21UroEGCGa"
-   },
-   "source": [
-    "## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
-    "### Pass Your Baseten model `Version ID` as `model`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "uLVWFH899lAF",
-    "outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32mINFO\u001b[0m API key set.\n",
-      "INFO:baseten:API key set.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'choices': [{'finish_reason': 'stop',\n",
-       "   'index': 0,\n",
-       "   'message': {'role': 'assistant',\n",
-       "    'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
-       " 'created': 1692135900.2806294,\n",
-       " 'model': 'q841o8w'}"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model = \"q841o8w\"\n",
-    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-    "response"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "6-TFwmPAGPXq"
-   },
-   "source": [
-    "## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
-    "### Pass Your Baseten model `Version ID` as `model`"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "gbeYZOrUE_Bp",
-    "outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[32mINFO\u001b[0m API key set.\n",
-      "INFO:baseten:API key set.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'choices': [{'finish_reason': 'stop',\n",
-       "   'index': 0,\n",
-       "   'message': {'role': 'assistant',\n",
-       "    'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
-       " 'created': 1692135914.7472186,\n",
-       " 'model': '31dxrj3'}"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "model = \"31dxrj3\"\n",
-    "response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
-    "response"
+    "print(\"=== Streaming Completion ===\")\n",
+    "response = completion(\n",
+    "    model=\"baseten/openai/gpt-oss-120b\",\n",
+    "    messages=[{\"role\": \"user\", \"content\": \"Write a short poem about AI\"}],\n",
+    "    stream=True,\n",
+    "    max_tokens=500,\n",
+    "    temperature=0.8,\n",
+    "    stream_options={\n",
+    "        \"include_usage\": True,\n",
+    "        \"continuous_usage_stats\": True\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "print(\"Streaming response:\")\n",
+    "for chunk in response:\n",
+    "    if chunk.choices and chunk.choices[0].delta.content:\n",
+    "        print(chunk.choices[0].delta.content, end=\"\", flush=True)\n",
+    "print(\"\\n\")"
    ]
   }
  ],
@@ -234,4 +143,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}