Skip to content

Commit 7c2c1c1

Browse files
Merge pull request #13783 from philipkiely-baseten/main
Update Baseten LiteLLM integration
2 parents 6b2039f + 5fa4a18 commit 7c2c1c1

File tree

11 files changed

+354
-375
lines changed

11 files changed

+354
-375
lines changed

cookbook/liteLLM_Baseten.ipynb

Lines changed: 61 additions & 152 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,21 @@
66
"id": "gZx-wHJapG5w"
77
},
88
"source": [
9-
"# Use liteLLM to call Falcon, Wizard, MPT 7B using OpenAI chatGPT Input/output\n",
9+
"# LiteLLM with Baseten Model APIs\n",
1010
"\n",
11-
"* Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
12-
"* Wizard LM: https://app.baseten.co/explore/wizardlm\n",
13-
"* MPT 7B Base: https://app.baseten.co/explore/mpt_7b_instruct\n",
11+
"This notebook demonstrates how to use LiteLLM with Baseten's Model APIs instead of dedicated deployments.\n",
1412
"\n",
15-
"\n",
16-
"## Call all baseten llm models using OpenAI chatGPT Input/Output using liteLLM\n",
17-
"Example call\n",
13+
"## Example Usage\n",
1814
"```python\n",
19-
"model = \"q841o8w\" # baseten model version ID\n",
20-
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
21-
"```"
15+
"response = completion(\n",
16+
" model=\"baseten/openai/gpt-oss-120b\",\n",
17+
" messages=[{\"role\": \"user\", \"content\": \"Hello!\"}],\n",
18+
" max_tokens=1000,\n",
19+
" temperature=0.7\n",
20+
")\n",
21+
"```\n",
22+
"\n",
23+
"## Setup"
2224
]
2325
},
2426
{
@@ -29,20 +31,25 @@
2931
},
3032
"outputs": [],
3133
"source": [
32-
"!pip install litellm==0.1.399\n",
33-
"!pip install baseten urllib3"
34+
"%pip install litellm"
3435
]
3536
},
3637
{
3738
"cell_type": "code",
38-
"execution_count": 2,
39+
"execution_count": null,
3940
"metadata": {
4041
"id": "VEukLhDzo4vw"
4142
},
4243
"outputs": [],
4344
"source": [
4445
"import os\n",
45-
"from litellm import completion"
46+
"from litellm import completion\n",
47+
"\n",
48+
"# Set your Baseten API key\n",
49+
"os.environ['BASETEN_API_KEY'] = \"\" #@param {type:\"string\"}\n",
50+
"\n",
51+
"# Test message\n",
52+
"messages = [{\"role\": \"user\", \"content\": \"What is AGI?\"}]"
4653
]
4754
},
4855
{
@@ -51,19 +58,31 @@
5158
"id": "4STYM2OHFNlc"
5259
},
5360
"source": [
54-
"## Setup"
61+
"## Example 1: Basic Completion\n",
62+
"\n",
63+
"Simple completion with the GPT-OSS 120B model"
5564
]
5665
},
5766
{
5867
"cell_type": "code",
59-
"execution_count": 21,
68+
"execution_count": null,
6069
"metadata": {
6170
"id": "DorpLxw1FHbC"
6271
},
6372
"outputs": [],
6473
"source": [
65-
"os.environ['BASETEN_API_KEY'] = \"\" #@param\n",
66-
"messages = [{ \"content\": \"what does Baseten do? \",\"role\": \"user\"}]"
74+
"print(\"=== Basic Completion ===\")\n",
75+
"response = completion(\n",
76+
" model=\"baseten/openai/gpt-oss-120b\",\n",
77+
" messages=messages,\n",
78+
" max_tokens=1000,\n",
79+
" temperature=0.7,\n",
80+
" top_p=0.9,\n",
81+
" presence_penalty=0.1,\n",
82+
" frequency_penalty=0.1,\n",
83+
")\n",
84+
"print(f\"Response: {response.choices[0].message.content}\")\n",
85+
"print(f\"Usage: {response.usage}\")"
6786
]
6887
},
6988
{
@@ -72,151 +91,41 @@
7291
"id": "syF3dTdKFSQQ"
7392
},
7493
"source": [
75-
"## Calling Falcon 7B: https://app.baseten.co/explore/falcon_7b\n",
76-
"### Pass Your Baseten model `Version ID` as `model`"
94+
"## Example 2: Streaming Completion\n",
95+
"\n",
96+
"Streaming completion with usage statistics"
7797
]
7898
},
7999
{
80100
"cell_type": "code",
81-
"execution_count": 18,
101+
"execution_count": null,
82102
"metadata": {
83103
"colab": {
84104
"base_uri": "https://localhost:8080/"
85105
},
86106
"id": "rPgSoMlsojz0",
87107
"outputId": "81d6dc7b-1681-4ae4-e4c8-5684eb1bd050"
88108
},
89-
"outputs": [
90-
{
91-
"name": "stderr",
92-
"output_type": "stream",
93-
"text": [
94-
"\u001b[32mINFO\u001b[0m API key set.\n",
95-
"INFO:baseten:API key set.\n"
96-
]
97-
},
98-
{
99-
"data": {
100-
"text/plain": [
101-
"{'choices': [{'finish_reason': 'stop',\n",
102-
" 'index': 0,\n",
103-
" 'message': {'role': 'assistant',\n",
104-
" 'content': \"what does Baseten do? \\nI'm sorry, I cannot provide a specific answer as\"}}],\n",
105-
" 'created': 1692135883.699066,\n",
106-
" 'model': 'qvv0xeq'}"
107-
]
108-
},
109-
"execution_count": 18,
110-
"metadata": {},
111-
"output_type": "execute_result"
112-
}
113-
],
114-
"source": [
115-
"model = \"qvv0xeq\"\n",
116-
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
117-
"response"
118-
]
119-
},
120-
{
121-
"cell_type": "markdown",
122-
"metadata": {
123-
"id": "7n21UroEGCGa"
124-
},
125-
"source": [
126-
"## Calling Wizard LM https://app.baseten.co/explore/wizardlm\n",
127-
"### Pass Your Baseten model `Version ID` as `model`"
128-
]
129-
},
130-
{
131-
"cell_type": "code",
132-
"execution_count": 19,
133-
"metadata": {
134-
"colab": {
135-
"base_uri": "https://localhost:8080/"
136-
},
137-
"id": "uLVWFH899lAF",
138-
"outputId": "61c2bc74-673b-413e-bb40-179cf408523d"
139-
},
140-
"outputs": [
141-
{
142-
"name": "stderr",
143-
"output_type": "stream",
144-
"text": [
145-
"\u001b[32mINFO\u001b[0m API key set.\n",
146-
"INFO:baseten:API key set.\n"
147-
]
148-
},
149-
{
150-
"data": {
151-
"text/plain": [
152-
"{'choices': [{'finish_reason': 'stop',\n",
153-
" 'index': 0,\n",
154-
" 'message': {'role': 'assistant',\n",
155-
" 'content': 'As an AI language model, I do not have personal beliefs or practices, but based on the information available online, Baseten is a popular name for a traditional Ethiopian dish made with injera, a spongy flatbread, and wat, a spicy stew made with meat or vegetables. It is typically served for breakfast or dinner and is a staple in Ethiopian cuisine. The name Baseten is also used to refer to a traditional Ethiopian coffee ceremony, where coffee is brewed and served in a special ceremony with music and food.'}}],\n",
156-
" 'created': 1692135900.2806294,\n",
157-
" 'model': 'q841o8w'}"
158-
]
159-
},
160-
"execution_count": 19,
161-
"metadata": {},
162-
"output_type": "execute_result"
163-
}
164-
],
165-
"source": [
166-
"model = \"q841o8w\"\n",
167-
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
168-
"response"
169-
]
170-
},
171-
{
172-
"cell_type": "markdown",
173-
"metadata": {
174-
"id": "6-TFwmPAGPXq"
175-
},
176-
"source": [
177-
"## Calling mosaicml/mpt-7b https://app.baseten.co/explore/mpt_7b_instruct\n",
178-
"### Pass Your Baseten model `Version ID` as `model`"
179-
]
180-
},
181-
{
182-
"cell_type": "code",
183-
"execution_count": 20,
184-
"metadata": {
185-
"colab": {
186-
"base_uri": "https://localhost:8080/"
187-
},
188-
"id": "gbeYZOrUE_Bp",
189-
"outputId": "838d86ea-2143-4cb3-bc80-2acc2346c37a"
190-
},
191-
"outputs": [
192-
{
193-
"name": "stderr",
194-
"output_type": "stream",
195-
"text": [
196-
"\u001b[32mINFO\u001b[0m API key set.\n",
197-
"INFO:baseten:API key set.\n"
198-
]
199-
},
200-
{
201-
"data": {
202-
"text/plain": [
203-
"{'choices': [{'finish_reason': 'stop',\n",
204-
" 'index': 0,\n",
205-
" 'message': {'role': 'assistant',\n",
206-
" 'content': \"\\n===================\\n\\nIt's a tool to build a local version of a game on your own machine to host\\non your website.\\n\\nIt's used to make game demos and show them on Twitter, Tumblr, and Facebook.\\n\\n\\n\\n## What's built\\n\\n- A directory of all your game directories, named with a version name and build number, with images linked to.\\n- Includes HTML to include in another site.\\n- Includes images for your icons and\"}}],\n",
207-
" 'created': 1692135914.7472186,\n",
208-
" 'model': '31dxrj3'}"
209-
]
210-
},
211-
"execution_count": 20,
212-
"metadata": {},
213-
"output_type": "execute_result"
214-
}
215-
],
109+
"outputs": [],
216110
"source": [
217-
"model = \"31dxrj3\"\n",
218-
"response = completion(model=model, messages=messages, custom_llm_provider=\"baseten\")\n",
219-
"response"
111+
"print(\"=== Streaming Completion ===\")\n",
112+
"response = completion(\n",
113+
" model=\"baseten/openai/gpt-oss-120b\",\n",
114+
" messages=[{\"role\": \"user\", \"content\": \"Write a short poem about AI\"}],\n",
115+
" stream=True,\n",
116+
" max_tokens=500,\n",
117+
" temperature=0.8,\n",
118+
" stream_options={\n",
119+
" \"include_usage\": True,\n",
120+
" \"continuous_usage_stats\": True\n",
121+
" },\n",
122+
")\n",
123+
"\n",
124+
"print(\"Streaming response:\")\n",
125+
"for chunk in response:\n",
126+
" if chunk.choices and chunk.choices[0].delta.content:\n",
127+
" print(chunk.choices[0].delta.content, end=\"\", flush=True)\n",
128+
"print(\"\\n\")"
220129
]
221130
}
222131
],
@@ -234,4 +143,4 @@
234143
},
235144
"nbformat": 4,
236145
"nbformat_minor": 0
237-
}
146+
}

0 commit comments

Comments
 (0)