Skip to content
Merged
Show file tree
Hide file tree
Changes from 62 commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
118aae6
add
ywang96 Aug 22, 2025
3b92bcf
typo
ywang96 Aug 22, 2025
53dd7c7
fix import
ywang96 Aug 22, 2025
2483c40
Add uuid to mm data.
huachenheli Aug 22, 2025
57e114d
Merge branch 'main' into allow-passing-mm-hash
ywang96 Aug 24, 2025
cea5c09
revert
ywang96 Aug 24, 2025
611827f
revert
ywang96 Aug 24, 2025
1f31339
allow missing entry
ywang96 Aug 24, 2025
1756606
update
ywang96 Aug 24, 2025
a82a865
update typing
ywang96 Aug 24, 2025
c6a1e6a
rename
Aug 25, 2025
0af3999
comment
Aug 25, 2025
6defa1d
comments
Aug 25, 2025
cb406f3
tweak
Aug 25, 2025
977811b
typing
Aug 25, 2025
f26890b
clarify
Aug 25, 2025
46b849b
Track mm uuid separately.
huachenheli Aug 25, 2025
5c928c6
Update unit tests
huachenheli Aug 25, 2025
6b399aa
Fix pydantic error
huachenheli Aug 25, 2025
a4be8cb
More tests.
huachenheli Aug 25, 2025
027a4f9
Formats
huachenheli Aug 25, 2025
ca8609f
fix type
huachenheli Aug 26, 2025
06a69b0
relax on user-input data structure
ywang96 Aug 26, 2025
d9c97fc
fix import
ywang96 Aug 26, 2025
0927f7f
Merge branch 'main' into allow-passing-mm-hash
ywang96 Aug 26, 2025
2663d0e
typing
ywang96 Aug 26, 2025
86d7e46
typing cleanup
ywang96 Aug 26, 2025
df36488
Merge branch 'main' into mm_uuid
huachenheli Aug 26, 2025
46c874e
Merge branch 'main' into allow-passing-mm-hash
ywang96 Aug 27, 2025
175bfe4
update
ywang96 Aug 27, 2025
fb8aa34
fix
ywang96 Aug 27, 2025
c874889
typing
ywang96 Aug 27, 2025
4811dc6
update
ywang96 Aug 28, 2025
b6be45a
update
ywang96 Aug 28, 2025
779d2b2
use typealias
ywang96 Aug 28, 2025
e5ca736
mapping
ywang96 Aug 28, 2025
d3b227e
fix
ywang96 Aug 28, 2025
0848d86
typing
ywang96 Aug 28, 2025
ce7c9b0
typing
ywang96 Aug 28, 2025
2f0a02c
add tests
ywang96 Aug 28, 2025
06d31af
add tests
ywang96 Aug 28, 2025
6ea9ee4
Merge branch 'main' into allow-passing-mm-hash
ywang96 Aug 28, 2025
9677f00
Merge remote-tracking branch 'ywang-vllm/allow-passing-mm-hash' into …
huachenheli Aug 29, 2025
58bf1a5
Merge branch 'main' into mm_uuid
huachenheli Sep 2, 2025
45517ce
Remove unused import.
huachenheli Sep 2, 2025
51b042d
Merge branch 'main' into mm_uuid
huachenheli Sep 3, 2025
6add882
Merge branch 'main' into mm_uuid
huachenheli Sep 3, 2025
d81aa20
Merge branch 'main' into mm_uuid
huachenheli Sep 3, 2025
c21fab2
Add e2e test.
huachenheli Sep 3, 2025
3f54bac
lint
huachenheli Sep 3, 2025
764c5a6
move to test_vision.py
huachenheli Sep 4, 2025
b6abe82
use the same test assets
huachenheli Sep 4, 2025
82cdc63
Merge branch 'main' into mm_uuid
huachenheli Sep 4, 2025
6ab8a34
doc
huachenheli Sep 4, 2025
e6e4d40
Merge branch 'main' into mm_uuid
huachenheli Sep 4, 2025
7a06ece
Merge branch 'main' into mm_uuid
huachenheli Sep 4, 2025
d238ddb
Merge branch 'main' into mm_uuid
ywang96 Sep 5, 2025
b16bc3b
Merge branch 'main' into mm_uuid
ywang96 Sep 5, 2025
2a2b43a
Merge branch 'main' into mm_uuid
huachenheli Sep 6, 2025
5287c94
Merge branch 'main' into mm_uuid
huachenheli Sep 6, 2025
b732457
Merge branch 'main' into mm_uuid
huachenheli Sep 6, 2025
9309f3f
Merge branch 'main' into mm_uuid
ywang96 Sep 7, 2025
e0235e2
fix
ywang96 Sep 7, 2025
2a6c6ba
add validation
ywang96 Sep 7, 2025
af2f766
update
ywang96 Sep 7, 2025
2376e02
Merge branch 'main' into mm_uuid
ywang96 Sep 8, 2025
1ef0961
Merge branch 'main' into mm_uuid
ywang96 Sep 8, 2025
a49d3eb
Update tests/entrypoints/test_chat_utils.py
DarkLight1337 Sep 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 33 additions & 9 deletions docs/features/multimodal_inputs.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,19 +215,19 @@ When loading RGBA images (images with transparency), vLLM converts them to RGB f

```python
from vllm import LLM

# Default white background (no configuration needed)
llm = LLM(model="llava-hf/llava-1.5-7b-hf")

# Custom black background for dark theme
llm = LLM(
model="llava-hf/llava-1.5-7b-hf",
media_io_kwargs={"image": {"rgba_background_color": [0, 0, 0]}}
)

# Custom brand color background (e.g., blue)
llm = LLM(
model="llava-hf/llava-1.5-7b-hf",
model="llava-hf/llava-1.5-7b-hf",
media_io_kwargs={"image": {"rgba_background_color": [0, 0, 255]}}
)
```
Expand Down Expand Up @@ -388,7 +388,7 @@ For Qwen2-VL and MiniCPM-V, we accept additional parameters alongside the embedd

## Online Serving

Our OpenAI-compatible server accepts multi-modal data via the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat).
Our OpenAI-compatible server accepts multi-modal data via the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat). Media inputs also support optional UUIDs users can provide to uniquely identify each media, which is used to cache the media results across requests.

!!! important
A chat template is **required** to use Chat Completions API.
Expand Down Expand Up @@ -438,7 +438,13 @@ Then, you can use the OpenAI client as follows:
# NOTE: The prompt formatting with the image token `<image>` is not needed
# since the prompt will be processed automatically by the API server.
{"type": "text", "text": "What’s in this image?"},
{"type": "image_url", "image_url": {"url": image_url}},
{
"type": "image_url",
"image_url": {
url": image_url
},
"uuid": image_url # Optional
},
],
}],
)
Expand All @@ -454,8 +460,20 @@ Then, you can use the OpenAI client as follows:
"role": "user",
"content": [
{"type": "text", "text": "What are the animals in these images?"},
{"type": "image_url", "image_url": {"url": image_url_duck}},
{"type": "image_url", "image_url": {"url": image_url_lion}},
{
"type": "image_url",
"image_url": {
"url": image_url_duck
},
"uuid": image_url_duck # Optional
},
{
"type": "image_url",
"image_url": {
"url": image_url_lion
},
"uuid": image_url_lion # Optional
},
],
}],
)
Expand Down Expand Up @@ -522,6 +540,7 @@ Then, you can use the OpenAI client as follows:
"video_url": {
"url": video_url
},
"uuid": video_url # Optional
},
],
}],
Expand Down Expand Up @@ -613,6 +632,7 @@ Then, you can use the OpenAI client as follows:
"data": audio_base64,
"format": "wav"
},
"uuid": audio_url # Optional
},
],
}],
Expand Down Expand Up @@ -642,6 +662,7 @@ Alternatively, you can pass `audio_url`, which is the audio counterpart of `imag
"audio_url": {
"url": audio_url
},
"uuid": audio_url # Optional
},
],
}],
Expand Down Expand Up @@ -695,7 +716,8 @@ The following example demonstrates how to pass image embeddings to the OpenAI se
model = "llava-hf/llava-1.5-7b-hf"
embeds = {
"type": "image_embeds",
"image_embeds": f"{base64_image_embedding}"
"image_embeds": f"{base64_image_embedding}",
"uuid": image_url # Optional
}

# Pass additional parameters (available to Qwen2-VL and MiniCPM-V)
Expand All @@ -706,6 +728,7 @@ The following example demonstrates how to pass image embeddings to the OpenAI se
"image_embeds": f"{base64_image_embedding}" , # Required
"image_grid_thw": f"{base64_image_grid_thw}" # Required by Qwen/Qwen2-VL-2B-Instruct
},
"uuid": image_url # Optional
}
model = "openbmb/MiniCPM-V-2_6"
embeds = {
Expand All @@ -714,6 +737,7 @@ The following example demonstrates how to pass image embeddings to the OpenAI se
"image_embeds": f"{base64_image_embedding}" , # Required
"image_sizes": f"{base64_image_sizes}" # Required by openbmb/MiniCPM-V-2_6
},
"uuid": image_url # Optional
}
chat_completion = client.chat.completions.create(
messages=[
Expand Down
129 changes: 129 additions & 0 deletions tests/entrypoints/openai/test_vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,3 +436,132 @@ async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
)
message = chat_completion.choices[0].message
assert message.content is not None and len(message.content) >= 0


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize(
"image_urls",
[TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
indirect=True)
async def test_completions_with_image(
client: openai.AsyncOpenAI,
model_name: str,
image_urls: list[str],
):
for image_url in image_urls:
chat_completion = await client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role":
"user",
"content": [
{
"type": "text",
"text": "Describe this image.",
},
{
"type": "image_url",
"image_url": {
"url": image_url,
}
},
],
},
],
model=model_name,
)
assert chat_completion.choices[0].message.content is not None
assert isinstance(chat_completion.choices[0].message.content, str)
assert len(chat_completion.choices[0].message.content) > 0


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize(
"image_urls",
[TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
indirect=True)
async def test_completions_with_image_with_uuid(
client: openai.AsyncOpenAI,
model_name: str,
image_urls: list[str],
):
for image_url in image_urls:
chat_completion = await client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role":
"user",
"content": [
{
"type": "text",
"text": "Describe this image.",
},
{
"type": "image_url",
"image_url": {
"url": image_url,
},
"uuid": image_url
},
],
},
],
model=model_name,
)
assert chat_completion.choices[0].message.content is not None
assert isinstance(chat_completion.choices[0].message.content, str)
assert len(chat_completion.choices[0].message.content) > 0


@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize(
"image_urls",
[TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
indirect=True)
async def test_completions_with_image_with_incorrect_uuid_format(
client: openai.AsyncOpenAI,
model_name: str,
image_urls: list[str],
):
for image_url in image_urls:
chat_completion = await client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role":
"user",
"content": [
{
"type": "text",
"text": "Describe this image.",
},
{
"type": "image_url",
"image_url": {
"url": image_url,
"incorrect_uuid_key": image_url,
},
"also_incorrect_uuid_key": image_url,
},
],
},
],
model=model_name,
)
assert chat_completion.choices[0].message.content is not None
assert isinstance(chat_completion.choices[0].message.content, str)
assert len(chat_completion.choices[0].message.content) > 0
Loading