add details

Roger Wang · Roger Wang · commit 61f340ba36f9 · 2025-08-30T23:55:16.000-07:00
Signed-off-by: Roger Wang &lt;hey@rogerw.io&gt;
Signed-off-by: Roger Wang &lt;hey@rogerw.me&gt;
diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py
@@ -473,9 +473,14 @@ def _validate_model_input(
 
             # NOTE: tokenizer.max_token_id is the tokenizer’s vocab size while
             # self.model_config.get_vocab_size() is the model’s vocab size.
-            # For some models, certain tokens exist in one but not the other,
-            # so we take the max of the two to decide if a token id is truly
-            # out-of-vocabulary.
+            # For Qwen3 models, the language model has extra tokens that do
+            # not exist in the tokenizer, and vice versa for multimodal
+            # placeholder tokens in some multimodal models.
+            # See https://github.com/QwenLM/Qwen3/issues/29#issuecomment-1933720399 # noqa: E501
+            # and https://github.com/vllm-project/vllm/pull/22471#discussion_r2312251421 # noqa: E501
+
+            # Here we take the max of the two to determine if a token id is
+            # truly out-of-vocabulary.
             if max_input_id > max(tokenizer.max_token_id,
                                   self.model_config.get_vocab_size() - 1):
                 raise ValueError(