File tree Expand file tree Collapse file tree 2 files changed +3
-3
lines changed
Expand file tree Collapse file tree 2 files changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -40,7 +40,7 @@ export PYTORCH_NPU_ALLOC_CONF=max_split_size_mb:256
4040### Online Inference
4141
4242``` bash
43- vllm serve Qwen/Qwen3-Embedding-8B --task embed
43+ vllm serve Qwen/Qwen3-Embedding-8B --runner pooling
4444```
4545
4646Once your server is started, you can query the model with input prompts.
@@ -81,7 +81,7 @@ if __name__=="__main__":
8181 input_texts = queries + documents
8282
8383 model = LLM(model = " Qwen/Qwen3-Embedding-8B" ,
84- task = " embed " ,
84+ runner = " pooling " ,
8585 distributed_executor_backend = " mp" )
8686
8787 outputs = model.embed(input_texts)
Original file line number Diff line number Diff line change @@ -44,7 +44,7 @@ def main():
4444 ]
4545 input_texts = queries + documents
4646
47- model = LLM (model = "Qwen/Qwen3-Embedding-0.6B" , task = "embed " )
47+ model = LLM (model = "Qwen/Qwen3-Embedding-0.6B" , runner = "pooling " )
4848
4949 outputs = model .embed (input_texts )
5050 embeddings = torch .tensor ([o .outputs .embedding for o in outputs ])
You can’t perform that action at this time.
0 commit comments