Skip to content

Commit d05d29f

Browse files
jiangyunfan1PotabkYikun
authored
Enable nightly test and add qwen3 32b test case (#3370)
### What this PR does / why we need it? This PR adds a nightly test case for qwen3_32b bf16 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? by running the case - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: jiangyunfan1 <[email protected]> Signed-off-by: wangli <[email protected]> Signed-off-by: Yikun Jiang <[email protected]> Co-authored-by: wangli <[email protected]> Co-authored-by: Yikun Jiang <[email protected]>
1 parent 0d59a3c commit d05d29f

File tree

5 files changed

+238
-4
lines changed

5 files changed

+238
-4
lines changed
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
name: 'e2e nightly test'
19+
20+
on:
21+
workflow_call:
22+
inputs:
23+
vllm:
24+
required: true
25+
type: string
26+
runner:
27+
required: true
28+
type: string
29+
image:
30+
required: false
31+
type: string
32+
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11"
33+
tests:
34+
required: true
35+
type: string
36+
37+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
38+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
39+
# It's used to activate ascend-toolkit environment variables.
40+
defaults:
41+
run:
42+
shell: bash -el {0}
43+
44+
# only cancel in-progress runs of the same workflow
45+
# and ignore the lint / 1 card / 4 cards test type
46+
concurrency:
47+
group: ${{ github.workflow }}-${{ github.ref }}
48+
cancel-in-progress: true
49+
50+
jobs:
51+
e2e-nightly:
52+
name: e2e-nightly
53+
runs-on: ${{ inputs.runner }}
54+
container:
55+
image: ${{ inputs.image }}
56+
env:
57+
VLLM_USE_MODELSCOPE: True
58+
steps:
59+
- name: Check npu and CANN info
60+
run: |
61+
npu-smi info
62+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
63+
64+
- name: Config mirrors
65+
run: |
66+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
67+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
68+
apt-get update -y
69+
apt install git -y
70+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
71+
72+
- name: Checkout vllm-project/vllm-ascend repo
73+
uses: actions/checkout@v4
74+
75+
- name: Install system dependencies
76+
run: |
77+
apt-get -y install `cat packages.txt`
78+
apt-get -y install gcc g++ cmake libnuma-dev
79+
80+
- name: Checkout vllm-project/vllm repo
81+
uses: actions/checkout@v4
82+
with:
83+
repository: vllm-project/vllm
84+
ref: ${{ inputs.vllm }}
85+
path: ./vllm-empty
86+
87+
- name: Install vllm-project/vllm from source
88+
working-directory: ./vllm-empty
89+
run: |
90+
VLLM_TARGET_DEVICE=empty pip install -e .
91+
92+
- name: Install vllm-project/vllm-ascend
93+
env:
94+
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
95+
run: |
96+
pip install -r requirements-dev.txt
97+
pip install -v -e .
98+
99+
- name: Run vllm-project/vllm-ascend test
100+
env:
101+
VLLM_WORKER_MULTIPROC_METHOD: spawn
102+
VLLM_USE_MODELSCOPE: True
103+
run: |
104+
# TODO: enable more tests
105+
pytest -sv ${{ inputs.tests }}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
name: 'ascend test / nightly'
19+
20+
on:
21+
schedule:
22+
# Run test at 24:00 Beijing time (UTC+8)
23+
- cron: "0 16 * * *"
24+
workflow_dispatch:
25+
pull_request:
26+
branches:
27+
- 'main'
28+
- '*-dev'
29+
paths:
30+
- 'tests/e2e/nightly/**'
31+
- '.github/workflows/vllm_ascend_test_nightly.yaml'
32+
33+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
34+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
35+
# It's used to activate ascend-toolkit environment variables.
36+
defaults:
37+
run:
38+
shell: bash -el {0}
39+
40+
# only cancel in-progress runs of the same workflow
41+
# and ignore the lint / 1 card / 4 cards test type
42+
concurrency:
43+
group: ascend-nightly-${{ github.ref }}
44+
cancel-in-progress: true
45+
46+
jobs:
47+
qwen3-32b:
48+
strategy:
49+
matrix:
50+
# should add A3 chip runner when available
51+
os: [linux-aarch64-a2-4]
52+
# Note (yikun): If CI resource are limited we can split job into two chain jobs
53+
# only trigger e2e test after lint passed and the change is e2e related with pull request.
54+
uses: ./.github/workflows/_e2e_nightly.yaml
55+
with:
56+
vllm: v0.11.0
57+
runner: ${{ matrix.os }}
58+
tests: tests/e2e/nightly/models/test_qwen3_32b.py

tests/e2e/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,10 @@ def _start_server(self, model: str, vllm_serve_args: list[str],
110110

111111
def __init__(self,
112112
model: str,
113-
server_host: str,
114-
server_port: int,
115113
vllm_serve_args: list[str],
116114
*,
115+
server_host: str = "0.0.0.0",
116+
server_port: int = 8080,
117117
env_dict: Optional[dict[str, str]] = None,
118118
seed: Optional[int] = 0,
119119
auto_port: bool = True,

tests/e2e/multi_node/test_multi_dp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ def test_multi_dp(config: MultiNodeConfig) -> None:
3030

3131
with RemoteOpenAIServer(
3232
model_name,
33-
config.server_host,
34-
config.server_port,
3533
server_args,
34+
server_host=config.server_host,
35+
server_port=config.server_port,
3636
env_dict=env_dict,
3737
auto_port=False,
3838
seed=1024,
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
2+
# Copyright 2023 The vLLM team.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
from typing import Any
18+
19+
import openai
20+
import pytest
21+
22+
from tests.e2e.conftest import RemoteOpenAIServer
23+
24+
MODELS = [
25+
"Qwen/Qwen3-32B",
26+
]
27+
28+
TENSOR_PARALLELS = [4]
29+
30+
prompts = [
31+
"San Francisco is a",
32+
]
33+
34+
api_keyword_args = {
35+
"max_tokens": 10,
36+
}
37+
38+
39+
@pytest.mark.asyncio
40+
@pytest.mark.parametrize("model", MODELS)
41+
@pytest.mark.parametrize("tp_size", TENSOR_PARALLELS)
42+
async def test_models(model: str, tp_size: int) -> None:
43+
env_dict = {
44+
"TASK_QUEUE_ENABLE": "1",
45+
"OMP_PROC_BIND": "false",
46+
"HCCL_OP_EXPANSION_MODE": "AIV",
47+
"PAGED_ATTENTION_MASK_LEN": "5500"
48+
}
49+
server_args = [
50+
"--no-enable-prefix-caching", "--tensor-parallel-size",
51+
str(tp_size), "--port", "20002", "--max-model-len", "36864",
52+
"--max-num-batched-tokens", "36864", "--block-size", "128",
53+
"--trust-remote-code", "--gpu-memory-utilization", "0.9",
54+
"--additional-config", '{"enable_weight_nz_layout":true}'
55+
]
56+
request_keyword_args: dict[str, Any] = {
57+
**api_keyword_args,
58+
}
59+
with RemoteOpenAIServer(model,
60+
server_args,
61+
server_port=20002,
62+
env_dict=env_dict,
63+
auto_port=False) as server:
64+
client = server.get_async_client()
65+
batch = await client.completions.create(
66+
model=model,
67+
prompt=prompts,
68+
**request_keyword_args,
69+
)
70+
choices: list[openai.types.CompletionChoice] = batch.choices
71+
assert choices[0].text, "empty response"

0 commit comments

Comments
 (0)