Skip to content

Commit 7855d00

Browse files
committed
Merge branch 'main' into event-sync
Signed-off-by: jesse <[email protected]>
2 parents 5347f2d + d0cc9c1 commit 7855d00

File tree

264 files changed

+10288
-8764
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

264 files changed

+10288
-8764
lines changed

.github/workflows/_accuracy_test.yaml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,16 @@ jobs:
7373
working-directory: ./vllm-empty
7474
run: |
7575
VLLM_TARGET_DEVICE=empty pip install -e .
76+
77+
- name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
78+
if: ${{ inputs.model_name == 'Qwen3-Next-80B-A3B-Instruct' }}
79+
shell: bash -l {0}
80+
run: |
81+
wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
82+
chmod a+x /tmp/Ascend-BiSheng-toolkit_aarch64.run
83+
/tmp/Ascend-BiSheng-toolkit_aarch64.run --install
84+
. /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
85+
python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
7686
7787
- name: Resolve vllm-ascend version
7888
run: |
@@ -166,7 +176,7 @@ jobs:
166176
167177
- name: Upload Report
168178
if: ${{ inputs.upload == true }}
169-
uses: actions/upload-artifact@v4
179+
uses: actions/upload-artifact@v5
170180
with:
171181
name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
172182
path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
name: 'e2e nightly test multi_node'
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
soc_version:
7+
required: true
8+
type: string
9+
description: use a2 or a3
10+
runner:
11+
required: false
12+
type: string
13+
default: linux-aarch64-a3-0
14+
image:
15+
required: false
16+
type: string
17+
description: base image for pods
18+
default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11"
19+
config_file_path:
20+
required: true
21+
type: string
22+
description: the model config for multi_node test
23+
replicas:
24+
required: false
25+
default: "1"
26+
type: string
27+
description: replicas of the k8s cluster
28+
size:
29+
required: false
30+
default: "2"
31+
type: string
32+
description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
33+
vllm_version:
34+
required: false
35+
default: "v0.11.0"
36+
type: string
37+
description: vllm version to use
38+
vllm_ascend_remote_url:
39+
required: false
40+
default: https://github.com/vllm-project/vllm-ascend.git
41+
type: string
42+
description: used for pr level tests
43+
vllm_ascend_ref:
44+
required: false
45+
default: main
46+
type: string
47+
description: used for pr level tests
48+
secrets:
49+
KUBECONFIG_B64:
50+
required: true
51+
52+
53+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
54+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
55+
# It's used to activate ascend-toolkit environment variables.
56+
defaults:
57+
run:
58+
shell: bash -el {0}
59+
60+
# only cancel in-progress runs of the same workflow
61+
# and ignore the lint / 8 cards test type
62+
concurrency:
63+
group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.config_file_path }}
64+
cancel-in-progress: true
65+
66+
jobs:
67+
e2e:
68+
name: ${{ inputs.config_file_path }}
69+
# This is the runner with no NPU for k8s controller
70+
runs-on: ${{ inputs.runner }}
71+
container:
72+
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
73+
env:
74+
KUBECONFIG: /tmp/kubeconfig
75+
KUBECTL: /root/.cache/.kube/kubectl
76+
NAMESPACE: vllm-project
77+
LEADER_POD: vllm-0
78+
RESULT_FILE: /root/.cache/tests/ret_${{ inputs.soc_version }}
79+
steps:
80+
- name: Install system denpendencies
81+
run: |
82+
# configure apt and pip source
83+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
84+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
85+
pip install jinja2-cli
86+
87+
#apt-get update -y && apt-get install -y git curl
88+
89+
- name: Install kubectl
90+
run: |
91+
# Install kubectl
92+
arch=$(uname -m)
93+
94+
if echo "$arch" | grep -qiE "arm|aarch64"; then
95+
echo "Detected ARM architecture: $arch"
96+
KUBECTL="$KUBECTL"_arm
97+
fi
98+
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
99+
100+
# Verify kubectl installation
101+
kubectl version --client=true
102+
103+
- name: Decode kubeconfig from secrets
104+
run: |
105+
# Decode and save kubeconfig
106+
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
107+
108+
- name: Checkout code
109+
uses: actions/checkout@v4
110+
111+
- name: Prepare scripts
112+
run: |
113+
# prepare for lws entrypoint scripts
114+
install -D tests/e2e/nightly/multi_node/scripts/run.sh /root/.cache/tests/run.sh
115+
116+
- name: Clear resources
117+
run: |
118+
# pre clear the crd resources created by lws
119+
kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found
120+
- name: Launch cluster
121+
id: launcher
122+
run: |
123+
set -e
124+
125+
size="${{ inputs.size }}"
126+
replicas="${{ inputs.replicas }}"
127+
image="${{ inputs.image }}"
128+
config_file_path="${{ inputs.config_file_path }}"
129+
vllm_version="${{ inputs.vllm_version }}"
130+
vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}"
131+
vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}"
132+
result_file_path="$RESULT_FILE"
133+
fail_tag=FAIL_TAG_"${{ inputs.config_file_path }}"
134+
echo "FAIL_TAG=${fail_tag}" >> $GITHUB_ENV
135+
136+
required_params=("size" "replicas" "image" "config_file_path")
137+
for param in "${required_params[@]}"; do
138+
if [ -z "${!param}" ]; then
139+
echo "Error: Parameter '$param' is required but empty"
140+
exit 1
141+
fi
142+
done
143+
144+
if [ "${{ inputs.soc_version }}" = "a3" ]; then
145+
npu_per_node=16
146+
else
147+
npu_per_node=8
148+
fi
149+
150+
jinja2 tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 \
151+
-D size="$size" \
152+
-D replicas="$replicas" \
153+
-D image="$image" \
154+
-D config_file_path="$config_file_path" \
155+
-D vllm_version="$vllm_version" \
156+
-D vllm_ascend_remote_url="$vllm_ascend_remote_url" \
157+
-D vllm_ascend_ref="$vllm_ascend_ref" \
158+
-D result_file_path="$result_file_path" \
159+
-D npu_per_node="$npu_per_node" \
160+
-D fail_tag="$fail_tag" \
161+
--outfile lws.yaml
162+
163+
kubectl apply -f ./lws.yaml
164+
165+
- name: Waiting for pod ready
166+
run: |
167+
echo "waiting for Pod [$LEADER_POD] in namespace [$NAMESPACE] to Ready..."
168+
169+
while true; do
170+
# get pod status
171+
READY_STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}')
172+
173+
if [[ "$READY_STATUS" == "true" ]]; then
174+
echo "Pod [$LEADER_POD] is Ready!"
175+
break
176+
else
177+
echo "Pod [$LEADER_POD] not ready, waiting..."
178+
sleep 3
179+
fi
180+
done
181+
182+
- name: Stream logs
183+
run: |
184+
set -euo pipefail
185+
echo "Looking for logs containing: $FAIL_TAG"
186+
kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" | while read -r line; do
187+
echo "$line"
188+
if echo "$line" | grep -q "$FAIL_TAG"; then
189+
exit 1 # workflow step failed
190+
fi
191+
done
192+
193+
- name: Post process
194+
if: always()
195+
run: |
196+
kubectl get pods -n $NAMESPACE
197+
kubectl delete -f ./lws.yaml

.github/workflows/_e2e_nightly.yaml renamed to .github/workflows/_e2e_nightly_single_node.yaml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,17 +44,15 @@ defaults:
4444
# only cancel in-progress runs of the same workflow
4545
# and ignore the lint / 1 card / 4 cards test type
4646
concurrency:
47-
group: ${{ github.workflow }}-${{ github.ref }}
47+
group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.tests }}
4848
cancel-in-progress: true
4949

5050
jobs:
5151
e2e-nightly:
52-
name: e2e-nightly
52+
name: ${{ inputs.tests }}
5353
runs-on: ${{ inputs.runner }}
5454
container:
5555
image: ${{ inputs.image }}
56-
env:
57-
VLLM_USE_MODELSCOPE: True
5856
steps:
5957
- name: Check npu and CANN info
6058
run: |
@@ -111,5 +109,4 @@ jobs:
111109
VLLM_USE_MODELSCOPE: True
112110
VLLM_CI_RUNNER: ${{ inputs.runner }}
113111
run: |
114-
# TODO: enable more tests
115112
pytest -sv ${{ inputs.tests }}

.github/workflows/_e2e_test.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,27 +88,31 @@ jobs:
8888
# We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
8989
# the test separately.
9090
91+
pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
9192
pytest -sv tests/e2e/singlecard/test_aclgraph.py
93+
pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
9294
pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
9395
pytest -sv tests/e2e/singlecard/test_bge_model.py
9496
pytest -sv tests/e2e/singlecard/test_camem.py
9597
pytest -sv tests/e2e/singlecard/test_chunked.py
9698
pytest -sv tests/e2e/singlecard/test_embedding.py
97-
pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
99+
# pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
98100
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
99101
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
100102
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
101103
pytest -sv tests/e2e/singlecard/test_quantization.py
102104
pytest -sv tests/e2e/singlecard/test_sampler.py
103105
pytest -sv tests/e2e/singlecard/test_vlm.py
106+
pytest -sv tests/e2e/singlecard/multi-modal/test_internvl.py
104107
105108
# ------------------------------------ v1 spec decode test ------------------------------------ #
106109
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
107110
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
108111
# Fix me: OOM error
109-
#pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
112+
# pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
110113
111-
pytest -sv tests/e2e/singlecard/ops/
114+
# TODO: Move ops test to nightly test
115+
#pytest -sv tests/e2e/singlecard/ops/
112116
113117
e2e-2-cards:
114118
name: multicard
@@ -176,7 +180,7 @@ jobs:
176180
run: |
177181
pytest -sv tests/e2e/multicard/test_data_parallel.py
178182
pytest -sv tests/e2e/multicard/test_expert_parallel.py
179-
pytest -sv tests/e2e/multicard/test_external_launcher.py
183+
# pytest -sv tests/e2e/multicard/test_external_launcher.py
180184
pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
181185
pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
182186
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
name: 'resource clear'
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
runner:
7+
required: false
8+
type: string
9+
default: linux-aarch64-a3-0
10+
secrets:
11+
KUBECONFIG_B64:
12+
required: true
13+
14+
15+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
16+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
17+
# It's used to activate ascend-toolkit environment variables.
18+
defaults:
19+
run:
20+
shell: bash -el {0}
21+
22+
jobs:
23+
resource_clear:
24+
# This is a runner with no NPU for k8s controller
25+
runs-on: ${{ inputs.runner }}
26+
container:
27+
image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
28+
env:
29+
KUBECONFIG: /tmp/kubeconfig
30+
KUBECTL: /root/.cache/.kube/kubectl
31+
NAMESPACE: vllm-project
32+
LEADER_POD: vllm-0
33+
RESULT_FILE: /root/.cache/tests/ret/test_result.txt
34+
steps:
35+
- name: Install kubectl
36+
run: |
37+
# Install kubectl
38+
arch=$(uname -m)
39+
40+
if echo "$arch" | grep -qiE "arm|aarch64"; then
41+
echo "Detected ARM architecture: $arch"
42+
KUBECTL="$KUBECTL"_arm
43+
fi
44+
install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
45+
46+
# Verify kubectl installation
47+
kubectl version --client=true
48+
49+
- name: Decode kubeconfig from secrets
50+
run: |
51+
# Decode and save kubeconfig
52+
echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
53+
54+
- name: Clear LWS resources
55+
if: always()
56+
run: |
57+
kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found

.github/workflows/accuracy_test.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ jobs:
5757
model_name: Qwen3-VL-30B-A3B-Instruct
5858
- runner: a2-2
5959
model_name: DeepSeek-V2-Lite
60+
- runner: a2-4
61+
model_name: Qwen3-Next-80B-A3B-Instruct
6062
fail-fast: false
6163
# test will be triggered when tag 'accuracy-test' & 'ready-for-test'
6264
if: >-

.github/workflows/format_pr_body.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636

3737
- name: Get vLLM version
3838
run: |
39-
VLLM_COMMIT=v0.11.0
39+
VLLM_COMMIT=83f478bb19489b41e9d208b47b4bb5a95ac171ac
4040
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
4141
4242
- name: Checkout repository

0 commit comments

Comments
 (0)