Skip to content

Commit 0001f22

Browse files
[Test] Add accuracy test report
Signed-off-by: hfadzxy <[email protected]>
1 parent fa4a5d9 commit 0001f22

File tree

6 files changed

+590
-2
lines changed

6 files changed

+590
-2
lines changed

.github/actionlint.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ self-hosted-runner:
22
# Labels of self-hosted runner in array of strings.
33
labels:
44
- linux-arm64-npu-1
5+
- linux-arm64-npu-2
56
- linux-arm64-npu-4
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
name: Accuracy Report
19+
on:
20+
workflow_run:
21+
workflows: ["accuarcy test"]
22+
types: [completed]
23+
workflow_dispatch:
24+
inputs:
25+
branch:
26+
description: 'choose a dev branch to pr'
27+
required: true
28+
vllm-ascend-version:
29+
description: 'what vllm-ascend version to accuracy test?'
30+
required: true
31+
type: string
32+
jobs:
33+
download:
34+
runs-on: ubuntu-latest
35+
steps:
36+
- name: Checkout repository
37+
uses: actions/checkout@v4
38+
with:
39+
ref: ${{ github.event.inputs.branch }}
40+
41+
- name: Debug List Artifacts
42+
run: gh api /repos/${{ github.repository }}/actions/artifacts
43+
env:
44+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
45+
46+
- name: Query artifact run id for Qwen2.5-VL-7B-Instruct V0 latest artifact
47+
id: get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0
48+
run: |
49+
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
50+
RUN_ID=$(echo "$ARTIFACT_JSON" | \
51+
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-VL-7B-Instruct-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
52+
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
53+
env:
54+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
55+
56+
- name: Query artifact run id for Qwen2.5-7B-Instruct V0 latest artifact
57+
id: get_Qwen2_5_7B_Instruct_latest_run_id_V0
58+
run: |
59+
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
60+
RUN_ID=$(echo "$ARTIFACT_JSON" | \
61+
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-7B-Instruct-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
62+
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
63+
env:
64+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
65+
66+
- name: Query artifact run id for Llama-3.1-8B-Instruct V0 latest artifact
67+
id: get_Llama_3_1_8B_Instruct_latest_run_id_V0
68+
run: |
69+
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
70+
RUN_ID=$(echo "$ARTIFACT_JSON" | \
71+
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Llama-3.1-8B-Instruct-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
72+
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
73+
env:
74+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
75+
76+
- name: Query artifact run id for Qwen3-8B V0 latest artifact
77+
id: get_Qwen3_8B_latest_run_id_V0
78+
run: |
79+
ARTIFACT_JSON=$(gh api "repos/${{ github.repository }}/actions/artifacts")
80+
RUN_ID=$(echo "$ARTIFACT_JSON" | \
81+
jq -r '[.artifacts[] | select(.name=="${{ github.event.inputs.vllm-ascend-version }}-Qwen3-8B-V0-report")] | sort_by(.created_at) | last | .workflow_run.id')
82+
echo "runid=$RUN_ID" >> "$GITHUB_OUTPUT"
83+
env:
84+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
85+
86+
- name: Download Qwen/Qwen2.5-VL-7B-Instruct V0 Artifact
87+
uses: actions/download-artifact@v4
88+
with:
89+
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-VL-7B-Instruct-V0-report
90+
path: ./docs/source/developer_guide/evaluation/accuracy_report
91+
github-token: ${{ secrets.GITHUB_TOKEN }}
92+
repository: vllm-project/vllm-ascend
93+
run-id: ${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
94+
95+
- name: Download Qwen/Qwen2.5-7B-Instruct Artifact
96+
uses: actions/download-artifact@v4
97+
with:
98+
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen2.5-7B-Instruct-V0-report
99+
path: ./docs/source/developer_guide/evaluation/accuracy_report
100+
github-token: ${{ secrets.GITHUB_TOKEN }}
101+
repository: vllm-project/vllm-ascend
102+
run-id: ${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
103+
104+
- name: Download meta-llama/Llama-3.1-8B-Instruct Artifact
105+
uses: actions/download-artifact@v4
106+
with:
107+
name: ${{ github.event.inputs.vllm-ascend-version }}-Llama-3.1-8B-Instruct-V0-report
108+
path: ./docs/source/developer_guide/evaluation/accuracy_report
109+
github-token: ${{ secrets.GITHUB_TOKEN }}
110+
repository: vllm-project/vllm-ascend
111+
run-id: ${{ steps.get_Llama_3_1_8B_Instruct_latest_run_id_V0.outputs.runid }}
112+
113+
- name: Download Qwen/Qwen3-8B Artifact
114+
uses: actions/download-artifact@v4
115+
with:
116+
name: ${{ github.event.inputs.vllm-ascend-version }}-Qwen3-8B-V0-report
117+
path: ./docs/source/developer_guide/evaluation/accuracy_report
118+
github-token: ${{ secrets.GITHUB_TOKEN }}
119+
repository: vllm-project/vllm-ascend
120+
run-id: ${{ steps.get_Qwen3_8B_latest_run_id_V0.outputs.runid }}
121+
122+
- name: Display Files
123+
working-directory: ./docs/source/developer_guide/evaluation/accuracy_report
124+
run: |
125+
cat ./Qwen2.5-VL-7B-Instruct.md
126+
cat ./Llama-3.1-8B-Instruct.md
127+
cat ./Qwen2.5-7B-Instruct.md
128+
cat ./Qwen3-8B.md
129+
130+
- name: Create Pull Request for markdown update
131+
uses: peter-evans/create-pull-request@v7
132+
with:
133+
token: ${{ secrets.PR_TOKEN }}
134+
base: ${{ github.ref_name }}
135+
branch: auto-pr/accuracy-test
136+
commit-message: "Update accuracy report for ${{ github.event.inputs.branch }}"
137+
add-paths: ./docs/source/developer_guide/evaluation/accuracy_report/*.md
138+
title: "[Doc]Update accuracy report for ${{ github.event.inputs.branch }}"
139+
body: |
140+
The accuracy results running on Ascend NPU have changed, I'm updating the report.
141+
Please review the changes.
142+
143+
- [Workflow run][1]
144+
- [Qwen2.5-7B-Instruct accuracy report][2]
145+
- [Llama-3.1-8B-Instruct accuracy report][3]
146+
- [Qwen2.5-VL-7B-Instruct accuracy report][4]
147+
- [Qwen3-8B accuracy report][5]
148+
149+
[1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
150+
[2]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_7B_Instruct_latest_run_id_V0.outputs.runid }}
151+
[3]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Llama_3_1_8B_Instruct_latest_run_id_V0.outputs.runid }}
152+
[4]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen2_5_VL_7B_Instruct_latest_run_id_V0.outputs.runid }}
153+
[5]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ steps.get_Qwen3_8B_latest_run_id_V0.outputs.runid }}
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
name: Accuracy Tests
19+
20+
on:
21+
workflow_dispatch:
22+
inputs:
23+
vllm-version:
24+
description: 'what vllm version to accuracy test?'
25+
required: true
26+
type: string
27+
vllm-ascend-version:
28+
description: 'what vllm-ascend version to accuracy test?'
29+
required: true
30+
type: string
31+
models:
32+
description: 'choose model(all/Qwen2.5-7B-Instruct/Llama-3.1-8B-Instruct/Qwen2.5-VL-7B-Instruct/Qwen3-8B)'
33+
required: true
34+
type: choice
35+
options:
36+
- all
37+
- Qwen/Qwen2.5-7B-Instruct
38+
- meta-llama/Llama-3.1-8B-Instruct
39+
- Qwen/Qwen2.5-VL-7B-Instruct
40+
- Qwen/Qwen3-8B
41+
default: 'all'
42+
43+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
44+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
45+
# It's used to activate ascend-toolkit environment variables.
46+
defaults:
47+
run:
48+
shell: bash -el {0}
49+
50+
jobs:
51+
model_tests:
52+
name: Model Test - ${{ matrix.model_name }}
53+
runs-on: 'linux-arm64-npu-2'
54+
strategy:
55+
matrix:
56+
include: ${{ fromJSON(
57+
(github.event.inputs.models == 'all' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"},{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"},{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}, {"model_name":"Qwen/Qwen3-8B","output_file":"Qwen3-8B"}]') ||
58+
(github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-7B-Instruct","output_file":"Qwen2.5-7B-Instruct"}]') ||
59+
(github.event.inputs.models == 'meta-llama/Llama-3.1-8B-Instruct' && '[{"model_name":"meta-llama/Llama-3.1-8B-Instruct","output_file":"Llama-3.1-8B-Instruct"}]') ||
60+
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' && '[{"model_name":"Qwen/Qwen2.5-VL-7B-Instruct","output_file":"Qwen2.5-VL-7B-Instruct"}]') ||
61+
(github.event.inputs.models == 'Qwen/Qwen3-8B' && '[{"model_name":"Qwen/Qwen3-8B","output_file":"Qwen3-8B"}]')
62+
) }}
63+
fail-fast: false
64+
65+
container:
66+
image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
67+
env:
68+
HF_ENDPOINT: https://hf-mirror.com
69+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
70+
DATASET_SOURCE: ModelScope
71+
72+
steps:
73+
- name: Checkout repository
74+
uses: actions/checkout@v4
75+
76+
- name: Check npu and CANN info
77+
run: |
78+
npu-smi info
79+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
80+
81+
- name: Config mirrors
82+
run: |
83+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
84+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
85+
apt-get update -y
86+
apt install git -y
87+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
88+
89+
- name: Install system dependencies
90+
run: |
91+
apt-get -y install `cat packages.txt`
92+
apt-get -y install gcc g++ cmake libnuma-dev
93+
94+
95+
- name: Install system dependencies
96+
run: |
97+
apt-get -y install `cat packages.txt`
98+
apt-get -y install gcc g++ cmake libnuma-dev
99+
100+
- name: Checkout vllm-project/vllm repo
101+
uses: actions/checkout@v4
102+
with:
103+
repository: vllm-project/vllm
104+
path: ./vllm-empty
105+
ref: ${{ github.event.inputs.vllm-version }}
106+
107+
- name: Install vllm-project/vllm from source
108+
working-directory: ./vllm-empty
109+
run: VLLM_TARGET_DEVICE=empty pip install -e .
110+
111+
112+
- name: Checkout vllm-project/vllm-ascend repo
113+
uses: actions/checkout@v4
114+
with:
115+
repository: vllm-project/vllm-ascend
116+
path: ./vllm-ascend
117+
ref: ${{ github.event.inputs.vllm-ascend-version }}
118+
fetch-depth: 0
119+
120+
- name: Install pta
121+
run: |
122+
if [ ! -d /root/.cache/pta ]; then
123+
mkdir -p /root/.cache/pta
124+
fi
125+
if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then
126+
cd /root/.cache/pta
127+
rm -rf pytorch_v2.5.1_py310*
128+
wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz
129+
tar -zxvf pytorch_v2.5.1_py310.tar.gz
130+
fi
131+
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
132+
133+
- name: Install vllm-project/vllm-ascend
134+
working-directory: ./vllm-ascend
135+
run: |
136+
pip install -r requirements-dev.txt
137+
pip install -e .
138+
139+
- name: Checkout EleutherAI/lm-evaluation-harness repo
140+
uses: actions/checkout@v4
141+
with:
142+
repository: EleutherAI/lm-evaluation-harness
143+
path: ./lm-eval
144+
fetch-depth: 0
145+
146+
- name: Install EleutherAI/lm-evaluation-harness
147+
working-directory: ./lm-eval
148+
run: |
149+
pip install -e .
150+
pip install ray datasets==2.16.0 transformers==4.50.3 huggingface-hub==0.29.3
151+
152+
- name: Collect version info
153+
run: |
154+
for dir in /usr/local/Ascend/ascend-toolkit/*; do
155+
dname=$(basename "$dir")
156+
if [ "$dname" != "latest" ]; then
157+
TOOLKIT_DIR="$dname"
158+
break
159+
fi
160+
done
161+
INFO_FILE="/usr/local/Ascend/ascend-toolkit/${TOOLKIT_DIR}/$(uname -i)-linux/ascend_toolkit_install.info"
162+
CANN_VERSION=$(grep "version=" "$INFO_FILE" \
163+
| head -n1 \
164+
| cut -d'=' -f2 \
165+
| tr -d '"')
166+
{
167+
echo "CANN_VERSION=$CANN_VERSION"
168+
pip show torch | grep "Version:" | awk '{print "TORCH_VERSION="$2}'
169+
pip show torch_npu | grep "Version:" | awk '{print "TORCH_NPU_VERSION="$2}'
170+
pip show vllm | grep "Version:" | awk '{print "VLLM_VERSION="$2}' | sed 's/+.*//'
171+
} >> "$GITHUB_ENV"
172+
173+
- name: Print versions
174+
run: |
175+
echo "CANN: ${{ env.CANN_VERSION }}"
176+
echo "Torch NPU: ${{ env.TORCH_NPU_VERSION }}"
177+
echo "Torch: ${{ env.TORCH_VERSION }}"
178+
echo "vLLM: ${{ env.VLLM_VERSION }}"
179+
180+
- name: Run Accuracy Test for V0
181+
working-directory: ./benchmarks
182+
env:
183+
VLLM_USE_V1: 0
184+
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
185+
run: |
186+
mkdir -p ./accuracy/V0
187+
python ./scripts/run_accuracy.py \
188+
--model "${{ matrix.model_name }}" \
189+
--output "./accuracy/V0/${{ matrix.output_file }}.md" \
190+
--vllm_ascend_version "${{ github.event.inputs.vllm-ascend-version }}" \
191+
--cann_version "${{ env.CANN_VERSION }}" \
192+
--torch_npu_version "${{ env.TORCH_NPU_VERSION }}" \
193+
--torch_version "${{ env.TORCH_VERSION }}" \
194+
--vllm_version "${{ env.VLLM_VERSION }}"
195+
196+
- name: Upload Report for V0
197+
uses: actions/upload-artifact@v4
198+
with:
199+
name: "${{ github.event.inputs.vllm-ascend-version }}-${{ matrix.output_file }}-V0-report"
200+
path: ./benchmarks/accuracy/V0/${{ matrix.output_file }}.md
201+
if-no-files-found: warn
202+
retention-days: 90
203+
overwrite: true

0 commit comments

Comments
 (0)