Skip to content

Commit e56b001

Browse files
authored
[TEST]Add aisbench log and A2 cases (#3841)
### What this PR does / why we need it? This PR adds 2 more A2 caces which we need to test daily. It also enhances the logging for aisbench test failures to improve issues identification ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running the test - vLLM version: v0.11.0rc3 - vLLM main: vllm-project/vllm@releases/v0.11.1 --------- Signed-off-by: jiangyunfan1 <[email protected]>
1 parent d08401d commit e56b001

File tree

6 files changed

+87
-17
lines changed

6 files changed

+87
-17
lines changed

.github/workflows/vllm_ascend_test_nightly_a2.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ jobs:
7373
- name: multi-node-deepseek-dp
7474
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2.yaml
7575
size: 2
76+
- name: multi-node-deepseek-dp-torchair
77+
config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8-A2-torchair.yaml
78+
size: 2
7679
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
7780
with:
7881
soc_version: a2

tests/e2e/nightly/models/test_deepseek_r1_0528_w8a8.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,7 @@ async def test_models(model: str, mode: str) -> None:
133133
if mode in ["single", "no_chunkprefill"]:
134134
return
135135
# aisbench test
136-
run_aisbench_cases(model, port, aisbench_cases)
136+
run_aisbench_cases(model,
137+
port,
138+
aisbench_cases,
139+
server_args=server_args)
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
test_name: "test DeepSeek-R1-W8A8 torchair on A2"
2+
model: "vllm-ascend/DeepSeek-R1-0528-W8A8"
3+
num_nodes: 2
4+
npu_per_node: 8
5+
env_common:
6+
VLLM_USE_MODELSCOPE: true
7+
HCCL_BUFFSIZE: 1024
8+
SERVER_PORT: 8080
9+
OMP_PROC_BIND: false
10+
OMP_NUM_THREADS: 10
11+
12+
13+
deployment:
14+
-
15+
server_cmd: >
16+
vllm serve vllm-ascend/DeepSeek-R1-0528-W8A8
17+
--host 0.0.0.0
18+
--port $SERVER_PORT
19+
--data-parallel-size 4
20+
--data-parallel-size-local 2
21+
--data-parallel-address $LOCAL_IP
22+
--data-parallel-rpc-port 13399
23+
--no-enable-prefix-caching
24+
--max-num-seqs 16
25+
--tensor-parallel-size 4
26+
--max-model-len 36864
27+
--max-num-batched-tokens 6000
28+
--enable-expert-parallel
29+
--trust-remote-code
30+
--quantization ascend
31+
--gpu-memory-utilization 0.9
32+
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
33+
--additional-config '{"ascend_scheduler_config":{"enabled":false},"torchair_graph_config":{"enabled":true,"enable_multistream_moe":true},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
34+
35+
-
36+
server_cmd: >
37+
vllm serve vllm-ascend/DeepSeek-R1-0528-W8A8
38+
--headless
39+
--data-parallel-size 4
40+
--data-parallel-rpc-port 13399
41+
--data-parallel-size-local 2
42+
--data-parallel-start-rank 2
43+
--data-parallel-address $MASTER_IP
44+
--no-enable-prefix-caching
45+
--max-num-seqs 16
46+
--tensor-parallel-size 4
47+
--max-model-len 36864
48+
--max-num-batched-tokens 6000
49+
--enable-expert-parallel
50+
--trust-remote-code
51+
--quantization ascend
52+
--gpu-memory-utilization 0.9
53+
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
54+
--additional-config '{"ascend_scheduler_config":{"enabled":false},"torchair_graph_config":{"enabled":true,"enable_multistream_moe":true},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
55+
benchmarks:
56+
acc:
57+
case_type: accuracy
58+
dataset_path: vllm-ascend/gsm8k
59+
request_conf: vllm_api_general_chat
60+
dataset_conf: gsm8k/gsm8k_gen_0_shot_cot_chat_prompt
61+
max_out_len: 32768
62+
batch_size: 512
63+
baseline: 95
64+
threshold: 5

tests/e2e/nightly/multi_node/config/models/DeepSeep-R1-W8A8-A2.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ deployment:
3030
--quantization ascend
3131
--gpu-memory-utilization 0.9
3232
--enforce-eager
33-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' \
33+
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
3434
--additional-config '{"ascend_scheduler_config":{"enabled":false},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
3535
3636
-
@@ -52,6 +52,6 @@ deployment:
5252
--quantization ascend
5353
--gpu-memory-utilization 0.9
5454
--enforce-eager
55-
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}' \
55+
--speculative-config '{"num_speculative_tokens": 1, "method":"deepseek_mtp"}'
5656
--additional-config '{"ascend_scheduler_config":{"enabled":false},"chunked_prefill_for_mla":true,"enable_weight_nz_layout":true}'
5757
benchmarks:

tests/e2e/nightly/multi_node/test_multi_node.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,7 @@ async def test_multi_node() -> None:
117117
if config.is_master:
118118
port = proxy_port if disaggregated_prefill else server_port
119119
# aisbench test
120-
if acc_cmd:
121-
run_aisbench_cases(local_model_path, port, acc_cmd)
122-
if perf_cmd:
123-
run_aisbench_cases(local_model_path, port, perf_cmd)
120+
aisbench_cases = [acc_cmd, perf_cmd]
121+
run_aisbench_cases(local_model_path, port, aisbench_cases)
124122
else:
125123
remote_server.hang_until_terminated()

tools/aisbench.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#
1717
import hashlib
1818
import json
19+
import logging
1920
import os
2021
import re
2122
import subprocess
@@ -188,8 +189,8 @@ def _wait_for_exp_folder(self):
188189
line).group(1)
189190
return
190191
if "ERROR" in line:
191-
raise RuntimeError(
192-
"Some errors happen to Aisbench task.") from None
192+
error_msg = f"Some errors happened to Aisbench runtime, the first error is {line}"
193+
raise RuntimeError(error_msg) from None
193194

194195
def _wait_for_task(self):
195196
self._wait_for_exp_folder()
@@ -201,8 +202,8 @@ def _wait_for_task(self):
201202
self.result_line = line
202203
return
203204
if "ERROR" in line:
204-
raise RuntimeError(
205-
"Some errors happen to Aisbench task.") from None
205+
error_msg = f"Some errors happened to Aisbench runtime, the first error is {line}"
206+
raise RuntimeError(error_msg) from None
206207

207208
def _get_result_performance(self):
208209
result_dir = re.search(r'Performance Result files locate in (.*)',
@@ -237,12 +238,12 @@ def _accuracy_verify(self):
237238
assert self.baseline - self.threshold <= acc_value <= self.baseline + self.threshold, f"Accuracy verification failed. The accuracy of {self.dataset_path} is {acc_value}, which is not within {self.threshold} relative to baseline {self.baseline}."
238239

239240

240-
def run_aisbench_cases(model, port, aisbench_cases):
241-
if isinstance(aisbench_cases, dict):
242-
aisbench_cases = [aisbench_cases]
241+
def run_aisbench_cases(model, port, aisbench_cases, server_args=""):
243242
aisbench_results = []
244243
aisbench_errors = []
245244
for aisbench_case in aisbench_cases:
245+
if not aisbench_case:
246+
continue
246247
try:
247248
with AisbenchRunner(model, port, aisbench_case) as aisbench:
248249
aisbench_results.append(aisbench.result)
@@ -251,9 +252,10 @@ def run_aisbench_cases(model, port, aisbench_cases):
251252
aisbench_errors.append([aisbench_case, e])
252253
print(e)
253254
for failed_case, error_info in aisbench_errors:
254-
print(
255-
f"The following aisbench case failed: {failed_case}, reason is {error_info}."
256-
)
255+
error_msg = f"The following aisbench case failed: {failed_case}, reason is {error_info}"
256+
if server_args:
257+
error_msg += f"\nserver_args are {server_args}"
258+
logging.error(error_msg)
257259
assert not aisbench_errors, "some aisbench cases failed, info were shown above."
258260
return aisbench_results
259261

0 commit comments

Comments
 (0)