Skip to content

Commit 5b05aef

Browse files
authored
Merge branch 'main' into triton-cyx-new
Signed-off-by: yuxingcyx <[email protected]>
2 parents a60e4c8 + eac72f5 commit 5b05aef

File tree

254 files changed

+14581
-21099
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

254 files changed

+14581
-21099
lines changed

.github/Dockerfile.buildwheel

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@
1717
ARG PY_VERSION=3.11
1818
FROM quay.io/ascend/manylinux:8.3.rc2-910b-manylinux_2_28-py${PY_VERSION}
1919

20-
ARG COMPILE_CUSTOM_KERNELS=1
2120
ARG SOC_VERSION="ascend910b1"
2221

2322
# Define environments
2423
ENV DEBIAN_FRONTEND=noninteractive
25-
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
2624
ENV SOC_VERSION=$SOC_VERSION
2725
RUN yum update -y && \
2826
yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \

.github/Dockerfile.nightly.a2

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git"
2323

2424
# Define environments
2525
ENV DEBIAN_FRONTEND=noninteractive
26-
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
2726

2827
WORKDIR /workspace
2928

.github/Dockerfile.nightly.a3

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git"
2323

2424
# Define environments
2525
ENV DEBIAN_FRONTEND=noninteractive
26-
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
2726

2827
WORKDIR /workspace
2928

.github/workflows/_e2e_test.yaml

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ jobs:
2525
env:
2626
VLLM_LOGGING_LEVEL: ERROR
2727
VLLM_USE_MODELSCOPE: True
28+
TRANSFORMERS_OFFLINE: 1
2829
steps:
2930
- name: Check npu and CANN info
3031
run: |
@@ -74,9 +75,10 @@ jobs:
7475
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
7576
if: ${{ inputs.type == 'light' }}
7677
run: |
77-
# pytest -sv tests/e2e/singlecard/test_aclgraph.py
78+
# pytest -sv tests/e2e/singlecard/test_aclgraph_accuracy.py
7879
# pytest -sv tests/e2e/singlecard/test_quantization.py
7980
pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
81+
pytest -sv tests/e2e/singlecard/pooling/test_classification.py::test_classify_correctness
8082
8183
- name: Run e2e test
8284
env:
@@ -89,25 +91,21 @@ jobs:
8991
# the test separately.
9092
9193
pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
92-
pytest -sv tests/e2e/singlecard/test_aclgraph.py
94+
pytest -sv tests/e2e/singlecard/test_aclgraph_accuracy.py
9395
pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
94-
pytest -sv tests/e2e/singlecard/test_bge_model.py
9596
pytest -sv tests/e2e/singlecard/test_camem.py
96-
pytest -sv tests/e2e/singlecard/test_embedding.py
97-
# pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
9897
pytest -sv tests/e2e/singlecard/test_guided_decoding.py
9998
# torch 2.8 doesn't work with lora, fix me
10099
#pytest -sv tests/e2e/singlecard/test_ilama_lora.py
101100
pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
102101
pytest -sv tests/e2e/singlecard/test_quantization.py
103102
pytest -sv tests/e2e/singlecard/test_sampler.py
104103
pytest -sv tests/e2e/singlecard/test_vlm.py
105-
pytest -sv tests/e2e/singlecard/multi-modal/test_internvl.py
106104
pytest -sv tests/e2e/singlecard/test_xlite.py
105+
pytest -sv tests/e2e/singlecard/pooling/
107106
108107
# ------------------------------------ v1 spec decode test ------------------------------------ #
109108
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
110-
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
111109
pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
112110
113111
e2e-2-cards:
@@ -119,6 +117,7 @@ jobs:
119117
VLLM_LOGGING_LEVEL: ERROR
120118
VLLM_USE_MODELSCOPE: True
121119
HCCL_BUFFSIZE: 1024
120+
TRANSFORMERS_OFFLINE: 1
122121
steps:
123122
- name: Check npu and CANN info
124123
run: |
@@ -170,10 +169,6 @@ jobs:
170169
if: ${{ inputs.type == 'light' }}
171170
run: |
172171
pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
173-
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_qwen3_moe_with_torchair
174-
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_deepseekv2lite_with_torchair
175-
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_deepseekv2lite_with_torchair_v1scheduler
176-
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_deepseekv2lite_with_nz
177172
178173
- name: Run vllm-project/vllm-ascend test (full)
179174
env:
@@ -183,7 +178,6 @@ jobs:
183178
run: |
184179
pytest -sv tests/e2e/multicard/test_quantization.py
185180
pytest -sv tests/e2e/multicard/test_aclgraph_capture_replay.py
186-
pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
187181
pytest -sv tests/e2e/multicard/test_full_graph_mode.py
188182
pytest -sv tests/e2e/multicard/test_data_parallel.py
189183
pytest -sv tests/e2e/multicard/test_expert_parallel.py
@@ -220,6 +214,7 @@ jobs:
220214
env:
221215
VLLM_LOGGING_LEVEL: ERROR
222216
VLLM_USE_MODELSCOPE: True
217+
TRANSFORMERS_OFFLINE: 1
223218
steps:
224219
- name: Check npu and CANN info
225220
run: |
@@ -272,6 +267,7 @@ jobs:
272267
run: |
273268
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
274269
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
270+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Kimi_K2_Thinking_W4A16
275271
# pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
276272
# pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_W8A8_WITH_EP
277273
pytest -sv tests/e2e/multicard/test_data_parallel_tp2.py

.github/workflows/vllm_ascend_test_nightly_a2.yaml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@ jobs:
8282
- os: linux-aarch64-a2-1
8383
model_list:
8484
- Qwen3-8B
85-
- Qwen2.5-VL-7B-Instruct
8685
- Qwen2-Audio-7B-Instruct
8786
- Qwen3-8B-W8A8
8887
- Qwen3-VL-8B-Instruct
@@ -91,7 +90,6 @@ jobs:
9190
- os: linux-aarch64-a2-1
9291
model_list:
9392
- ERNIE-4.5-21B-A3B-PT
94-
- gemma-2-9b-it
9593
- gemma-3-4b-it
9694
- internlm-7b
9795
- InternVL3_5-8B-hf
@@ -101,7 +99,6 @@ jobs:
10199
model_list:
102100
- Qwen3-30B-A3B
103101
- Qwen3-VL-30B-A3B-Instruct
104-
- DeepSeek-V2-Lite
105102
- Qwen3-30B-A3B-W8A8
106103
- os: linux-aarch64-a2-4
107104
model_list:
@@ -127,9 +124,6 @@ jobs:
127124
- name: multi-node-deepseek-dp
128125
config_file_path: DeepSeek-R1-W8A8-A2.yaml
129126
size: 2
130-
- name: multi-node-deepseek-dp-torchair
131-
config_file_path: DeepSeek-R1-W8A8-A2-torchair.yaml
132-
size: 2
133127
uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
134128
with:
135129
soc_version: a2

.github/workflows/vllm_ascend_test_nightly_a3.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
config_file_path: DeepSeek-V3.yaml
5454
size: 2
5555
- name: multi-node-qwen3-dp
56-
config_file_path: Qwen3-235B-A3B.yaml
56+
config_file_path: Qwen3-235B-A22B.yaml
5757
size: 2
5858
- name: multi-node-dpsk-4node-pd
5959
config_file_path: DeepSeek-R1-W8A8.yaml

.github/workflows/vllm_ascend_test_pr_full.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ jobs:
4444
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
4545
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
4646
steps:
47+
- name: Setup git proxy
48+
run: |
49+
git config --global --add safe.directory "$GITHUB_WORKSPACE"
50+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
4751
# NOTE: Do not update the version of checkout, there have some issue on self_hosted runner with the higher version
4852
- uses: actions/checkout@v4
4953
- uses: dorny/paths-filter@v3

.github/workflows/vllm_ascend_test_pr_light.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ jobs:
4949
e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
5050
ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
5151
steps:
52+
- name: Setup git proxy
53+
run: |
54+
git config --global --add safe.directory "$GITHUB_WORKSPACE"
55+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
5256
# NOTE: Do not update the version of checkout, there have some issue on self_hosted runner with the higher version
5357
- uses: actions/checkout@v4
5458
- uses: dorny/paths-filter@v3
@@ -130,12 +134,9 @@ jobs:
130134
run: |
131135
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/arm64-linux/devlib
132136
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
133-
--ignore tests/ut/torchair/models/test_torchair_deepseek_mtp.py \
134-
--ignore tests/ut/torchair/models/test_torchair_deepseek_v2.py \
135137
--ignore tests/ut/model_loader/netloader/test_netloader_elastic.py \
136138
--ignore tests/ut/kv_connector/test_remote_prefill_lifecycle.py \
137139
--ignore tests/ut/kv_connector/test_remote_decode_lifecycle.py \
138-
--ignore tests/ut/kv_connector/test_llmdatadist_connector.py \
139140
--ignore tests/ut/core/test_scheduler_dynamic_batch.py
140141
141142
- name: Upload coverage to Codecov

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ repos:
1212
- id: codespell
1313
args: [
1414
--toml, pyproject.toml,
15-
'--skip', 'tests/e2e/multicard/test_torchair_graph_mode.py,csrc/**,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml',
15+
'--skip', 'csrc/**,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml',
1616
'-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,ArchType,AND,ND'
1717
]
1818
additional_dependencies:

Dockerfile

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@
1818
FROM quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11
1919

2020
ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
21-
ARG COMPILE_CUSTOM_KERNELS=1
2221
ARG MOONCAKE_TAG="v0.3.7.post2"
2322
ARG SOC_VERSION="ascend910b1"
2423

2524
# Define environments
2625
ENV DEBIAN_FRONTEND=noninteractive
27-
ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
2826
ENV SOC_VERSION=$SOC_VERSION
2927

3028
WORKDIR /workspace

0 commit comments

Comments
 (0)