diff --git a/.github/workflows/_accuracy_test.yaml b/.github/workflows/_accuracy_test.yaml index 62d2970860e..eb7322ee751 100644 --- a/.github/workflows/_accuracy_test.yaml +++ b/.github/workflows/_accuracy_test.yaml @@ -30,7 +30,7 @@ jobs: runs-on: ${{ inputs.runner }} name: ${{ inputs.model_name }} accuracy container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 env: VLLM_USE_MODELSCOPE: True # 1. If version specified (work_dispatch), do specified branch accuracy test diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml index b0c101353f0..72e76db8a5e 100644 --- a/.github/workflows/accuracy_test.yaml +++ b/.github/workflows/accuracy_test.yaml @@ -68,5 +68,5 @@ jobs: with: vllm: v0.11.0 runner: linux-aarch64-${{ matrix.runner }} - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 model_name: ${{ matrix.model_name }} diff --git a/.github/workflows/multi_node_test.yaml b/.github/workflows/multi_node_test.yaml index fcf34510507..1d9a33cdbc2 100644 --- a/.github/workflows/multi_node_test.yaml +++ b/.github/workflows/multi_node_test.yaml @@ -23,7 +23,7 @@ jobs: # This is a runner with no NPU for k8s controller runs-on: linux-aarch64-a3-0 container: - image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11 env: KUBECONFIG: /tmp/kubeconfig KUBECTL: /root/.cache/.kube/kubectl diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 76c0c37f120..0c6eab76ba9 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -56,7 +56,7 @@ jobs: vllm_use_v1: 1 max-parallel: 1 container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 volumes: - /usr/local/dcmi:/usr/local/dcmi - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi diff --git a/.github/workflows/vllm_ascend_dist.yaml b/.github/workflows/vllm_ascend_dist.yaml index 216e62d8d4c..c5dc2d9c86f 100644 --- a/.github/workflows/vllm_ascend_dist.yaml +++ b/.github/workflows/vllm_ascend_dist.yaml @@ -47,7 +47,7 @@ jobs: name: vLLM Ascend test runs-on: ${{ matrix.os }} container: - image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11 env: DEBIAN_FRONTEND: noninteractive steps: @@ -97,4 +97,4 @@ jobs: VLLM_USE_MODELSCOPE: True run: | # TODO: enable more tests - pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe \ No newline at end of file + pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 079c0ec64f4..013fadf6ff1 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -145,5 +145,5 @@ jobs: with: vllm: ${{ matrix.vllm_version }} runner: linux-aarch64-a2 - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 type: light diff --git a/.github/workflows/vllm_ascend_test_310p.yaml b/.github/workflows/vllm_ascend_test_310p.yaml index 099f3e07fe0..348dc0224dd 100644 --- a/.github/workflows/vllm_ascend_test_310p.yaml +++ b/.github/workflows/vllm_ascend_test_310p.yaml @@ -58,7 +58,7 @@ jobs: runs-on: ${{ matrix.os }} container: # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-310p-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-310p-ubuntu22.04-py3.11 env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index 18b541a7f32..ec9b195ce35 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -76,5 +76,5 @@ jobs: with: vllm: ${{ matrix.vllm_version }} runner: linux-aarch64-a2 - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 type: full diff --git a/.github/workflows/vllm_ascend_test_full_vllm_main.yaml b/.github/workflows/vllm_ascend_test_full_vllm_main.yaml index dbd632912af..0c93b7742c0 100644 --- a/.github/workflows/vllm_ascend_test_full_vllm_main.yaml +++ b/.github/workflows/vllm_ascend_test_full_vllm_main.yaml @@ -41,5 +41,5 @@ jobs: with: vllm: main runner: linux-aarch64-a2 - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 type: full diff --git a/.github/workflows/vllm_ascend_test_models.yaml b/.github/workflows/vllm_ascend_test_models.yaml index 855eb21fccf..83c9fb1a740 100644 --- a/.github/workflows/vllm_ascend_test_models.yaml +++ b/.github/workflows/vllm_ascend_test_models.yaml @@ -79,7 +79,7 @@ jobs: with: vllm: v0.11.0 runner: linux-aarch64-${{ matrix.runner }} - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 model_name: ${{ matrix.model_name }} upload: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }} diff --git a/.github/workflows/vllm_ascend_test_pd.yaml b/.github/workflows/vllm_ascend_test_pd.yaml index 778d83b6375..b031e088a76 100644 --- a/.github/workflows/vllm_ascend_test_pd.yaml +++ b/.github/workflows/vllm_ascend_test_pd.yaml @@ -49,7 +49,7 @@ jobs: runs-on: linux-arm64-npu-static-8 container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11 + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11 volumes: - /usr/local/dcmi:/usr/local/dcmi - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi @@ -109,4 +109,4 @@ jobs: - name: Run vllm-project/vllm-ascend PD Disaggregation edge test run: | git config --global --add safe.directory/__w/vllm-ascend/vllm-ascend - bash tests/e2e/pd_disaggreate/run_edge_case_test.sh \ No newline at end of file + bash tests/e2e/pd_disaggreate/run_edge_case_test.sh diff --git a/Dockerfile b/Dockerfile index c7d43c6c559..64fb4b1c90e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -FROM quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11 +FROM quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 diff --git a/Dockerfile.310p b/Dockerfile.310p index f9948912741..38508b774f0 100644 --- a/Dockerfile.310p +++ b/Dockerfile.310p @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -FROM quay.io/ascend/cann:8.3.rc1-310p-ubuntu22.04-py3.11 +FROM quay.io/ascend/cann:8.3.rc2-310p-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler index 5a7b950480f..159fe006d3f 100644 --- a/Dockerfile.310p.openEuler +++ b/Dockerfile.310p.openEuler @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -FROM quay.io/ascend/cann:8.3.rc1-310p-openeuler24.03-py3.11 +FROM quay.io/ascend/cann:8.3.rc2-310p-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 diff --git a/Dockerfile.a3 b/Dockerfile.a3 index efebed4e3a0..29a20b6da14 100644 --- a/Dockerfile.a3 +++ b/Dockerfile.a3 @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -FROM quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11 +FROM quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 @@ -57,4 +57,4 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \ python3 -m pip cache purge -CMD ["/bin/bash"] \ No newline at end of file +CMD ["/bin/bash"] diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler index 835df2e6135..7101285b439 100644 --- a/Dockerfile.a3.openEuler +++ b/Dockerfile.a3.openEuler @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -FROM quay.io/ascend/cann:8.3.rc1-a3-openeuler24.03-py3.11 +FROM quay.io/ascend/cann:8.3.rc2-a3-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 @@ -55,4 +55,4 @@ RUN export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi RUN python3 -m pip install modelscope 'ray>=2.47.1' 'protobuf>3.20.0' && \ python3 -m pip cache purge -CMD ["/bin/bash"] \ No newline at end of file +CMD ["/bin/bash"] diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index 77abf09b675..c59c61ea950 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -15,7 +15,7 @@ # This file is a part of the vllm-ascend project. # -FROM quay.io/ascend/cann:8.3.rc1-910b-openeuler24.03-py3.11 +FROM quay.io/ascend/cann:8.3.rc2-910b-openeuler24.03-py3.11 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple" ARG COMPILE_CUSTOM_KERNELS=1 diff --git a/docs/source/conf.py b/docs/source/conf.py index 27c57a9e2e4..4cd99f1d922 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -75,7 +75,7 @@ 'pip_vllm_ascend_version': "0.11.0rc0", 'pip_vllm_version': "0.11.0", # CANN image tag - 'cann_image_tag': "8.3.rc1-910b-ubuntu22.04-py3.11", + 'cann_image_tag': "8.3.rc2-910b-ubuntu22.04-py3.11", # vllm version in ci 'ci_vllm_version': 'v0.11.0rc3', } diff --git a/docs/source/installation.md b/docs/source/installation.md index 20ea07a3238..86da0552be8 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -79,19 +79,19 @@ source vllm-ascend-env/bin/activate pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple attrs 'numpy<2.0.0' decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions # Download and install the CANN package. -wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run -chmod +x ./Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run -./Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run --full -# https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.3.rc1_linux-aarch64.run +wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run +chmod +x ./Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run +./Ascend-cann-toolkit_8.3.RC2_linux-"$(uname -i)".run --full +# https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.3.rc2_linux-aarch64.run source /usr/local/Ascend/ascend-toolkit/set_env.sh -wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run -chmod +x ./Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run -./Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run --install +wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run +chmod +x ./Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run +./Ascend-cann-kernels-910b_8.3.RC2_linux-"$(uname -i)".run --install -wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run -chmod +x ./Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run -./Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run --install +wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC2/Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run +chmod +x ./Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run +./Ascend-cann-nnal_8.3.RC2_linux-"$(uname -i)".run --install source /usr/local/Ascend/nnal/atb/set_env.sh ``` diff --git a/docs/source/tutorials/multi_npu_qwen3_next.md b/docs/source/tutorials/multi_npu_qwen3_next.md index c9e8b4080bc..c9103604cdb 100644 --- a/docs/source/tutorials/multi_npu_qwen3_next.md +++ b/docs/source/tutorials/multi_npu_qwen3_next.md @@ -51,7 +51,7 @@ Install the Ascend BiSheng toolkit: wget https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run chmod a+x Ascend-BiSheng-toolkit_aarch64.run ./Ascend-BiSheng-toolkit_aarch64.run --install -source /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh +source /usr/local/Ascend/8.3.RC2/bisheng_toolkit/set_env.sh ``` Install Triton Ascend: @@ -75,7 +75,7 @@ Coming soon ... Please make sure you have already executed the command: ```bash -source /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh +source /usr/local/Ascend/8.3.RC2/bisheng_toolkit/set_env.sh ``` :::::{tab-set} diff --git a/tests/e2e/nightly/multi_node/scripts/lws.yaml b/tests/e2e/nightly/multi_node/scripts/lws.yaml index 6db4778c8b4..c2198e8436c 100644 --- a/tests/e2e/nightly/multi_node/scripts/lws.yaml +++ b/tests/e2e/nightly/multi_node/scripts/lws.yaml @@ -15,7 +15,7 @@ spec: spec: containers: - name: vllm-leader - image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11 env: - name: WORKSPACE value: "/root/workspace" @@ -70,7 +70,7 @@ spec: spec: containers: - name: vllm-worker - image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11 + image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11 env: - name: WORKSPACE value: "/root/workspace" diff --git a/tests/e2e/vllm_interface/vllm_test.cfg b/tests/e2e/vllm_interface/vllm_test.cfg index 9723d49cad7..dfd540384bc 100644 --- a/tests/e2e/vllm_interface/vllm_test.cfg +++ b/tests/e2e/vllm_interface/vllm_test.cfg @@ -1,2 +1,2 @@ # Base docker image used to build the vllm-ascend e2e test image, which is built in the vLLM repository -BASE_IMAGE_NAME="quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11" +BASE_IMAGE_NAME="quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11"