vllm-project
diff --git a/‎.github/workflows/_accuracy_test.yaml‎
Lines changed: 11 additions & 1 deletion b/‎.github/workflows/_accuracy_test.yaml‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎.github/workflows/_e2e_nightly_multi_node.yaml‎
Lines changed: 197 additions & 0 deletions b/‎.github/workflows/_e2e_nightly_multi_node.yaml‎
Lines changed: 197 additions & 0 deletions
diff --git a/‎.github/workflows/_e2e_nightly.yaml‎ ‎…/workflows/_e2e_nightly_single_node.yaml‎.github/workflows/_e2e_nightly.yaml renamed to .github/workflows/_e2e_nightly_single_node.yaml
Lines changed: 2 additions & 5 deletions b/‎.github/workflows/_e2e_nightly.yaml‎ ‎…/workflows/_e2e_nightly_single_node.yaml‎.github/workflows/_e2e_nightly.yaml renamed to .github/workflows/_e2e_nightly_single_node.yaml
Lines changed: 2 additions & 5 deletions
diff --git a/‎.github/workflows/_e2e_test.yaml‎
Lines changed: 8 additions & 4 deletions b/‎.github/workflows/_e2e_test.yaml‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎.github/workflows/_kill_lws_resources.yaml‎
Lines changed: 57 additions & 0 deletions b/‎.github/workflows/_kill_lws_resources.yaml‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎.github/workflows/accuracy_test.yaml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/accuracy_test.yaml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/format_pr_body.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/format_pr_body.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -73,6 +73,16 @@ jobs:
         working-directory: ./vllm-empty
         run: |
           VLLM_TARGET_DEVICE=empty pip install -e .
+        
+      - name: Install Ascend toolkit & triton_ascend (for Qwen3-Next-80B-A3B-Instruct)
+        if: ${{ inputs.model_name == 'Qwen3-Next-80B-A3B-Instruct' }}
+        shell: bash -l {0}
+        run: |
+          wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
+          chmod a+x /tmp/Ascend-BiSheng-toolkit_aarch64.run
+          /tmp/Ascend-BiSheng-toolkit_aarch64.run --install
+          . /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
+          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
 
       - name: Resolve vllm-ascend version
         run: |
@@ -166,7 +176,7 @@ jobs:
 
       - name: Upload Report
         if: ${{ inputs.upload == true }}
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@v5
         with:
           name: "report-${{ env.GHA_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
           path: ./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md
 
@@ -0,0 +1,197 @@
+name: 'e2e nightly test multi_node'
+
+on:
+  workflow_call:
+    inputs:
+      soc_version:
+        required: true
+        type: string
+        description: use a2 or a3
+      runner:
+        required: false
+        type: string
+        default: linux-aarch64-a3-0
+      image:
+        required: false
+        type: string
+        description: base image for pods
+        default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11"
+      config_file_path:
+        required: true
+        type: string
+        description: the model config for multi_node test
+      replicas:
+        required: false
+        default: "1"
+        type: string
+        description: replicas of the k8s cluster
+      size:
+        required: false
+        default: "2"
+        type: string
+        description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
+      vllm_version:
+        required: false
+        default: "v0.11.0"
+        type: string
+        description: vllm version to use
+      vllm_ascend_remote_url:
+        required: false
+        default: https://github.com/vllm-project/vllm-ascend.git
+        type: string
+        description: used for pr level tests
+      vllm_ascend_ref:
+        required: false
+        default: main
+        type: string
+        description: used for pr level tests
+    secrets:
+      KUBECONFIG_B64:
+        required: true
+
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+# only cancel in-progress runs of the same workflow
+# and ignore the lint / 8 cards test type
+concurrency:
+  group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.config_file_path }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    name: ${{ inputs.config_file_path }}
+    # This is the runner with no NPU for k8s controller
+    runs-on: ${{ inputs.runner }}
+    container:
+      image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
+      env:
+        KUBECONFIG: /tmp/kubeconfig
+        KUBECTL: /root/.cache/.kube/kubectl
+        NAMESPACE: vllm-project
+        LEADER_POD: vllm-0
+        RESULT_FILE: /root/.cache/tests/ret_${{ inputs.soc_version }}
+    steps:
+        - name: Install system denpendencies
+          run: |
+           # configure apt and pip source
+           sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+           pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+           pip install jinja2-cli
+
+           #apt-get update -y && apt-get install -y git curl
+
+        - name: Install kubectl
+          run: |
+            # Install kubectl
+            arch=$(uname -m)
+
+            if echo "$arch" | grep -qiE "arm|aarch64"; then
+              echo "Detected ARM architecture: $arch"
+              KUBECTL="$KUBECTL"_arm
+            fi
+            install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
+
+            # Verify kubectl installation
+            kubectl version --client=true
+
+        - name: Decode kubeconfig from secrets
+          run: |
+            # Decode and save kubeconfig
+            echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
+
+        - name: Checkout code
+          uses: actions/checkout@v4
+
+        - name: Prepare scripts
+          run: |
+            # prepare for lws entrypoint scripts
+            install -D tests/e2e/nightly/multi_node/scripts/run.sh /root/.cache/tests/run.sh
+
+        - name: Clear resources
+          run: |
+            # pre clear the crd resources created by lws
+            kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found
+        - name: Launch cluster
+          id: launcher
+          run: |
+            set -e
+
+            size="${{ inputs.size }}"
+            replicas="${{ inputs.replicas }}"
+            image="${{ inputs.image }}"
+            config_file_path="${{ inputs.config_file_path }}"
+            vllm_version="${{ inputs.vllm_version }}"
+            vllm_ascend_ref="${{ inputs.vllm_ascend_ref }}"
+            vllm_ascend_remote_url="${{ inputs.vllm_ascend_remote_url }}"
+            result_file_path="$RESULT_FILE"
+            fail_tag=FAIL_TAG_"${{ inputs.config_file_path }}"
+            echo "FAIL_TAG=${fail_tag}" >> $GITHUB_ENV
+
+            required_params=("size" "replicas" "image" "config_file_path")
+            for param in "${required_params[@]}"; do
+              if [ -z "${!param}" ]; then
+                echo "Error: Parameter '$param' is required but empty"
+                exit 1
+              fi
+            done
+
+            if [ "${{ inputs.soc_version }}" = "a3" ]; then
+              npu_per_node=16
+            else
+              npu_per_node=8
+            fi
+
+            jinja2 tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2 \
+              -D size="$size" \
+              -D replicas="$replicas" \
+              -D image="$image" \
+              -D config_file_path="$config_file_path" \
+              -D vllm_version="$vllm_version" \
+              -D vllm_ascend_remote_url="$vllm_ascend_remote_url" \
+              -D vllm_ascend_ref="$vllm_ascend_ref" \
+              -D result_file_path="$result_file_path" \
+              -D npu_per_node="$npu_per_node" \
+              -D fail_tag="$fail_tag" \
+              --outfile lws.yaml
+
+            kubectl apply -f ./lws.yaml
+
+        - name: Waiting for pod ready
+          run: |
+            echo "waiting for Pod [$LEADER_POD] in namespace [$NAMESPACE] to Ready..."
+
+            while true; do
+              # get pod status
+              READY_STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}')
+
+              if [[ "$READY_STATUS" == "true" ]]; then
+                echo "Pod [$LEADER_POD] is Ready!"
+                break
+              else
+                echo "Pod [$LEADER_POD] not ready, waiting..."
+                sleep 3
+              fi
+            done
+
+        - name: Stream logs
+          run: |
+            set -euo pipefail
+            echo "Looking for logs containing: $FAIL_TAG"
+            kubectl logs -f "$LEADER_POD" -n "$NAMESPACE" | while read -r line; do
+              echo "$line"
+              if echo "$line" | grep -q "$FAIL_TAG"; then
+                exit 1   # workflow step failed
+              fi
+            done
+
+        - name: Post process
+          if: always()
+          run: |
+            kubectl get pods -n $NAMESPACE
+            kubectl delete -f ./lws.yaml
@@ -44,17 +44,15 @@ defaults:
 # only cancel in-progress runs of the same workflow
 # and ignore the lint / 1 card / 4 cards test type
 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
+  group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.tests }}
   cancel-in-progress: true
 
 jobs:
   e2e-nightly:
-    name: e2e-nightly
+    name: ${{ inputs.tests }}
     runs-on: ${{ inputs.runner }}
     container:
       image: ${{ inputs.image }}
-      env:
-        VLLM_USE_MODELSCOPE: True
     steps:
       - name: Check npu and CANN info
         run: |
@@ -111,5 +109,4 @@ jobs:
           VLLM_USE_MODELSCOPE: True
           VLLM_CI_RUNNER: ${{ inputs.runner }}
         run: |
-          # TODO: enable more tests
           pytest -sv ${{ inputs.tests }}
@@ -88,27 +88,31 @@ jobs:
           # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run
           # the test separately.
 
+          pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
           pytest -sv tests/e2e/singlecard/test_aclgraph.py
+          pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
           pytest -sv tests/e2e/singlecard/test_ascend_scheduler.py
           pytest -sv tests/e2e/singlecard/test_bge_model.py
           pytest -sv tests/e2e/singlecard/test_camem.py
           pytest -sv tests/e2e/singlecard/test_chunked.py
           pytest -sv tests/e2e/singlecard/test_embedding.py
-          pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
+          # pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
           pytest -sv tests/e2e/singlecard/test_guided_decoding.py
           pytest -sv tests/e2e/singlecard/test_ilama_lora.py
           pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
           pytest -sv tests/e2e/singlecard/test_quantization.py
           pytest -sv tests/e2e/singlecard/test_sampler.py
           pytest -sv tests/e2e/singlecard/test_vlm.py
+          pytest -sv tests/e2e/singlecard/multi-modal/test_internvl.py
 
           # ------------------------------------ v1 spec decode test ------------------------------------ #
           pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
           pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
           # Fix me: OOM error
-          #pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
+          # pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
 
-          pytest -sv tests/e2e/singlecard/ops/
+          # TODO: Move ops test to nightly test
+          #pytest -sv tests/e2e/singlecard/ops/
 
   e2e-2-cards:
     name: multicard
@@ -176,7 +180,7 @@ jobs:
         run: |
           pytest -sv tests/e2e/multicard/test_data_parallel.py
           pytest -sv tests/e2e/multicard/test_expert_parallel.py
-          pytest -sv tests/e2e/multicard/test_external_launcher.py
+          # pytest -sv tests/e2e/multicard/test_external_launcher.py
           pytest -sv tests/e2e/multicard/test_single_request_aclgraph.py
           pytest -sv tests/e2e/multicard/test_fused_moe_allgather_ep.py
           pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
 
@@ -0,0 +1,57 @@
+name: 'resource clear'
+
+on:
+  workflow_call:
+    inputs:
+      runner:
+        required: false
+        type: string
+        default: linux-aarch64-a3-0
+    secrets:
+      KUBECONFIG_B64:
+        required: true
+
+
+# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
+# declared as "shell: bash -el {0}" on steps that need to be properly activated.
+# It's used to activate ascend-toolkit environment variables.
+defaults:
+  run:
+    shell: bash -el {0}
+
+jobs:
+  resource_clear:
+    # This is a runner with no NPU for k8s controller
+    runs-on: ${{ inputs.runner }}
+    container:
+      image: m.daocloud.io/quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
+      env:
+        KUBECONFIG: /tmp/kubeconfig
+        KUBECTL: /root/.cache/.kube/kubectl
+        NAMESPACE: vllm-project
+        LEADER_POD: vllm-0
+        RESULT_FILE: /root/.cache/tests/ret/test_result.txt
+    steps:
+        - name: Install kubectl
+          run: |
+            # Install kubectl
+            arch=$(uname -m)
+
+            if echo "$arch" | grep -qiE "arm|aarch64"; then
+              echo "Detected ARM architecture: $arch"
+              KUBECTL="$KUBECTL"_arm
+            fi
+            install -o root -g root -m 0755 $KUBECTL /usr/local/bin/kubectl
+
+            # Verify kubectl installation
+            kubectl version --client=true
+
+        - name: Decode kubeconfig from secrets
+          run: |
+            # Decode and save kubeconfig
+            echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
+
+        - name: Clear LWS resources
+          if: always()
+          run: |
+            kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found
@@ -57,6 +57,8 @@ jobs:
             model_name: Qwen3-VL-30B-A3B-Instruct
           - runner: a2-2
             model_name: DeepSeek-V2-Lite
+          - runner: a2-4
+            model_name: Qwen3-Next-80B-A3B-Instruct
       fail-fast: false
     # test will be triggered when tag 'accuracy-test' & 'ready-for-test'
     if:  >-
 
@@ -36,7 +36,7 @@ jobs:
 
       - name: Get vLLM version
         run: |
-          VLLM_COMMIT=v0.11.0
+          VLLM_COMMIT=83f478bb19489b41e9d208b47b4bb5a95ac171ac
           echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
 
       - name: Checkout repository