vllm-project
diff --git a/‎.github/Dockerfile.buildwheel‎
Lines changed: 3 additions & 1 deletion b/‎.github/Dockerfile.buildwheel‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/_e2e_nightly_multi_node.yaml‎
Lines changed: 85 additions & 15 deletions b/‎.github/workflows/_e2e_nightly_multi_node.yaml‎
Lines changed: 85 additions & 15 deletions
diff --git a/‎.github/workflows/_e2e_nightly_single_node.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_e2e_nightly_single_node.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/_e2e_nightly_single_node_models.yaml‎
Lines changed: 14 additions & 10 deletions b/‎.github/workflows/_e2e_nightly_single_node_models.yaml‎
Lines changed: 14 additions & 10 deletions
@@ -15,13 +15,15 @@
 # This file is a part of the vllm-ascend project.
 #
 ARG PY_VERSION=3.11
-FROM quay.io/ascend/manylinux:8.3.rc1-910b-manylinux_2_28-py${PY_VERSION}
+FROM quay.io/ascend/manylinux:8.3.rc2-910b-manylinux_2_28-py${PY_VERSION}
 
 ARG COMPILE_CUSTOM_KERNELS=1
+ARG SOC_VERSION="ascend910b1"
 
 # Define environments
 ENV DEBIAN_FRONTEND=noninteractive
 ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
+ENV SOC_VERSION=$SOC_VERSION
 RUN yum update -y && \
     yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
     rm -rf /var/cache/yum
 
@@ -15,7 +15,7 @@ on:
         required: false
         type: string
         description: base image for pods
-        default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
+        default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
       config_file_path:
         required: true
         type: string
@@ -32,7 +32,7 @@ on:
         description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
       vllm_version:
         required: false
-        default: "v0.11.0"
+        default: "v0.12.0"
         type: string
         description: vllm version to use
       vllm_ascend_remote_url:
@@ -60,7 +60,7 @@ defaults:
 # only cancel in-progress runs of the same workflow
 # and ignore the lint / 8 cards test type
 concurrency:
-  group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.config_file_path }}
+  group: ascend-nightly-${{ github.workflow_ref }}-${{ github.ref }}-${{ inputs.soc_version }}
   cancel-in-progress: true
 
 jobs:
@@ -69,7 +69,7 @@ jobs:
     # This is the runner with no NPU for k8s controller
     runs-on: ${{ inputs.runner }}
     container:
-      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
+      image: m.daocloud.io/quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
       env:
         KUBECONFIG: /tmp/kubeconfig
         KUBECTL: /root/.cache/.kube/kubectl
@@ -106,7 +106,7 @@ jobs:
             echo "${{ secrets.KUBECONFIG_B64 }}" | base64 -d > $KUBECONFIG
 
         - name: Checkout code
-          uses: actions/checkout@v4
+          uses: actions/checkout@v6.0.0
 
         - name: Prepare scripts
           run: |
@@ -115,8 +115,39 @@ jobs:
 
         - name: Clear resources
           run: |
-            # pre clear the crd resources created by lws
-            kubectl delete leaderworkerset vllm -n "$NAMESPACE" --ignore-not-found
+            set -euo pipefail
+
+            CRD_NAME="${CRD_NAME:-vllm}"
+            TIMEOUT=${TIMEOUT:-120}
+            SLEEP_INTERVAL=2
+
+            echo "Deleting leaderworkerset [$CRD_NAME] in namespace [$NAMESPACE]..."
+            kubectl delete leaderworkerset "$CRD_NAME" -n "$NAMESPACE" --ignore-not-found
+
+            echo "Waiting for all pods starting with 'vllm' to be deleted..."
+            START_TIME=$(date +%s)
+
+            while true; do
+              NOW=$(date +%s)
+              ELAPSED=$((NOW - START_TIME))
+
+              if [[ $ELAPSED -ge $TIMEOUT ]]; then
+                echo "Timeout reached ($TIMEOUT seconds), some pods still exist:"
+                kubectl get pods -n "$NAMESPACE" | grep '^vllm' || true
+                exit 1
+              fi
+
+              PODS_EXIST=$(kubectl get pods -n "$NAMESPACE" -o jsonpath='{.items[*].metadata.name}' 2>/dev/null | tr ' ' '\n' | grep '^vllm' || true)
+
+              if [[ -z "$PODS_EXIST" ]]; then
+                echo "All vllm pods deleted."
+                break
+              else
+                echo "Waiting for pods to be deleted: $PODS_EXIST"
+                sleep $SLEEP_INTERVAL
+              fi
+            done
+
         - name: Launch cluster
           id: launcher
           run: |
@@ -164,19 +195,58 @@ jobs:
 
         - name: Waiting for pod ready
           run: |
-            echo "waiting for Pod [$LEADER_POD] in namespace [$NAMESPACE] to Ready..."
+            POD_PREFIX="${POD_PREFIX:-vllm-0}"
+            SIZE="${{ inputs.size }}"
+            TIMEOUT=1200  # default timeout 20 minutes
+
+            echo "Waiting for Pods in namespace [$NAMESPACE] to become Running and Ready (timeout ${TIMEOUT}s)..."
+
+            START_TIME=$(date +%s)
 
             while true; do
-              # get pod status
-              READY_STATUS=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}')
+              NOW=$(date +%s)
+              ELAPSED=$((NOW - START_TIME))
+              if [[ $ELAPSED -ge $TIMEOUT ]]; then
+                echo "Timeout reached after ${ELAPSED}s"
+                echo "Dumping pod status for debugging:"
+                kubectl get pods -n "$NAMESPACE"
+                kubectl describe pod "$LEADER_POD" -n "$NAMESPACE"
+                exit 1
+              fi
+
+              # 1) check follower pods
+              ALL_FOLLOWERS_READY=true
+              for ((i=1; i<SIZE; i++)); do
+                POD="${POD_PREFIX}-${i}"
+                PHASE=$(kubectl get pod "$POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
+                READY=$(kubectl get pod "$POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}' 2>/dev/null)
+
+                echo "Follower [$POD] phase=$PHASE ready=$READY"
 
-              if [[ "$READY_STATUS" == "true" ]]; then
-                echo "Pod [$LEADER_POD] is Ready!"
+                if [[ "$PHASE" != "Running" || "$READY" != "true" ]]; then
+                  echo "Follower [$POD] not Ready yet..."
+                  ALL_FOLLOWERS_READY=false
+                  break
+                fi
+              done
+
+              # 2) check leader pod
+              LEADER_PHASE=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
+              LEADER_READY=$(kubectl get pod "$LEADER_POD" -n "$NAMESPACE" -o jsonpath='{.status.containerStatuses[*].ready}' 2>/dev/null)
+
+              echo "Leader [$LEADER_POD] phase=$LEADER_PHASE ready=$LEADER_READY"
+
+              if [[ "$LEADER_PHASE" != "Running" || "$LEADER_READY" != "true" ]]; then
+                echo "Leader not Ready yet..."
+                ALL_FOLLOWERS_READY=false
+              fi
+
+              if [[ "$ALL_FOLLOWERS_READY" == "true" ]]; then
+                echo "All follower pods and leader pod are Running and Ready — continuing."
                 break
-              else
-                echo "Pod [$LEADER_POD] not ready, waiting..."
-                sleep 3
               fi
+
+              sleep 2
             done
 
         - name: Stream logs
 
@@ -29,7 +29,7 @@ on:
       image:
         required: false
         type: string
-        default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
+        default: "swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11"
       tests:
         required: true
         type: string
 
@@ -59,7 +59,7 @@ jobs:
     name: ${{inputs.model_list}} accuracy test
     runs-on: ${{ inputs.runner }}
     container:
-      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc1-910b-ubuntu22.04-py3.11
+      image: "${{ inputs.image }}"
       env:
         VLLM_USE_MODELSCOPE: True
         GHA_VLLM_ASCEND_VERSION: ${{ inputs.vllm-ascend }}
@@ -78,15 +78,15 @@ jobs:
           git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
 
       - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6.0.0
 
       - name: Install system dependencies
         run: |
           apt-get -y install `cat packages.txt`
           apt-get -y install gcc g++ cmake libnuma-dev
 
       - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6.0.0
         with:
           repository: vllm-project/vllm
           ref: ${{ inputs.vllm }}
@@ -108,11 +108,14 @@ jobs:
         if: ${{ inputs.runner == 'linux-aarch64-a2-4' && contains(inputs.model_list, 'Qwen3-Next-80B-A3B-Instruct') }}
         shell: bash -l {0}
         run: |
-          wget -q https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/Ascend-BiSheng-toolkit_aarch64.run -O /tmp/Ascend-BiSheng-toolkit_aarch64.run
-          chmod a+x /tmp/Ascend-BiSheng-toolkit_aarch64.run
-          /tmp/Ascend-BiSheng-toolkit_aarch64.run --install
-          . /usr/local/Ascend/8.3.RC1/bisheng_toolkit/set_env.sh
-          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev20250914-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
+          . /usr/local/Ascend/ascend-toolkit/8.3.RC2/bisheng_toolkit/set_env.sh
+          python3 -m pip install "https://vllm-ascend.obs.cn-north-4.myhuaweicloud.com/vllm-ascend/triton_ascend-3.2.0.dev2025110717-cp311-cp311-manylinux_2_27_aarch64.whl"
+
+      - name: Install tensorflow (for Molmo-7B-D-0924)
+        if: ${{ inputs.runner == 'linux-aarch64-a2-1' && contains(inputs.model_list, 'Molmo-7B-D-0924') }}
+        shell: bash -l {0}
+        run: |
+          pip install tensorflow --no-cache-dir
 
       - name: Resolve vllm-ascend version
         run: |
@@ -132,7 +135,7 @@ jobs:
           echo "GHA_VLLM_ASCEND_VERSION=$RESOLVED_VERSION" >> $GITHUB_ENV
 
       - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6.0.0
         with:
           repository: vllm-project/vllm-ascend
           path: ./vllm-ascend
@@ -175,6 +178,7 @@ jobs:
         id: report
         env:
           VLLM_WORKER_MULTIPROC_METHOD: spawn
+          HF_DATASETS_OFFLINE: True
           VLLM_USE_MODELSCOPE: True
           VLLM_CI_RUNNER: ${{ inputs.runner }}
           VLLM_VERSION: ${{ env.GHA_VLLM_VERSION }}
@@ -225,4 +229,4 @@ jobs:
           path: ./benchmarks/accuracy/
           if-no-files-found: warn
           retention-days: 90
-          overwrite: true
+          overwrite: true