vllm-project
diff --git a/‎.github/Dockerfile.buildwheel‎
Lines changed: 0 additions & 2 deletions b/‎.github/Dockerfile.buildwheel‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎.github/Dockerfile.nightly.a2‎
Lines changed: 0 additions & 1 deletion b/‎.github/Dockerfile.nightly.a2‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/Dockerfile.nightly.a3‎
Lines changed: 0 additions & 1 deletion b/‎.github/Dockerfile.nightly.a3‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/_e2e_test.yaml‎
Lines changed: 8 additions & 12 deletions b/‎.github/workflows/_e2e_test.yaml‎
Lines changed: 8 additions & 12 deletions
diff --git a/‎.github/workflows/vllm_ascend_test_nightly_a2.yaml‎
Lines changed: 0 additions & 6 deletions b/‎.github/workflows/vllm_ascend_test_nightly_a2.yaml‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎.github/workflows/vllm_ascend_test_nightly_a3.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/vllm_ascend_test_nightly_a3.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/vllm_ascend_test_pr_full.yaml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/vllm_ascend_test_pr_full.yaml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/vllm_ascend_test_pr_light.yaml‎
Lines changed: 4 additions & 3 deletions b/‎.github/workflows/vllm_ascend_test_pr_light.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Dockerfile‎
Lines changed: 0 additions & 2 deletions b/‎Dockerfile‎
Lines changed: 0 additions & 2 deletions
@@ -17,12 +17,10 @@
 ARG PY_VERSION=3.11
 FROM quay.io/ascend/manylinux:8.3.rc2-910b-manylinux_2_28-py${PY_VERSION}
 
-ARG COMPILE_CUSTOM_KERNELS=1
 ARG SOC_VERSION="ascend910b1"
 
 # Define environments
 ENV DEBIAN_FRONTEND=noninteractive
-ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
 ENV SOC_VERSION=$SOC_VERSION
 RUN yum update -y && \
     yum install -y python3-pip git vim wget net-tools gcc gcc-c++ make cmake numactl-devel && \
 
@@ -23,7 +23,6 @@ ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git"
 
 # Define environments
 ENV DEBIAN_FRONTEND=noninteractive
-ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
 
 WORKDIR /workspace
 
 
@@ -23,7 +23,6 @@ ARG AIS_BENCH_URL="https://gitee.com/aisbench/benchmark.git"
 
 # Define environments
 ENV DEBIAN_FRONTEND=noninteractive
-ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
 
 WORKDIR /workspace
 
 
@@ -25,6 +25,7 @@ jobs:
       env:
         VLLM_LOGGING_LEVEL: ERROR
         VLLM_USE_MODELSCOPE: True
+        TRANSFORMERS_OFFLINE: 1
     steps:
       - name: Check npu and CANN info
         run: |
@@ -74,9 +75,10 @@ jobs:
           PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
         if: ${{ inputs.type == 'light' }}
         run: |
-          # pytest -sv tests/e2e/singlecard/test_aclgraph.py
+          # pytest -sv tests/e2e/singlecard/test_aclgraph_accuracy.py
           # pytest -sv tests/e2e/singlecard/test_quantization.py
           pytest -sv tests/e2e/singlecard/test_vlm.py::test_multimodal_vl
+          pytest -sv tests/e2e/singlecard/pooling/test_classification.py::test_classify_correctness
 
       - name: Run e2e test
         env:
@@ -89,25 +91,21 @@ jobs:
           # the test separately.
 
           pytest -sv tests/e2e/singlecard/test_completion_with_prompt_embeds.py
-          pytest -sv tests/e2e/singlecard/test_aclgraph.py
+          pytest -sv tests/e2e/singlecard/test_aclgraph_accuracy.py
           pytest -sv tests/e2e/singlecard/test_aclgraph_mem.py
-          pytest -sv tests/e2e/singlecard/test_bge_model.py
           pytest -sv tests/e2e/singlecard/test_camem.py
-          pytest -sv tests/e2e/singlecard/test_embedding.py
-          # pytest -sv tests/e2e/singlecard/test_embedding_aclgraph.py
           pytest -sv tests/e2e/singlecard/test_guided_decoding.py
           # torch 2.8 doesn't work with lora, fix me
           #pytest -sv tests/e2e/singlecard/test_ilama_lora.py
           pytest -sv tests/e2e/singlecard/test_profile_execute_duration.py
           pytest -sv tests/e2e/singlecard/test_quantization.py
           pytest -sv tests/e2e/singlecard/test_sampler.py
           pytest -sv tests/e2e/singlecard/test_vlm.py
-          pytest -sv tests/e2e/singlecard/multi-modal/test_internvl.py
           pytest -sv tests/e2e/singlecard/test_xlite.py
+          pytest -sv tests/e2e/singlecard/pooling/
 
           # ------------------------------------ v1 spec decode test ------------------------------------ #
           pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_correctness.py
-          pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_mtp_torchair_correctness.py
           pytest -sv tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py
 
   e2e-2-cards:
@@ -119,6 +117,7 @@ jobs:
         VLLM_LOGGING_LEVEL: ERROR
         VLLM_USE_MODELSCOPE: True
         HCCL_BUFFSIZE: 1024
+        TRANSFORMERS_OFFLINE: 1
     steps:
       - name: Check npu and CANN info
         run: |
@@ -170,10 +169,6 @@ jobs:
         if: ${{ inputs.type == 'light' }}
         run: |
           pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
-          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_qwen3_moe_with_torchair
-          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_deepseekv2lite_with_torchair
-          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_deepseekv2lite_with_torchair_v1scheduler
-          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py::test_e2e_deepseekv2lite_with_nz
 
       - name: Run vllm-project/vllm-ascend test (full)
         env:
@@ -183,7 +178,6 @@ jobs:
         run: |
           pytest -sv tests/e2e/multicard/test_quantization.py
           pytest -sv tests/e2e/multicard/test_aclgraph_capture_replay.py
-          pytest -sv tests/e2e/multicard/test_torchair_graph_mode.py
           pytest -sv tests/e2e/multicard/test_full_graph_mode.py
           pytest -sv tests/e2e/multicard/test_data_parallel.py
           pytest -sv tests/e2e/multicard/test_expert_parallel.py
@@ -220,6 +214,7 @@ jobs:
       env:
         VLLM_LOGGING_LEVEL: ERROR
         VLLM_USE_MODELSCOPE: True
+        TRANSFORMERS_OFFLINE: 1
     steps:
       - name: Check npu and CANN info
         run: |
@@ -272,6 +267,7 @@ jobs:
         run: |
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_multistream_moe
           pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W4A8DYNAMIC
+          pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_Kimi_K2_Thinking_W4A16
           # pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_TP2_WITH_EP
           # pytest -sv tests/e2e/multicard/test_qwen3_moe.py::test_models_distributed_Qwen3_MOE_W8A8_WITH_EP
           pytest -sv tests/e2e/multicard/test_data_parallel_tp2.py
 
@@ -82,7 +82,6 @@ jobs:
           - os: linux-aarch64-a2-1
             model_list:
               - Qwen3-8B
-              - Qwen2.5-VL-7B-Instruct
               - Qwen2-Audio-7B-Instruct
               - Qwen3-8B-W8A8
               - Qwen3-VL-8B-Instruct
@@ -91,7 +90,6 @@ jobs:
           - os: linux-aarch64-a2-1
             model_list:
               - ERNIE-4.5-21B-A3B-PT
-              - gemma-2-9b-it
               - gemma-3-4b-it
               - internlm-7b
               - InternVL3_5-8B-hf
@@ -101,7 +99,6 @@ jobs:
             model_list:
               - Qwen3-30B-A3B
               - Qwen3-VL-30B-A3B-Instruct
-              - DeepSeek-V2-Lite
               - Qwen3-30B-A3B-W8A8
           - os: linux-aarch64-a2-4
             model_list:
@@ -127,9 +124,6 @@ jobs:
           - name: multi-node-deepseek-dp
             config_file_path: DeepSeek-R1-W8A8-A2.yaml
             size: 2
-          - name: multi-node-deepseek-dp-torchair
-            config_file_path: DeepSeek-R1-W8A8-A2-torchair.yaml
-            size: 2
     uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
     with:
       soc_version: a2
 
@@ -53,7 +53,7 @@ jobs:
             config_file_path: DeepSeek-V3.yaml
             size: 2
           - name: multi-node-qwen3-dp
-            config_file_path: Qwen3-235B-A3B.yaml
+            config_file_path: Qwen3-235B-A22B.yaml
             size: 2
           - name: multi-node-dpsk-4node-pd
             config_file_path: DeepSeek-R1-W8A8.yaml
 
@@ -44,6 +44,10 @@ jobs:
       e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
       ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
     steps:
+      - name: Setup git proxy
+        run: |
+          git config --global --add safe.directory "$GITHUB_WORKSPACE"
+          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
       # NOTE: Do not update the version of checkout, there have some issue on self_hosted runner with the higher version
       - uses: actions/checkout@v4
       - uses: dorny/paths-filter@v3
 
@@ -49,6 +49,10 @@ jobs:
       e2e_tracker: ${{ steps.filter.outputs.e2e_tracker }}
       ut_tracker: ${{ steps.filter.outputs.ut_tracker }}
     steps:
+      - name: Setup git proxy
+        run: |
+          git config --global --add safe.directory "$GITHUB_WORKSPACE"
+          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
       # NOTE: Do not update the version of checkout, there have some issue on self_hosted runner with the higher version
       - uses: actions/checkout@v4
       - uses: dorny/paths-filter@v3
@@ -130,12 +134,9 @@ jobs:
         run: |
           export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/arm64-linux/devlib
           pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
-            --ignore tests/ut/torchair/models/test_torchair_deepseek_mtp.py \
-            --ignore tests/ut/torchair/models/test_torchair_deepseek_v2.py \
             --ignore tests/ut/model_loader/netloader/test_netloader_elastic.py \
             --ignore tests/ut/kv_connector/test_remote_prefill_lifecycle.py \
             --ignore tests/ut/kv_connector/test_remote_decode_lifecycle.py \
-            --ignore tests/ut/kv_connector/test_llmdatadist_connector.py \
             --ignore tests/ut/core/test_scheduler_dynamic_batch.py
 
       - name: Upload coverage to Codecov
 
@@ -12,7 +12,7 @@ repos:
     - id: codespell
       args: [
         --toml, pyproject.toml,
-        '--skip', 'tests/e2e/multicard/test_torchair_graph_mode.py,csrc/**,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml',
+        '--skip', 'csrc/**,tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**,.github/**,typos.toml',
         '-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,ArchType,AND,ND'
       ]
       additional_dependencies:
 
@@ -18,13 +18,11 @@
 FROM quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11
 
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
-ARG COMPILE_CUSTOM_KERNELS=1
 ARG MOONCAKE_TAG="v0.3.7.post2"
 ARG SOC_VERSION="ascend910b1"
 
 # Define environments
 ENV DEBIAN_FRONTEND=noninteractive
-ENV COMPILE_CUSTOM_KERNELS=${COMPILE_CUSTOM_KERNELS}
 ENV SOC_VERSION=$SOC_VERSION
 
 WORKDIR /workspace
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@ repos:`
`12`	`12`	`- id: codespell`
`13`	`13`	`args: [`
`14`	`14`	`--toml, pyproject.toml,`
`15`		`- '--skip', 'tests/e2e/multicard/test_torchair_graph_mode.py,csrc/,tests/prompts/,./benchmarks/sonnet.txt,tests/lora/data/,build/,./vllm_ascend.egg-info/,.github/*,typos.toml',`
	`15`	`+ '--skip', 'csrc/,tests/prompts/,./benchmarks/sonnet.txt,tests/lora/data/,build/,./vllm_ascend.egg-info/,.github/*,typos.toml',`
`16`	`16`	`'-L', 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,ArchType,AND,ND'`
`17`	`17`	`]`
`18`	`18`	`additional_dependencies:`