cherry-pick CANN 8.3

wangxiyuan · wangxiyuan · commit 452b4a0b8c8d · 2025-11-03T20:31:11.000+08:00
Signed-off-by: wangxiyuan &lt;wangxiyuan1007@gmail.com&gt;
diff --git a/Dockerfile b/Dockerfile
@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
 
-FROM quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11
+FROM quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11
 
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
diff --git a/Dockerfile.310p b/Dockerfile.310p
@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
 
-FROM quay.io/ascend/cann:8.2.rc1-310p-ubuntu22.04-py3.11
+FROM quay.io/ascend/cann:8.3.rc1-310p-ubuntu22.04-py3.11
 
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler
@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
 
-FROM quay.io/ascend/cann:8.2.rc1-310p-openeuler24.03-py3.11
+FROM quay.io/ascend/cann:8.3.rc1-310p-openeuler24.03-py3.11
 
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
diff --git a/Dockerfile.a3 b/Dockerfile.a3
@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
 
-FROM quay.io/ascend/cann:8.2.rc1-a3-ubuntu22.04-py3.11
+FROM quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
 
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler
@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
 
-FROM quay.io/ascend/cann:8.2.rc1-a3-openeuler24.03-py3.11
+FROM quay.io/ascend/cann:8.3.rc1-a3-openeuler24.03-py3.11
 
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
@@ -15,7 +15,7 @@
 # This file is a part of the vllm-ascend project.
 #
 
-FROM quay.io/ascend/cann:8.2.rc1-910b-openeuler24.03-py3.11
+FROM quay.io/ascend/cann:8.3.rc1-910b-openeuler24.03-py3.11
 
 ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"
 ARG COMPILE_CUSTOM_KERNELS=1
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ By using vLLM Ascend plugin, popular open-source models, including Transformer-l
 - OS: Linux
 - Software:
   * Python >= 3.9, < 3.12
-  * CANN >= 8.2.rc1 (Ascend HDK version refers to [here](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/releasenote/releasenote_0000.html))
+  * CANN >= 8.3.rc1 (Ascend HDK version refers to [here](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html))
   * PyTorch == 2.7.1, torch-npu == 2.7.1
   * vLLM (the same version as vllm-ascend)
 
diff --git a/README.zh.md b/README.zh.md
@@ -43,7 +43,7 @@ vLLM 昇腾插件 (`vllm-ascend`) 是一个由社区维护的让vLLM在Ascend NP
 - 操作系统：Linux
 - 软件：
   * Python >= 3.9, < 3.12
-  * CANN >= 8.2.rc1 (Ascend HDK 版本参考[这里](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/releasenote/releasenote_0000.html))
+  * CANN >= 8.3.rc1 (Ascend HDK 版本参考[这里](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html))
   * PyTorch == 2.7.1, torch-npu == 2.7.1
   * vLLM (与vllm-ascend版本一致)
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -75,7 +75,7 @@
     'pip_vllm_ascend_version': "0.11.0rc0",
     'pip_vllm_version': "0.11.0",
     # CANN image tag
-    'cann_image_tag': "8.2.rc1-910b-ubuntu22.04-py3.11",
+    'cann_image_tag': "8.3.rc1-910b-ubuntu22.04-py3.11",
     # vllm version in ci
     'ci_vllm_version': 'v0.11.0rc3',
 }
diff --git a/docs/source/developer_guide/contribution/multi_node_test.md b/docs/source/developer_guide/contribution/multi_node_test.md
@@ -0,0 +1,99 @@
+# Multi Node Test
+
+Multi-Node CI is designed to test distributed scenarios of very large models, eg: disaggregated_prefill multi DP across multi nodes and so on.
+
+## How is works
+
+The following picture shows the basic deployment view of the multi-node CI mechanism, It shows how the github action interact with [lws](https://lws.sigs.k8s.io/docs/overview/) (a kind of kubernetes crd resource)
+
+![alt text](../../assets/deployment.png)
+
+From the workflow perspective, we can see how the final test script is executed, The key point is that these two [lws.yaml and run.sh](https://github.com/vllm-project/vllm-ascend/tree/main/tests/e2e/nightly/multi_node/scripts), The former defines how our k8s cluster is pulled up, and the latter defines the entry script when the pod is started, Each node executes different logic according to the [LWS_WORKER_INDEX](https://lws.sigs.k8s.io/docs/reference/labels-annotations-and-environment-variables/) environment variable, so that multiple nodes can form a distributed cluster to perform tasks.
+
+![alt text](../../assets/workflow.png)
+
+## How to contribute
+
+1. Upload custom weights
+
+   If you need customized weights, for example, you quantized a w8a8 weight for DeepSeek-V3 and you want your weight to run on CI, Uploading weights to ModelScope's [vllm-ascend](https://www.modelscope.cn/organization/vllm-ascend) organization is welcome, If you do not have permission to upload, please contact @Potabk
+
+2. Add config yaml
+
+    As the entrypoint script [run.sh](https://github.com/vllm-project/vllm-ascend/blob/0bf3f21a987aede366ec4629ad0ffec8e32fe90d/tests/e2e/nightly/multi_node/scripts/run.sh#L106) shows, A k8s pod startup means traversing all *.yaml files in the [directory](https://github.com/vllm-project/vllm-ascend/tree/main/tests/e2e/nightly/multi_node/config/models), reading and executing according to different configurations, so what we need to do is just add "yamls" like [DeepSeek-V3.yaml](https://github.com/vllm-project/vllm-ascend/blob/main/tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml).
+
+    Suppose you have **2 nodes** running a 1P1D setup (1 Prefillers + 1 Decoder):
+
+    you may add a config file looks like:
+
+    ```yaml
+    test_name: "test DeepSeek-V3 disaggregated_prefill"
+    # the model being tested
+    model: "vllm-ascend/DeepSeek-V3-W8A8"
+    # how large the cluster is
+    num_nodes: 2
+    npu_per_node: 16
+    # All env vars you need should add it here
+    env_common:
+    VLLM_USE_MODELSCOPE: true
+    OMP_PROC_BIND: false
+    OMP_NUM_THREADS: 100
+    HCCL_BUFFSIZE: 1024
+    SERVER_PORT: 8080
+    disaggregated_prefill:
+    enabled: true
+    # node index(a list) which meet all the conditions:
+    #  - prefiller
+    #  - no headless(have api server)
+    prefiller_host_index: [0]
+    # node index(a list) which meet all the conditions:
+    #  - decoder
+    #  - no headless(have api server)
+    decoder_host_index: [1]
+
+    # Add each node's vllm serve cli command just like you runs locally
+    deployment:
+    -
+        server_cmd: >
+            vllm serve ...
+    -
+        server_cmd: >
+            vllm serve ...
+    benchmarks:
+    perf:
+        # fill with performance test kwargs
+    acc:
+        # fill with accuracy test kwargs
+    ```
+  
+3. Add the case to nightly workflow
+currently, the multi-node test workflow defined in the [vllm_ascend_test_nightly_a2/a3.yaml](https://github.com/vllm-project/vllm-ascend/blob/main/.github/workflows/vllm_ascend_test_nightly_a3.yaml)
+
+   ```yaml
+    multi-node-tests:
+        needs: single-node-tests
+        if: always() && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
+        strategy:
+        fail-fast: false
+        max-parallel: 1
+        matrix:
+            test_config:
+            - name: multi-node-deepseek-pd
+                config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-V3.yaml
+                size: 2
+            - name: multi-node-qwen3-dp
+                config_file_path: tests/e2e/nightly/multi_node/config/models/Qwen3-235B-A3B.yaml
+                size: 2
+            - name: multi-node-dpsk-4node-pd
+                config_file_path: tests/e2e/nightly/multi_node/config/models/DeepSeek-R1-W8A8.yaml
+                size: 4
+        uses: ./.github/workflows/_e2e_nightly_multi_node.yaml
+        with:
+        soc_version: a3
+        image: m.daocloud.io/quay.io/ascend/cann:8.3.rc1-a3-ubuntu22.04-py3.11
+        replicas: 1
+        size: ${{ matrix.test_config.size }}
+        config_file_path: ${{ matrix.test_config.config_file_path }}
+   ```
+  
+The matrix above defines all the parameters required to add a multi-machine use case, The parameters worth paying attention to (I mean if you are adding a new use case) are size and the path to the yaml configuration file. The former defines the number of nodes required for your use case, and the latter defines the path to the configuration file you have completed in step 2.
diff --git a/docs/source/installation.md b/docs/source/installation.md
@@ -11,8 +11,8 @@ This document describes how to install vllm-ascend manually.
 
     | Software      | Supported version                | Note                                      |
     |---------------|----------------------------------|-------------------------------------------|
-    | Ascend HDK    | Refer to [here](https://www.hiascend.com/document/detail/zh/canncommercial/82RC1/releasenote/releasenote_0000.html) | Required for CANN |
-    | CANN          | >= 8.2.RC1                       | Required for vllm-ascend and torch-npu    |
+    | Ascend HDK    | Refer to [here](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/releasenote/releasenote_0000.html) | Required for CANN |
+    | CANN          | >= 8.3.RC1                       | Required for vllm-ascend and torch-npu    |
     | torch-npu     | == 2.7.1             | Required for vllm-ascend, No need to install manually, it will be auto installed in below steps |
     | torch         | == 2.7.1                         | Required for torch-npu and vllm           |
 
@@ -79,19 +79,19 @@ source vllm-ascend-env/bin/activate
 pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple attrs 'numpy<2.0.0' decorator sympy cffi pyyaml pathlib2 psutil protobuf scipy requests absl-py wheel typing_extensions
 
 # Download and install the CANN package.
-wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.2.RC1/Ascend-cann-toolkit_8.2.RC1_linux-"$(uname -i)".run
-chmod +x ./Ascend-cann-toolkit_8.2.RC1_linux-"$(uname -i)".run
-./Ascend-cann-toolkit_8.2.RC1_linux-"$(uname -i)".run --full
-# https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.2.rc1_linux-aarch64.run
+wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run
+chmod +x ./Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run
+./Ascend-cann-toolkit_8.3.RC1_linux-"$(uname -i)".run --full
+# https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Milan-ASL/Milan-ASL%20V100R001C22B800TP052/Ascend-cann-kernels-910b_8.3.rc1_linux-aarch64.run
 
 source /usr/local/Ascend/ascend-toolkit/set_env.sh
-wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.2.RC1/Ascend-cann-kernels-910b_8.2.RC1_linux-"$(uname -i)".run
-chmod +x ./Ascend-cann-kernels-910b_8.2.RC1_linux-"$(uname -i)".run
-./Ascend-cann-kernels-910b_8.2.RC1_linux-"$(uname -i)".run --install
+wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run
+chmod +x ./Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run
+./Ascend-cann-kernels-910b_8.3.RC1_linux-"$(uname -i)".run --install
 
-wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.2.RC1/Ascend-cann-nnal_8.2.RC1_linux-"$(uname -i)".run
-chmod +x ./Ascend-cann-nnal_8.2.RC1_linux-"$(uname -i)".run
-./Ascend-cann-nnal_8.2.RC1_linux-"$(uname -i)".run --install
+wget --header="Referer: https://www.hiascend.com/" https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%208.3.RC1/Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run
+chmod +x ./Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run
+./Ascend-cann-nnal_8.3.RC1_linux-"$(uname -i)".run --install
 
 source /usr/local/Ascend/nnal/atb/set_env.sh
 ```
diff --git a/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md b/examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md
@@ -4,7 +4,7 @@
 
  *  Software:
      *  Python >= 3.9, < 3.12
-     *  CANN >= 8.2.rc1
+     *  CANN >= 8.3.rc1
      *  PyTorch == 2.7.1, torch-npu == 2.7.1
      *  vLLM (same version as vllm-ascend)
      *  mooncake-transfer-engine reference documentation: https://github.com/kvcache-ai/Mooncake/blob/main/doc/zh/ascend_transport.md
diff --git a/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md b/examples/disaggregated_prefill_v1/mooncake_connector_store_deployment_guide.md
@@ -4,7 +4,7 @@
 
 * Software:
   * Python >= 3.9, < 3.12
-  * CANN >= 8.2.rc1
+  * CANN >= 8.3.rc1
   * PyTorch == 2.7.1, torch-npu == 2.7.1
   * vLLM：main branch
   * vLLM-Ascend：main branch
diff --git a/tests/e2e/vllm_interface/vllm_test.cfg b/tests/e2e/vllm_interface/vllm_test.cfg
@@ -1,2 +1,2 @@
 # Base docker image used to build the vllm-ascend e2e test image, which is built in the vLLM repository
-BASE_IMAGE_NAME="quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11"
+BASE_IMAGE_NAME="quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11"
diff --git a/tests/ut/attention/test_attention_mask.py b/tests/ut/attention/test_attention_mask.py
@@ -74,11 +74,10 @@ def test_get_attn_mask(self):
         attn_mask = attention_mask_builder.get_attn_mask(
             max_seq_len=2048, dtype=torch.float16, device=torch.device("cpu"))
         self.assertEqual(attn_mask.shape, (2048, 2048))
-        self.assertEqual(attn_mask[0][-1],
-                         torch.tensor(float("-inf"), dtype=torch.float16))
-        self.assertEqual(attention_mask_builder._seq_len_cached, 2048)
+        self.assertEqual(attn_mask[0][-1], torch.tensor(True))
+        self.assertEqual(attention_mask_builder._seq_len_cached, 1024)
         self.assertEqual(attention_mask_builder.attn_mask_cache.shape,
-                         (2048, 2048))
+                         (1024, 1024))
         self.assertEqual(attention_mask_builder.attn_mask_cache[0][-1],
                          torch.tensor(float("-inf"), dtype=torch.float16))
 
@@ -91,43 +90,5 @@ def test_get_splitfuse_attn_mask(self):
             dtype=torch.float16,
             device=torch.device("cpu"),
         )
-        self.assertEqual(attn_mask.shape, (6, 100))
+        self.assertEqual(attn_mask.shape, (2048, 2048))
         self.assertEqual(attention_mask_builder._seq_len_cached, 1024)
-
-        attn_mask = attention_mask_builder.get_splitfuse_attn_mask(
-            seq_lens=torch.tensor([10, 3000, 2000]),
-            position=torch.tensor([7, 8, 9, 2999, 1999]),
-            dtype=torch.float16,
-            device=torch.device("cpu"),
-        )
-        self.assertEqual(attn_mask.shape, (5, 3000))
-        self.assertEqual(attention_mask_builder._seq_len_cached, 3000)
-
-        # splitfuse_attn_mask now only supports data types: torch.float16 and torch.bfloat16
-        # otherwise raise ValueError
-        with self.assertRaises(ValueError):
-            attn_mask = attention_mask_builder.get_splitfuse_attn_mask(
-                seq_lens=torch.tensor([10, 20, 100]),
-                position=torch.tensor([7, 8, 9, 18, 19, 99]),
-                dtype=torch.int8,
-                device=torch.device("cpu"),
-            )
-
-    def test_mask_value_cleanliness(self):
-        attention_mask_builder = AttentionMaskBuilder(max_seq_len=6,
-                                                      dtype=torch.bfloat16)
-        self.assertEqual(attention_mask_builder.attn_mask_cache[-2][-1],
-                         torch.tensor(1, dtype=torch.bfloat16))
-
-        attn_mask = attention_mask_builder.get_splitfuse_attn_mask(
-            seq_lens=torch.tensor([6]),
-            position=torch.tensor([3, 4, 5]),
-            dtype=torch.bfloat16,
-            device=torch.device("cpu"),
-        )
-        self.assertEqual(
-            attn_mask[-2][-1],
-            torch.tensor(-10000, dtype=torch.bfloat16,
-                         device=attn_mask.device))
-        self.assertEqual(attention_mask_builder.attn_mask_cache[-2][-1],
-                         torch.tensor(1, dtype=torch.bfloat16))
diff --git a/tests/ut/attention/test_attention_v1.py b/tests/ut/attention/test_attention_v1.py
@@ -344,8 +344,9 @@ def test_forward_prefill_no_cache(self, mock_flash_attention,
         assert output.shape == (10, 8 * 64)
 
     @patch('torch_npu._npu_reshape_and_cache')
-    @patch('torch_npu._npu_flash_attention_qlens')
-    def test_forward_prefill_cache_hit(self, mock_flash_attention_qlens,
+    @patch('torch_npu.npu_fused_infer_attention_score')
+    def test_forward_prefill_cache_hit(self,
+                                       mock_npu_fused_infer_attention_score,
                                        mock_npu_reshape_and_cache):
         """Test forward pass in PrefillCacheHit state"""
         query = torch.randn(10, 8 * 64)
@@ -370,7 +371,7 @@ def test_forward_prefill_cache_hit(self, mock_flash_attention_qlens,
                                    metadata,
                                    trace_flag=False)
 
-        mock_flash_attention_qlens.assert_called_once()
+        mock_npu_fused_infer_attention_score.assert_called_once()
         assert output.shape == (10, 8 * 64)
 
     @patch('vllm_ascend.attention.attention_v1.get_forward_context')
@@ -638,14 +639,15 @@ def test_forward_normal_v1_situation(self, mock_paged_attention,
                                    metadata,
                                    trace_flag=False)
 
-        mock_paged_attention.assert_called_once()
+        mock_npu_fused_infer_attention_score.assert_called_once()
         assert output.shape == (10, 8 * 64)
 
     @patch('torch_npu.npu_format_cast')
     @patch('torch_npu._npu_reshape_and_cache')
-    @patch('torch_npu._npu_paged_attention_splitfuse')
+    @patch('torch_npu.npu_fused_infer_attention_score')
     @patch('vllm_ascend.attention.attention_v1.is_310p', return_value=True)
-    def test_forward_310p_device(self, mock_is_310p, mock_paged_attention,
+    def test_forward_310p_device(self, mock_is_310p,
+                                 mock_npu_fused_infer_attention_score,
                                  mock_npu_reshape_and_cache,
                                  mock_npu_format_cast):
         """Test forward pass on 310P device"""
@@ -671,7 +673,7 @@ def test_forward_310p_device(self, mock_is_310p, mock_paged_attention,
                                    metadata,
                                    trace_flag=False)
 
-        mock_paged_attention.assert_called_once()
+        mock_npu_fused_infer_attention_score.assert_called_once()
         assert output.shape == (10, 8 * 64)
 
     @patch('torch_npu._npu_reshape_and_cache')
diff --git a/tests/ut/ops/test_linear.py b/tests/ut/ops/test_linear.py
@@ -63,33 +63,20 @@ def setUp(self):
 
     @mock.patch("vllm_ascend.ops.linear.is_enable_nz")
     @mock.patch("torch_npu.npu_format_cast")
-    @mock.patch("torch.version")
-    def test_process_weights_after_loading_is_8_3_enable_nz(
-            self, mock_version, mock_format_cast, mock_is_nz):
-        mock_version.cann = "8.3.RC1"
+    def test_process_weights_after_loading_enable_nz(self, mock_format_cast,
+                                                     mock_is_nz):
         mock_is_nz.return_value = 1
         self.method.process_weights_after_loading(self.layer)
         mock_format_cast.assert_called_once()
 
     @mock.patch("vllm_ascend.ops.linear.is_enable_nz")
     @mock.patch("torch_npu.npu_format_cast")
-    @mock.patch("torch.version")
-    def test_process_weights_after_loading_is_8_3_disable_nz(
-            self, mock_version, mock_format_cast, mock_is_nz):
-        mock_version.cann = "8.3.RC1"
+    def test_process_weights_after_loading_disable_nz(self, mock_format_cast,
+                                                      mock_is_nz):
         mock_is_nz.return_value = 0
         self.method.process_weights_after_loading(self.layer)
         mock_format_cast.assert_not_called()
 
-    @mock.patch("vllm_ascend.ops.linear.is_enable_nz")
-    @mock.patch("torch.version")
-    def test_process_weights_after_loading_not_8_3(self, mock_version,
-                                                   mock_is_nz):
-        mock_version.cann = "8.2.RC1"
-        mock_is_nz.return_value = 1
-        # Should not raise exception
-        self.method.process_weights_after_loading(self.layer)
-
 
 class TestAscendRowParallelLinear(BaseLinearTest):
 
diff --git a/vllm_ascend/attention/attention_mask.py b/vllm_ascend/attention/attention_mask.py
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
diff --git a/vllm_ascend/ops/linear.py b/vllm_ascend/ops/linear.py
diff --git a/vllm_ascend/ops/linear_op.py b/vllm_ascend/ops/linear_op.py
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py

Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@`
`15`	`15`	`# This file is a part of the vllm-ascend project.`
`16`	`16`	`#`
`17`	`17`
`18`		`-FROM quay.io/ascend/cann:8.2.rc1-910b-ubuntu22.04-py3.11`
	`18`	`+FROM quay.io/ascend/cann:8.3.rc1-910b-ubuntu22.04-py3.11`
`19`	`19`
`20`	`20`	`ARG PIP_INDEX_URL="https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple"`
`21`	`21`	`ARG COMPILE_CUSTOM_KERNELS=1`
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@`
`75`	`75`	`'pip_vllm_ascend_version': "0.11.0rc0",`
`76`	`76`	`'pip_vllm_version': "0.11.0",`
`77`	`77`	`# CANN image tag`
`78`		`- 'cann_image_tag': "8.2.rc1-910b-ubuntu22.04-py3.11",`
	`78`	`+ 'cann_image_tag': "8.3.rc1-910b-ubuntu22.04-py3.11",`
`79`	`79`	`# vllm version in ci`
`80`	`80`	`'ci_vllm_version': 'v0.11.0rc3',`
`81`	`81`	`}`