[Refactor] add fia_v3 attention & remove other attention operator.

weijinqian_v1 · weijinqian_v1 · commit af59fa28e415 · 2025-11-27T17:10:42.000+08:00
Signed-off-by: weijinqian_v1 &lt;weijinqian@huawei.com&gt;
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
@@ -560,8 +560,9 @@ def forward(
         value = value.contiguous()
 
         if self.attn_type == AttentionType.ENCODER_ONLY:
-            output = self._forward_encode(query, key, value, attn_metadata,
+            attn_output = self._forward_encode(query, key, value, attn_metadata,
                                           output)
+            output[:num_tokens] = attn_output[:num_tokens]
             return output
 
         if len(kv_cache) > 1: