[Refactor] add fia_v3 attention & remove other attention operator.

weijinqian_v1 · weijinqian_v1 · commit 994d6d8968d6 · 2025-11-27T17:22:32.000+08:00
Signed-off-by: weijinqian_v1 &lt;weijinqian@huawei.com&gt;
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
@@ -560,8 +560,8 @@ def forward(
         value = value.contiguous()
 
         if self.attn_type == AttentionType.ENCODER_ONLY:
-            attn_output = self._forward_encode(query, key, value, attn_metadata,
-                                          output)
+            attn_output = self._forward_encode(query, key, value,
+                                               attn_metadata, output)
             output[:num_tokens] = attn_output[:num_tokens]
             return output