[Refactor] add fia_v3 attention & remove other attention operator.

weijinqian_v1 · weijinqian_v1 · commit 8bd9477ebf50 · 2025-11-27T14:58:10.000+08:00
Signed-off-by: weijinqian_v1 &lt;weijinqian@huawei.com&gt;
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -896,8 +896,11 @@ def _make_attention_mask(self, seq_lens, position,
         if self.model_config.runner_type == "pooling" and self.model_config.pooler_config.pooling_type == "CLS":
             return self.attn_mask_builder.get_pooling_mask(self.device)
         # fia prefill situation.
-        if attn_state in [AscendAttentionState.PrefillNoCache, AscendAttentionState.PrefillCacheHit,
-                          AscendAttentionState.ChunkedPrefill]:
+        if attn_state in [
+                AscendAttentionState.PrefillNoCache,
+                AscendAttentionState.PrefillCacheHit,
+                AscendAttentionState.ChunkedPrefill
+        ]:
             return self.attn_mask_builder.get_splitfuse_attn_mask()
         # Decode-only situation.
         return None