fix dsv32 bmm_ops

hust17yixuan · hust17yixuan · commit 8169b7c7d38d · 2025-12-08T17:57:53.000+08:00
Signed-off-by: hust17yixuan &lt;303660421@qq.com&gt;
diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py
@@ -485,7 +485,8 @@ def get_and_maybe_dequant_weights(layer: LinearBase):
 
     def _v_up_proj(self, x):
         if x.dtype in [torch.float16, torch.bfloat16] \
-                and hasattr(torch.ops._C_ascend, "batch_matmul_transpose"):
+                and hasattr(torch.ops._C_ascend, "batch_matmul_transpose") \
+                and not self.enable_sfa_cp:
             x = x.view(-1, self.num_heads, self.kv_lora_rank)
             b, _, _ = x.shape
             res = torch.empty((b, self.num_heads, self.v_head_dim),