We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent f0876b5 commit 8169b7cCopy full SHA for 8169b7c
vllm_ascend/attention/sfa_v1.py
@@ -485,7 +485,8 @@ def get_and_maybe_dequant_weights(layer: LinearBase):
485
486
def _v_up_proj(self, x):
487
if x.dtype in [torch.float16, torch.bfloat16] \
488
- and hasattr(torch.ops._C_ascend, "batch_matmul_transpose"):
+ and hasattr(torch.ops._C_ascend, "batch_matmul_transpose") \
489
+ and not self.enable_sfa_cp:
490
x = x.view(-1, self.num_heads, self.kv_lora_rank)
491
b, _, _ = x.shape
492
res = torch.empty((b, self.num_heads, self.v_head_dim),
0 commit comments