We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e553424 commit 684f254Copy full SHA for 684f254
vllm/platforms/cuda.py
@@ -55,15 +55,15 @@ def _get_backend_priorities(
55
return [
56
AttentionBackendEnum.CUTLASS_MLA,
57
AttentionBackendEnum.FLASHINFER_MLA,
58
- AttentionBackendEnum.FLASHMLA,
59
AttentionBackendEnum.FLASH_ATTN_MLA,
+ AttentionBackendEnum.FLASHMLA,
60
AttentionBackendEnum.TRITON_MLA,
61
AttentionBackendEnum.FLASHMLA_SPARSE,
62
]
63
else:
64
65
66
67
68
69
0 commit comments