|
40 | 40 | if TYPE_CHECKING: |
41 | 41 | from vllm.config import ModelConfig, VllmConfig |
42 | 42 | from vllm.utils import FlexibleArgumentParser |
43 | | - if not vllm_version_is('0.12.0'): |
44 | | - from vllm.attention.selector import AttentionSelectorConfig |
| 43 | + try: |
| 44 | + if not vllm_version_is('0.12.0'): |
| 45 | + from vllm.attention.selector import AttentionSelectorConfig |
| 46 | + except ImportError: |
| 47 | + pass |
45 | 48 | else: |
46 | 49 | ModelConfig = None |
47 | 50 | VllmConfig = None |
@@ -362,11 +365,14 @@ def get_attn_backend_cls(cls, selected_backend, *args, **kwargs): |
362 | 365 | use_mla = kwargs["attn_selector_config"].use_mla |
363 | 366 | use_sparse = kwargs["attn_selector_config"].use_sparse |
364 | 367 | else: |
365 | | - use_mla = kwargs.get("use_mla", args[4] if len(args) >= 5 else None) |
366 | | - use_sparse = kwargs.get("use_sparse", args[6] if len(args) >= 7 else None) |
| 368 | + use_mla = kwargs.get("use_mla", |
| 369 | + args[4] if len(args) >= 5 else None) |
| 370 | + use_sparse = kwargs.get("use_sparse", |
| 371 | + args[6] if len(args) >= 7 else None) |
367 | 372 | backend_map = { |
368 | 373 | (True, False): "vllm_ascend.attention.mla_v1.AscendMLABackend", |
369 | | - (False, False): "vllm_ascend.attention.attention_v1.AscendAttentionBackend", |
| 374 | + (False, False): |
| 375 | + "vllm_ascend.attention.attention_v1.AscendAttentionBackend", |
370 | 376 | (True, True): "vllm_ascend.attention.sfa_v1.AscendSFABackend", |
371 | 377 | } |
372 | 378 | return backend_map[(use_mla, use_sparse)] |
|
0 commit comments