|
1 | | -from types import MappingProxyType |
2 | | -from typing import Any, Callable, Dict, List, Mapping, Optional, Union |
| 1 | +from typing import Any, Callable, List, Optional, Union |
3 | 2 |
|
4 | 3 | import torch |
5 | | -from torch.nn.modules import Module |
6 | 4 | import torch_npu |
7 | | -from vllm.config import get_current_vllm_config |
8 | | -from vllm.distributed import get_tensor_model_parallel_rank, get_tp_group |
9 | 5 | from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEMethodBase, |
10 | 6 | FusedMoeWeightScaleSupported) |
11 | | -from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig, FusedMoEQuantConfig, |
12 | | - int4_w4a16_moe_quant_config, |
13 | | - int8_w8a16_moe_quant_config,) |
14 | | -from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, |
15 | | - RowParallelLinear, UnquantizedLinearMethod) |
| 7 | +from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig, FusedMoEQuantConfig) |
| 8 | +from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase) |
16 | 9 | from vllm.model_executor.layers.quantization import \ |
17 | 10 | QUANTIZATION_METHODS, register_quantization_config |
18 | 11 | from vllm.model_executor.layers.quantization.base_config import ( |
19 | 12 | QuantizationConfig, QuantizeMethodBase) |
20 | | -from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod |
21 | 13 | from vllm.model_executor.layers.quantization.utils.quant_utils import is_layer_skipped |
22 | 14 | from vllm.model_executor.layers.quantization.awq import AWQLinearMethod |
23 | | -from vllm.model_executor.layers.quantization.awq_marlin import AWQMarlinConfig, AWQMoEMethod |
24 | | -from vllm.model_executor.layers.quantization.moe_wna16 import MoeWNA16Method |
25 | | -from vllm.model_executor.layers.vocab_parallel_embedding import ( |
26 | | - UnquantizedEmbeddingMethod, VocabParallelEmbedding) |
27 | | -from vllm.model_executor.parameter import PerTensorScaleParameter |
28 | 15 | from vllm.model_executor.utils import set_weight_attrs |
29 | 16 |
|
30 | | -from vllm_ascend.ascend_config import get_ascend_config |
31 | 17 | from vllm_ascend.ops.linear import AscendUnquantizedLinearMethod |
32 | 18 | from vllm_ascend.utils import (AWQ_QUANTIZATION_METHOD) |
33 | 19 | from vllm_ascend.ops.fused_moe.experts_selector import select_experts |
34 | 20 | from vllm_ascend.ops.fused_moe.fused_moe import AscendUnquantizedFusedMoEMethod |
35 | 21 |
|
| 22 | + |
36 | 23 | def remove_quantization_method(): |
37 | 24 | if AWQ_QUANTIZATION_METHOD in QUANTIZATION_METHODS: |
38 | 25 | QUANTIZATION_METHODS.remove(AWQ_QUANTIZATION_METHOD) |
|
0 commit comments