Skip to content

Commit 08441ba

Browse files
authored
Remove VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION (#4860)
VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION is enabled by default for long time. Let's remove it now. - vLLM version: v0.12.0 - vLLM main: vllm-project/vllm@ad32e3e Signed-off-by: wangxiyuan <[email protected]>
1 parent 37db084 commit 08441ba

File tree

2 files changed

+2
-14
lines changed

2 files changed

+2
-14
lines changed

vllm_ascend/envs.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,6 @@
9292
"VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE":
9393
lambda: int(
9494
os.getenv("VLLM_ASCEND_KV_CACHE_MEGABYTES_FLOATING_TOLERANCE", 64)),
95-
# Whether to enable the topk optimization. It's enabled by default. Please set to False if you hit any issue.
96-
# We'll remove this flag in the future once it's stable enough.
97-
"VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION":
98-
lambda: bool(
99-
int(os.getenv("VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION", '1'))),
10095
# Whether to enable mla_pa for deepseek mla decode, this flag will be removed after its available torch_npu is public accessible
10196
# and the mla_pa will be the default path of deepseek decode path.
10297
"VLLM_ASCEND_MLA_PA":

vllm_ascend/worker/model_runner_v1.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@
141141
from vllm_ascend.platform import NPUPlatform
142142
from vllm_ascend.sample.logits_processor import build_logitsprocs
143143
from vllm_ascend.sample.rejection_sampler import AscendRejectionSampler
144+
from vllm_ascend.sample.sampler import AscendSampler
144145
from vllm_ascend.spec_decode import get_spec_decode_method
145146
from vllm_ascend.spec_decode.eagle_proposer import EagleProposer
146147
from vllm_ascend.spec_decode.interface import SpecDcodeType
@@ -312,15 +313,7 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
312313
else:
313314
self.prefetch_stream = None
314315
self.dtype = self.model_config.dtype
315-
if envs_ascend.VLLM_ASCEND_ENABLE_TOPK_TOPP_OPTIMIZATION:
316-
# TODO: drop the env config to use ascend sampler by default
317-
from vllm_ascend.sample.sampler import AscendSampler
318-
319-
self.sampler = AscendSampler()
320-
else:
321-
from vllm.v1.sample.sampler import Sampler
322-
323-
self.sampler = Sampler()
316+
self.sampler = AscendSampler()
324317
self.reorder_batch_threshold: Optional[int] = None
325318

326319
# Lazy initialization, these will be set after __init__

0 commit comments

Comments
 (0)