We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d50a577 commit 16cad8aCopy full SHA for 16cad8a
vllm_ascend/sample/rejection_sampler.py
@@ -9,19 +9,10 @@
9
apply_sampling_constraints,
10
generate_uniform_probs)
11
from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
12
+from vllm.triton_utils import HAS_TRITON, triton
13
+from vllm.triton_utils import triton.language as tl
14
-<<<<<<< HEAD
-from vllm_ascend.utils import vllm_version_is
15
16
-if vllm_version_is("0.11.0"):
17
- from vllm.v1.sample.rejection_sampler import compute_probs
18
-else:
19
- from vllm.v1.sample.rejection_sampler import apply_sampling_constraints
20
-
21
-import triton.language as tl
22
23
-=======
24
->>>>>>> a1f142b... Drop 0.11.0 support (#4377)
25
PLACEHOLDER_TOKEN_ID = -1
26
GREEDY_TEMPERATURE = -1
27
# Maximum number of speculative draft tokens allowed per request in a single
0 commit comments