triton code vector core change rejection_sampler.py

yuxingcyx · yuxingcyx · commit 5afce44e3645 · 2025-12-11T14:58:54.000+08:00
Signed-off-by: yuxingcyx &lt;yuxingchen.math@gmail.com&gt;
diff --git a/vllm_ascend/sample/rejection_sampler.py b/vllm_ascend/sample/rejection_sampler.py
@@ -3,7 +3,6 @@
 
 import torch
 import torch.nn as nn
-import triton.runtime.driver as driver
 import vllm.v1.sample.rejection_sampler as rs
 from vllm.triton_utils import HAS_TRITON, tl, triton
 from vllm.v1.sample.metadata import SamplingMetadata
@@ -18,10 +17,18 @@
 # step. This value is chosen to be large enough to handle typical use cases.
 MAX_SPEC_LEN = 32
 
-device_properties = driver.active.utils.get_device_properties(torch.npu.current_device())
-vectorcore_num = device_properties['num_vectorcore']
+
+vectorcore_num = None
+device_properties = None
+
+
+if HAS_TRITON:
+    from triton.runtime import driver
+    device_properties = driver.active.utils.get_device_properties(torch.npu.current_device())
+    vectorcore_num = device_properties['num_vectorcore']
 #get vector core number in order for later tiling
 
+
 class AscendRejectionSampler(RejectionSampler, nn.Module):
     """
     The implementation strictly follows the algorithm described in