fix ut by triton torch_npu._inductor

Meihan-chen · Meihan-chen · commit 883f351a1a0d · 2025-12-11T11:13:44.000+08:00
Signed-off-by: Meihan-chen &lt;jcccx.cmh@gmail.com&gt;
diff --git a/tests/e2e/nightly/ops/triton/test_rope.py b/tests/e2e/nightly/ops/triton/test_rope.py
@@ -1,8 +1,12 @@
 import gc
+import sys
+from unittest.mock import MagicMock
 
 import pytest
 import torch
 
+if 'torch_npu._inductor' not in sys.modules:
+    sys.modules['torch_npu._inductor'] = MagicMock()
 from vllm_ascend.ops.triton.rope import rope_forward_triton
 from vllm_ascend.ops.triton.triton_utils import init_device_properties_triton
 
diff --git a/tests/ut/attention/test_sfa_v1.py b/tests/ut/attention/test_sfa_v1.py
@@ -1,10 +1,15 @@
+import sys
 from unittest.mock import MagicMock
 
 import torch
 from vllm.v1.attention.backends.utils import AttentionCGSupport
 
 from tests.ut.base import TestBase
 from vllm_ascend.attention.attention_v1 import AscendAttentionState
+
+if 'torch_npu._inductor' not in sys.modules:
+    sys.modules['torch_npu._inductor'] = MagicMock()
+
 from vllm_ascend.attention.sfa_v1 import (AscendSFABackend, AscendSFAImpl,
                                           AscendSFAMetadata,
                                           AscendSFAMetadataBuilder)
diff --git a/tests/ut/sample/test_rejection_sampler.py b/tests/ut/sample/test_rejection_sampler.py
@@ -128,18 +128,10 @@ def test_expand_batch_to_tokens(self):
         cu_num_tokens = torch.tensor([2, 5, 7])
         num_tokens = 7
 
-        with patch("vllm_ascend.sample.rejection_sampler.expand_pytorch"
-                   ) as mock_kernel:
-            expand_batch_to_tokens(x, cu_num_tokens, num_tokens)
-            mock_kernel.assert_called_once()
-            args = mock_kernel.call_args[0]
-            assert (args[1] == x).all()
-            assert (args[2] == cu_num_tokens).all()
-
-        # Run actual function
-        result = expand_batch_to_tokens(x, cu_num_tokens, num_tokens)
-        expected = torch.tensor([10, 10, 20, 20, 20, 30, 30])
-        assert torch.equal(result, expected)
+        with patch("vllm_ascend.sample.rejection_sampler.HAS_TRITON", False):
+            result = expand_batch_to_tokens(x, cu_num_tokens, num_tokens)
+            expected = torch.tensor([10, 10, 20, 20, 20, 30, 30])
+            assert torch.equal(result, expected)
 
     def test_sample_recovered_tokens_pytorch_ngram(self):
         """Test recovered token sampling under n-gram mode"""