update comment

jesse996 · jesse996 · commit dd4c177cfffe · 2025-09-22T16:07:32.000+08:00
Signed-off-by: jesse &lt;szxfml@gmail.com&gt;
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -3065,9 +3065,9 @@ def _build_drafter_prepare_inputs_torchair_param(self):
     def _to_list(self, sampled_token_ids: torch.Tensor) -> list[list[int]]:
         # This is a short term mitigation for issue mentioned in
         # https://github.com/vllm-project/vllm/issues/22754.
-        # `tolist` would trigger a cuda wise stream sync, which
-        # would block other copy ops from other cuda streams.
-        # A cuda event sync would avoid such a situation. Since
+        # `tolist` would trigger a npu wise stream sync, which
+        # would block other copy ops from other npu streams.
+        # A npu event sync would avoid such a situation. Since
         # this is in the critical path of every single model
         # forward loop, this has caused perf issue for a disagg
         # setup.