set pin_memory=True

Ronald1995 · Ronald1995 · commit dbc38d7fff90 · 2025-11-28T14:58:38.000+08:00
Signed-off-by: Ronald1995 &lt;ronaldautomobile@163.com&gt;
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -2122,7 +2122,7 @@ def _calc_spec_decode_metadata(
                 cu_num_scheduled_tokens - num_sampled_tokens,
                 num_sampled_tokens)
             logits_indices_pcp += arange
-            logits_indices_pcp = torch.from_numpy(logits_indices_pcp).pin_memory().to(
+            logits_indices_pcp = torch.tensor(logits_indices_pcp, pin_memory=True).to(
                 self.device, non_blocking=True)
 
         # Compute the bonus logits indices.
@@ -2145,28 +2145,23 @@ def _calc_spec_decode_metadata(
 
         # TODO: Optimize the CPU -> NPU copy.
         cu_num_draft_tokens = (
-            torch.from_numpy(cu_num_draft_tokens)
-            .pin_memory()
+            torch.tensor(cu_num_draft_tokens, pin_memory=True)
             .to(self.device, non_blocking=True)
         )
         cu_num_sampled_tokens = (
-            torch.from_numpy(cu_num_sampled_tokens)
-            .pin_memory()
+            torch.tensor(cu_num_sampled_tokens, pin_memory=True)
             .to(self.device, non_blocking=True)
         )
         logits_indices = (
-            torch.from_numpy(logits_indices)
-            .pin_memory()
+            torch.tensor(logits_indices, pin_memory=True)
             .to(self.device, non_blocking=True)
         )
         target_logits_indices = (
-            torch.from_numpy(target_logits_indices)
-            .pin_memory()
+            torch.tensor(target_logits_indices, pin_memory=True)
             .to(self.device, non_blocking=True)
         )
         bonus_logits_indices = (
-            torch.from_numpy(bonus_logits_indices)
-            .pin_memory()
+            torch.tensor(bonus_logits_indices, pin_memory=True)
             .to(self.device, non_blocking=True)
         )