fix sync error of seq_lens tolist

Ronald1995 · Ronald1995 · commit f2c8426e2eb8 · 2025-11-28T09:53:14.000+08:00
Signed-off-by: Ronald1995 &lt;ronaldautomobile@163.com&gt;
diff --git a/vllm_ascend/spec_decode/mtp_proposer.py b/vllm_ascend/spec_decode/mtp_proposer.py
@@ -144,6 +144,9 @@ def __init__(
         self.arange = torch.arange(max_num_slots_for_arange,
                                    device=device,
                                    dtype=torch.int32)
+        self.arange_cpu = torch.arange(
+            max_num_slots_for_arange, device="cpu", dtype=torch.int32
+        )
 
         self.inputs_embeds = torch.zeros(
             (self.max_num_tokens, self.hidden_size),
@@ -814,7 +817,7 @@ def _propose(
             # When disable_padded_drafter_batch=False, it should not to be updating these params, maybe.
             if self.speculative_config.disable_padded_drafter_batch or \
                     aclgraph_runtime_mode != CUDAGraphMode.FULL:
-                attn_metadata_i.decode.actual_seq_lengths_q = attn_metadata_i.query_start_loc[
+                attn_metadata_i.decode.actual_seq_lengths_q = self.arange_cpu[
                     1:batch_size + 1].tolist()
                 if aclgraph_runtime_mode == CUDAGraphMode.FULL:
                     attn_metadata_i.decode.actual_seq_lengths_q = \