fix yapf error

Ronald1995 · Ronald1995 · commit d9a1b9cb5e14 · 2025-11-28T19:59:25.000+08:00
Signed-off-by: Ronald1995 &lt;ronaldautomobile@163.com&gt;
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
@@ -348,8 +348,8 @@ def build(
                              device=query_start_loc_cpu.device)
             ])
 
-        query_start_loc = query_start_loc_cpu.pin_memory().to(self.device,
-                                                 non_blocking=True)
+        query_start_loc = query_start_loc_cpu.pin_memory().to(
+            self.device, non_blocking=True)
 
         if get_ascend_device_type() == AscendDeviceType._310P:
             if attn_state == AscendAttentionState.PrefillNoCache:
diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py
@@ -568,41 +568,41 @@ def build(
                     )
                     chunked_context_metadata = AscendMLAPrefillMetadata.ChunkedContextMetadata(
                         cu_seq_lens=cu_seq_lens_cpu.pin_memory().to(
-                            device, non_blocking=True
-                        ),
+                            device, non_blocking=True),
                         starts=local_chunk_starts.pin_memory().to(
-                            device, non_blocking=True
-                        ),
-                        seq_tot=padded_local_chunk_seq_lens.sum(dim=1).tolist(),
+                            device, non_blocking=True),
+                        seq_tot=padded_local_chunk_seq_lens.sum(
+                            dim=1).tolist(),
                         max_seq_lens=chunk_seq_lens.max(dim=1).values.tolist(),
                         chunk_seq_lens=chunk_seq_lens,
                         chunk_seq_lens_npu=chunk_seq_lens.npu(),
                         workspace=self.chunked_prefill_workspace,
-                        padded_chunk_seq_lens_npu=padded_local_chunk_seq_lens.npu(),
-                        padded_local_chunk_seq_lens=padded_local_chunk_seq_lens.tolist(),
-                        local_context_lens_allranks=local_context_lens_allranks.tolist(),
-                        padded_local_cu_seq_lens=padded_local_cu_chunk_seq_lens_cpu.pin_memory().to(
-                            device, non_blocking=True
-                        ),
+                        padded_chunk_seq_lens_npu=padded_local_chunk_seq_lens.
+                        npu(),
+                        padded_local_chunk_seq_lens=padded_local_chunk_seq_lens
+                        .tolist(),
+                        local_context_lens_allranks=local_context_lens_allranks
+                        .tolist(),
+                        padded_local_cu_seq_lens=
+                        padded_local_cu_chunk_seq_lens_cpu.pin_memory().to(
+                            device, non_blocking=True),
                         cu_seq_lens_lst=cu_seq_lens_cpu.tolist(),
                         chunk_size=padded_local_max_context_chunk_across_ranks,
                     )
                 else:
                     chunked_context_metadata = (
                         AscendMLAPrefillMetadata.ChunkedContextMetadata(
                             cu_seq_lens=cu_seq_lens_cpu.pin_memory().to(
-                                device, non_blocking=True
-                            ),
+                                device, non_blocking=True),
                             starts=chunk_starts.pin_memory().to(
-                                device, non_blocking=True
-                            ),
+                                device, non_blocking=True),
                             seq_tot=chunk_seq_lens.sum(dim=1).tolist(),
-                            max_seq_lens=chunk_seq_lens.max(dim=1).values.tolist(),
+                            max_seq_lens=chunk_seq_lens.max(
+                                dim=1).values.tolist(),
                             chunk_seq_lens=chunk_seq_lens,
                             chunk_seq_lens_npu=chunk_seq_lens.npu(),
                             workspace=self.chunked_prefill_workspace,
-                        )
-                    )
+                        ))
             prefill_input_positions = input_positions[tokens_start:]
             cos = self.cos_cache[
                 prefill_input_positions].unsqueeze(  # type: ignore
@@ -634,7 +634,8 @@ def build(
             cos = common_attn_metadata.cos
             sin = common_attn_metadata.sin
             # Notice that num_decodes != num_decode_tokens in SpecDecoding Scenario
-            actual_seq_lengths_q = query_start_loc_cpu[1:num_decodes + 1].tolist()
+            actual_seq_lengths_q = query_start_loc_cpu[1:num_decodes +
+                                                       1].tolist()
             max_seq_lens = seq_lens[:num_decodes].max().item()
             seq_lens = seq_lens[:num_decodes]
             input_positions = input_positions[:num_decode_tokens]
diff --git a/vllm_ascend/spec_decode/mtp_proposer.py b/vllm_ascend/spec_decode/mtp_proposer.py
@@ -144,9 +144,9 @@ def __init__(
         self.arange = torch.arange(max_num_slots_for_arange,
                                    device=device,
                                    dtype=torch.int32)
-        self.arange_cpu = torch.arange(
-            max_num_slots_for_arange, device="cpu", dtype=torch.int32
-        )
+        self.arange_cpu = torch.arange(max_num_slots_for_arange,
+                                       device="cpu",
+                                       dtype=torch.int32)
 
         self.inputs_embeds = torch.zeros(
             (self.max_num_tokens, self.hidden_size),
@@ -346,7 +346,8 @@ def generate_token_ids(self,
                     self.runner.discard_request_indices.gpu,
                     self.runner.num_discarded_requests
                 )
-            self._copy_valid_sampled_token_count(next_token_ids, valid_sampled_tokens_count)
+            self._copy_valid_sampled_token_count(next_token_ids,
+                                                 valid_sampled_tokens_count)
 
         req_scheduled_tokens = scheduler_output.num_scheduled_tokens
         if self.pcp_size > 1:
@@ -426,24 +427,28 @@ def generate_token_ids(self,
         )
 
         return draft_token_ids
-    
+
     def _copy_valid_sampled_token_count(
-        self, next_token_ids: torch.Tensor, valid_sampled_tokens_count: torch.Tensor
-    ) -> None:
+            self, next_token_ids: torch.Tensor,
+            valid_sampled_tokens_count: torch.Tensor) -> None:
         if self.runner.valid_sampled_token_count_event is not None:
             default_stream = torch.npu.current_stream()
             # initialize a new stream to overlap the copy operation with
             # prepare_input of draft model.
-            with torch.npu.stream(self.runner.valid_sampled_token_count_copy_stream):
+            with torch.npu.stream(
+                    self.runner.valid_sampled_token_count_copy_stream):
                 self.runner.valid_sampled_token_count_copy_stream.wait_stream(
-                    default_stream
-                )  # type: ignore
-                self.runner.valid_sampled_token_count_cpu[
-                    : valid_sampled_tokens_count.shape[0]
-                ].copy_(valid_sampled_tokens_count, non_blocking=True)
+                    default_stream)  # type: ignore
+                self.runner.valid_sampled_token_count_cpu[:
+                                                          valid_sampled_tokens_count
+                                                          .shape[0]].copy_(
+                                                              valid_sampled_tokens_count,
+                                                              non_blocking=True
+                                                          )
                 self.runner.valid_sampled_token_count_event.record()
 
-            self.runner.input_batch.prev_sampled_token_ids = next_token_ids.unsqueeze(1)
+            self.runner.input_batch.prev_sampled_token_ids = next_token_ids.unsqueeze(
+                1)
 
     def _init_mtp_model(self):
         architecture = self.vllm_config.model_config.architecture
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py