fix

MengqingCao · MengqingCao · commit bac4043b0043 · 2025-10-31T07:59:06.000Z
Signed-off-by: MengqingCao &lt;cmq0113@163.com&gt;
diff --git a/vllm_ascend/patch/platform/__init__.py b/vllm_ascend/patch/platform/__init__.py
@@ -19,8 +19,8 @@
 import vllm_ascend.patch.platform.patch_config  # noqa
 import vllm_ascend.patch.platform.patch_distributed  # noqa
 import vllm_ascend.patch.platform.patch_mamba_config  # noqa
-import vllm_ascend.patch.platform.patch_sched_yield  # noqa
 import vllm_ascend.patch.platform.patch_mtp_predictor  # noqa
+import vllm_ascend.patch.platform.patch_sched_yield  # noqa
 
 if os.getenv("DYNAMIC_EPLB", "false") == "true" or os.getenv(
         "EXPERT_MAP_RECORD", "false") == "true":
diff --git a/vllm_ascend/patch/platform/patch_mtp_predictor.py b/vllm_ascend/patch/platform/patch_mtp_predictor.py
@@ -2,12 +2,10 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import torch
-
 import vllm
-
+from vllm.compilation.decorators import support_torch_compile
 from vllm.config import VllmConfig
 from vllm.model_executor.models.deepseek_mtp import DeepSeekMTP
-from vllm.compilation.decorators import support_torch_compile
 
 
 def forward(
@@ -27,15 +25,15 @@ def forward(
     previous_hidden_states = self.hnorm(previous_hidden_states)
 
     hidden_states = self.eh_proj(
-        torch.cat([inputs_embeds, previous_hidden_states], dim=-1)
-    )
+        torch.cat([inputs_embeds, previous_hidden_states], dim=-1))
 
-    hidden_states, residual = self.mtp_block(
-        positions=positions, hidden_states=hidden_states, residual=None
-    )
+    hidden_states, residual = self.mtp_block(positions=positions,
+                                             hidden_states=hidden_states,
+                                             residual=None)
     hidden_states = residual + hidden_states
     return hidden_states
 
+
 # Patch this only for aclgraph support, as this is not support in vLLM 0.11.0
 @support_torch_compile
 class AscendDeepSeekMTP(DeepSeekMTP):
@@ -45,4 +43,3 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
 
 
 vllm.model_executor.models.deepseek_mtp.DeepSeekMultiTokenPredictorLayer.forward = forward
-vllm.model_executor.models.deepseek_mtp.DeepSeekMTP = AscendDeepSeekMTP
diff --git a/vllm_ascend/spec_decode/mtp_proposer.py b/vllm_ascend/spec_decode/mtp_proposer.py
@@ -11,14 +11,15 @@
 from vllm.model_executor.model_loader import get_model_loader
 from vllm.model_executor.model_loader.utils import (
     process_weights_after_loading, set_default_torch_dtype)
-from vllm.model_executor.models.deepseek_mtp import DeepSeekMTP
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.sample.metadata import SamplingMetadata
 from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
 
 from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.ascend_forward_context import set_ascend_forward_context
 from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
+from vllm_ascend.patch.platform.patch_mtp_predictor import \
+    AscendDeepSeekMTP as DeepSeekMTP
 from vllm_ascend.spec_decode.interface import Proposer, SpecDcodeType
 from vllm_ascend.torchair.models.torchair_deepseek_mtp import \
     TorchairDeepSeekMTP