Skip to content

Commit 1c96296

Browse files
committed
qucik fix for mtp in full graph mode
Signed-off-by: chenbaixuan <[email protected]>
1 parent 39ed4ee commit 1c96296

File tree

2 files changed

+4
-17
lines changed

2 files changed

+4
-17
lines changed

vllm_ascend/eplb/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def get_log2phy_map(self, layer_id):
3636
return self.layers[str(layer_id)].mtp_block.mlp.experts.get_log2phy_map()
3737

3838

39-
def get_all_expert_map(self, num_moe_layers):
39+
def get_all_expert_map(self, num_moe_layers=None):
4040
if not isinstance(self, DeepSeekMultiTokenPredictor):
4141
all_loads = []
4242
num_dense_layers = self.num_dense_layers if hasattr(

vllm_ascend/worker/model_runner_v1.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@
6262
from vllm.model_executor.models.interfaces import (SupportsMultiModal,
6363
supports_mrope,
6464
supports_transcription)
65-
from vllm.model_executor.models.deepseek_mtp import DeepSeekMTP
6665
from vllm.model_executor.models.interfaces_base import (
6766
VllmModelForPooling, is_pooling_model, is_text_generation_model)
6867
from vllm.multimodal import MULTIMODAL_REGISTRY
@@ -3130,17 +3129,9 @@ def _dummy_pooler_run(
31303129
def eplb_warmup(self):
31313130
if self.dynamic_eplb and not self.is_eplb_warmuped:
31323131
self.is_eplb_warmuped = True
3133-
mtp_instance: Optional[DeepSeekMTP] = None
3134-
if self.speculative_config and self.speculative_config.method == 'deepseek_mtp':
3135-
assert isinstance(self.drafter, MtpProposer) and isinstance(self.drafter.model, DeepSeekMTP)
3136-
mtp_instance=self.drafter.model
3137-
self.eplb_adaptor = VllmEplbAdaptor(
3138-
model=self.model,
3139-
mtp_instance=mtp_instance,
3140-
num_mtp_layers=mtp_instance.model.num_mtp_layers
3141-
)
3132+
self.eplb_adaptor = VllmEplbAdaptor(model=self.model)
31423133
self.eplb_loader.set_adator(self.eplb_adaptor)
3143-
self.eplb_updator.set_adaptor(self.eplb_adaptor, mtp_instance.model.num_mtp_layers)
3134+
self.eplb_updator.set_adaptor(self.eplb_adaptor)
31443135
self.eplb_updator.warm_up_eplb()
31453136

31463137
def load_model(self) -> None:
@@ -3149,7 +3140,7 @@ def load_model(self) -> None:
31493140
with DeviceMemoryProfiler() as m: # noqa: SIM117
31503141
self.model = get_model(vllm_config=self.vllm_config)
31513142
if self.dynamic_eplb:
3152-
model_register(self.model, self.model_config)
3143+
model_register(self.model, self.model_config)
31533144
if is_310p():
31543145
from vllm.model_executor.layers.linear import (
31553146
MergedColumnParallelLinear, QKVParallelLinear,
@@ -3163,10 +3154,6 @@ def load_model(self) -> None:
31633154
if self.drafter:
31643155
logger.info("Loading drafter model...")
31653156
self.drafter.load_model(self.model)
3166-
if self.speculative_config and self.speculative_config.method == 'deepseek_mtp':
3167-
assert isinstance(self.drafter, MtpProposer) and isinstance(self.drafter.model, DeepSeekMTP)
3168-
mtp_instance=self.drafter.model
3169-
model_register(mtp_instance.model, self.vllm_config)
31703157
if self.drafter.name == SpecDcodeType.EAGLE3:
31713158
self.model.set_aux_hidden_state_layers(
31723159
self.model.get_eagle3_aux_hidden_state_layers())

0 commit comments

Comments
 (0)