Skip to content

Commit 14a9068

Browse files
committed
fix pangu
Signed-off-by: wangli <[email protected]>
1 parent af56a36 commit 14a9068

File tree

2 files changed

+3
-10
lines changed

2 files changed

+3
-10
lines changed

vllm_ascend/torchair/models/torchair_pangu_moe.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -540,8 +540,7 @@ def __init__(
540540
hidden_size: int,
541541
num_heads: int,
542542
num_kv_heads: int,
543-
rope_theta: float = 10000,
544-
rope_scaling: Optional[Dict[str, Any]] = None,
543+
rope_parameters: Dict[str, Any],
545544
max_position_embeddings: int = 8192,
546545
cache_config: Optional[CacheConfig] = None,
547546
quant_config: Optional[QuantizationConfig] = None,
@@ -567,7 +566,6 @@ def __init__(
567566
self.q_size = self.num_heads * self.head_dim
568567
self.kv_size = self.num_kv_heads * self.head_dim
569568
self.scaling = self.head_dim**-0.5
570-
self.rope_theta = rope_theta
571569
self.max_position_embeddings = max_position_embeddings
572570

573571
self.qkv_proj = QKVParallelLinear(
@@ -601,8 +599,7 @@ def __init__(
601599
self.head_dim,
602600
rotary_dim=self.head_dim,
603601
max_position=max_position_embeddings,
604-
base=rope_theta,
605-
rope_scaling=rope_scaling,
602+
rope_parameters=rope_parameters,
606603
)
607604
self.attn = Attention(
608605
self.num_heads,
@@ -655,17 +652,14 @@ def __init__(
655652
) -> None:
656653
super().__init__()
657654
self.hidden_size = config.hidden_size
658-
rope_theta = getattr(config, "rope_theta", 10000)
659-
rope_scaling = getattr(config, "rope_scaling", None)
660655
max_position_embeddings = getattr(config, "max_position_embeddings",
661656
8192)
662657

663658
self.self_attn = PanguProMoEAttention(
664659
hidden_size=self.hidden_size,
665660
num_heads=config.num_attention_heads,
666661
num_kv_heads=config.num_key_value_heads,
667-
rope_theta=rope_theta,
668-
rope_scaling=rope_scaling,
662+
rope_parameters=config.rope_parameters,
669663
max_position_embeddings=max_position_embeddings,
670664
cache_config=cache_config,
671665
quant_config=quant_config,

vllm_ascend/worker/model_runner_v1.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2872,7 +2872,6 @@ def _dummy_run(
28722872
assert aclgraph_runtime_mode is None or aclgraph_runtime_mode in {
28732873
CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL
28742874
}
2875-
logger.debug(f"aclgraph runtime : {aclgraph_runtime_mode}, ")
28762875
# In multi-DP scenarios, there may be situations where all DP groups are executing dummy runs.
28772876
# If sequence parallelism is enabled, it is essential to ensure that num_tokens is divisible by tp_size.
28782877
if self.use_aclgraph and enable_sp(self.vllm_config):

0 commit comments

Comments
 (0)