fix pangu

Potabk · Potabk · commit 14a90688a022 · 2025-12-01T16:09:21.000+08:00
Signed-off-by: wangli &lt;wangli858794774@gmail.com&gt;
diff --git a/vllm_ascend/torchair/models/torchair_pangu_moe.py b/vllm_ascend/torchair/models/torchair_pangu_moe.py
@@ -540,8 +540,7 @@ def __init__(
         hidden_size: int,
         num_heads: int,
         num_kv_heads: int,
-        rope_theta: float = 10000,
-        rope_scaling: Optional[Dict[str, Any]] = None,
+        rope_parameters: Dict[str, Any],
         max_position_embeddings: int = 8192,
         cache_config: Optional[CacheConfig] = None,
         quant_config: Optional[QuantizationConfig] = None,
@@ -567,7 +566,6 @@ def __init__(
         self.q_size = self.num_heads * self.head_dim
         self.kv_size = self.num_kv_heads * self.head_dim
         self.scaling = self.head_dim**-0.5
-        self.rope_theta = rope_theta
         self.max_position_embeddings = max_position_embeddings
 
         self.qkv_proj = QKVParallelLinear(
@@ -601,8 +599,7 @@ def __init__(
             self.head_dim,
             rotary_dim=self.head_dim,
             max_position=max_position_embeddings,
-            base=rope_theta,
-            rope_scaling=rope_scaling,
+            rope_parameters=rope_parameters,
         )
         self.attn = Attention(
             self.num_heads,
@@ -655,17 +652,14 @@ def __init__(
     ) -> None:
         super().__init__()
         self.hidden_size = config.hidden_size
-        rope_theta = getattr(config, "rope_theta", 10000)
-        rope_scaling = getattr(config, "rope_scaling", None)
         max_position_embeddings = getattr(config, "max_position_embeddings",
                                           8192)
 
         self.self_attn = PanguProMoEAttention(
             hidden_size=self.hidden_size,
             num_heads=config.num_attention_heads,
             num_kv_heads=config.num_key_value_heads,
-            rope_theta=rope_theta,
-            rope_scaling=rope_scaling,
+            rope_parameters=config.rope_parameters,
             max_position_embeddings=max_position_embeddings,
             cache_config=cache_config,
             quant_config=quant_config,
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -2872,7 +2872,6 @@ def _dummy_run(
         assert aclgraph_runtime_mode is None or aclgraph_runtime_mode in {
             CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL
         }
-        logger.debug(f"aclgraph runtime : {aclgraph_runtime_mode}, ")
         # In multi-DP scenarios, there may be situations where all DP groups are executing dummy runs.
         # If sequence parallelism is enabled, it is essential to ensure that num_tokens is divisible by tp_size.
         if self.use_aclgraph and enable_sp(self.vllm_config):

Original file line number	Diff line number	Diff line change
`@@ -2872,7 +2872,6 @@ def _dummy_run(`
`2872`	`2872`	`assert aclgraph_runtime_mode is None or aclgraph_runtime_mode in {`
`2873`	`2873`	`CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL`
`2874`	`2874`	`}`
`2875`		`- logger.debug(f"aclgraph runtime : {aclgraph_runtime_mode}, ")`
`2876`	`2875`	`# In multi-DP scenarios, there may be situations where all DP groups are executing dummy runs.`
`2877`	`2876`	`# If sequence parallelism is enabled, it is essential to ensure that num_tokens is divisible by tp_size.`
`2878`	`2877`	`if self.use_aclgraph and enable_sp(self.vllm_config):`