Optimize code

zzhx1 · zzhx1 · commit e0eb802efcb3 · 2025-11-26T22:31:42.000+08:00
Signed-off-by: zzhxx &lt;2783294813@qq.com&gt;
diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py
@@ -137,12 +137,6 @@ def __init__(self, vllm_config):
         from vllm_ascend.utils import get_flashcomm2_config_and_validate
         self.flashcomm2_oproj_tensor_parallel_size, self.flashcomm2_oproj_shared = get_flashcomm2_config_and_validate(
             self, vllm_config)
-        if self.flashcomm2_oproj_shared:
-            if self.flashcomm2_oproj_tensor_parallel_size == 0:
-                raise AssertionError(
-                    "flashcomm2_oproj_shared must be enabled with flashcomm2_oproj_tensor_parallel_size > 0"
-                )
-            logger.info("Enable Flashcomm2 with flashcomm2_oproj_shared")
 
 
 class TorchairGraphConfig:
diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py
@@ -941,18 +941,22 @@ def flashcomm2_enable() -> bool:
     return envs_ascend.VLLM_ASCEND_FLASHCOMM2_PARALLEL_SIZE > 0
 
 
+def flashcomm2_o_shared_enabled() -> bool:
+    return envs_ascend.VLLM_ASCEND_ENABLE_FLASHCOMM2_OSHARED > 0
+
+
 def get_flashcomm2_config_and_validate(ascend_config, vllm_config):
     flashcomm2_oproj_tp_size = envs_ascend.VLLM_ASCEND_FLASHCOMM2_PARALLEL_SIZE
     global_tp_size = vllm_config.parallel_config.tensor_parallel_size
-    flashcomm2_oproj_shared = envs_ascend.VLLM_ASCEND_ENABLE_FLASHCOMM2_OSHARED
+    flashcomm2_oproj_shared = flashcomm2_o_shared_enabled()
 
     if not flashcomm2_enable():
         flashcomm2_oproj_shared = False
         logger.info("FLASHCOMM2 not enable.")
         return flashcomm2_oproj_tp_size, flashcomm2_oproj_shared
 
     logger.info(
-        f"Enable FLASHCOMM2 with flashcomm2_oproj_tensor_parallel_size={flashcomm2_oproj_tp_size} and global_tp_size={global_tp_size}"
+        f"Enable FLASHCOMM2 with flashcomm2_oproj_tensor_parallel_size = {flashcomm2_oproj_tp_size} and oproj_shared_enabled = {flashcomm2_oproj_shared}"
     )
     if not envs_ascend.VLLM_ASCEND_ENABLE_FLASHCOMM1:
         logger.warning_once(
@@ -979,14 +983,12 @@ def get_flashcomm2_config_and_validate(ascend_config, vllm_config):
             "FLASHCOMM2 primarily targets P-scenario deployments, "
             "with additional support for hybrid deployment scenarios. "
             "It is not applicable in D-scenario environments.")
+    if flashcomm2_oproj_shared:
+        logger.info("Enable FLASHCOMM2 with oproj_shared.")
 
     return flashcomm2_oproj_tp_size, flashcomm2_oproj_shared
 
 
-def flashcomm2_o_shared_enabled() -> bool:
-    return get_ascend_config().flashcomm2_oproj_shared
-
-
 def get_flashcomm2_reorgnized_batch_ids(global_tp_size) -> list[list[int]]:
     # Reorganize batch_ids so that, after the all2all and reduce-scatter operation, each batch_id corresponds to the rank_id within the DP domain.
     # For example, when DP = [0, 1, 2, ..., 15] and flashcomm2_oproj_tensor_parallel_size = 2,