refactor: move EPLB num_redundant_experts computation to ParallelConfig

majiayu000 · claude · majiayu000 · commit 0d58dcf27c63 · 2025-12-18T23:10:31.000+08:00
Address reviewer feedback: - Move num_redundant_experts computation logic from verify_with_parallel_config to a dedicated method compute_eplb_num_redundant_experts in ParallelConfig - Call the computation method in VllmConfig.__post_init__ before verification - Update tests to verify actual computed values instead of replicating formula - Add tests for EPLB disabled case and explicit value preservation - Fix nested if statement lint warning (SIM102) Signed-off-by: lif <majiayu000@gmail.com> 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> Signed-off-by: lif <1835304752@qq.com>
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -1089,28 +1089,91 @@ def test_eplb_num_redundant_experts_default():
         )
 
 
-@pytest.mark.parametrize("num_experts,ep_size,expected", [
-    (8, 8, 0),    # Divisible: 8 % 8 = 0
-    (8, 16, 8),   # ep_size > experts: (16 - 8%16) % 16 = 8
-    (8, 6, 4),    # Non-divisible: (6 - 8%6) % 6 = (6-2)%6 = 4
-    (16, 8, 0),   # Divisible: 16 % 8 = 0
-    (10, 8, 6),   # Non-divisible: (8 - 10%8) % 8 = (8-2)%8 = 6
-    (7, 4, 1),    # Non-divisible: (4 - 7%4) % 4 = (4-3)%4 = 1
-    (1, 4, 3),    # Single expert: (4 - 1%4) % 4 = 3
-])
-def test_eplb_num_redundant_experts_auto_computation(num_experts, ep_size, expected):
-    """Test the formula: (ep_size - num_experts % ep_size) % ep_size.
-
-    This ensures (num_logical_experts + num_redundant_experts) is divisible
-    by ep_size, supporting non-standard ep_size values.
+@pytest.mark.parametrize(
+    "num_experts,tp_size,dp_size,expected",
+    [
+        (8, 8, 1, 0),  # ep_size=8, divisible: 8 % 8 = 0
+        (8, 8, 2, 8),  # ep_size=16, ep_size > experts: need 8 redundant
+        (8, 2, 3, 4),  # ep_size=6, non-divisible: need 4 redundant
+        (16, 4, 2, 0),  # ep_size=8, divisible: 16 % 8 = 0
+        (10, 4, 2, 6),  # ep_size=8, non-divisible: need 6 redundant
+        (7, 2, 2, 1),  # ep_size=4, non-divisible: need 1 redundant
+        (1, 2, 2, 3),  # ep_size=4, single expert: need 3 redundant
+    ],
+)
+def test_eplb_num_redundant_experts_auto_computation(
+    num_experts, tp_size, dp_size, expected
+):
+    """Test that num_redundant_experts is correctly computed by ParallelConfig.
+
+    The computation ensures (num_logical_experts + num_redundant_experts)
+    is divisible by ep_size (= tp_size * dp_size).
     """
-    # Compute using the actual formula from model.py
-    result = (ep_size - num_experts % ep_size) % ep_size
-    assert result == expected, (
-        f"Formula failed for experts={num_experts}, ep_size={ep_size}: "
-        f"got {result}, expected {expected}"
+    from vllm.config.parallel import ParallelConfig
+
+    parallel_config = ParallelConfig(
+        tensor_parallel_size=tp_size,
+        data_parallel_size=dp_size,
+        enable_expert_parallel=True,
+        enable_eplb=True,
+    )
+    # num_redundant_experts should be None before computation
+    assert parallel_config.eplb_config.num_redundant_experts is None
+
+    # Call the computation method
+    parallel_config.compute_eplb_num_redundant_experts(num_experts)
+
+    # Verify the computed value matches expected
+    assert parallel_config.eplb_config.num_redundant_experts == expected, (
+        f"Expected num_redundant_experts={expected} for "
+        f"num_experts={num_experts}, ep_size={tp_size * dp_size}, "
+        f"got {parallel_config.eplb_config.num_redundant_experts}"
     )
     # Verify divisibility constraint
-    assert (num_experts + result) % ep_size == 0, (
-        f"Divisibility check failed: ({num_experts} + {result}) % {ep_size} != 0"
+    ep_size = tp_size * dp_size
+    total = num_experts + parallel_config.eplb_config.num_redundant_experts
+    assert total % ep_size == 0, (
+        f"Divisibility check failed: ({num_experts} + "
+        f"{parallel_config.eplb_config.num_redundant_experts}) % {ep_size} != 0"
+    )
+
+
+def test_eplb_num_redundant_experts_disabled():
+    """Test that num_redundant_experts defaults to 0 when EPLB is disabled."""
+    from vllm.config.parallel import ParallelConfig
+
+    parallel_config = ParallelConfig(
+        tensor_parallel_size=2,
+        data_parallel_size=1,
+        enable_expert_parallel=False,
+        enable_eplb=False,
     )
+    # num_redundant_experts should be None before computation
+    assert parallel_config.eplb_config.num_redundant_experts is None
+
+    # Call the computation method
+    parallel_config.compute_eplb_num_redundant_experts(num_logical_experts=8)
+
+    # When EPLB is disabled, should default to 0
+    assert parallel_config.eplb_config.num_redundant_experts == 0
+
+
+def test_eplb_num_redundant_experts_explicit_value_preserved():
+    """Test that explicitly set num_redundant_experts is not overwritten."""
+    from vllm.config.parallel import EPLBConfig, ParallelConfig
+
+    parallel_config = ParallelConfig(
+        tensor_parallel_size=4,
+        data_parallel_size=2,
+        enable_expert_parallel=True,
+        enable_eplb=True,
+        eplb_config=EPLBConfig(num_redundant_experts=10),
+    )
+    # num_redundant_experts is explicitly set
+    assert parallel_config.eplb_config.num_redundant_experts == 10
+
+    # Call the computation method - should not override
+    parallel_config.compute_eplb_num_redundant_experts(num_logical_experts=8)
+
+    # Should still be the explicit value
+    assert parallel_config.eplb_config.num_redundant_experts == 10
diff --git a/vllm/config/model.py b/vllm/config/model.py
@@ -1040,31 +1040,6 @@ def verify_with_parallel_config(
         if parallel_config.enable_expert_parallel:
             self._verify_with_expert_parallelism()
 
-        # Compute num_redundant_experts if not specified
-        if parallel_config.eplb_config.num_redundant_experts is None:
-            if parallel_config.enable_eplb:
-                num_logical_experts = self.get_num_experts()
-                # EP size is TP * DP for EPLB
-                ep_size = (
-                    parallel_config.tensor_parallel_size
-                    * parallel_config.data_parallel_size
-                )
-                # Ensure (num_logical_experts + num_redundant_experts) is
-                # divisible by ep_size, supporting non-standard ep_size values
-                min_redundant = (ep_size - num_logical_experts % ep_size) % ep_size
-                parallel_config.eplb_config.num_redundant_experts = min_redundant
-                logger.info(
-                    "EPLB num_redundant_experts not specified, "
-                    "defaulting to minimum valid value: %d "
-                    "(num_logical_experts=%d, ep_size=%d)",
-                    min_redundant,
-                    num_logical_experts,
-                    ep_size,
-                )
-            else:
-                # EPLB disabled, default to 0
-                parallel_config.eplb_config.num_redundant_experts = 0
-
         pipeline_parallel_size = parallel_config.pipeline_parallel_size
         if pipeline_parallel_size > 1 and not self.registry.is_pp_supported_model(
             self.architectures, self
diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py
@@ -322,21 +322,53 @@ def _validate_parallel_config(self) -> Self:
                 f"got {self.eplb_config.num_redundant_experts}"
             )
 
-        if not self.enable_eplb:
-            # When EPLB is disabled, num_redundant_experts must be None or 0
-            if (
-                self.eplb_config.num_redundant_experts is not None
-                and self.eplb_config.num_redundant_experts != 0
-            ):
-                raise ValueError(
-                    "num_redundant_experts is set to "
-                    f"{self.eplb_config.num_redundant_experts} but EPLB is not "
-                    "enabled. Either enable EPLB or unset "
-                    "num_redundant_experts."
-                )
+        # When EPLB is disabled, num_redundant_experts must be None or 0
+        if (
+            not self.enable_eplb
+            and self.eplb_config.num_redundant_experts is not None
+            and self.eplb_config.num_redundant_experts != 0
+        ):
+            raise ValueError(
+                "num_redundant_experts is set to "
+                f"{self.eplb_config.num_redundant_experts} but EPLB is not "
+                "enabled. Either enable EPLB or unset "
+                "num_redundant_experts."
+            )
 
         return self
 
+    def compute_eplb_num_redundant_experts(self, num_logical_experts: int) -> None:
+        """Compute and set num_redundant_experts if not explicitly specified.
+
+        This method should be called after ParallelConfig is initialized and
+        when the number of logical experts is known (from ModelConfig).
+
+        Args:
+            num_logical_experts: The number of logical experts from the model.
+        """
+        if self.eplb_config.num_redundant_experts is not None:
+            # Already explicitly set, don't override
+            return
+
+        if self.enable_eplb:
+            # EP size is TP * DP for EPLB
+            ep_size = self.tensor_parallel_size * self.data_parallel_size
+            # Ensure (num_logical_experts + num_redundant_experts) is
+            # divisible by ep_size, supporting non-standard ep_size values
+            min_redundant = (ep_size - num_logical_experts % ep_size) % ep_size
+            self.eplb_config.num_redundant_experts = min_redundant
+            logger.info(
+                "EPLB num_redundant_experts not specified, "
+                "defaulting to minimum valid value: %d "
+                "(num_logical_experts=%d, ep_size=%d)",
+                min_redundant,
+                num_logical_experts,
+                ep_size,
+            )
+        else:
+            # EPLB disabled, default to 0
+            self.eplb_config.num_redundant_experts = 0
+
     @property
     def world_size_across_dp(self) -> int:
         """world_size_across_dp is TPxPPxDP, it is the size of the world
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
@@ -519,6 +519,11 @@ def __post_init__(self):
         self.try_verify_and_update_config()
 
         if self.model_config is not None:
+            # Compute EPLB num_redundant_experts before verification
+            num_experts = self.model_config.get_num_experts()
+            if num_experts is not None:
+                self.parallel_config.compute_eplb_num_redundant_experts(num_experts)
+
             self.model_config.verify_with_parallel_config(self.parallel_config)
             self.model_config.verify_dual_chunk_attention_config(self.load_config)