[Feature] Default EPLB num_redundant_experts to minimum valid value

majiayu000 · majiayu000 · commit efdf56b785a4 · 2025-12-13T22:37:59.000+08:00
When EPLB is enabled but num_redundant_experts is not specified, automatically compute and use the minimum valid value based on: - Number of logical experts in the model - Expert parallel size (TP * DP) The minimum valid value ensures at least 1 local physical expert per rank: min_redundant = max(0, ep_size - num_logical_experts) This reduces friction when enabling EPLB for the first time and allows the same configuration to work across multiple EP sizes. Changes: - EPLBConfig.num_redundant_experts now defaults to None instead of 0 - ModelConfig.verify_with_parallel_config() computes the minimum value when num_redundant_experts is None and EPLB is enabled - Added validation that num_redundant_experts must be non-negative Fixes #30075
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -1050,3 +1050,36 @@ def test_scheduler_config_init():
     with pytest.raises(AttributeError):
         # InitVar does not become an attribute
         print(SchedulerConfig.default_factory().max_model_len)
+
+
+def test_eplb_num_redundant_experts_default():
+    """Test that num_redundant_experts defaults to None and can be set."""
+    from vllm.config.parallel import EPLBConfig, ParallelConfig
+
+    # Test default is None
+    eplb_config = EPLBConfig()
+    assert eplb_config.num_redundant_experts is None
+
+    # Test explicit value
+    eplb_config_explicit = EPLBConfig(num_redundant_experts=4)
+    assert eplb_config_explicit.num_redundant_experts == 4
+
+    # Test validation for negative value
+    with pytest.raises(ValueError, match="non-negative"):
+        EPLBConfig(num_redundant_experts=-1)
+
+    # Test ParallelConfig validation - EPLB disabled with None is OK
+    parallel_config = ParallelConfig(
+        enable_eplb=False,
+        enable_expert_parallel=False,
+    )
+    # Should not raise - None is allowed when EPLB is disabled
+    assert parallel_config.eplb_config.num_redundant_experts is None
+
+    # Test ParallelConfig validation - EPLB disabled with non-zero value
+    with pytest.raises(ValueError, match="EPLB is not enabled"):
+        ParallelConfig(
+            enable_eplb=False,
+            enable_expert_parallel=False,
+            eplb_config=EPLBConfig(num_redundant_experts=4),
+        )
diff --git a/vllm/config/model.py b/vllm/config/model.py
@@ -1023,6 +1023,27 @@ def verify_with_parallel_config(
         if parallel_config.enable_expert_parallel:
             self._verify_with_expert_parallelism()
 
+        # Compute num_redundant_experts if not specified
+        if parallel_config.eplb_config.num_redundant_experts is None:
+            if parallel_config.enable_eplb:
+                num_logical_experts = self.get_num_experts()
+                # EP size is TP * DP for EPLB
+                ep_size = (parallel_config.tensor_parallel_size
+                           * parallel_config.data_parallel_size)
+                # Minimum value ensures at least 1 local physical expert per rank:
+                # (num_logical_experts + num_redundant_experts) / ep_size >= 1
+                min_redundant = max(0, ep_size - num_logical_experts)
+                parallel_config.eplb_config.num_redundant_experts = min_redundant
+                logger.info(
+                    "EPLB num_redundant_experts not specified, "
+                    "defaulting to minimum valid value: %d "
+                    "(num_logical_experts=%d, ep_size=%d)",
+                    min_redundant, num_logical_experts, ep_size
+                )
+            else:
+                # EPLB disabled, default to 0
+                parallel_config.eplb_config.num_redundant_experts = 0
+
         pipeline_parallel_size = parallel_config.pipeline_parallel_size
         if pipeline_parallel_size > 1 and not self.registry.is_pp_supported_model(
             self.architectures, self
diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py
@@ -53,8 +53,19 @@ class EPLBConfig:
     of the last `lb_window_size` steps will be used for rearranging experts.
     """
 
-    num_redundant_experts: int = Field(default=0, ge=0)
-    """Number of redundant experts to use for expert parallelism."""
+    num_redundant_experts: int | None = None
+    """Number of redundant experts to use for expert parallelism.
+    If None (default), the minimum valid value will be computed automatically
+    based on the number of logical experts and the expert parallel size."""
+
+    @model_validator(mode="after")
+    def _validate_num_redundant_experts(self) -> Self:
+        if self.num_redundant_experts is not None and self.num_redundant_experts < 0:
+            raise ValueError(
+                f"num_redundant_experts must be non-negative, "
+                f"got {self.num_redundant_experts}"
+            )
+        return self
 
     log_balancedness: bool = False
     """
@@ -309,7 +320,9 @@ def _validate_parallel_config(self) -> Self:
                     f"TP={self.tensor_parallel_size},DP={self.data_parallel_size}."
                 )
         else:
-            if self.eplb_config.num_redundant_experts != 0:
+            # When EPLB is disabled, num_redundant_experts must be None or 0
+            if (self.eplb_config.num_redundant_experts is not None
+                    and self.eplb_config.num_redundant_experts != 0):
                 raise ValueError(
                     "num_redundant_experts is set to "
                     f"{self.eplb_config.num_redundant_experts} but EPLB is not "