Skip to content

Commit efdf56b

Browse files
committed
[Feature] Default EPLB num_redundant_experts to minimum valid value
When EPLB is enabled but num_redundant_experts is not specified, automatically compute and use the minimum valid value based on: - Number of logical experts in the model - Expert parallel size (TP * DP) The minimum valid value ensures at least 1 local physical expert per rank: min_redundant = max(0, ep_size - num_logical_experts) This reduces friction when enabling EPLB for the first time and allows the same configuration to work across multiple EP sizes. Changes: - EPLBConfig.num_redundant_experts now defaults to None instead of 0 - ModelConfig.verify_with_parallel_config() computes the minimum value when num_redundant_experts is None and EPLB is enabled - Added validation that num_redundant_experts must be non-negative Fixes #30075
1 parent ace34e3 commit efdf56b

File tree

3 files changed

+70
-3
lines changed

3 files changed

+70
-3
lines changed

tests/test_config.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,3 +1050,36 @@ def test_scheduler_config_init():
10501050
with pytest.raises(AttributeError):
10511051
# InitVar does not become an attribute
10521052
print(SchedulerConfig.default_factory().max_model_len)
1053+
1054+
1055+
def test_eplb_num_redundant_experts_default():
1056+
"""Test that num_redundant_experts defaults to None and can be set."""
1057+
from vllm.config.parallel import EPLBConfig, ParallelConfig
1058+
1059+
# Test default is None
1060+
eplb_config = EPLBConfig()
1061+
assert eplb_config.num_redundant_experts is None
1062+
1063+
# Test explicit value
1064+
eplb_config_explicit = EPLBConfig(num_redundant_experts=4)
1065+
assert eplb_config_explicit.num_redundant_experts == 4
1066+
1067+
# Test validation for negative value
1068+
with pytest.raises(ValueError, match="non-negative"):
1069+
EPLBConfig(num_redundant_experts=-1)
1070+
1071+
# Test ParallelConfig validation - EPLB disabled with None is OK
1072+
parallel_config = ParallelConfig(
1073+
enable_eplb=False,
1074+
enable_expert_parallel=False,
1075+
)
1076+
# Should not raise - None is allowed when EPLB is disabled
1077+
assert parallel_config.eplb_config.num_redundant_experts is None
1078+
1079+
# Test ParallelConfig validation - EPLB disabled with non-zero value
1080+
with pytest.raises(ValueError, match="EPLB is not enabled"):
1081+
ParallelConfig(
1082+
enable_eplb=False,
1083+
enable_expert_parallel=False,
1084+
eplb_config=EPLBConfig(num_redundant_experts=4),
1085+
)

vllm/config/model.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,27 @@ def verify_with_parallel_config(
10231023
if parallel_config.enable_expert_parallel:
10241024
self._verify_with_expert_parallelism()
10251025

1026+
# Compute num_redundant_experts if not specified
1027+
if parallel_config.eplb_config.num_redundant_experts is None:
1028+
if parallel_config.enable_eplb:
1029+
num_logical_experts = self.get_num_experts()
1030+
# EP size is TP * DP for EPLB
1031+
ep_size = (parallel_config.tensor_parallel_size
1032+
* parallel_config.data_parallel_size)
1033+
# Minimum value ensures at least 1 local physical expert per rank:
1034+
# (num_logical_experts + num_redundant_experts) / ep_size >= 1
1035+
min_redundant = max(0, ep_size - num_logical_experts)
1036+
parallel_config.eplb_config.num_redundant_experts = min_redundant
1037+
logger.info(
1038+
"EPLB num_redundant_experts not specified, "
1039+
"defaulting to minimum valid value: %d "
1040+
"(num_logical_experts=%d, ep_size=%d)",
1041+
min_redundant, num_logical_experts, ep_size
1042+
)
1043+
else:
1044+
# EPLB disabled, default to 0
1045+
parallel_config.eplb_config.num_redundant_experts = 0
1046+
10261047
pipeline_parallel_size = parallel_config.pipeline_parallel_size
10271048
if pipeline_parallel_size > 1 and not self.registry.is_pp_supported_model(
10281049
self.architectures, self

vllm/config/parallel.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,19 @@ class EPLBConfig:
5353
of the last `lb_window_size` steps will be used for rearranging experts.
5454
"""
5555

56-
num_redundant_experts: int = Field(default=0, ge=0)
57-
"""Number of redundant experts to use for expert parallelism."""
56+
num_redundant_experts: int | None = None
57+
"""Number of redundant experts to use for expert parallelism.
58+
If None (default), the minimum valid value will be computed automatically
59+
based on the number of logical experts and the expert parallel size."""
60+
61+
@model_validator(mode="after")
62+
def _validate_num_redundant_experts(self) -> Self:
63+
if self.num_redundant_experts is not None and self.num_redundant_experts < 0:
64+
raise ValueError(
65+
f"num_redundant_experts must be non-negative, "
66+
f"got {self.num_redundant_experts}"
67+
)
68+
return self
5869

5970
log_balancedness: bool = False
6071
"""
@@ -309,7 +320,9 @@ def _validate_parallel_config(self) -> Self:
309320
f"TP={self.tensor_parallel_size},DP={self.data_parallel_size}."
310321
)
311322
else:
312-
if self.eplb_config.num_redundant_experts != 0:
323+
# When EPLB is disabled, num_redundant_experts must be None or 0
324+
if (self.eplb_config.num_redundant_experts is not None
325+
and self.eplb_config.num_redundant_experts != 0):
313326
raise ValueError(
314327
"num_redundant_experts is set to "
315328
f"{self.eplb_config.num_redundant_experts} but EPLB is not "

0 commit comments

Comments
 (0)