Skip to content

Commit 96ea0e0

Browse files
[EPLB] Add log Info for moe_load Imbalance Ratio (#4482)
### What this PR does / why we need it? Add log Info for MOE_load Imbalance Ratio ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.12.0 --------- Signed-off-by: daishixun <[email protected]> Co-authored-by: weijinqian0 <[email protected]>
1 parent a433f32 commit 96ea0e0

File tree

1 file changed

+41
-0
lines changed

1 file changed

+41
-0
lines changed

vllm_ascend/eplb/eplb_updator.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def __init__(self, ascend_config, loader, eplb_process: EplbProcess,
3434
self.eplb_loader = loader
3535
self.eplb_process = eplb_process
3636
self.shared_dict = self.eplb_process.shared_dict
37+
self.moe_imbalance_dict: dict[int, float] = {}
3738

3839
def set_adaptor(self, adaptor):
3940
self.adaptor = adaptor
@@ -173,8 +174,48 @@ def compute_and_set_moe_load(self, is_clear=False):
173174
logger.debug(
174175
f"[ModelRunner] Updated shared_dict['moe_load'] shape={moe_load.shape}"
175176
)
177+
178+
if dist.is_initialized() and dist.get_rank() == 0:
179+
self.compute_moe_imbalance(moe_load)
180+
self.summarize_moe_imbalance()
181+
176182
return moe_load
177183

184+
def compute_moe_imbalance(self, moe_load: torch.Tensor):
185+
186+
self.moe_imbalance_dict.clear()
187+
188+
layer_card_load = moe_load.sum(dim=-1).cpu().float()
189+
190+
for layer_idx in range(layer_card_load.size(0)):
191+
layer_load = layer_card_load[layer_idx]
192+
193+
mean_load = layer_load.mean().item()
194+
max_load = layer_load.max().item()
195+
196+
moe_load_imbalance = max_load / (mean_load + 1e-6)
197+
198+
logger.debug(f"[ModelRunner][MOE_load_stats][Layer {layer_idx}] "
199+
f"PAR={moe_load_imbalance:.4f}")
200+
201+
self.moe_imbalance_dict[layer_idx] = moe_load_imbalance
202+
203+
def summarize_moe_imbalance(self):
204+
205+
values = list(self.moe_imbalance_dict.values())
206+
if not values:
207+
logger.info("[MOE_load_stats] No data available.")
208+
return
209+
210+
avg_imbalance = sum(values) / len(values)
211+
max_imbalance = max(values)
212+
min_imbalance = min(values)
213+
214+
logger.info(
215+
f"[ModelRunner][MOE_load_stats] Peak-to-Average-Ratio: "
216+
f"Mean={avg_imbalance:.4f}, Max={max_imbalance:.4f}, Min={min_imbalance:.4f}"
217+
)
218+
178219
def warm_up_eplb(self):
179220

180221
self.get_init_expert_map()

0 commit comments

Comments
 (0)