Skip to content

Commit 6102024

Browse files
committed
eplb-fix-dev
Signed-off-by: Che Ruan <[email protected]>
1 parent 6391f06 commit 6102024

File tree

5 files changed

+12
-18
lines changed

5 files changed

+12
-18
lines changed

vllm_ascend/eplb/core/eplb_device_transfer_loader.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,6 @@ def generate_expert_d2d_transfer_task(self, expert_send_info,
5050
)
5151
return
5252

53-
# If neither send nor receive task is needed for this layer on this rank, return
54-
if not (expert_send_info or expert_recv_info):
55-
return
56-
5753
self.updated_expert_map = updated_expert_map
5854

5955
self.layer_id = layer_id

vllm_ascend/ops/expert_load_balancer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def generate_index_dicts(self, tensor_2d):
4848

4949
def generate_expert_placement_map(self):
5050
expert_placement_map = torch.full(
51-
(self.layers_num, self.ranks_num, self.global_expert_num),
51+
(self.layers_num, self.ranks_num, self.num_experts),
5252
-1,
5353
dtype=torch.int32,
5454
)
@@ -71,7 +71,7 @@ def generate_log2phy_expert_map(self, layer_id):
7171
result_dict[key] = []
7272
result_dict[key].append(idx)
7373

74-
log2phy_map = torch.full((self.ranks_num, self.global_expert_num),
74+
log2phy_map = torch.full((self.ranks_num, self.num_experts),
7575
-1,
7676
dtype=torch.int32)
7777
for rank in range(self.ranks_num):

vllm_ascend/ops/moe/moe_mlp.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,17 @@ def quant_apply_mlp(hidden_states: torch.Tensor,
105105
group_list=group_list,
106106
output_dtype=torch.int32)[0]
107107
# act_fn: swiglu
108+
group_diff = torch.diff(group_list)
109+
new_group = torch.cat([group_list[0].unsqueeze(0), group_diff],
110+
dim=0)
108111
hidden_states, swiglu_out_scale = torch_npu.npu_dequant_swiglu_quant(
109112
x=hidden_states,
110113
weight_scale=w1_scale,
111114
activation_scale=pertoken_scale,
112115
bias=None,
113116
quant_scale=None,
114117
quant_offset=None,
115-
group_index=group_list,
118+
group_index=new_group,
116119
activate_left=True,
117120
quant_mode=1,
118121
)

vllm_ascend/ops/moe/token_dispatcher.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -122,18 +122,14 @@ def get_dispatch_mc2_kwargs(
122122
expert_map: torch.Tensor,
123123
global_redundant_expert_num: int = 0,
124124
):
125-
if self.with_quant:
126-
quant_mode = 2
127-
moe_expert_num = len(expert_map)
128-
else:
129-
quant_mode = 0
130-
moe_expert_num = len(expert_map)
125+
quant_mode = 2 if self.with_quant else 0
126+
self.moe__expert_num = len(expert_map) + global_redundant_expert_num
131127
kwargs_mc2 = {
132128
"x": hidden_states,
133129
"expert_ids": topk_ids,
134130
"expert_shard_type": 0,
135131
"shared_expert_rank_num": 0,
136-
"moe_expert_num": moe_expert_num,
132+
"moe_expert_num": self.moe_expert_num,
137133
"global_bs": 0,
138134
"expert_token_nums_type": 0,
139135
}
@@ -229,15 +225,14 @@ def get_combine_mc_kwargs(self, hidden_states: torch.Tensor):
229225
assert self.topk_weights is not None
230226
assert self.topk_ids is not None
231227
assert self.output is not None
232-
moe_expert_num = len(self.expert_map)
233228
# moeCombine
234229
kwargs_mc2 = {
235230
"expand_x": hidden_states,
236231
"expert_ids": self.topk_ids,
237232
"expert_scales": self.topk_weights.to(torch.float32),
238233
"expert_shard_type": 0,
239234
"shared_expert_rank_num": 0,
240-
"moe_expert_num": moe_expert_num,
235+
"moe_expert_num": self.moe_expert_num,
241236
"global_bs": 0,
242237
}
243238
if self.with_quant:
@@ -359,7 +354,7 @@ def token_dispatch(self,
359354
hidden_states = hidden_states * \
360355
topk_weights.to(hidden_states.dtype)
361356
if expert_map is not None:
362-
global_num_experts = len(expert_map)
357+
global_num_experts = len(expert_map) + global_redundant_expert_num
363358
mask = (expert_map[topk_ids] != -1)
364359
self.topk_weights = topk_weights * mask
365360
first_expert_idx = get_ep_group(

vllm_ascend/quantization/w8a8_dynamic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def apply(
250250
return moe_comm_method.fused_experts(
251251
hidden_states=x,
252252
w1=layer.w13_weight,
253-
w1_scale=layer.w13_weight_scale_fp32,
253+
w1_scale=layer.w13_weight_scale.to(torch.float32),
254254
w2=layer.w2_weight,
255255
w2_scale=layer.w2_weight_scale,
256256
topk_weights=topk_weights,

0 commit comments

Comments
 (0)