We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 89dd326 commit 0d2b18dCopy full SHA for 0d2b18d
vllm_ascend/ops/fused_moe/moe_mlp.py
@@ -127,14 +127,17 @@ def quant_apply_mlp(hidden_states: torch.Tensor,
127
if quantized_hidden_states is not None:
128
dispose_tensor(quantized_hidden_states)
129
# act_fn: swiglu
130
+ group_diff = torch.diff(group_list, dim=0)
131
+ new_group = torch.cat([group_list[0].unsqueeze(0), group_diff],
132
+ dim=0)
133
hidden_states, swiglu_out_scale = torch_npu.npu_dequant_swiglu_quant(
134
x=hidden_states,
135
weight_scale=w1_scale[0],
136
activation_scale=pertoken_scale,
137
bias=None,
138
quant_scale=None,
139
quant_offset=None,
- group_index=group_list,
140
+ group_index=new_group,
141
activate_left=True,
142
quant_mode=1,
143
)
0 commit comments