File tree Expand file tree Collapse file tree 1 file changed +2
-3
lines changed
vllm_ascend/ops/fused_moe Expand file tree Collapse file tree 1 file changed +2
-3
lines changed Original file line number Diff line number Diff line change @@ -127,16 +127,14 @@ def quant_apply_mlp(hidden_states: torch.Tensor,
127127 if quantized_hidden_states is not None :
128128 dispose_tensor (quantized_hidden_states )
129129 # act_fn: swiglu
130- group_diff = torch .diff (group_list )
131- new_group = torch .cat ([group_diff [0 ].unsqueeze (0 ), group_diff ], dim = 0 )
132130 hidden_states , swiglu_out_scale = torch_npu .npu_dequant_swiglu_quant (
133131 x = hidden_states ,
134132 weight_scale = w1_scale ,
135133 activation_scale = pertoken_scale ,
136134 bias = None ,
137135 quant_scale = None ,
138136 quant_offset = None ,
139- group_index = new_group ,
137+ group_index = group_list ,
140138 activate_left = True ,
141139 quant_mode = 1 ,
142140 )
@@ -298,3 +296,4 @@ def unified_apply_mlp(hidden_states: torch.Tensor,
298296 group_list_type = group_list_type ,
299297 topk_scales = topk_scales ,
300298 need_trans = need_trans )
299+
You can’t perform that action at this time.
0 commit comments