@@ -122,10 +122,7 @@ FORCEINLINE void copy_x(BLASLONG n, IFLOAT *src, IFLOAT *dest, BLASLONG inc_src)
122122FORCEINLINE void copy_y_beta (BLASLONG n , FLOAT * src , FLOAT * dest , BLASLONG inc_src , FLOAT beta )
123123{
124124 if (beta == (FLOAT )0 ) {
125- for (BLASLONG i = 0 ; i < n ; i ++ ) {
126- * dest ++ = (FLOAT )0 ;
127- src += inc_src ;
128- }
125+ memset (dest , 0 , n * sizeof (FLOAT ));
129126 } else if (beta == (FLOAT )1 ) {
130127 for (BLASLONG i = 0 ; i < n ; i ++ ) {
131128 * dest ++ = * src ;
@@ -139,13 +136,18 @@ FORCEINLINE void copy_y_beta(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_s
139136 }
140137}
141138
139+ FORCEINLINE void move_y (BLASLONG n , FLOAT * src , FLOAT * dest , BLASLONG inc_dest )
140+ {
141+ for (BLASLONG i = 0 ; i < n ; i ++ ) {
142+ * dest = * src ++ ;
143+ dest += inc_dest ;
144+ }
145+ }
146+
142147FORCEINLINE void copy_y (BLASLONG n , FLOAT * src , FLOAT * dest , BLASLONG inc_src , FLOAT beta )
143148{
144149 if (beta == (FLOAT )0 ) {
145- for (BLASLONG i = 0 ; i < n ; i ++ ) {
146- * dest = * src ++ ;
147- dest += inc_src ;
148- }
150+ move_y (n , src , dest , inc_src );
149151 } else if (beta == (FLOAT )1 ) {
150152 for (BLASLONG i = 0 ; i < n ; i ++ ) {
151153 * dest += * src ++ ;
@@ -159,14 +161,6 @@ FORCEINLINE void copy_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src, F
159161 }
160162}
161163
162- FORCEINLINE void move_y (BLASLONG n , FLOAT * src , FLOAT * dest , BLASLONG inc_dest )
163- {
164- for (BLASLONG i = 0 ; i < n ; i ++ ) {
165- * dest = * src ++ ;
166- dest += inc_dest ;
167- }
168- }
169-
170164static void BF16GEMV_N_beta (BLASLONG n , FLOAT * output_vector , FLOAT * input_vector , FLOAT beta )
171165{
172166 if (beta == (FLOAT )0 ) {
0 commit comments