1+ /*******************************************************************************
2+ Copyright (c) 2015, The OpenBLAS Project
3+ All rights reserved.
4+ Redistribution and use in source and binary forms, with or without
5+ modification, are permitted provided that the following conditions are
6+ met:
7+ 1. Redistributions of source code must retain the above copyright
8+ notice, this list of conditions and the following disclaimer.
9+ 2. Redistributions in binary form must reproduce the above copyright
10+ notice, this list of conditions and the following disclaimer in
11+ the documentation and/or other materials provided with the
12+ distribution.
13+ 3. Neither the name of the OpenBLAS project nor the names of
14+ its contributors may be used to endorse or promote products
15+ derived from this software without specific prior written permission.
16+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+ ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+ *******************************************************************************/
27+ #include "common.h"
28+ #include <arm_sve.h>
29+
30+ #ifdef DOUBLE
31+ #define SVE_TYPE svfloat64_t
32+ #define SVE_ZERO svdup_f64(0.0)
33+ #define SVE_WHILELT svwhilelt_b64
34+ #define SVE_ALL svptrue_b64()
35+ #define SVE_WIDTH svcntd()
36+ #else
37+ #define SVE_TYPE svfloat32_t
38+ #define SVE_ZERO svdup_f32(0.0)
39+ #define SVE_WHILELT svwhilelt_b32
40+ #define SVE_ALL svptrue_b32()
41+ #define SVE_WIDTH svcntw()
42+ #endif
43+
44+ static int rot_kernel_sve (BLASLONG n , FLOAT * x , FLOAT * y , FLOAT c , FLOAT s )
45+ {
46+ for (BLASLONG i = 0 ; i < n ; i += SVE_WIDTH )
47+ {
48+ svbool_t pg = SVE_WHILELT ((uint64_t )i , (uint64_t )n );
49+ SVE_TYPE x_vec = svld1 (pg , & x [i ]);
50+ SVE_TYPE y_vec = svld1 (pg , & y [i ]);
51+ SVE_TYPE cx_vec = svmul_z (pg , x_vec , c );
52+ SVE_TYPE sy_vec = svmul_z (pg , y_vec , s );
53+ SVE_TYPE sx_vec = svmul_z (pg , x_vec , s );
54+ SVE_TYPE cy_vec = svmul_z (pg , y_vec , c );
55+ svst1 (pg , & x [i ], svadd_z (pg , cx_vec , sy_vec ));
56+ svst1 (pg , & y [i ], svsub_z (pg , cy_vec , sx_vec ));
57+ }
58+ return (0 );
59+ }
0 commit comments