Skip to content

Commit 9c39e96

Browse files
committed
mips64: Fixed MSA optimization bugs for zgemv and cgemv
1 parent 4c03ed4 commit 9c39e96

File tree

2 files changed

+62
-62
lines changed

2 files changed

+62
-62
lines changed

kernel/mips/cgemv_t_msa.c

Lines changed: 41 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -31,27 +31,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3131
#undef OP0
3232
#undef OP1
3333
#undef OP2
34+
#undef OP3
35+
#undef OP4
36+
#undef OP5
37+
38+
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
39+
#define OP0 -=
40+
#define OP1 +=
41+
#define OP2 +=
42+
#else
43+
#define OP0 +=
44+
#define OP1 +=
45+
#define OP2 -=
46+
#endif
3447

35-
#if !defined(CONJ)
36-
#if !defined(XCONJ)
37-
#define OP0 -=
38-
#define OP1 +=
39-
#define OP2 +=
40-
#else
41-
#define OP0 +=
42-
#define OP1 +=
43-
#define OP2 -=
44-
#endif
48+
#if !defined(XCONJ)
49+
#define OP3 -=
50+
#define OP4 +=
51+
#define OP5 +=
4552
#else
46-
#if !defined(XCONJ)
47-
#define OP0 +=
48-
#define OP1 -=
49-
#define OP2 +=
50-
#else
51-
#define OP0 -=
52-
#define OP1 -=
53-
#define OP2 -=
54-
#endif
53+
#define OP3 +=
54+
#define OP4 -=
55+
#define OP5 +=
5556
#endif
5657

5758
#define CGEMV_T_8x4() \
@@ -268,22 +269,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
268269
res3i = y[3 * inc_y2 + 1]; \
269270
\
270271
res0r += alphar * temp0r; \
271-
res0r OP0 alphai * temp0i; \
272+
res0r OP3 alphai * temp0i; \
272273
res1r += alphar * temp1r; \
273-
res1r OP0 alphai * temp1i; \
274+
res1r OP3 alphai * temp1i; \
274275
res2r += alphar * temp2r; \
275-
res2r OP0 alphai * temp2i; \
276+
res2r OP3 alphai * temp2i; \
276277
res3r += alphar * temp3r; \
277-
res3r OP0 alphai * temp3i; \
278+
res3r OP3 alphai * temp3i; \
278279
\
279-
res0i OP1 alphar * temp0i; \
280-
res0i OP2 alphai * temp0r; \
281-
res1i OP1 alphar * temp1i; \
282-
res1i OP2 alphai * temp1r; \
283-
res2i OP1 alphar * temp2i; \
284-
res2i OP2 alphai * temp2r; \
285-
res3i OP1 alphar * temp3i; \
286-
res3i OP2 alphai * temp3r; \
280+
res0i OP4 alphar * temp0i; \
281+
res0i OP5 alphai * temp0r; \
282+
res1i OP4 alphar * temp1i; \
283+
res1i OP5 alphai * temp1r; \
284+
res2i OP4 alphar * temp2i; \
285+
res2i OP5 alphai * temp2r; \
286+
res3i OP4 alphar * temp3i; \
287+
res3i OP5 alphai * temp3r; \
287288
\
288289
y[0 * inc_y2] = res0r; \
289290
y[1 * inc_y2] = res1r; \
@@ -303,14 +304,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
303304
res1i = y[1 * inc_y2 + 1]; \
304305
\
305306
res0r += alphar * temp0r; \
306-
res0r OP0 alphai * temp0i; \
307+
res0r OP3 alphai * temp0i; \
307308
res1r += alphar * temp1r; \
308-
res1r OP0 alphai * temp1i; \
309+
res1r OP3 alphai * temp1i; \
309310
\
310-
res0i OP1 alphar * temp0i; \
311-
res0i OP2 alphai * temp0r; \
312-
res1i OP1 alphar * temp1i; \
313-
res1i OP2 alphai * temp1r; \
311+
res0i OP4 alphar * temp0i; \
312+
res0i OP5 alphai * temp0r; \
313+
res1i OP4 alphar * temp1i; \
314+
res1i OP5 alphai * temp1r; \
314315
\
315316
y[0 * inc_y2] = res0r; \
316317
y[1 * inc_y2] = res1r; \
@@ -324,10 +325,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
324325
res0i = y[0 * inc_y2 + 1]; \
325326
\
326327
res0r += alphar * temp0r; \
327-
res0r OP0 alphai * temp0i; \
328+
res0r OP3 alphai * temp0i; \
328329
\
329-
res0i OP1 alphar * temp0i; \
330-
res0i OP2 alphai * temp0r; \
330+
res0i OP4 alphar * temp0i; \
331+
res0i OP5 alphai * temp0r; \
331332
\
332333
y[0 * inc_y2] = res0r; \
333334
y[0 * inc_y2 + 1] = res0i; \

kernel/mips/zgemv_t_msa.c

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -33,27 +33,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3333
#undef OP2
3434
#undef OP3
3535
#undef OP4
36+
#undef OP5
3637

37-
#if !defined(CONJ)
38-
#if !defined(XCONJ)
39-
#define OP0 -=
40-
#define OP1 +=
41-
#define OP2 +=
42-
#else
43-
#define OP0 +=
44-
#define OP1 +=
45-
#define OP2 -=
46-
#endif
38+
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
39+
#define OP0 -=
40+
#define OP1 +=
41+
#define OP2 +=
4742
#else
48-
#if !defined(XCONJ)
49-
#define OP0 +=
50-
#define OP1 -=
51-
#define OP2 +=
52-
#else
53-
#define OP0 -=
54-
#define OP1 -=
55-
#define OP2 -=
56-
#endif
43+
#define OP0 +=
44+
#define OP1 +=
45+
#define OP2 -=
46+
#endif
47+
48+
#if !defined(XCONJ)
49+
#define OP3 -=
50+
#define OP4 +=
51+
#define OP5 +=
52+
#else
53+
#define OP3 +=
54+
#define OP4 -=
55+
#define OP5 +=
5756
#endif
5857

5958
#define ZGEMV_T_8x1() \
@@ -124,10 +123,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
124123
res0i = y[0 * inc_y2 + 1]; \
125124
\
126125
res0r += alphar * temp0r; \
127-
res0r OP0 alphai * temp0i; \
126+
res0r OP3 alphai * temp0i; \
128127
\
129-
res0i OP1 alphar * temp0i; \
130-
res0i OP2 alphai * temp0r; \
128+
res0i OP4 alphar * temp0i; \
129+
res0i OP5 alphai * temp0r; \
131130
\
132131
y[0 * inc_y2] = res0r; \
133132
y[0 * inc_y2 + 1] = res0i; \

0 commit comments

Comments
 (0)