Skip to content

Commit 28eeef5

Browse files
authored
Merge pull request #5538 from CheryDan/riscv/rot
Optimize ZROT_RVV for the unit-stride case (inc_x = inc_y = 1)
2 parents a51a1b8 + 98a8230 commit 28eeef5

File tree

1 file changed

+9
-23
lines changed

1 file changed

+9
-23
lines changed

kernel/riscv64/zrot_rvv.c

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -97,33 +97,19 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
9797
}
9898
else if(inc_x == 1 && inc_y == 1) {
9999

100-
for (size_t vl; n > 0; n -= vl, x += vl*2, y += vl*2) {
100+
n *= 2;
101+
for (size_t vl; n > 0; n -= vl, x += vl, y += vl) {
101102
vl = VSETVL(n);
102103

103-
vxx2 = VLSEG_FLOAT(x, vl);
104-
vyx2 = VLSEG_FLOAT(y, vl);
105-
106-
vx0 = VGET_VX2(vxx2, 0);
107-
vx1 = VGET_VX2(vxx2, 1);
108-
vy0 = VGET_VX2(vyx2, 0);
109-
vy1 = VGET_VX2(vyx2, 1);
110-
104+
vx0 = VLEV_FLOAT(x, vl);
105+
vy0 = VLEV_FLOAT(y, vl);
111106
vt0 = VFMULVF_FLOAT(vx0, c, vl);
112-
vt0 = VFMACCVF_FLOAT(vt0, s, vy0, vl);
113-
vt1 = VFMULVF_FLOAT(vx1, c, vl);
114-
vt1 = VFMACCVF_FLOAT(vt1, s, vy1, vl);
115-
vy0 = VFMULVF_FLOAT(vy0, c, vl);
116-
vy0 = VFNMSACVF_FLOAT(vy0, s, vx0, vl);
117-
vy1 = VFMULVF_FLOAT(vy1, c, vl);
118-
vy1 = VFNMSACVF_FLOAT(vy1, s, vx1, vl);
107+
vx1 = VFMACCVF_FLOAT(vt0, s, vy0, vl);
119108

120-
vtx2 = VSET_VX2(vtx2, 0, vt0);
121-
vtx2 = VSET_VX2(vtx2, 1, vt1);
122-
vyx2 = VSET_VX2(vyx2, 0, vy0);
123-
vyx2 = VSET_VX2(vyx2, 1, vy1);
124-
125-
VSSEG_FLOAT(x, vtx2, vl);
126-
VSSEG_FLOAT(y, vyx2, vl);
109+
vt1 = VFMULVF_FLOAT(vy0, c, vl);
110+
vy1 = VFNMSACVF_FLOAT(vt1, s, vx0, vl);
111+
VSEV_FLOAT(x, vx1, vl);
112+
VSEV_FLOAT(y, vy1, vl);
127113
}
128114

129115
} else if (inc_x == 1){

0 commit comments

Comments
 (0)