Skip to content

Commit 1334191

Browse files
author
Hamlin Li
committed
8334474: RISC-V: verify perf of ExpandBits/CompressBits (rvv)
Reviewed-by: fyang, rehn, luhenry
1 parent e29b0ed commit 1334191

File tree

5 files changed

+1
-262
lines changed

5 files changed

+1
-262
lines changed

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

Lines changed: 0 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -2339,83 +2339,6 @@ void C2_MacroAssembler::signum_fp_v(VectorRegister dst, VectorRegister one, Basi
23392339
vfsgnj_vv(dst, one, dst, v0_t);
23402340
}
23412341

2342-
void C2_MacroAssembler::compress_bits_v(Register dst, Register src, Register mask, bool is_long) {
2343-
Assembler::SEW sew = is_long ? Assembler::e64 : Assembler::e32;
2344-
// intrinsic is enabled when MaxVectorSize >= 16
2345-
Assembler::LMUL lmul = is_long ? Assembler::m4 : Assembler::m2;
2346-
long len = is_long ? 64 : 32;
2347-
2348-
// load the src data(in bits) to be compressed.
2349-
vsetivli(x0, 1, sew, Assembler::m1);
2350-
vmv_s_x(v0, src);
2351-
// reset the src data(in bytes) to zero.
2352-
mv(t0, len);
2353-
vsetvli(x0, t0, Assembler::e8, lmul);
2354-
vmv_v_i(v4, 0);
2355-
// convert the src data from bits to bytes.
2356-
vmerge_vim(v4, v4, 1); // v0 as the implicit mask register
2357-
// reset the dst data(in bytes) to zero.
2358-
vmv_v_i(v8, 0);
2359-
// load the mask data(in bits).
2360-
vsetivli(x0, 1, sew, Assembler::m1);
2361-
vmv_s_x(v0, mask);
2362-
// compress the src data(in bytes) to dst(in bytes).
2363-
vsetvli(x0, t0, Assembler::e8, lmul);
2364-
vcompress_vm(v8, v4, v0);
2365-
// convert the dst data from bytes to bits.
2366-
vmseq_vi(v0, v8, 1);
2367-
// store result back.
2368-
vsetivli(x0, 1, sew, Assembler::m1);
2369-
vmv_x_s(dst, v0);
2370-
}
2371-
2372-
void C2_MacroAssembler::compress_bits_i_v(Register dst, Register src, Register mask) {
2373-
compress_bits_v(dst, src, mask, /* is_long */ false);
2374-
}
2375-
2376-
void C2_MacroAssembler::compress_bits_l_v(Register dst, Register src, Register mask) {
2377-
compress_bits_v(dst, src, mask, /* is_long */ true);
2378-
}
2379-
2380-
void C2_MacroAssembler::expand_bits_v(Register dst, Register src, Register mask, bool is_long) {
2381-
Assembler::SEW sew = is_long ? Assembler::e64 : Assembler::e32;
2382-
// intrinsic is enabled when MaxVectorSize >= 16
2383-
Assembler::LMUL lmul = is_long ? Assembler::m4 : Assembler::m2;
2384-
long len = is_long ? 64 : 32;
2385-
2386-
// load the src data(in bits) to be expanded.
2387-
vsetivli(x0, 1, sew, Assembler::m1);
2388-
vmv_s_x(v0, src);
2389-
// reset the src data(in bytes) to zero.
2390-
mv(t0, len);
2391-
vsetvli(x0, t0, Assembler::e8, lmul);
2392-
vmv_v_i(v4, 0);
2393-
// convert the src data from bits to bytes.
2394-
vmerge_vim(v4, v4, 1); // v0 as implicit mask register
2395-
// reset the dst data(in bytes) to zero.
2396-
vmv_v_i(v12, 0);
2397-
// load the mask data(in bits).
2398-
vsetivli(x0, 1, sew, Assembler::m1);
2399-
vmv_s_x(v0, mask);
2400-
// expand the src data(in bytes) to dst(in bytes).
2401-
vsetvli(x0, t0, Assembler::e8, lmul);
2402-
viota_m(v8, v0);
2403-
vrgather_vv(v12, v4, v8, VectorMask::v0_t); // v0 as implicit mask register
2404-
// convert the dst data from bytes to bits.
2405-
vmseq_vi(v0, v12, 1);
2406-
// store result back.
2407-
vsetivli(x0, 1, sew, Assembler::m1);
2408-
vmv_x_s(dst, v0);
2409-
}
2410-
2411-
void C2_MacroAssembler::expand_bits_i_v(Register dst, Register src, Register mask) {
2412-
expand_bits_v(dst, src, mask, /* is_long */ false);
2413-
}
2414-
2415-
void C2_MacroAssembler::expand_bits_l_v(Register dst, Register src, Register mask) {
2416-
expand_bits_v(dst, src, mask, /* is_long */ true);
2417-
}
2418-
24192342
// j.l.Math.round(float)
24202343
// Returns the closest int to the argument, with ties rounding to positive infinity.
24212344
// We need to handle 3 special cases defined by java api spec:

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,6 @@
3939
VectorRegister vrs,
4040
bool is_latin, Label& DONE, Assembler::LMUL lmul);
4141

42-
void compress_bits_v(Register dst, Register src, Register mask, bool is_long);
43-
void expand_bits_v(Register dst, Register src, Register mask, bool is_long);
44-
4542
public:
4643
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
4744
void fast_lock(Register object, Register box,
@@ -184,13 +181,6 @@
184181

185182
// intrinsic methods implemented by rvv instructions
186183

187-
// compress bits, i.e. j.l.Integer/Long::compress.
188-
void compress_bits_i_v(Register dst, Register src, Register mask);
189-
void compress_bits_l_v(Register dst, Register src, Register mask);
190-
// expand bits, i.e. j.l.Integer/Long::expand.
191-
void expand_bits_i_v(Register dst, Register src, Register mask);
192-
void expand_bits_l_v(Register dst, Register src, Register mask);
193-
194184
void java_round_float_v(VectorRegister dst, VectorRegister src, FloatRegister ftmp, BasicType bt, uint vector_length);
195185
void java_round_double_v(VectorRegister dst, VectorRegister src, FloatRegister ftmp, BasicType bt, uint vector_length);
196186

src/hotspot/cpu/riscv/riscv.ad

Lines changed: 0 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -942,26 +942,6 @@ reg_class v11_reg(
942942
V11, V11_H, V11_J, V11_K
943943
);
944944

945-
// class for vector register v12
946-
reg_class v12_reg(
947-
V12, V12_H, V12_J, V12_K
948-
);
949-
950-
// class for vector register v13
951-
reg_class v13_reg(
952-
V13, V13_H, V13_J, V13_K
953-
);
954-
955-
// class for vector register v14
956-
reg_class v14_reg(
957-
V14, V14_H, V14_J, V14_K
958-
);
959-
960-
// class for vector register v15
961-
reg_class v15_reg(
962-
V15, V15_H, V15_J, V15_K
963-
);
964-
965945
// class for condition codes
966946
reg_class reg_flags(RFLAGS);
967947

@@ -1896,9 +1876,6 @@ bool Matcher::match_rule_supported(int opcode) {
18961876
}
18971877
break;
18981878

1899-
case Op_ExpandBits: // fall through
1900-
case Op_CompressBits: // fall through
1901-
guarantee(UseRVV == (MaxVectorSize >= 16), "UseRVV and MaxVectorSize not matched");
19021879
case Op_StrCompressedCopy: // fall through
19031880
case Op_StrInflatedCopy: // fall through
19041881
case Op_CountPositives: // fall through
@@ -3541,46 +3518,6 @@ operand vReg_V11()
35413518
interface(REG_INTER);
35423519
%}
35433520

3544-
operand vReg_V12()
3545-
%{
3546-
constraint(ALLOC_IN_RC(v12_reg));
3547-
match(VecA);
3548-
match(vReg);
3549-
op_cost(0);
3550-
format %{ %}
3551-
interface(REG_INTER);
3552-
%}
3553-
3554-
operand vReg_V13()
3555-
%{
3556-
constraint(ALLOC_IN_RC(v13_reg));
3557-
match(VecA);
3558-
match(vReg);
3559-
op_cost(0);
3560-
format %{ %}
3561-
interface(REG_INTER);
3562-
%}
3563-
3564-
operand vReg_V14()
3565-
%{
3566-
constraint(ALLOC_IN_RC(v14_reg));
3567-
match(VecA);
3568-
match(vReg);
3569-
op_cost(0);
3570-
format %{ %}
3571-
interface(REG_INTER);
3572-
%}
3573-
3574-
operand vReg_V15()
3575-
%{
3576-
constraint(ALLOC_IN_RC(v15_reg));
3577-
match(VecA);
3578-
match(vReg);
3579-
op_cost(0);
3580-
format %{ %}
3581-
interface(REG_INTER);
3582-
%}
3583-
35843521
operand vRegMask()
35853522
%{
35863523
constraint(ALLOC_IN_RC(vmask_reg));

src/hotspot/cpu/riscv/riscv_v.ad

Lines changed: 0 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -3843,116 +3843,6 @@ instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
38433843
ins_pipe(pipe_class_memory);
38443844
%}
38453845

3846-
// CompressBits of Long & Integer
3847-
3848-
instruct compressBitsI(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, vRegMask_V0 v0,
3849-
vReg_V4 v4, vReg_V5 v5, vReg_V8 v8, vReg_V9 v9) %{
3850-
match(Set dst (CompressBits src mask));
3851-
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v8, TEMP v9);
3852-
format %{ "vsetivli x0, 1, e32, m1, tu, mu\t#@compressBitsI\n\t"
3853-
"vmv.s.x $v0, $src\n\t"
3854-
"mv t0, 32\n\t"
3855-
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
3856-
"vmv.v.i $v4, 0\n\t"
3857-
"vmerge.vim $v4, $v4, 1, $v0\n\t"
3858-
"vmv.v.i $v8, 0\n\t"
3859-
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
3860-
"vmv.s.x $v0, $mask\n\t"
3861-
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
3862-
"vcompress.vm $v8, $v4, $v0\n\t"
3863-
"vmseq.vi $v0, $v8, 1\n\t"
3864-
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
3865-
"vmv.x.s $dst, $v0\t#@compressBitsI\n\t"
3866-
%}
3867-
ins_encode %{
3868-
__ compress_bits_i_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
3869-
%}
3870-
ins_pipe(pipe_slow);
3871-
%}
3872-
3873-
instruct compressBitsL(iRegLNoSp dst, iRegL src, iRegL mask, vRegMask_V0 v0,
3874-
vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
3875-
vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11) %{
3876-
match(Set dst (CompressBits src mask));
3877-
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, TEMP v10, TEMP v11);
3878-
format %{ "vsetivli x0, 1, e64, m1, tu, mu\t#@compressBitsL\n\t"
3879-
"vmv.s.x $v0, $src\n\t"
3880-
"mv t0, 64\n\t"
3881-
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
3882-
"vmv.v.i $v4, 0\n\t"
3883-
"vmerge.vim $v4, $v4, 1, $v0\n\t"
3884-
"vmv.v.i $v8, 0\n\t"
3885-
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
3886-
"vmv.s.x $v0, $mask\n\t"
3887-
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
3888-
"vcompress.vm $v8, $v4, $v0\n\t"
3889-
"vmseq.vi $v0, $v8, 1\n\t"
3890-
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
3891-
"vmv.x.s $dst, $v0\t#@compressBitsL\n\t"
3892-
%}
3893-
ins_encode %{
3894-
__ compress_bits_l_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
3895-
%}
3896-
ins_pipe(pipe_slow);
3897-
%}
3898-
3899-
// ExpandBits of Long & Integer
3900-
3901-
instruct expandBitsI(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, vRegMask_V0 v0,
3902-
vReg_V4 v4, vReg_V5 v5, vReg_V8 v8, vReg_V9 v9, vReg_V12 v12, vReg_V13 v13) %{
3903-
match(Set dst (ExpandBits src mask));
3904-
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v8, TEMP v9, TEMP v12, TEMP v13);
3905-
format %{ "vsetivli x0, 1, e32, m1, tu, mu\t#@expandBitsI\n\t"
3906-
"vmv.s.x $v0, $src\n\t"
3907-
"mv t0, 32\n\t"
3908-
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
3909-
"vmv.v.i $v4, 0\n\t"
3910-
"vmerge.vim $v4, $v4, 1, $v0\n\t"
3911-
"vmv.v.i $v12, 0\n\t"
3912-
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
3913-
"vmv.s.x $v0, $mask\n\t"
3914-
"vsetvli x0, t0, e8, m2, tu, mu\n\t"
3915-
"viota.m $v8, $v0\n\t"
3916-
"vrgather.vv $v12, $v4, $v8, $v0.t\n\t"
3917-
"vmseq.vi $v0, $v12, 1\n\t"
3918-
"vsetivli x0, 1, e32, m1, tu, mu\n\t"
3919-
"vmv.x.s $dst, $v0\t#@expandBitsI\n\t"
3920-
%}
3921-
ins_encode %{
3922-
__ expand_bits_i_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
3923-
%}
3924-
ins_pipe(pipe_slow);
3925-
%}
3926-
3927-
instruct expandBitsL(iRegLNoSp dst, iRegL src, iRegL mask, vRegMask_V0 v0,
3928-
vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
3929-
vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11,
3930-
vReg_V12 v12, vReg_V13 v13, vReg_V14 v14, vReg_V15 v15) %{
3931-
match(Set dst (ExpandBits src mask));
3932-
effect(TEMP v0, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, TEMP v10, TEMP v11,
3933-
TEMP v12, TEMP v13, TEMP v14, TEMP v15);
3934-
format %{ "vsetivli x0, 1, e64, m1, tu, mu\t#@expandBitsL\n\t"
3935-
"vmv.s.x $v0, $src\n\t"
3936-
"mv t0, 64\n\t"
3937-
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
3938-
"vmv.v.i $v4, 0\n\t"
3939-
"vmerge.vim $v4, $v4, 1, $v0\n\t"
3940-
"vmv.v.i $v12, 0\n\t"
3941-
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
3942-
"vmv.s.x $v0, $mask\n\t"
3943-
"vsetvli x0, t0, e8, m4, tu, mu\n\t"
3944-
"viota.m $v8, $v0\n\t"
3945-
"vrgather.vv $v12, $v4, $v8, $v0.t\n\t"
3946-
"vmseq.vi $v0, $v12, 1\n\t"
3947-
"vsetivli x0, 1, e64, m1, tu, mu\n\t"
3948-
"vmv.x.s $dst, $v0\t#@expandBitsL\n\t"
3949-
%}
3950-
ins_encode %{
3951-
__ expand_bits_l_v(as_Register($dst$$reg), as_Register($src$$reg), as_Register($mask$$reg));
3952-
%}
3953-
ins_pipe(pipe_slow);
3954-
%}
3955-
39563846
// Vector Load Const
39573847
instruct vloadcon(vReg dst, immI0 src) %{
39583848
match(Set dst (VectorLoadConst src));

test/hotspot/jtreg/compiler/intrinsics/TestBitShuffleOpers.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@
3030
* @requires (((os.arch=="x86" | os.arch=="amd64" | os.arch=="x86_64") &
3131
* (vm.cpu.features ~= ".*bmi2.*" & vm.cpu.features ~= ".*bmi1.*" &
3232
* vm.cpu.features ~= ".*sse2.*")) |
33-
* (os.arch=="aarch64" & vm.cpu.features ~= ".*svebitperm.*") |
34-
* (os.arch=="riscv64" & vm.cpu.features ~= ".*rvv.*"))
33+
* (os.arch=="aarch64" & vm.cpu.features ~= ".*svebitperm.*"))
3534
* @library /test/lib /
3635
* @run driver compiler.intrinsics.TestBitShuffleOpers
3736
*/

0 commit comments

Comments
 (0)