|
1 | 1 | // |
2 | | -// Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. |
| 2 | +// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved. |
3 | 3 | // Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved. |
4 | 4 | // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
5 | 5 | // |
@@ -2296,6 +2296,26 @@ bool Matcher::match_rule_supported(int opcode) { |
2296 | 2296 | return false; |
2297 | 2297 | } |
2298 | 2298 | break; |
| 2299 | + case Op_FmaHF: |
| 2300 | + // UseFMA flag also needs to be checked along with FEAT_FP16 |
| 2301 | + if (!UseFMA || !is_feat_fp16_supported()) { |
| 2302 | + return false; |
| 2303 | + } |
| 2304 | + break; |
| 2305 | + case Op_AddHF: |
| 2306 | + case Op_SubHF: |
| 2307 | + case Op_MulHF: |
| 2308 | + case Op_DivHF: |
| 2309 | + case Op_MinHF: |
| 2310 | + case Op_MaxHF: |
| 2311 | + case Op_SqrtHF: |
| 2312 | + // Half-precision floating point scalar operations require FEAT_FP16 |
| 2313 | + // to be available. FEAT_FP16 is enabled if both "fphp" and "asimdhp" |
| 2314 | + // features are supported. |
| 2315 | + if (!is_feat_fp16_supported()) { |
| 2316 | + return false; |
| 2317 | + } |
| 2318 | + break; |
2299 | 2319 | } |
2300 | 2320 |
|
2301 | 2321 | return true; // Per default match rules are supported. |
@@ -4599,6 +4619,15 @@ operand immF0() |
4599 | 4619 | interface(CONST_INTER); |
4600 | 4620 | %} |
4601 | 4621 |
|
| 4622 | +// Half Float (FP16) Immediate |
| 4623 | +operand immH() |
| 4624 | +%{ |
| 4625 | + match(ConH); |
| 4626 | + op_cost(0); |
| 4627 | + format %{ %} |
| 4628 | + interface(CONST_INTER); |
| 4629 | +%} |
| 4630 | + |
4602 | 4631 | // |
4603 | 4632 | operand immFPacked() |
4604 | 4633 | %{ |
@@ -6942,6 +6971,21 @@ instruct loadConD(vRegD dst, immD con) %{ |
6942 | 6971 | ins_pipe(fp_load_constant_d); |
6943 | 6972 | %} |
6944 | 6973 |
|
| 6974 | +// Load Half Float Constant |
| 6975 | +// The "ldr" instruction loads a 32-bit word from the constant pool into a |
| 6976 | +// 32-bit register but only the bottom half will be populated and the top |
| 6977 | +// 16 bits are zero. |
| 6978 | +instruct loadConH(vRegF dst, immH con) %{ |
| 6979 | + match(Set dst con); |
| 6980 | + format %{ |
| 6981 | + "ldrs $dst, [$constantaddress]\t# load from constant table: half float=$con\n\t" |
| 6982 | + %} |
| 6983 | + ins_encode %{ |
| 6984 | + __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con)); |
| 6985 | + %} |
| 6986 | + ins_pipe(fp_load_constant_s); |
| 6987 | +%} |
| 6988 | + |
6945 | 6989 | // Store Instructions |
6946 | 6990 |
|
6947 | 6991 | // Store Byte |
@@ -13634,6 +13678,17 @@ instruct bits_reverse_L(iRegLNoSp dst, iRegL src) |
13634 | 13678 | // ============================================================================ |
13635 | 13679 | // Floating Point Arithmetic Instructions |
13636 | 13680 |
|
| 13681 | +instruct addHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
| 13682 | + match(Set dst (AddHF src1 src2)); |
| 13683 | + format %{ "faddh $dst, $src1, $src2" %} |
| 13684 | + ins_encode %{ |
| 13685 | + __ faddh($dst$$FloatRegister, |
| 13686 | + $src1$$FloatRegister, |
| 13687 | + $src2$$FloatRegister); |
| 13688 | + %} |
| 13689 | + ins_pipe(fp_dop_reg_reg_s); |
| 13690 | +%} |
| 13691 | + |
13637 | 13692 | instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
13638 | 13693 | match(Set dst (AddF src1 src2)); |
13639 | 13694 |
|
@@ -13664,6 +13719,17 @@ instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ |
13664 | 13719 | ins_pipe(fp_dop_reg_reg_d); |
13665 | 13720 | %} |
13666 | 13721 |
|
| 13722 | +instruct subHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
| 13723 | + match(Set dst (SubHF src1 src2)); |
| 13724 | + format %{ "fsubh $dst, $src1, $src2" %} |
| 13725 | + ins_encode %{ |
| 13726 | + __ fsubh($dst$$FloatRegister, |
| 13727 | + $src1$$FloatRegister, |
| 13728 | + $src2$$FloatRegister); |
| 13729 | + %} |
| 13730 | + ins_pipe(fp_dop_reg_reg_s); |
| 13731 | +%} |
| 13732 | + |
13667 | 13733 | instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
13668 | 13734 | match(Set dst (SubF src1 src2)); |
13669 | 13735 |
|
@@ -13694,6 +13760,17 @@ instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ |
13694 | 13760 | ins_pipe(fp_dop_reg_reg_d); |
13695 | 13761 | %} |
13696 | 13762 |
|
| 13763 | +instruct mulHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
| 13764 | + match(Set dst (MulHF src1 src2)); |
| 13765 | + format %{ "fmulh $dst, $src1, $src2" %} |
| 13766 | + ins_encode %{ |
| 13767 | + __ fmulh($dst$$FloatRegister, |
| 13768 | + $src1$$FloatRegister, |
| 13769 | + $src2$$FloatRegister); |
| 13770 | + %} |
| 13771 | + ins_pipe(fp_dop_reg_reg_s); |
| 13772 | +%} |
| 13773 | + |
13697 | 13774 | instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
13698 | 13775 | match(Set dst (MulF src1 src2)); |
13699 | 13776 |
|
@@ -13724,6 +13801,20 @@ instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ |
13724 | 13801 | ins_pipe(fp_dop_reg_reg_d); |
13725 | 13802 | %} |
13726 | 13803 |
|
| 13804 | +// src1 * src2 + src3 (half-precision float) |
| 13805 | +instruct maddHF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ |
| 13806 | + match(Set dst (FmaHF src3 (Binary src1 src2))); |
| 13807 | + format %{ "fmaddh $dst, $src1, $src2, $src3" %} |
| 13808 | + ins_encode %{ |
| 13809 | + assert(UseFMA, "Needs FMA instructions support."); |
| 13810 | + __ fmaddh($dst$$FloatRegister, |
| 13811 | + $src1$$FloatRegister, |
| 13812 | + $src2$$FloatRegister, |
| 13813 | + $src3$$FloatRegister); |
| 13814 | + %} |
| 13815 | + ins_pipe(pipe_class_default); |
| 13816 | +%} |
| 13817 | + |
13727 | 13818 | // src1 * src2 + src3 |
13728 | 13819 | instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{ |
13729 | 13820 | match(Set dst (FmaF src3 (Binary src1 src2))); |
@@ -13865,6 +13956,29 @@ instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zer |
13865 | 13956 | ins_pipe(pipe_class_default); |
13866 | 13957 | %} |
13867 | 13958 |
|
| 13959 | +// Math.max(HH)H (half-precision float) |
| 13960 | +instruct maxHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
| 13961 | + match(Set dst (MaxHF src1 src2)); |
| 13962 | + format %{ "fmaxh $dst, $src1, $src2" %} |
| 13963 | + ins_encode %{ |
| 13964 | + __ fmaxh($dst$$FloatRegister, |
| 13965 | + $src1$$FloatRegister, |
| 13966 | + $src2$$FloatRegister); |
| 13967 | + %} |
| 13968 | + ins_pipe(fp_dop_reg_reg_s); |
| 13969 | +%} |
| 13970 | + |
| 13971 | +// Math.min(HH)H (half-precision float) |
| 13972 | +instruct minHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
| 13973 | + match(Set dst (MinHF src1 src2)); |
| 13974 | + format %{ "fminh $dst, $src1, $src2" %} |
| 13975 | + ins_encode %{ |
| 13976 | + __ fminh($dst$$FloatRegister, |
| 13977 | + $src1$$FloatRegister, |
| 13978 | + $src2$$FloatRegister); |
| 13979 | + %} |
| 13980 | + ins_pipe(fp_dop_reg_reg_s); |
| 13981 | +%} |
13868 | 13982 |
|
13869 | 13983 | // Math.max(FF)F |
13870 | 13984 | instruct maxF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
@@ -13922,6 +14036,16 @@ instruct minD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{ |
13922 | 14036 | ins_pipe(fp_dop_reg_reg_d); |
13923 | 14037 | %} |
13924 | 14038 |
|
| 14039 | +instruct divHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
| 14040 | + match(Set dst (DivHF src1 src2)); |
| 14041 | + format %{ "fdivh $dst, $src1, $src2" %} |
| 14042 | + ins_encode %{ |
| 14043 | + __ fdivh($dst$$FloatRegister, |
| 14044 | + $src1$$FloatRegister, |
| 14045 | + $src2$$FloatRegister); |
| 14046 | + %} |
| 14047 | + ins_pipe(fp_div_s); |
| 14048 | +%} |
13925 | 14049 |
|
13926 | 14050 | instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{ |
13927 | 14051 | match(Set dst (DivF src1 src2)); |
@@ -14095,6 +14219,16 @@ instruct sqrtF_reg(vRegF dst, vRegF src) %{ |
14095 | 14219 | ins_pipe(fp_div_d); |
14096 | 14220 | %} |
14097 | 14221 |
|
| 14222 | +instruct sqrtHF_reg(vRegF dst, vRegF src) %{ |
| 14223 | + match(Set dst (SqrtHF src)); |
| 14224 | + format %{ "fsqrth $dst, $src" %} |
| 14225 | + ins_encode %{ |
| 14226 | + __ fsqrth($dst$$FloatRegister, |
| 14227 | + $src$$FloatRegister); |
| 14228 | + %} |
| 14229 | + ins_pipe(fp_div_s); |
| 14230 | +%} |
| 14231 | + |
14098 | 14232 | // Math.rint, floor, ceil |
14099 | 14233 | instruct roundD_reg(vRegD dst, vRegD src, immI rmode) %{ |
14100 | 14234 | match(Set dst (RoundDoubleMode src rmode)); |
@@ -17144,6 +17278,64 @@ instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask, |
17144 | 17278 | ins_pipe(pipe_slow); |
17145 | 17279 | %} |
17146 | 17280 |
|
| 17281 | +//----------------------------- Reinterpret ---------------------------------- |
| 17282 | +// Reinterpret a half-precision float value in a floating point register to a general purpose register |
| 17283 | +instruct reinterpretHF2S(iRegINoSp dst, vRegF src) %{ |
| 17284 | + match(Set dst (ReinterpretHF2S src)); |
| 17285 | + format %{ "reinterpretHF2S $dst, $src" %} |
| 17286 | + ins_encode %{ |
| 17287 | + __ smov($dst$$Register, $src$$FloatRegister, __ H, 0); |
| 17288 | + %} |
| 17289 | + ins_pipe(pipe_slow); |
| 17290 | +%} |
| 17291 | + |
| 17292 | +// Reinterpret a half-precision float value in a general purpose register to a floating point register |
| 17293 | +instruct reinterpretS2HF(vRegF dst, iRegINoSp src) %{ |
| 17294 | + match(Set dst (ReinterpretS2HF src)); |
| 17295 | + format %{ "reinterpretS2HF $dst, $src" %} |
| 17296 | + ins_encode %{ |
| 17297 | + __ mov($dst$$FloatRegister, __ H, 0, $src$$Register); |
| 17298 | + %} |
| 17299 | + ins_pipe(pipe_slow); |
| 17300 | +%} |
| 17301 | + |
| 17302 | +// Without this optimization, ReinterpretS2HF (ConvF2HF src) would result in the following |
| 17303 | +// instructions (the first two are for ConvF2HF and the last instruction is for ReinterpretS2HF) - |
| 17304 | +// fcvt $tmp1_fpr, $src_fpr // Convert float to half-precision float |
| 17305 | +// mov $tmp2_gpr, $tmp1_fpr // Move half-precision float in FPR to a GPR |
| 17306 | +// mov $dst_fpr, $tmp2_gpr // Move the result from a GPR to an FPR |
| 17307 | +// The move from FPR to GPR in ConvF2HF and the move from GPR to FPR in ReinterpretS2HF |
| 17308 | +// can be omitted in this pattern, resulting in - |
| 17309 | +// fcvt $dst, $src // Convert float to half-precision float |
| 17310 | +instruct convF2HFAndS2HF(vRegF dst, vRegF src) |
| 17311 | +%{ |
| 17312 | + match(Set dst (ReinterpretS2HF (ConvF2HF src))); |
| 17313 | + format %{ "convF2HFAndS2HF $dst, $src" %} |
| 17314 | + ins_encode %{ |
| 17315 | + __ fcvtsh($dst$$FloatRegister, $src$$FloatRegister); |
| 17316 | + %} |
| 17317 | + ins_pipe(pipe_slow); |
| 17318 | +%} |
| 17319 | + |
| 17320 | +// Without this optimization, ConvHF2F (ReinterpretHF2S src) would result in the following |
| 17321 | +// instructions (the first one is for ReinterpretHF2S and the last two are for ConvHF2F) - |
| 17322 | +// mov $tmp1_gpr, $src_fpr // Move the half-precision float from an FPR to a GPR |
| 17323 | +// mov $tmp2_fpr, $tmp1_gpr // Move the same value from GPR to an FPR |
| 17324 | +// fcvt $dst_fpr, $tmp2_fpr // Convert the half-precision float to 32-bit float |
| 17325 | +// The move from FPR to GPR in ReinterpretHF2S and the move from GPR to FPR in ConvHF2F |
| 17326 | +// can be omitted as the input (src) is already in an FPR required for the fcvths instruction |
| 17327 | +// resulting in - |
| 17328 | +// fcvt $dst, $src // Convert half-precision float to a 32-bit float |
| 17329 | +instruct convHF2SAndHF2F(vRegF dst, vRegF src) |
| 17330 | +%{ |
| 17331 | + match(Set dst (ConvHF2F (ReinterpretHF2S src))); |
| 17332 | + format %{ "convHF2SAndHF2F $dst, $src" %} |
| 17333 | + ins_encode %{ |
| 17334 | + __ fcvths($dst$$FloatRegister, $src$$FloatRegister); |
| 17335 | + %} |
| 17336 | + ins_pipe(pipe_slow); |
| 17337 | +%} |
| 17338 | + |
17147 | 17339 | // ============================================================================ |
17148 | 17340 | // This name is KNOWN by the ADLC and cannot be changed. |
17149 | 17341 | // The ADLC forces a 'TypeRawPtr::BOTTOM' output type |
|
0 commit comments