Skip to content

Commit befc071

Browse files
author
Datadog Syncup Service
committed
Merge branch 'upstream-master'
2 parents 261a10e + 66358fa commit befc071

File tree

183 files changed

+5253
-1587
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

183 files changed

+5253
-1587
lines changed

make/Docs.gmk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ JAVADOC_DISABLED_DOCLINT_PACKAGES := org.w3c.* javax.smartcardio
9898
JAVADOC_OPTIONS := -use -keywords -notimestamp \
9999
-serialwarn -encoding ISO-8859-1 -docencoding UTF-8 -breakiterator \
100100
-splitIndex --system none -javafx --expand-requires transitive \
101-
--override-methods=summary
101+
--override-methods=summary --syntax-highlight
102102

103103
# The reference options must stay stable to allow for comparisons across the
104104
# development cycle.

src/hotspot/cpu/aarch64/aarch64.ad

Lines changed: 193 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
2+
// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
33
// Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved.
44
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
//
@@ -2296,6 +2296,26 @@ bool Matcher::match_rule_supported(int opcode) {
22962296
return false;
22972297
}
22982298
break;
2299+
case Op_FmaHF:
2300+
// UseFMA flag also needs to be checked along with FEAT_FP16
2301+
if (!UseFMA || !is_feat_fp16_supported()) {
2302+
return false;
2303+
}
2304+
break;
2305+
case Op_AddHF:
2306+
case Op_SubHF:
2307+
case Op_MulHF:
2308+
case Op_DivHF:
2309+
case Op_MinHF:
2310+
case Op_MaxHF:
2311+
case Op_SqrtHF:
2312+
// Half-precision floating point scalar operations require FEAT_FP16
2313+
// to be available. FEAT_FP16 is enabled if both "fphp" and "asimdhp"
2314+
// features are supported.
2315+
if (!is_feat_fp16_supported()) {
2316+
return false;
2317+
}
2318+
break;
22992319
}
23002320

23012321
return true; // Per default match rules are supported.
@@ -4599,6 +4619,15 @@ operand immF0()
45994619
interface(CONST_INTER);
46004620
%}
46014621

4622+
// Half Float (FP16) Immediate
4623+
operand immH()
4624+
%{
4625+
match(ConH);
4626+
op_cost(0);
4627+
format %{ %}
4628+
interface(CONST_INTER);
4629+
%}
4630+
46024631
//
46034632
operand immFPacked()
46044633
%{
@@ -6942,6 +6971,21 @@ instruct loadConD(vRegD dst, immD con) %{
69426971
ins_pipe(fp_load_constant_d);
69436972
%}
69446973

6974+
// Load Half Float Constant
6975+
// The "ldr" instruction loads a 32-bit word from the constant pool into a
6976+
// 32-bit register but only the bottom half will be populated and the top
6977+
// 16 bits are zero.
6978+
instruct loadConH(vRegF dst, immH con) %{
6979+
match(Set dst con);
6980+
format %{
6981+
"ldrs $dst, [$constantaddress]\t# load from constant table: half float=$con\n\t"
6982+
%}
6983+
ins_encode %{
6984+
__ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6985+
%}
6986+
ins_pipe(fp_load_constant_s);
6987+
%}
6988+
69456989
// Store Instructions
69466990

69476991
// Store Byte
@@ -13634,6 +13678,17 @@ instruct bits_reverse_L(iRegLNoSp dst, iRegL src)
1363413678
// ============================================================================
1363513679
// Floating Point Arithmetic Instructions
1363613680

13681+
instruct addHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13682+
match(Set dst (AddHF src1 src2));
13683+
format %{ "faddh $dst, $src1, $src2" %}
13684+
ins_encode %{
13685+
__ faddh($dst$$FloatRegister,
13686+
$src1$$FloatRegister,
13687+
$src2$$FloatRegister);
13688+
%}
13689+
ins_pipe(fp_dop_reg_reg_s);
13690+
%}
13691+
1363713692
instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
1363813693
match(Set dst (AddF src1 src2));
1363913694

@@ -13664,6 +13719,17 @@ instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
1366413719
ins_pipe(fp_dop_reg_reg_d);
1366513720
%}
1366613721

13722+
instruct subHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13723+
match(Set dst (SubHF src1 src2));
13724+
format %{ "fsubh $dst, $src1, $src2" %}
13725+
ins_encode %{
13726+
__ fsubh($dst$$FloatRegister,
13727+
$src1$$FloatRegister,
13728+
$src2$$FloatRegister);
13729+
%}
13730+
ins_pipe(fp_dop_reg_reg_s);
13731+
%}
13732+
1366713733
instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
1366813734
match(Set dst (SubF src1 src2));
1366913735

@@ -13694,6 +13760,17 @@ instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
1369413760
ins_pipe(fp_dop_reg_reg_d);
1369513761
%}
1369613762

13763+
instruct mulHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13764+
match(Set dst (MulHF src1 src2));
13765+
format %{ "fmulh $dst, $src1, $src2" %}
13766+
ins_encode %{
13767+
__ fmulh($dst$$FloatRegister,
13768+
$src1$$FloatRegister,
13769+
$src2$$FloatRegister);
13770+
%}
13771+
ins_pipe(fp_dop_reg_reg_s);
13772+
%}
13773+
1369713774
instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
1369813775
match(Set dst (MulF src1 src2));
1369913776

@@ -13724,6 +13801,20 @@ instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
1372413801
ins_pipe(fp_dop_reg_reg_d);
1372513802
%}
1372613803

13804+
// src1 * src2 + src3 (half-precision float)
13805+
instruct maddHF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
13806+
match(Set dst (FmaHF src3 (Binary src1 src2)));
13807+
format %{ "fmaddh $dst, $src1, $src2, $src3" %}
13808+
ins_encode %{
13809+
assert(UseFMA, "Needs FMA instructions support.");
13810+
__ fmaddh($dst$$FloatRegister,
13811+
$src1$$FloatRegister,
13812+
$src2$$FloatRegister,
13813+
$src3$$FloatRegister);
13814+
%}
13815+
ins_pipe(pipe_class_default);
13816+
%}
13817+
1372713818
// src1 * src2 + src3
1372813819
instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
1372913820
match(Set dst (FmaF src3 (Binary src1 src2)));
@@ -13865,6 +13956,29 @@ instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zer
1386513956
ins_pipe(pipe_class_default);
1386613957
%}
1386713958

13959+
// Math.max(HH)H (half-precision float)
13960+
instruct maxHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13961+
match(Set dst (MaxHF src1 src2));
13962+
format %{ "fmaxh $dst, $src1, $src2" %}
13963+
ins_encode %{
13964+
__ fmaxh($dst$$FloatRegister,
13965+
$src1$$FloatRegister,
13966+
$src2$$FloatRegister);
13967+
%}
13968+
ins_pipe(fp_dop_reg_reg_s);
13969+
%}
13970+
13971+
// Math.min(HH)H (half-precision float)
13972+
instruct minHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
13973+
match(Set dst (MinHF src1 src2));
13974+
format %{ "fminh $dst, $src1, $src2" %}
13975+
ins_encode %{
13976+
__ fminh($dst$$FloatRegister,
13977+
$src1$$FloatRegister,
13978+
$src2$$FloatRegister);
13979+
%}
13980+
ins_pipe(fp_dop_reg_reg_s);
13981+
%}
1386813982

1386913983
// Math.max(FF)F
1387013984
instruct maxF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
@@ -13922,6 +14036,16 @@ instruct minD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
1392214036
ins_pipe(fp_dop_reg_reg_d);
1392314037
%}
1392414038

14039+
instruct divHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
14040+
match(Set dst (DivHF src1 src2));
14041+
format %{ "fdivh $dst, $src1, $src2" %}
14042+
ins_encode %{
14043+
__ fdivh($dst$$FloatRegister,
14044+
$src1$$FloatRegister,
14045+
$src2$$FloatRegister);
14046+
%}
14047+
ins_pipe(fp_div_s);
14048+
%}
1392514049

1392614050
instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
1392714051
match(Set dst (DivF src1 src2));
@@ -14095,6 +14219,16 @@ instruct sqrtF_reg(vRegF dst, vRegF src) %{
1409514219
ins_pipe(fp_div_d);
1409614220
%}
1409714221

14222+
instruct sqrtHF_reg(vRegF dst, vRegF src) %{
14223+
match(Set dst (SqrtHF src));
14224+
format %{ "fsqrth $dst, $src" %}
14225+
ins_encode %{
14226+
__ fsqrth($dst$$FloatRegister,
14227+
$src$$FloatRegister);
14228+
%}
14229+
ins_pipe(fp_div_s);
14230+
%}
14231+
1409814232
// Math.rint, floor, ceil
1409914233
instruct roundD_reg(vRegD dst, vRegD src, immI rmode) %{
1410014234
match(Set dst (RoundDoubleMode src rmode));
@@ -17144,6 +17278,64 @@ instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask,
1714417278
ins_pipe(pipe_slow);
1714517279
%}
1714617280

17281+
//----------------------------- Reinterpret ----------------------------------
17282+
// Reinterpret a half-precision float value in a floating point register to a general purpose register
17283+
instruct reinterpretHF2S(iRegINoSp dst, vRegF src) %{
17284+
match(Set dst (ReinterpretHF2S src));
17285+
format %{ "reinterpretHF2S $dst, $src" %}
17286+
ins_encode %{
17287+
__ smov($dst$$Register, $src$$FloatRegister, __ H, 0);
17288+
%}
17289+
ins_pipe(pipe_slow);
17290+
%}
17291+
17292+
// Reinterpret a half-precision float value in a general purpose register to a floating point register
17293+
instruct reinterpretS2HF(vRegF dst, iRegINoSp src) %{
17294+
match(Set dst (ReinterpretS2HF src));
17295+
format %{ "reinterpretS2HF $dst, $src" %}
17296+
ins_encode %{
17297+
__ mov($dst$$FloatRegister, __ H, 0, $src$$Register);
17298+
%}
17299+
ins_pipe(pipe_slow);
17300+
%}
17301+
17302+
// Without this optimization, ReinterpretS2HF (ConvF2HF src) would result in the following
17303+
// instructions (the first two are for ConvF2HF and the last instruction is for ReinterpretS2HF) -
17304+
// fcvt $tmp1_fpr, $src_fpr // Convert float to half-precision float
17305+
// mov $tmp2_gpr, $tmp1_fpr // Move half-precision float in FPR to a GPR
17306+
// mov $dst_fpr, $tmp2_gpr // Move the result from a GPR to an FPR
17307+
// The move from FPR to GPR in ConvF2HF and the move from GPR to FPR in ReinterpretS2HF
17308+
// can be omitted in this pattern, resulting in -
17309+
// fcvt $dst, $src // Convert float to half-precision float
17310+
instruct convF2HFAndS2HF(vRegF dst, vRegF src)
17311+
%{
17312+
match(Set dst (ReinterpretS2HF (ConvF2HF src)));
17313+
format %{ "convF2HFAndS2HF $dst, $src" %}
17314+
ins_encode %{
17315+
__ fcvtsh($dst$$FloatRegister, $src$$FloatRegister);
17316+
%}
17317+
ins_pipe(pipe_slow);
17318+
%}
17319+
17320+
// Without this optimization, ConvHF2F (ReinterpretHF2S src) would result in the following
17321+
// instructions (the first one is for ReinterpretHF2S and the last two are for ConvHF2F) -
17322+
// mov $tmp1_gpr, $src_fpr // Move the half-precision float from an FPR to a GPR
17323+
// mov $tmp2_fpr, $tmp1_gpr // Move the same value from GPR to an FPR
17324+
// fcvt $dst_fpr, $tmp2_fpr // Convert the half-precision float to 32-bit float
17325+
// The move from FPR to GPR in ReinterpretHF2S and the move from GPR to FPR in ConvHF2F
17326+
// can be omitted as the input (src) is already in an FPR required for the fcvths instruction
17327+
// resulting in -
17328+
// fcvt $dst, $src // Convert half-precision float to a 32-bit float
17329+
instruct convHF2SAndHF2F(vRegF dst, vRegF src)
17330+
%{
17331+
match(Set dst (ConvHF2F (ReinterpretHF2S src)));
17332+
format %{ "convHF2SAndHF2F $dst, $src" %}
17333+
ins_encode %{
17334+
__ fcvths($dst$$FloatRegister, $src$$FloatRegister);
17335+
%}
17336+
ins_pipe(pipe_slow);
17337+
%}
17338+
1714717339
// ============================================================================
1714817340
// This name is KNOWN by the ADLC and cannot be changed.
1714917341
// The ADLC forces a 'TypeRawPtr::BOTTOM' output type

src/hotspot/cpu/aarch64/assembler_aarch64.hpp

Lines changed: 57 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,6 +2032,8 @@ void mvnw(Register Rd, Register Rm,
20322032
INSN(fsqrtd, 0b01, 0b000011);
20332033
INSN(fcvtd, 0b01, 0b000100); // Double-precision to single-precision
20342034

2035+
INSN(fsqrth, 0b11, 0b000011); // Half-precision sqrt
2036+
20352037
private:
20362038
void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
20372039
FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
@@ -2059,37 +2061,68 @@ void mvnw(Register Rd, Register Rm,
20592061
#undef INSN
20602062

20612063
// Floating-point data-processing (2 source)
2062-
void data_processing(unsigned op31, unsigned type, unsigned opcode,
2064+
void data_processing(unsigned op31, unsigned type, unsigned opcode, unsigned op21,
20632065
FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {
20642066
starti;
20652067
f(op31, 31, 29);
20662068
f(0b11110, 28, 24);
2067-
f(type, 23, 22), f(1, 21), f(opcode, 15, 10);
2069+
f(type, 23, 22), f(op21, 21), f(opcode, 15, 10);
20682070
rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
20692071
}
20702072

2071-
#define INSN(NAME, op31, type, opcode) \
2073+
#define INSN(NAME, op31, type, opcode, op21) \
20722074
void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \
2073-
data_processing(op31, type, opcode, Vd, Vn, Vm); \
2074-
}
2075-
2076-
INSN(fabds, 0b011, 0b10, 0b110101);
2077-
INSN(fmuls, 0b000, 0b00, 0b000010);
2078-
INSN(fdivs, 0b000, 0b00, 0b000110);
2079-
INSN(fadds, 0b000, 0b00, 0b001010);
2080-
INSN(fsubs, 0b000, 0b00, 0b001110);
2081-
INSN(fmaxs, 0b000, 0b00, 0b010010);
2082-
INSN(fmins, 0b000, 0b00, 0b010110);
2083-
INSN(fnmuls, 0b000, 0b00, 0b100010);
2084-
2085-
INSN(fabdd, 0b011, 0b11, 0b110101);
2086-
INSN(fmuld, 0b000, 0b01, 0b000010);
2087-
INSN(fdivd, 0b000, 0b01, 0b000110);
2088-
INSN(faddd, 0b000, 0b01, 0b001010);
2089-
INSN(fsubd, 0b000, 0b01, 0b001110);
2090-
INSN(fmaxd, 0b000, 0b01, 0b010010);
2091-
INSN(fmind, 0b000, 0b01, 0b010110);
2092-
INSN(fnmuld, 0b000, 0b01, 0b100010);
2075+
data_processing(op31, type, opcode, op21, Vd, Vn, Vm); \
2076+
}
2077+
2078+
INSN(fmuls, 0b000, 0b00, 0b000010, 0b1);
2079+
INSN(fdivs, 0b000, 0b00, 0b000110, 0b1);
2080+
INSN(fadds, 0b000, 0b00, 0b001010, 0b1);
2081+
INSN(fsubs, 0b000, 0b00, 0b001110, 0b1);
2082+
INSN(fmaxs, 0b000, 0b00, 0b010010, 0b1);
2083+
INSN(fmins, 0b000, 0b00, 0b010110, 0b1);
2084+
INSN(fnmuls, 0b000, 0b00, 0b100010, 0b1);
2085+
2086+
INSN(fmuld, 0b000, 0b01, 0b000010, 0b1);
2087+
INSN(fdivd, 0b000, 0b01, 0b000110, 0b1);
2088+
INSN(faddd, 0b000, 0b01, 0b001010, 0b1);
2089+
INSN(fsubd, 0b000, 0b01, 0b001110, 0b1);
2090+
INSN(fmaxd, 0b000, 0b01, 0b010010, 0b1);
2091+
INSN(fmind, 0b000, 0b01, 0b010110, 0b1);
2092+
INSN(fnmuld, 0b000, 0b01, 0b100010, 0b1);
2093+
2094+
// Half-precision floating-point instructions
2095+
INSN(fmulh, 0b000, 0b11, 0b000010, 0b1);
2096+
INSN(fdivh, 0b000, 0b11, 0b000110, 0b1);
2097+
INSN(faddh, 0b000, 0b11, 0b001010, 0b1);
2098+
INSN(fsubh, 0b000, 0b11, 0b001110, 0b1);
2099+
INSN(fmaxh, 0b000, 0b11, 0b010010, 0b1);
2100+
INSN(fminh, 0b000, 0b11, 0b010110, 0b1);
2101+
INSN(fnmulh, 0b000, 0b11, 0b100010, 0b1);
2102+
#undef INSN
2103+
2104+
// Advanced SIMD scalar three same
2105+
#define INSN(NAME, U, size, opcode) \
2106+
void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \
2107+
starti; \
2108+
f(0b01, 31, 30), f(U, 29), f(0b11110, 28, 24), f(size, 23, 22), f(1, 21); \
2109+
rf(Vm, 16), f(opcode, 15, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); \
2110+
}
2111+
2112+
INSN(fabds, 0b1, 0b10, 0b11010); // Floating-point Absolute Difference (single-precision)
2113+
INSN(fabdd, 0b1, 0b11, 0b11010); // Floating-point Absolute Difference (double-precision)
2114+
2115+
#undef INSN
2116+
2117+
// Advanced SIMD scalar three same FP16
2118+
#define INSN(NAME, U, a, opcode) \
2119+
void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \
2120+
starti; \
2121+
f(0b01, 31, 30), f(U, 29), f(0b11110, 28, 24), f(a, 23), f(0b10, 22, 21); \
2122+
rf(Vm, 16), f(0b00, 15, 14), f(opcode, 13, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); \
2123+
}
2124+
2125+
INSN(fabdh, 0b1, 0b1, 0b010); // Floating-point Absolute Difference (half-precision float)
20932126

20942127
#undef INSN
20952128

@@ -2120,6 +2153,7 @@ void mvnw(Register Rd, Register Rm,
21202153
INSN(fnmaddd, 0b000, 0b01, 1, 0);
21212154
INSN(fnmsub, 0b000, 0b01, 1, 1);
21222155

2156+
INSN(fmaddh, 0b000, 0b11, 0, 0); // half-precision fused multiply-add (scalar)
21232157
#undef INSN
21242158

21252159
// Floating-point conditional select

0 commit comments

Comments
 (0)