DataDog
diff --git a/‎make/Docs.gmk‎
Lines changed: 1 addition & 1 deletion b/‎make/Docs.gmk‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/hotspot/cpu/aarch64/aarch64.ad‎
Lines changed: 193 additions & 1 deletion b/‎src/hotspot/cpu/aarch64/aarch64.ad‎
Lines changed: 193 additions & 1 deletion
diff --git a/‎src/hotspot/cpu/aarch64/assembler_aarch64.hpp‎
Lines changed: 57 additions & 23 deletions b/‎src/hotspot/cpu/aarch64/assembler_aarch64.hpp‎
Lines changed: 57 additions & 23 deletions
@@ -98,7 +98,7 @@ JAVADOC_DISABLED_DOCLINT_PACKAGES := org.w3c.* javax.smartcardio
 JAVADOC_OPTIONS := -use -keywords -notimestamp \
     -serialwarn -encoding ISO-8859-1 -docencoding UTF-8 -breakiterator \
     -splitIndex --system none -javafx --expand-requires transitive \
-    --override-methods=summary
+    --override-methods=summary --syntax-highlight
 
 # The reference options must stay stable to allow for comparisons across the
 # development cycle.
 
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
 // Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
@@ -2296,6 +2296,26 @@ bool Matcher::match_rule_supported(int opcode) {
         return false;
       }
       break;
+    case Op_FmaHF:
+      // UseFMA flag also needs to be checked along with FEAT_FP16
+      if (!UseFMA || !is_feat_fp16_supported()) {
+        return false;
+      }
+      break;
+    case Op_AddHF:
+    case Op_SubHF:
+    case Op_MulHF:
+    case Op_DivHF:
+    case Op_MinHF:
+    case Op_MaxHF:
+    case Op_SqrtHF:
+      // Half-precision floating point scalar operations require FEAT_FP16
+      // to be available. FEAT_FP16 is enabled if both "fphp" and "asimdhp"
+      // features are supported.
+      if (!is_feat_fp16_supported()) {
+        return false;
+      }
+      break;
   }
 
   return true; // Per default match rules are supported.
@@ -4599,6 +4619,15 @@ operand immF0()
   interface(CONST_INTER);
 %}
 
+// Half Float (FP16) Immediate
+operand immH()
+%{
+  match(ConH);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 //
 operand immFPacked()
 %{
@@ -6942,6 +6971,21 @@ instruct loadConD(vRegD dst, immD con) %{
   ins_pipe(fp_load_constant_d);
 %}
 
+// Load Half Float Constant
+// The "ldr" instruction loads a 32-bit word from the constant pool into a
+// 32-bit register but only the bottom half will be populated and the top
+// 16 bits are zero.
+instruct loadConH(vRegF dst, immH con) %{
+  match(Set dst con);
+  format %{
+    "ldrs $dst, [$constantaddress]\t# load from constant table: half float=$con\n\t"
+  %}
+  ins_encode %{
+    __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
+  %}
+  ins_pipe(fp_load_constant_s);
+%}
+
 // Store Instructions
 
 // Store Byte
@@ -13634,6 +13678,17 @@ instruct bits_reverse_L(iRegLNoSp dst, iRegL src)
 // ============================================================================
 // Floating Point Arithmetic Instructions
 
+instruct addHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (AddHF src1 src2));
+  format %{ "faddh $dst, $src1, $src2" %}
+  ins_encode %{
+    __ faddh($dst$$FloatRegister,
+             $src1$$FloatRegister,
+             $src2$$FloatRegister);
+  %}
+  ins_pipe(fp_dop_reg_reg_s);
+%}
+
 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
   match(Set dst (AddF src1 src2));
 
@@ -13664,6 +13719,17 @@ instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
   ins_pipe(fp_dop_reg_reg_d);
 %}
 
+instruct subHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (SubHF src1 src2));
+  format %{ "fsubh $dst, $src1, $src2" %}
+  ins_encode %{
+    __ fsubh($dst$$FloatRegister,
+             $src1$$FloatRegister,
+             $src2$$FloatRegister);
+  %}
+  ins_pipe(fp_dop_reg_reg_s);
+%}
+
 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
   match(Set dst (SubF src1 src2));
 
@@ -13694,6 +13760,17 @@ instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
   ins_pipe(fp_dop_reg_reg_d);
 %}
 
+instruct mulHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (MulHF src1 src2));
+  format %{ "fmulh $dst, $src1, $src2" %}
+  ins_encode %{
+    __ fmulh($dst$$FloatRegister,
+             $src1$$FloatRegister,
+             $src2$$FloatRegister);
+  %}
+  ins_pipe(fp_dop_reg_reg_s);
+%}
+
 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
   match(Set dst (MulF src1 src2));
 
@@ -13724,6 +13801,20 @@ instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
   ins_pipe(fp_dop_reg_reg_d);
 %}
 
+// src1 * src2 + src3 (half-precision float)
+instruct maddHF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
+  match(Set dst (FmaHF src3 (Binary src1 src2)));
+  format %{ "fmaddh $dst, $src1, $src2, $src3" %}
+  ins_encode %{
+    assert(UseFMA, "Needs FMA instructions support.");
+    __ fmaddh($dst$$FloatRegister,
+              $src1$$FloatRegister,
+              $src2$$FloatRegister,
+              $src3$$FloatRegister);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // src1 * src2 + src3
 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
   match(Set dst (FmaF src3 (Binary src1 src2)));
@@ -13865,6 +13956,29 @@ instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zer
   ins_pipe(pipe_class_default);
 %}
 
+// Math.max(HH)H (half-precision float)
+instruct maxHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (MaxHF src1 src2));
+  format %{ "fmaxh $dst, $src1, $src2" %}
+  ins_encode %{
+    __ fmaxh($dst$$FloatRegister,
+             $src1$$FloatRegister,
+             $src2$$FloatRegister);
+  %}
+  ins_pipe(fp_dop_reg_reg_s);
+%}
+
+// Math.min(HH)H (half-precision float)
+instruct minHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (MinHF src1 src2));
+  format %{ "fminh $dst, $src1, $src2" %}
+  ins_encode %{
+    __ fminh($dst$$FloatRegister,
+             $src1$$FloatRegister,
+             $src2$$FloatRegister);
+  %}
+  ins_pipe(fp_dop_reg_reg_s);
+%}
 
 // Math.max(FF)F
 instruct maxF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
@@ -13922,6 +14036,16 @@ instruct minD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
   ins_pipe(fp_dop_reg_reg_d);
 %}
 
+instruct divHF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
+  match(Set dst (DivHF src1  src2));
+  format %{ "fdivh $dst, $src1, $src2" %}
+  ins_encode %{
+    __ fdivh($dst$$FloatRegister,
+             $src1$$FloatRegister,
+             $src2$$FloatRegister);
+  %}
+  ins_pipe(fp_div_s);
+%}
 
 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
   match(Set dst (DivF src1  src2));
@@ -14095,6 +14219,16 @@ instruct sqrtF_reg(vRegF dst, vRegF src) %{
   ins_pipe(fp_div_d);
 %}
 
+instruct sqrtHF_reg(vRegF dst, vRegF src) %{
+  match(Set dst (SqrtHF src));
+  format %{ "fsqrth $dst, $src" %}
+  ins_encode %{
+    __ fsqrth($dst$$FloatRegister,
+              $src$$FloatRegister);
+  %}
+  ins_pipe(fp_div_s);
+%}
+
 // Math.rint, floor, ceil
 instruct roundD_reg(vRegD dst, vRegD src, immI rmode) %{
   match(Set dst (RoundDoubleMode src rmode));
@@ -17144,6 +17278,64 @@ instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask,
   ins_pipe(pipe_slow);
 %}
 
+//----------------------------- Reinterpret ----------------------------------
+// Reinterpret a half-precision float value in a floating point register to a general purpose register
+instruct reinterpretHF2S(iRegINoSp dst, vRegF src) %{
+  match(Set dst (ReinterpretHF2S src));
+  format %{ "reinterpretHF2S $dst, $src" %}
+  ins_encode %{
+    __ smov($dst$$Register, $src$$FloatRegister, __ H, 0);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// Reinterpret a half-precision float value in a general purpose register to a floating point register
+instruct reinterpretS2HF(vRegF dst, iRegINoSp src) %{
+  match(Set dst (ReinterpretS2HF src));
+  format %{ "reinterpretS2HF $dst, $src" %}
+  ins_encode %{
+    __ mov($dst$$FloatRegister, __ H, 0, $src$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// Without this optimization, ReinterpretS2HF (ConvF2HF src) would result in the following
+// instructions (the first two are for ConvF2HF and the last instruction is for ReinterpretS2HF) -
+// fcvt $tmp1_fpr, $src_fpr    // Convert float to half-precision float
+// mov  $tmp2_gpr, $tmp1_fpr   // Move half-precision float in FPR to a GPR
+// mov  $dst_fpr,  $tmp2_gpr   // Move the result from a GPR to an FPR
+// The move from FPR to GPR in ConvF2HF and the move from GPR to FPR in ReinterpretS2HF
+// can be omitted in this pattern, resulting in -
+// fcvt $dst, $src  // Convert float to half-precision float
+instruct convF2HFAndS2HF(vRegF dst, vRegF src)
+%{
+  match(Set dst (ReinterpretS2HF (ConvF2HF src)));
+  format %{ "convF2HFAndS2HF $dst, $src" %}
+  ins_encode %{
+    __ fcvtsh($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// Without this optimization, ConvHF2F (ReinterpretHF2S src) would result in the following
+// instructions (the first one is for ReinterpretHF2S and the last two are for ConvHF2F) -
+// mov  $tmp1_gpr, $src_fpr  // Move the half-precision float from an FPR to a GPR
+// mov  $tmp2_fpr, $tmp1_gpr // Move the same value from GPR to an FPR
+// fcvt $dst_fpr,  $tmp2_fpr // Convert the half-precision float to 32-bit float
+// The move from FPR to GPR in ReinterpretHF2S and the move from GPR to FPR in ConvHF2F
+// can be omitted as the input (src) is already in an FPR required for the fcvths instruction
+// resulting in -
+// fcvt $dst, $src  // Convert half-precision float to a 32-bit float
+instruct convHF2SAndHF2F(vRegF dst, vRegF src)
+%{
+  match(Set dst (ConvHF2F (ReinterpretHF2S src)));
+  format %{ "convHF2SAndHF2F $dst, $src" %}
+  ins_encode %{
+    __ fcvths($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // ============================================================================
 // This name is KNOWN by the ADLC and cannot be changed.
 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
 
@@ -2032,6 +2032,8 @@ void mvnw(Register Rd, Register Rm,
   INSN(fsqrtd, 0b01, 0b000011);
   INSN(fcvtd,  0b01, 0b000100);   // Double-precision to single-precision
 
+  INSN(fsqrth, 0b11, 0b000011);   // Half-precision sqrt
+
 private:
   void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
                            FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
@@ -2059,37 +2061,68 @@ void mvnw(Register Rd, Register Rm,
 #undef INSN
 
   // Floating-point data-processing (2 source)
-  void data_processing(unsigned op31, unsigned type, unsigned opcode,
+  void data_processing(unsigned op31, unsigned type, unsigned opcode, unsigned op21,
                        FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {
     starti;
     f(op31, 31, 29);
     f(0b11110, 28, 24);
-    f(type, 23, 22), f(1, 21), f(opcode, 15, 10);
+    f(type, 23, 22), f(op21, 21), f(opcode, 15, 10);
     rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
   }
 
-#define INSN(NAME, op31, type, opcode)                  \
+#define INSN(NAME, op31, type, opcode, op21)                            \
   void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {     \
-    data_processing(op31, type, opcode, Vd, Vn, Vm);    \
-  }
-
-  INSN(fabds,  0b011, 0b10, 0b110101);
-  INSN(fmuls,  0b000, 0b00, 0b000010);
-  INSN(fdivs,  0b000, 0b00, 0b000110);
-  INSN(fadds,  0b000, 0b00, 0b001010);
-  INSN(fsubs,  0b000, 0b00, 0b001110);
-  INSN(fmaxs,  0b000, 0b00, 0b010010);
-  INSN(fmins,  0b000, 0b00, 0b010110);
-  INSN(fnmuls, 0b000, 0b00, 0b100010);
-
-  INSN(fabdd,  0b011, 0b11, 0b110101);
-  INSN(fmuld,  0b000, 0b01, 0b000010);
-  INSN(fdivd,  0b000, 0b01, 0b000110);
-  INSN(faddd,  0b000, 0b01, 0b001010);
-  INSN(fsubd,  0b000, 0b01, 0b001110);
-  INSN(fmaxd,  0b000, 0b01, 0b010010);
-  INSN(fmind,  0b000, 0b01, 0b010110);
-  INSN(fnmuld, 0b000, 0b01, 0b100010);
+    data_processing(op31, type, opcode, op21, Vd, Vn, Vm);              \
+  }
+
+  INSN(fmuls,  0b000, 0b00, 0b000010, 0b1);
+  INSN(fdivs,  0b000, 0b00, 0b000110, 0b1);
+  INSN(fadds,  0b000, 0b00, 0b001010, 0b1);
+  INSN(fsubs,  0b000, 0b00, 0b001110, 0b1);
+  INSN(fmaxs,  0b000, 0b00, 0b010010, 0b1);
+  INSN(fmins,  0b000, 0b00, 0b010110, 0b1);
+  INSN(fnmuls, 0b000, 0b00, 0b100010, 0b1);
+
+  INSN(fmuld,  0b000, 0b01, 0b000010, 0b1);
+  INSN(fdivd,  0b000, 0b01, 0b000110, 0b1);
+  INSN(faddd,  0b000, 0b01, 0b001010, 0b1);
+  INSN(fsubd,  0b000, 0b01, 0b001110, 0b1);
+  INSN(fmaxd,  0b000, 0b01, 0b010010, 0b1);
+  INSN(fmind,  0b000, 0b01, 0b010110, 0b1);
+  INSN(fnmuld, 0b000, 0b01, 0b100010, 0b1);
+
+  // Half-precision floating-point instructions
+  INSN(fmulh,  0b000, 0b11, 0b000010, 0b1);
+  INSN(fdivh,  0b000, 0b11, 0b000110, 0b1);
+  INSN(faddh,  0b000, 0b11, 0b001010, 0b1);
+  INSN(fsubh,  0b000, 0b11, 0b001110, 0b1);
+  INSN(fmaxh,  0b000, 0b11, 0b010010, 0b1);
+  INSN(fminh,  0b000, 0b11, 0b010110, 0b1);
+  INSN(fnmulh, 0b000, 0b11, 0b100010, 0b1);
+#undef INSN
+
+// Advanced SIMD scalar three same
+#define INSN(NAME, U, size, opcode)                                                     \
+  void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {                     \
+    starti;                                                                             \
+    f(0b01, 31, 30), f(U, 29), f(0b11110, 28, 24), f(size, 23, 22), f(1, 21);           \
+    rf(Vm, 16), f(opcode, 15, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);                      \
+  }
+
+  INSN(fabds, 0b1, 0b10, 0b11010); // Floating-point Absolute Difference (single-precision)
+  INSN(fabdd, 0b1, 0b11, 0b11010); // Floating-point Absolute Difference (double-precision)
+
+#undef INSN
+
+// Advanced SIMD scalar three same FP16
+#define INSN(NAME, U, a, opcode)                                                       \
+  void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {                    \
+    starti;                                                                            \
+    f(0b01, 31, 30), f(U, 29), f(0b11110, 28, 24), f(a, 23), f(0b10, 22, 21);          \
+    rf(Vm, 16), f(0b00, 15, 14), f(opcode, 13, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);    \
+  }
+
+  INSN(fabdh, 0b1, 0b1, 0b010); // Floating-point Absolute Difference (half-precision float)
 
 #undef INSN
 
@@ -2120,6 +2153,7 @@ void mvnw(Register Rd, Register Rm,
   INSN(fnmaddd, 0b000, 0b01, 1, 0);
   INSN(fnmsub,  0b000, 0b01, 1, 1);
 
+  INSN(fmaddh,  0b000, 0b11, 0, 0);  // half-precision fused multiply-add (scalar)
 #undef INSN
 
    // Floating-point conditional select