Skip to content

Commit ba713fc

Browse files
author
git apple-llvm automerger
committed
Merge commit 'a587ccd87d6f' from llvm.org/main into next
2 parents 81d1bfd + a587ccd commit ba713fc

19 files changed

+906
-646
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3510,6 +3510,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
35103510
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
35113511
SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1));
35123512
SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2));
3513+
SDNodeFlags Flags = N->getFlags();
35133514
SDLoc dl(N);
35143515

35153516
// Promote to the larger FP type.
@@ -3518,9 +3519,28 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
35183519
Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1);
35193520
Op2 = DAG.getNode(PromotionOpcode, dl, NVT, Op2);
35203521

3521-
SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2);
3522+
SDValue Res;
3523+
if (OVT == MVT::f16) {
3524+
// If f16 fma is not natively supported, the value must be promoted to an
3525+
// f64 (and not to f32!) to prevent double rounding issues.
3526+
SDValue A64 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Op0, Flags);
3527+
SDValue B64 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Op1, Flags);
3528+
SDValue C64 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Op2, Flags);
3529+
3530+
// Prefer a wide FMA node if available; otherwise expand to mul+add.
3531+
SDValue WideRes;
3532+
if (TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), MVT::f64)) {
3533+
WideRes = DAG.getNode(ISD::FMA, dl, MVT::f64, A64, B64, C64, Flags);
3534+
} else {
3535+
SDValue Mul = DAG.getNode(ISD::FMUL, dl, MVT::f64, A64, B64, Flags);
3536+
WideRes = DAG.getNode(ISD::FADD, dl, MVT::f64, Mul, C64, Flags);
3537+
}
35223538

3523-
// Convert back to FP16 as an integer.
3539+
return DAG.getNode(GetPromotionOpcode(MVT::f64, OVT), dl, MVT::i16,
3540+
WideRes);
3541+
}
3542+
3543+
Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2, Flags);
35243544
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
35253545
}
35263546

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,11 @@ void TargetLoweringBase::initActions() {
10411041
}
10421042
}
10431043

1044+
// If f16 fma is not natively supported, the value must be promoted to an f64
1045+
// (and not to f32!) to prevent double rounding issues.
1046+
AddPromotedToType(ISD::FMA, MVT::f16, MVT::f64);
1047+
AddPromotedToType(ISD::STRICT_FMA, MVT::f16, MVT::f64);
1048+
10441049
// Set default actions for various operations.
10451050
for (MVT VT : MVT::all_valuetypes()) {
10461051
// Default all indexed load / store to expand.

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
570570
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, XLenVT, Custom);
571571
}
572572

573+
if (!Subtarget.hasStdExtD()) {
574+
// FIXME: handle f16 fma when f64 is not legal. Using an f32 fma
575+
// instruction runs into double rounding issues, so this is wrong.
576+
// Normally we'd use an f64 fma, but without the D extension the f64 type
577+
// is not legal. This should probably be a libcall.
578+
AddPromotedToType(ISD::FMA, MVT::f16, MVT::f32);
579+
AddPromotedToType(ISD::STRICT_FMA, MVT::f16, MVT::f32);
580+
}
581+
573582
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
574583

575584
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);

llvm/test/CodeGen/AArch64/f16-instructions.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,11 +1378,11 @@ define half @test_log2(half %a) #0 {
13781378
define half @test_fma(half %a, half %b, half %c) #0 {
13791379
; CHECK-CVT-SD-LABEL: test_fma:
13801380
; CHECK-CVT-SD: // %bb.0:
1381-
; CHECK-CVT-SD-NEXT: fcvt s2, h2
1382-
; CHECK-CVT-SD-NEXT: fcvt s1, h1
1383-
; CHECK-CVT-SD-NEXT: fcvt s0, h0
1384-
; CHECK-CVT-SD-NEXT: fmadd s0, s0, s1, s2
1385-
; CHECK-CVT-SD-NEXT: fcvt h0, s0
1381+
; CHECK-CVT-SD-NEXT: fcvt d2, h2
1382+
; CHECK-CVT-SD-NEXT: fcvt d1, h1
1383+
; CHECK-CVT-SD-NEXT: fcvt d0, h0
1384+
; CHECK-CVT-SD-NEXT: fmadd d0, d0, d1, d2
1385+
; CHECK-CVT-SD-NEXT: fcvt h0, d0
13861386
; CHECK-CVT-SD-NEXT: ret
13871387
;
13881388
; CHECK-FP16-LABEL: test_fma:

0 commit comments

Comments
 (0)