Skip to content

Commit 3a15454

Browse files
committed
[CIR][AMDGPU] Add lowering for amdgcn div fmas builtins
1 parent 43283fa commit 3a15454

File tree

3 files changed

+52
-1
lines changed

3 files changed

+52
-1
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,16 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
116116
}
117117
case AMDGPU::BI__builtin_amdgcn_div_fmas:
118118
case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
119-
llvm_unreachable("div_fmas_* NYI");
119+
mlir::Value src0 = emitScalarExpr(expr->getArg(0));
120+
mlir::Value src1 = emitScalarExpr(expr->getArg(1));
121+
mlir::Value src2 = emitScalarExpr(expr->getArg(2));
122+
mlir::Value src3 = emitScalarExpr(expr->getArg(3));
123+
mlir::Value result =
124+
LLVMIntrinsicCallOp::create(builder, getLoc(expr->getExprLoc()),
125+
builder.getStringAttr("amdgcn.div.fmas"),
126+
src0.getType(), {src0, src1, src2, src3})
127+
.getResult();
128+
return result;
120129
}
121130
case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
122131
case AMDGPU::BI__builtin_amdgcn_mov_dpp8:

clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,3 +255,23 @@ __device__ void test_div_scale_f32_with_ptr(float* out, int* flagout, bool* flag
255255
{
256256
*out = __builtin_amdgcn_div_scalef(a, b, true, flag);
257257
}
258+
259+
// CIR-LABEL: @_Z17test_div_fmas_f32Pdfffi
260+
// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.float, !cir.float, !cir.float, !cir.bool) -> !cir.float
261+
// LLVM: define{{.*}} void @_Z17test_div_fmas_f32Pdfffi
262+
// LLVM: call float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}})
263+
// OGCG: define{{.*}} void @_Z17test_div_fmas_f32Pdfffi
264+
// OGCG: call {{.*}} float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}})
265+
__device__ void test_div_fmas_f32(double* out, float a, float b, float c, int d) {
266+
*out = __builtin_amdgcn_div_fmasf(a, b, c, d);
267+
}
268+
269+
// CIR-LABEL: @_Z17test_div_fmas_f64Pddddi
270+
// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.double, !cir.double, !cir.double, !cir.bool) -> !cir.double
271+
// LLVM: define{{.*}} void @_Z17test_div_fmas_f64Pddddi
272+
// LLVM: call double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}})
273+
// OGCG: define{{.*}} void @_Z17test_div_fmas_f64Pddddi
274+
// OGCG: call {{.*}} double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}})
275+
__device__ void test_div_fmas_f64(double* out, double a, double b, double c, int d) {
276+
*out = __builtin_amdgcn_div_fmas(a, b, c, d);
277+
}

clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,3 +266,25 @@ void test_div_scale_f32_generic_ptr(global float* out, global int* flagout, floa
266266
{
267267
*out = __builtin_amdgcn_div_scalef(a, b, true, flag);
268268
}
269+
270+
// CIR-LABEL: @test_div_fmas_f32
271+
// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.float, !cir.float, !cir.float, !cir.bool) -> !cir.float
272+
// LLVM: define{{.*}} void @test_div_fmas_f32
273+
// LLVM: call float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}})
274+
// OGCG: define{{.*}} void @test_div_fmas_f32
275+
// OGCG: call float @llvm.amdgcn.div.fmas.f32(float %{{.+}}, float %{{.+}}, float %{{.+}}, i1 %{{.*}})
276+
void test_div_fmas_f32(global float* out, float a, float b, float c, int d)
277+
{
278+
*out = __builtin_amdgcn_div_fmasf(a, b, c, d);
279+
}
280+
281+
// CIR-LABEL: @test_div_fmas_f64
282+
// CIR: cir.llvm.intrinsic "amdgcn.div.fmas" {{.*}} : (!cir.double, !cir.double, !cir.double, !cir.bool) -> !cir.double
283+
// LLVM: define{{.*}} void @test_div_fmas_f64
284+
// LLVM: call double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}})
285+
// OGCG: define{{.*}} void @test_div_fmas_f64
286+
// OGCG: call double @llvm.amdgcn.div.fmas.f64(double %{{.+}}, double %{{.+}}, double %{{.+}}, i1 %{{.*}})
287+
void test_div_fmas_f64(global double* out, double a, double b, double c, int d)
288+
{
289+
*out = __builtin_amdgcn_div_fmas(a, b, c, d);
290+
}

0 commit comments

Comments
 (0)