diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp index 26ef3e25e50f..21586acde738 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp @@ -98,9 +98,17 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId, llvm_unreachable("mov_dpp_* NYI"); } case AMDGPU::BI__builtin_amdgcn_permlane16: - case AMDGPU::BI__builtin_amdgcn_permlanex16: + case AMDGPU::BI__builtin_amdgcn_permlanex16: { + llvm::StringRef intrinsicName = + builtinId == AMDGPU::BI__builtin_amdgcn_permlane16 + ? "amdgcn.permlane16" + : "amdgcn.permlanex16"; + return emitBuiltinWithOneOverloadedType<6>(expr, intrinsicName) + .getScalarVal(); + } case AMDGPU::BI__builtin_amdgcn_permlane64: { - llvm_unreachable("permlane_* NYI"); + return emitBuiltinWithOneOverloadedType<1>(expr, "amdgcn.permlane64") + .getScalarVal(); } case AMDGPU::BI__builtin_amdgcn_readlane: case AMDGPU::BI__builtin_amdgcn_readfirstlane: { diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-gfx10.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-gfx10.hip new file mode 100644 index 000000000000..ccfe83381cbb --- /dev/null +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-gfx10.hip @@ -0,0 +1,63 @@ +#include "../Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1011 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1011 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1010 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1011 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1012 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z15test_permlane16Pjjjjj +// CIR: cir.llvm.intrinsic "amdgcn.permlane16" {{.*}} : (!u32i, !u32i, !u32i, !u32i, !cir.bool, !cir.bool) -> !u32i +// LLVM: define{{.*}} void @_Z15test_permlane16Pjjjjj +// LLVM: call i32 @llvm.amdgcn.permlane16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false) +// OGCG: define{{.*}} void @_Z15test_permlane16Pjjjjj +// OGCG: call i32 @llvm.amdgcn.permlane16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false) +__device__ void test_permlane16(unsigned int* out, unsigned int a, unsigned int b, unsigned int c, unsigned int d) { + *out = __builtin_amdgcn_permlane16(a, b, c, d, 0, 0); +} + + +// CIR-LABEL: @_Z16test_permlanex16Pjjjjj +// CIR: cir.llvm.intrinsic "amdgcn.permlanex16" {{.*}} : (!u32i, !u32i, !u32i, !u32i, !cir.bool, !cir.bool) -> !u32i +// LLVM: define{{.*}} void @_Z16test_permlanex16Pjjjjj +// LLVM: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false) +// OGCG: define{{.*}} void @_Z16test_permlanex16Pjjjjj +// OGCG: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false) +__device__ void test_permlanex16(unsigned int* out, unsigned int a, unsigned int b, unsigned int c, unsigned int d) { + *out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0); +} diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-gfx11.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-gfx11.hip new file mode 100644 index 000000000000..383db2e0c374 --- /dev/null +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn-gfx11.hip @@ -0,0 +1,108 @@ +#include "../Inputs/cuda.h" + +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1100 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1101 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1102 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1103 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1150 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1151 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1152 -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1101 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1102 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1103 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1150 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1151 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1152 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ +// RUN: -target-cpu gfx1153 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1101 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1102 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1103 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1150 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1151 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1152 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ +// RUN: -target-cpu gfx1153 -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +// CIR-LABEL: @_Z15test_permlane64Pjj +// CIR: cir.llvm.intrinsic "amdgcn.permlane64" {{.*}} : (!u32i) -> !u32i +// LLVM: define{{.*}} void @_Z15test_permlane64Pjj +// LLVM: call i32 @llvm.amdgcn.permlane64.i32(i32 %{{.*}}) +// OGCG: define{{.*}} void @_Z15test_permlane64Pjj +// OGCG: call i32 @llvm.amdgcn.permlane64.i32(i32 %{{.*}}) +__device__ void test_permlane64(unsigned int* out, unsigned int a) { + *out = __builtin_amdgcn_permlane64(a); +} diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip index 778a25c27476..127355d5b2fc 100644 --- a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip +++ b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip @@ -2,15 +2,15 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ -// RUN: -fcuda-is-device -emit-cir %s -o %t.cir +// RUN: -target-cpu tahiti -fcuda-is-device -emit-cir %s -o %t.cir // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir \ -// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \ -// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll +// RUN: -target-cpu tahiti -fcuda-is-device -emit-llvm %s -o %t.ll // RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s //===----------------------------------------------------------------------===// diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-gfx10.cl b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-gfx10.cl new file mode 100644 index 000000000000..49f51eb55b11 --- /dev/null +++ b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-gfx10.cl @@ -0,0 +1,65 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1010 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1011 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1012 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1010 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1011 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1012 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1010 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1011 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1012 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef unsigned int uint; +typedef unsigned long ulong; + +// CIR-LABEL: @test_permlane16 +// CIR: cir.llvm.intrinsic "amdgcn.permlane16" {{.*}} : (!u32i, !u32i, !u32i, !u32i, !cir.bool, !cir.bool) -> !u32i +// LLVM: define{{.*}} void @test_permlane16 +// LLVM: call i32 @llvm.amdgcn.permlane16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false) +// OGCG: define{{.*}} void @test_permlane16 +// OGCG: call i32 @llvm.amdgcn.permlane16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false) +void test_permlane16(global uint* out, uint a, uint b, uint c, uint d) { + *out = __builtin_amdgcn_permlane16(a, b, c, d, 0, 0); +} + +// CIR-LABEL: @test_permlanex16 +// CIR: cir.llvm.intrinsic "amdgcn.permlanex16" {{.*}} : (!u32i, !u32i, !u32i, !u32i, !cir.bool, !cir.bool) -> !u32i +// LLVM: define{{.*}} void @test_permlanex16 +// LLVM: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false) +// OGCG: define{{.*}} void @test_permlanex16 +// OGCG: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false) +void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) { + *out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0); +} diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-gfx11.cl new file mode 100644 index 000000000000..6a429573d025 --- /dev/null +++ b/clang/test/CIR/CodeGen/OpenCL/builtins-amdgcn-gfx11.cl @@ -0,0 +1,115 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1100 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1101 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1102 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1103 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1150 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1151 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1152 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1153 -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1100 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1101 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1102 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1103 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1150 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1151 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1152 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 -fclangir \ +// RUN: -target-cpu gfx1153 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1100 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1101 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1102 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1103 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1150 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1151 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1152 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -cl-std=CL2.0 \ +// RUN: -target-cpu gfx1153 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +//===----------------------------------------------------------------------===// +// Test AMDGPU builtins +//===----------------------------------------------------------------------===// + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef unsigned int uint; +typedef unsigned long ulong; + +// CIR-LABEL: @test_permlanex16 +// CIR: cir.llvm.intrinsic "amdgcn.permlanex16" {{.*}} : (!u32i, !u32i, !u32i, !u32i, !cir.bool, !cir.bool) -> !u32i +// LLVM: define{{.*}} void @test_permlanex16 +// LLVM: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false) +// OGCG: define{{.*}} void @test_permlanex16 +// OGCG: call i32 @llvm.amdgcn.permlanex16.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i1 false, i1 false) +void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) { + *out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0); +}