Skip to content

Commit a105239

Browse files
committed
Add OG codegen checks for builtin lowering aswell
1 parent d8d2482 commit a105239

File tree

2 files changed

+90
-3
lines changed

2 files changed

+90
-3
lines changed

clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,20 @@
99
// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll
1010
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
1111

12+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 \
13+
// RUN: -fcuda-is-device -emit-llvm %s -o %t.ll
14+
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
15+
1216
//===----------------------------------------------------------------------===//
13-
// Test AMDGPU built-in functions
17+
// Test AMDGPU builtins
1418
//===----------------------------------------------------------------------===//
1519

1620
// CIR-LABEL: @_Z28test_wave_reduce_add_u32_i32Pi
1721
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u32i, !s32i) -> !u32i
1822
// LLVM: define{{.*}} void @_Z28test_wave_reduce_add_u32_i32Pii(
1923
// LLVM: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 0)
24+
// OGCG: define{{.*}} void @_Z28test_wave_reduce_add_u32_i32Pii(
25+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 0)
2026
__device__ void test_wave_reduce_add_u32_i32(int* out, int in) {
2127
*out = __builtin_amdgcn_wave_reduce_add_u32(in, 0);
2228
}
@@ -25,6 +31,8 @@ __device__ void test_wave_reduce_add_u32_i32(int* out, int in) {
2531
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u64i, !s32i) -> !u64i
2632
// LLVM: define{{.*}} void @_Z28test_wave_reduce_add_u64_i64Pll(
2733
// LLVM: call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %{{.*}}, i32 0)
34+
// OGCG: define{{.*}} void @_Z28test_wave_reduce_add_u64_i64Pll(
35+
// OGCG: call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 %{{.*}}, i32 0)
2836
__device__ void test_wave_reduce_add_u64_i64(long* out, long in) {
2937
*out = __builtin_amdgcn_wave_reduce_add_u64(in, 0);
3038
}
@@ -33,6 +41,8 @@ __device__ void test_wave_reduce_add_u64_i64(long* out, long in) {
3341
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.sub" {{.*}} : (!u32i, !s32i) -> !u32i
3442
// LLVM: define{{.*}} void @_Z28test_wave_reduce_sub_u32_i32Pii(
3543
// LLVM: call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %{{.*}}, i32 0)
44+
// OGCG: define{{.*}} void @_Z28test_wave_reduce_sub_u32_i32Pii(
45+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 %{{.*}}, i32 0)
3646
__device__ void test_wave_reduce_sub_u32_i32(int* out, int in) {
3747
*out = __builtin_amdgcn_wave_reduce_sub_u32(in, 0);
3848
}
@@ -41,6 +51,8 @@ __device__ void test_wave_reduce_sub_u32_i32(int* out, int in) {
4151
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.sub" {{.*}} : (!u64i, !s32i) -> !u64i
4252
// LLVM: define{{.*}} void @_Z28test_wave_reduce_sub_u64_i64Pll(
4353
// LLVM: call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %{{.*}}, i32 0)
54+
// OGCG: define{{.*}} void @_Z28test_wave_reduce_sub_u64_i64Pll(
55+
// OGCG: call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 %{{.*}}, i32 0)
4456
__device__ void test_wave_reduce_sub_u64_i64(long* out, long in) {
4557
*out = __builtin_amdgcn_wave_reduce_sub_u64(in, 0);
4658
}
@@ -49,6 +61,8 @@ __device__ void test_wave_reduce_sub_u64_i64(long* out, long in) {
4961
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.min" {{.*}} : (!s32i, !s32i) -> !s32i
5062
// LLVM: define{{.*}} void @_Z29test_wave_reduce_min_i32_signPii(
5163
// LLVM: call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 %{{.*}}, i32 0)
64+
// OGCG: define{{.*}} void @_Z29test_wave_reduce_min_i32_signPii(
65+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 %{{.*}}, i32 0)
5266
__device__ void test_wave_reduce_min_i32_sign(int* out, int in) {
5367
*out = __builtin_amdgcn_wave_reduce_min_i32(in, 0);
5468
}
@@ -57,6 +71,8 @@ __device__ void test_wave_reduce_min_i32_sign(int* out, int in) {
5771
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umin" {{.*}} : (!u32i, !s32i) -> !u32i
5872
// LLVM: define{{.*}} void @_Z31test_wave_reduce_min_u32_unsignPjj(
5973
// LLVM: call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %{{.*}}, i32 0)
74+
// OGCG: define{{.*}} void @_Z31test_wave_reduce_min_u32_unsignPjj(
75+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %{{.*}}, i32 0)
6076
__device__ void test_wave_reduce_min_u32_unsign(unsigned int* out, unsigned int in) {
6177
*out = __builtin_amdgcn_wave_reduce_min_u32(in, 0);
6278
}
@@ -65,6 +81,8 @@ __device__ void test_wave_reduce_min_u32_unsign(unsigned int* out, unsigned int
6581
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.min" {{.*}} : (!s64i, !s32i) -> !s64i
6682
// LLVM: define{{.*}} void @_Z29test_wave_reduce_min_i64_signPll(
6783
// LLVM: call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 %{{.*}}, i32 0)
84+
// OGCG: define{{.*}} void @_Z29test_wave_reduce_min_i64_signPll(
85+
// OGCG: call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 %{{.*}}, i32 0)
6886
__device__ void test_wave_reduce_min_i64_sign(long* out, long in) {
6987
*out = __builtin_amdgcn_wave_reduce_min_i64(in, 0);
7088
}
@@ -73,6 +91,8 @@ __device__ void test_wave_reduce_min_i64_sign(long* out, long in) {
7391
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umin" {{.*}} : (!u64i, !s32i) -> !u64i
7492
// LLVM: define{{.*}} void @_Z31test_wave_reduce_min_u64_unsignPmm(
7593
// LLVM: call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 %{{.*}}, i32 0)
94+
// OGCG: define{{.*}} void @_Z31test_wave_reduce_min_u64_unsignPmm(
95+
// OGCG: call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 %{{.*}}, i32 0)
7696
__device__ void test_wave_reduce_min_u64_unsign(unsigned long* out, unsigned long in) {
7797
*out = __builtin_amdgcn_wave_reduce_min_u64(in, 0);
7898
}
@@ -81,6 +101,8 @@ __device__ void test_wave_reduce_min_u64_unsign(unsigned long* out, unsigned lon
81101
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.max" {{.*}} : (!s32i, !s32i) -> !s32i
82102
// LLVM: define{{.*}} void @_Z29test_wave_reduce_max_i32_signPii(
83103
// LLVM: call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 %{{.*}}, i32 0)
104+
// OGCG: define{{.*}} void @_Z29test_wave_reduce_max_i32_signPii(
105+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 %{{.*}}, i32 0)
84106
__device__ void test_wave_reduce_max_i32_sign(int* out, int in) {
85107
*out = __builtin_amdgcn_wave_reduce_max_i32(in, 0);
86108
}
@@ -89,6 +111,8 @@ __device__ void test_wave_reduce_max_i32_sign(int* out, int in) {
89111
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umax" {{.*}} : (!u32i, !s32i) -> !u32i
90112
// LLVM: define{{.*}} void @_Z31test_wave_reduce_max_u32_unsignPjj(
91113
// LLVM: call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 %{{.*}}, i32 0)
114+
// OGCG: define{{.*}} void @_Z31test_wave_reduce_max_u32_unsignPjj(
115+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 %{{.*}}, i32 0)
92116
__device__ void test_wave_reduce_max_u32_unsign(unsigned int* out, unsigned int in) {
93117
*out = __builtin_amdgcn_wave_reduce_max_u32(in, 0);
94118
}
@@ -97,6 +121,8 @@ __device__ void test_wave_reduce_max_u32_unsign(unsigned int* out, unsigned int
97121
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.max" {{.*}} : (!s64i, !s32i) -> !s64i
98122
// LLVM: define{{.*}} void @_Z29test_wave_reduce_max_i64_signPll(
99123
// LLVM: call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 %{{.*}}, i32 0)
124+
// OGCG: define{{.*}} void @_Z29test_wave_reduce_max_i64_signPll(
125+
// OGCG: call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 %{{.*}}, i32 0)
100126
__device__ void test_wave_reduce_max_i64_sign(long* out, long in) {
101127
*out = __builtin_amdgcn_wave_reduce_max_i64(in, 0);
102128
}
@@ -105,6 +131,8 @@ __device__ void test_wave_reduce_max_i64_sign(long* out, long in) {
105131
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.umax" {{.*}} : (!u64i, !s32i) -> !u64i
106132
// LLVM: define{{.*}} void @_Z31test_wave_reduce_max_u64_unsignPmm(
107133
// LLVM: call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 %{{.*}}, i32 0)
134+
// OGCG: define{{.*}} void @_Z31test_wave_reduce_max_u64_unsignPmm(
135+
// OGCG: call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 %{{.*}}, i32 0)
108136
__device__ void test_wave_reduce_max_u64_unsign(unsigned long* out, unsigned long in) {
109137
*out = __builtin_amdgcn_wave_reduce_max_u64(in, 0);
110138
}
@@ -113,6 +141,8 @@ __device__ void test_wave_reduce_max_u64_unsign(unsigned long* out, unsigned lon
113141
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.and" {{.*}} : (!s32i, !s32i) -> !s32i
114142
// LLVM: define{{.*}} void @_Z28test_wave_reduce_and_b32_i32Pii(
115143
// LLVM: call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 %{{.*}}, i32 0)
144+
// OGCG: define{{.*}} void @_Z28test_wave_reduce_and_b32_i32Pii(
145+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 %{{.*}}, i32 0)
116146
__device__ void test_wave_reduce_and_b32_i32(int* out, int in) {
117147
*out = __builtin_amdgcn_wave_reduce_and_b32(in, 0);
118148
}
@@ -121,6 +151,8 @@ __device__ void test_wave_reduce_and_b32_i32(int* out, int in) {
121151
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.and" {{.*}} : (!s64i, !s32i) -> !s64i
122152
// LLVM: define{{.*}} void @_Z28test_wave_reduce_and_b64_i64Pll(
123153
// LLVM: call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 %{{.*}}, i32 0)
154+
// OGCG: define{{.*}} void @_Z28test_wave_reduce_and_b64_i64Pll(
155+
// OGCG: call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 %{{.*}}, i32 0)
124156
__device__ void test_wave_reduce_and_b64_i64(long* out, long in) {
125157
*out = __builtin_amdgcn_wave_reduce_and_b64(in, 0);
126158
}
@@ -129,6 +161,8 @@ __device__ void test_wave_reduce_and_b64_i64(long* out, long in) {
129161
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.or" {{.*}} : (!s32i, !s32i) -> !s32i
130162
// LLVM: define{{.*}} void @_Z27test_wave_reduce_or_b32_i32Pii(
131163
// LLVM: call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 %{{.*}}, i32 0)
164+
// OGCG: define{{.*}} void @_Z27test_wave_reduce_or_b32_i32Pii(
165+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 %{{.*}}, i32 0)
132166
__device__ void test_wave_reduce_or_b32_i32(int* out, int in) {
133167
*out = __builtin_amdgcn_wave_reduce_or_b32(in, 0);
134168
}
@@ -137,6 +171,8 @@ __device__ void test_wave_reduce_or_b32_i32(int* out, int in) {
137171
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.or" {{.*}} : (!s64i, !s32i) -> !s64i
138172
// LLVM: define{{.*}} void @_Z27test_wave_reduce_or_b64_i64Pll(
139173
// LLVM: call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 %{{.*}}, i32 0)
174+
// OGCG: define{{.*}} void @_Z27test_wave_reduce_or_b64_i64Pll(
175+
// OGCG: call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 %{{.*}}, i32 0)
140176
__device__ void test_wave_reduce_or_b64_i64(long* out, long in) {
141177
*out = __builtin_amdgcn_wave_reduce_or_b64(in, 0);
142178
}
@@ -145,6 +181,8 @@ __device__ void test_wave_reduce_or_b64_i64(long* out, long in) {
145181
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.xor" {{.*}} : (!s32i, !s32i) -> !s32i
146182
// LLVM: define{{.*}} void @_Z28test_wave_reduce_xor_b32_i32Pii(
147183
// LLVM: call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %{{.*}}, i32 0)
184+
// OGCG: define{{.*}} void @_Z28test_wave_reduce_xor_b32_i32Pii(
185+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 %{{.*}}, i32 0)
148186
__device__ void test_wave_reduce_xor_b32_i32(int* out, int in) {
149187
*out = __builtin_amdgcn_wave_reduce_xor_b32(in, 0);
150188
}
@@ -153,6 +191,8 @@ __device__ void test_wave_reduce_xor_b32_i32(int* out, int in) {
153191
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.xor" {{.*}} : (!s64i, !s32i) -> !s64i
154192
// LLVM: define{{.*}} void @_Z28test_wave_reduce_xor_b64_i64Pll(
155193
// LLVM: call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %{{.*}}, i32 0)
194+
// OGCG: define{{.*}} void @_Z28test_wave_reduce_xor_b64_i64Pll(
195+
// OGCG: call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 %{{.*}}, i32 0)
156196
__device__ void test_wave_reduce_xor_b64_i64(long* out, long in) {
157197
*out = __builtin_amdgcn_wave_reduce_xor_b64(in, 0);
158198
}
@@ -162,6 +202,8 @@ __device__ void test_wave_reduce_xor_b64_i64(long* out, long in) {
162202
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u32i, !s32i) -> !u32i
163203
// LLVM: define{{.*}} void @_Z38test_wave_reduce_add_u32_iterative_i32Pii(
164204
// LLVM: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 1)
205+
// OGCG: define{{.*}} void @_Z38test_wave_reduce_add_u32_iterative_i32Pii(
206+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 1)
165207
__device__ void test_wave_reduce_add_u32_iterative_i32(int* out, int in) {
166208
*out = __builtin_amdgcn_wave_reduce_add_u32(in, 1);
167209
}
@@ -171,6 +213,8 @@ __device__ void test_wave_reduce_add_u32_iterative_i32(int* out, int in) {
171213
// CIR: cir.llvm.intrinsic "amdgcn.wave.reduce.add" {{.*}} : (!u32i, !s32i) -> !u32i
172214
// LLVM: define{{.*}} void @_Z32test_wave_reduce_add_u32_dpp_i32Pii(
173215
// LLVM: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 2)
216+
// OGCG: define{{.*}} void @_Z32test_wave_reduce_add_u32_dpp_i32Pii(
217+
// OGCG: call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 %{{.*}}, i32 2)
174218
__device__ void test_wave_reduce_add_u32_dpp_i32(int* out, int in) {
175219
*out = __builtin_amdgcn_wave_reduce_add_u32(in, 2);
176220
}

0 commit comments

Comments
 (0)