@@ -39,6 +39,10 @@ using MyAllocator
3939 = mallocMC::Allocator<Acc, CreationPolicies::FlatterScatter<>, DistributionPolicies::Noop,
4040 OOMPolicies::ReturnNull, ReservePoolPolicies::AlpakaBuf<Acc>,
4141 AlignmentPolicies::Shrink<>>;
42+ using MyAllocatorHandle =
43+ typename std::remove_cvref_t <decltype (std::declval<MyAllocator>().getAllocatorHandle())>;
44+ static constexpr std::uint32_t ALLOCATION_SIZE = 16U ;
45+ static constexpr std::size_t HEAP_SIZE = 1024U * 1024U * 1024U ;
4246
4347namespace kitgenbench ::Actions {
4448 [[maybe_unused]] static constexpr int MALLOC = 1 ;
@@ -48,11 +52,8 @@ namespace kitgenbench::Actions {
4852auto makeExecutionDetails () {
4953 auto const platformAcc = alpaka::Platform<Acc>{};
5054 auto const dev = alpaka::getDevByIdx (platformAcc, 0 );
51- #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
52- cudaDeviceSetLimit (cudaLimitMallocHeapSize, 1024U * 1024U * 1024U );
53- #endif
5455 uint32_t const numThreadsPerBlock = 256U ;
55- uint32_t const numThreads = 4U * numThreadsPerBlock;
56+ uint32_t const numThreads = 16U * numThreadsPerBlock;
5657 auto workdiv = [numThreads, numThreadsPerBlock]() -> alpaka::WorkDivMembers<Dim, Idx> {
5758 if constexpr (std::is_same_v<alpaka::AccToTag<Acc>, alpaka::TagCpuSerial>) {
5859 return {{1U }, {1U }, {numThreads}};
@@ -64,8 +65,6 @@ auto makeExecutionDetails() {
6465 return kitgenbench::ExecutionDetails<Acc, decltype (dev)>{workdiv, dev};
6566}
6667
67- static constexpr std::uint32_t ALLOCATION_SIZE = 16U ;
68-
6968// Reasons for the check to yield the result it yielded.
7069// `completed` means that the check completed. The result can still be true/false depending on
7170// whether the obtained value was actually correct. `notApplicable` means that the checks were
@@ -209,6 +208,13 @@ template <typename T> struct NoStoreProvider {
209208 nlohmann::json generateReport () { return {}; }
210209};
211210
211+ template <typename T, typename ... T_Resource> struct ResourceProvider {
212+ std::tuple<T_Resource...> resources{};
213+ ALPAKA_FN_ACC T load (auto const ) { return {resources}; }
214+ ALPAKA_FN_ACC void store (auto const &, T&&, auto const ) {}
215+ nlohmann::json generateReport () { return {}; }
216+ };
217+
212218template <typename T> struct AccumulateResultsProvider {
213219 T result{};
214220 ALPAKA_FN_ACC T load (auto const ) { return {}; }
@@ -229,6 +235,9 @@ template <typename T> struct AcumulateChecksProvider {
229235
230236namespace setups {
231237 struct SingleSizeMallocRecipe {
238+ ALPAKA_FN_ACC SingleSizeMallocRecipe (std::tuple<MyAllocatorHandle> handleInTuple)
239+ : handle{std::get<0 >(handleInTuple)} {}
240+ MyAllocatorHandle handle;
232241 static constexpr std::uint32_t allocationSize{ALLOCATION_SIZE};
233242 static constexpr std::uint32_t numAllocations{256U };
234243 std::array<std::byte*, numAllocations> pointers{{}};
@@ -239,7 +248,7 @@ namespace setups {
239248 return std::make_tuple (+kitgenbench::Actions::STOP,
240249 Payload (std::span<std::byte, allocationSize>{
241250 static_cast <std::byte*>(nullptr ), allocationSize}));
242- pointers[counter] = static_cast <std::byte*>(malloc (allocationSize));
251+ pointers[counter] = static_cast <std::byte*>(handle. malloc (acc, allocationSize));
243252 auto result = std::make_tuple (
244253 +kitgenbench::Actions::MALLOC,
245254 Payload (std::span<std::byte, allocationSize>(pointers[counter], allocationSize)));
@@ -252,19 +261,25 @@ namespace setups {
252261
253262 template <typename TAcc, typename TDev> struct InstructionDetails {
254263 struct DevicePackage {
255- NoStoreProvider <SingleSizeMallocRecipe> recipes{};
264+ ResourceProvider <SingleSizeMallocRecipe, MyAllocatorHandle > recipes{};
256265 AccumulateResultsProvider<SimpleSumLogger<AccTag>> loggers{};
257266 AcumulateChecksProvider<IotaReductionChecker> checkers{};
267+
268+ DevicePackage (MyAllocatorHandle handle) : recipes{handle} {};
258269 };
259270
260- DevicePackage hostData{} ;
271+ DevicePackage hostData;
261272 alpaka::Buf<TDev, DevicePackage, alpaka::Dim<TAcc>, alpaka::Idx<TAcc>> devicePackageBuffer;
262273
263- InstructionDetails (TDev const & device)
264- : devicePackageBuffer(alpaka::allocBuf<DevicePackage, Idx>(device, 1U )) {};
274+ InstructionDetails (TDev const & device, auto allocatorHandle)
275+ : hostData(allocatorHandle),
276+ devicePackageBuffer (alpaka::allocBuf<DevicePackage, Idx>(device, 1U )) {};
265277
266278 auto sendTo ([[maybe_unused]] TDev const & device, auto & queue) {
267- alpaka::memset (queue, devicePackageBuffer, 0U );
279+ auto const platformHost = alpaka::PlatformCpu{};
280+ auto const devHost = getDevByIdx (platformHost, 0 );
281+ auto view = alpaka::createView (devHost, &hostData, 1U );
282+ alpaka::memcpy (queue, devicePackageBuffer, view);
268283 return reinterpret_cast <DevicePackage*>(alpaka::getPtrNative (devicePackageBuffer));
269284 }
270285 auto retrieveFrom ([[maybe_unused]] TDev const & device, auto & queue) {
@@ -281,14 +296,15 @@ namespace setups {
281296 }
282297 };
283298
284- template <typename TAcc, typename TDev> auto makeInstructionDetails (TDev const & device) {
285- return InstructionDetails<TAcc, TDev>(device);
299+ template <typename TAcc, typename TDev>
300+ auto makeInstructionDetails (TDev const & device, auto allocatorHandle) {
301+ return InstructionDetails<TAcc, TDev>(device, allocatorHandle);
286302 }
287303
288- auto composeSetup () {
304+ auto composeSetup (auto allocatorHandle ) {
289305 auto execution = makeExecutionDetails ();
290306 return setup::composeSetup (" Non trivial" , execution,
291- makeInstructionDetails<Acc>(execution.device ), {});
307+ makeInstructionDetails<Acc>(execution.device , allocatorHandle ), {});
292308 }
293309} // namespace setups
294310
@@ -314,9 +330,16 @@ json composeReport(json const& metadata, json const& benchmarkReports) {
314330
315331void output (json const & report) { std::cout << report << std::endl; }
316332
333+ auto setupAllocator (auto const & device, auto HEAP_SIZE) {
334+ auto queue = alpaka::Queue<Acc, alpaka::Blocking>(device);
335+ return MyAllocator (device, queue, HEAP_SIZE);
336+ }
337+
317338auto main () -> int {
318339 auto metadata = gatherMetadata ();
319- auto setup = setups::composeSetup ();
340+ auto execution = makeExecutionDetails ();
341+ MyAllocator allocator = setupAllocator (execution.device , HEAP_SIZE);
342+ auto setup = setups::composeSetup (allocator.getAllocatorHandle ());
320343 auto benchmarkReports = runBenchmarks (setup);
321344 auto report = composeReport (metadata, benchmarkReports);
322345 output (report);
0 commit comments