Skip to content

Commit b3ab8b3

Browse files
committed
Crude multi-setup-runtime
1 parent 26465a2 commit b3ab8b3

File tree

1 file changed

+45
-25
lines changed
  • examples/multi-setup-runtime/source

1 file changed

+45
-25
lines changed

examples/multi-setup-runtime/source/main.cpp

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <alpaka/workdiv/WorkDivMembers.hpp>
77
#include <cstdint>
88
#include <limits>
9+
#include <sstream>
910
#include <tuple>
1011
#include <utility>
1112
#include <variant>
@@ -57,14 +58,12 @@ auto makeExecutionDetails() {
5758
return kitgenbench::ExecutionDetails<Acc, decltype(dev)>{workdiv, dev};
5859
}
5960

60-
static constexpr std::uint32_t ALLOCATION_SIZE = 16U;
61-
6261
// Reasons for the check to yield the result it yielded.
6362
// `completed` means that the check completed. The result can still be true/false depending on
6463
// whether the obtained value was actually correct. `notApplicable` means that the checks were
6564
// skipped. `nullpointer` means that a nullpointer was given, so the checks couldn't run at all.
6665
enum class Reason { completed, notApplicable, nullpointer };
67-
using Payload = std::variant<std::span<std::byte, ALLOCATION_SIZE>, std::pair<bool, Reason>>;
66+
using Payload = std::variant<std::span<std::byte>, std::pair<bool, Reason>>;
6867

6968
template <typename TAccTag> struct SimpleSumLogger {
7069
using Clock = DeviceClock<TAccTag>;
@@ -161,10 +160,9 @@ constexpr auto isSpan(T<TType, TExtent>) {
161160
return IsSpan<T, TType, TExtent>{};
162161
}
163162

164-
template <typename TNew, typename TOld, std::size_t TExtent>
165-
constexpr auto convertDataType(std::span<TOld, TExtent>& range) {
166-
return std::span<TNew, TExtent * sizeof(TOld) / sizeof(TNew)>(
167-
reinterpret_cast<TNew*>(range.data()), range.size());
163+
template <typename TNew, typename TOld> constexpr auto convertDataType(std::span<TOld>& range) {
164+
return std::span<TNew>(reinterpret_cast<TNew*>(range.data()),
165+
range.size() * sizeof(TOld) / sizeof(TNew));
168166
}
169167

170168
struct IotaReductionChecker {
@@ -211,6 +209,14 @@ template <typename T> struct AccumulateResultsProvider {
211209
nlohmann::json generateReport() { return result.generateReport(); }
212210
};
213211

212+
template <typename T, typename U> struct ArgumentStoringProvider {
213+
U argument{};
214+
ALPAKA_FN_ACC T load([[maybe_unused]] auto const threadIndex) { return {argument}; }
215+
ALPAKA_FN_ACC void store([[maybe_unused]] const auto& acc, [[maybe_unused]] T&& instance,
216+
auto const) {}
217+
nlohmann::json generateReport() { return {}; }
218+
};
219+
214220
template <typename T> struct AcumulateChecksProvider {
215221
T result{};
216222
ALPAKA_FN_ACC T load(auto const threadIndex) { return {threadIndex}; }
@@ -222,20 +228,20 @@ template <typename T> struct AcumulateChecksProvider {
222228

223229
namespace setups {
224230
struct SingleSizeMallocRecipe {
225-
static constexpr std::uint32_t allocationSize{ALLOCATION_SIZE};
231+
std::uint32_t allocationSize;
226232
static constexpr std::uint32_t numAllocations{256U};
227233
std::array<std::byte*, numAllocations> pointers{{}};
228234
std::uint32_t counter{0U};
229235

230236
ALPAKA_FN_ACC auto next([[maybe_unused]] const auto& acc) {
231237
if (counter >= numAllocations)
232-
return std::make_tuple(+kitgenbench::Actions::STOP,
233-
Payload(std::span<std::byte, allocationSize>{
234-
static_cast<std::byte*>(nullptr), allocationSize}));
238+
return std::make_tuple(
239+
+kitgenbench::Actions::STOP,
240+
Payload(std::span<std::byte>{static_cast<std::byte*>(nullptr), allocationSize}));
235241
pointers[counter] = static_cast<std::byte*>(malloc(allocationSize));
236-
auto result = std::make_tuple(
237-
+kitgenbench::Actions::MALLOC,
238-
Payload(std::span<std::byte, allocationSize>(pointers[counter], allocationSize)));
242+
auto result
243+
= std::make_tuple(+kitgenbench::Actions::MALLOC,
244+
Payload(std::span<std::byte>(pointers[counter], allocationSize)));
239245
counter++;
240246
return result;
241247
}
@@ -245,19 +251,24 @@ namespace setups {
245251

246252
template <typename TAcc, typename TDev> struct InstructionDetails {
247253
struct DevicePackage {
248-
NoStoreProvider<SingleSizeMallocRecipe> recipes{};
254+
ArgumentStoringProvider<SingleSizeMallocRecipe, uint32_t> recipes{};
249255
AccumulateResultsProvider<SimpleSumLogger<AccTag>> loggers{};
250256
AcumulateChecksProvider<IotaReductionChecker> checkers{};
257+
258+
DevicePackage(auto size) : recipes{size} {}
251259
};
252260

253261
DevicePackage hostData{};
254262
alpaka::Buf<TDev, DevicePackage, alpaka::Dim<TAcc>, alpaka::Idx<TAcc>> devicePackageBuffer;
255263

256-
InstructionDetails(TDev const& device)
257-
: devicePackageBuffer(alpaka::allocBuf<DevicePackage, Idx>(device, 1U)) {};
264+
InstructionDetails(TDev const& device, uint32_t size)
265+
: hostData(size), devicePackageBuffer(alpaka::allocBuf<DevicePackage, Idx>(device, 1U)) {};
258266

259267
auto sendTo([[maybe_unused]] TDev const& device, auto& queue) {
260-
alpaka::memset(queue, devicePackageBuffer, 0U);
268+
auto const platformHost = alpaka::PlatformCpu{};
269+
auto const devHost = getDevByIdx(platformHost, 0);
270+
auto view = alpaka::createView(devHost, &hostData, 1U);
271+
alpaka::memcpy(queue, devicePackageBuffer, view);
261272
return reinterpret_cast<DevicePackage*>(alpaka::getPtrNative(devicePackageBuffer));
262273
}
263274
auto retrieveFrom([[maybe_unused]] TDev const& device, auto& queue) {
@@ -274,14 +285,16 @@ namespace setups {
274285
}
275286
};
276287

277-
template <typename TAcc, typename TDev> auto makeInstructionDetails(TDev const& device) {
278-
return InstructionDetails<TAcc, TDev>(device);
288+
template <typename TAcc, typename TDev>
289+
auto makeInstructionDetails(TDev const& device, uint32_t size) {
290+
return InstructionDetails<TAcc, TDev>(device, size);
279291
}
280292

281-
auto composeSetup() {
293+
auto composeSetup(uint32_t size) {
282294
auto execution = makeExecutionDetails();
283-
return setup::composeSetup("Non trivial", execution,
284-
makeInstructionDetails<Acc>(execution.device), {});
295+
return setup::composeSetup((std::stringstream{} << "Allocation size: " << size).str(),
296+
execution, makeInstructionDetails<Acc>(execution.device, size),
297+
{{"allocation size", size}});
285298
}
286299
} // namespace setups
287300

@@ -309,9 +322,16 @@ void output(json const& report) { std::cout << report << std::endl; }
309322

310323
auto main() -> int {
311324
auto metadata = gatherMetadata();
312-
auto setup = setups::composeSetup();
313-
auto benchmarkReports = runBenchmarks(setup);
325+
json benchmarkReports = json::object();
326+
auto allocationSizes = std::to_array({16U, 32U, 64U, 128U, 256U, 512U, 1024U});
327+
for (auto const size : allocationSizes) {
328+
auto setup = setups::composeSetup(size);
329+
// CAUTION: This overwrites the outermost "total runtime" which will be reported wrongly.
330+
benchmarkReports.merge_patch(runBenchmarks(setup));
331+
}
314332
auto report = composeReport(metadata, benchmarkReports);
333+
// Hot fix: Remove wrongly "merged", i.e. overwritten, "total runtime".
334+
report["benchmarks"].erase("total runtime [ms]");
315335
output(report);
316336
return EXIT_SUCCESS;
317337
}

0 commit comments

Comments
 (0)