Skip to content

Commit

Permalink
feat(raja): add new host to device and device to host timers
Browse files Browse the repository at this point in the history
  • Loading branch information
pranav-sivaraman committed Jan 28, 2024
1 parent d5f58d5 commit d72fdb7
Showing 1 changed file with 31 additions and 7 deletions.
38 changes: 31 additions & 7 deletions src/raja/fasten.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,22 @@ template <size_t PPWI> class IMPL_CLS final : public Bude<PPWI> {
});
}

template <typename T> static void *registerAllocation(const std::vector<T> &xs) {
auto &rm = umpire::ResourceManager::getInstance();
auto host_alloc = rm.getAllocator("HOST");
auto strategy = host_alloc.getAllocationStrategy();
// Not allowed to create a record without casting away the const
auto host_data = const_cast<void*>(static_cast<const void*>(std::data(xs)));
umpire::util::AllocationRecord record{host_data, sizeof(T) * std::size(xs), strategy};
rm.registerAllocation(host_data, record);
return host_data;
}

template <typename T> static T *allocate(const std::vector<T> &xs) {
auto data = allocate<T>(xs.size());
std::copy(xs.begin(), xs.end(), data);
auto &rm = umpire::ResourceManager::getInstance();
auto host_data = registerAllocation(xs);
auto data = allocate<T>(std::size(xs));
rm.copy(data, host_data);
return data;
}

Expand All @@ -241,7 +254,11 @@ template <size_t PPWI> class IMPL_CLS final : public Bude<PPWI> {
#ifndef RAJA_TARGET_GPU
auto alloc = rm.getAllocator("HOST");
#else
#ifdef BUDE_MANAGED_ALLOC
auto alloc = rm.getAllocator("UM");
#else
auto alloc = rm.getAllocator("DEVICE");
#endif
#endif
return static_cast<T *>(alloc.allocate(sizeof(T) * size));
}
Expand Down Expand Up @@ -289,8 +306,9 @@ template <size_t PPWI> class IMPL_CLS final : public Bude<PPWI> {
[[nodiscard]] Sample fasten(const Params &p, size_t wgsize, size_t device) const override {

Sample sample(PPWI, wgsize, p.nposes());

auto contextStart = now();
auto &rm = umpire::ResourceManager::getInstance();

auto hostToDeviceStart = now();

auto protein = allocate(p.protein);
auto ligand = allocate(p.ligand);
Expand All @@ -305,8 +323,10 @@ template <size_t PPWI> class IMPL_CLS final : public Bude<PPWI> {

synchronise();

auto contextEnd = now();
sample.contextTime = {contextStart, contextEnd};
auto host_energies = registerAllocation(sample.energies);

auto hostToDeviceEnd = now();
sample.hostToDevice = {hostToDeviceStart, hostToDeviceEnd};

for (size_t i = 0; i < p.iterations + p.warmupIterations; ++i) {
auto kernelStart = now();
Expand All @@ -318,7 +338,11 @@ template <size_t PPWI> class IMPL_CLS final : public Bude<PPWI> {
sample.kernelTimes.emplace_back(kernelStart, kernelEnd);
}

std::copy(results, results + p.nposes(), sample.energies.begin());
auto deviceToHostStart = now();
rm.copy(host_energies, results);

auto deviceToHostEnd = now();
sample.deviceToHost = {deviceToHostStart, deviceToHostEnd};

deallocate(protein);
deallocate(ligand);
Expand Down

0 comments on commit d72fdb7

Please sign in to comment.