Skip to content

Commit

Permalink
[E2E][CUDA][HIP][level_zero] Make P2P tests portable. (#14095)
Browse files Browse the repository at this point in the history
This enables the p2p tests on hip and level_zero.

Now that cuda also supports the multi-device context, P2P programming
should be 100% portable across these backends.

---------

Signed-off-by: JackAKirk <jack.kirk@codeplay.com>
  • Loading branch information
JackAKirk authored Jun 19, 2024
1 parent 7928588 commit ab6c0f5
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 45 deletions.
17 changes: 4 additions & 13 deletions sycl/test-e2e/USM/P2P/p2p_access.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda
// RUN: %{build} -o %t.out
// RUN: %if cuda %{ %{run} %t.out %}
// REQUIRES: cuda || hip || level_zero
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include <cassert>
#include <sycl/detail/core.hpp>
Expand All @@ -10,17 +10,8 @@ using namespace sycl;

int main() {

// Note that this code will largely be removed: it is temporary due to the
// temporary lack of multiple devices per sycl context in the Nvidia backend.
// A portable implementation, using a single gpu platform, should be possible
// once the Nvidia context issues are resolved.
////////////////////////////////////////////////////////////////////////
std::vector<sycl::device> Devs;
for (const auto &plt : sycl::platform::get_platforms()) {
auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu);

if (plt.get_backend() == sycl::backend::ext_oneapi_cuda)
Devs.push_back(plt.get_devices()[0]);
}
if (Devs.size() < 2) {
std::cout << "Cannot test P2P capabilities, at least two devices are "
"required, exiting."
Expand Down
29 changes: 10 additions & 19 deletions sycl/test-e2e/USM/P2P/p2p_atomics.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda
// RUN: %if any-device-is-cuda %{ %{build} -Xsycl-target-backend --cuda-gpu-arch=sm_61 -o %t.out %}
// RUN: %if cuda %{ %{run} %t.out %}
// REQUIRES: cuda || hip || level_zero
// RUN: %{build} %if any-device-is-cuda %{ -Xsycl-target-backend --cuda-gpu-arch=sm_61 %} -o %t.out
// RUN: %{run} %t.out

#include <cassert>
#include <numeric>
Expand All @@ -18,17 +18,8 @@ constexpr size_t N = 512;

int main() {

// Note that this code will largely be removed: it is temporary due to the
// temporary lack of multiple devices per sycl context in the Nvidia backend.
// A portable implementation, using a single gpu platform, should be possible
// once the Nvidia context issues are resolved.
////////////////////////////////////////////////////////////////////////
std::vector<sycl::device> Devs;
for (const auto &plt : sycl::platform::get_platforms()) {
auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu);

if (plt.get_backend() == sycl::backend::ext_oneapi_cuda)
Devs.push_back(plt.get_devices()[0]);
}
if (Devs.size() < 2) {
std::cout << "Cannot test P2P capabilities, at least two devices are "
"required, exiting."
Expand All @@ -51,26 +42,26 @@ int main() {
// Enables Devs[1] to access Devs[0] memory.
Devs[1].ext_oneapi_enable_peer_access(Devs[0]);

std::vector<double> input(N);
std::vector<int> input(N);
std::iota(input.begin(), input.end(), 0);

double h_sum = 0.;
int h_sum = 0.;
for (const auto &value : input) {
h_sum += value;
}

double *d_sum = malloc_shared<double>(1, Queues[0]);
double *d_in = malloc_device<double>(N, Queues[0]);
int *d_sum = malloc_shared<int>(1, Queues[0]);
int *d_in = malloc_device<int>(N, Queues[0]);

Queues[0].memcpy(d_in, &input[0], N * sizeof(double));
Queues[0].memcpy(d_in, &input[0], N * sizeof(int));
Queues[0].wait();

range global_range{N};

*d_sum = 0.;
Queues[1].submit([&](handler &h) {
h.parallel_for<class peer_atomic>(global_range, [=](id<1> i) {
sycl::atomic_ref<double, sycl::memory_order::relaxed,
sycl::atomic_ref<int, sycl::memory_order::relaxed,
sycl::memory_scope::system,
access::address_space::global_space>(*d_sum) += d_in[i];
});
Expand Down
17 changes: 4 additions & 13 deletions sycl/test-e2e/USM/P2P/p2p_copy.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: cuda
// RUN: %{build} -o %t.out
// RUN: %if cuda %{ %{run} %t.out %}
// REQUIRES: cuda || hip || level_zero
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

#include <cassert>
#include <numeric>
Expand All @@ -15,17 +15,8 @@ constexpr int N = 100;

int main() {

// Note that this code will largely be removed: it is temporary due to the
// temporary lack of multiple devices per sycl context in the Nvidia backend.
// A portable implementation, using a single gpu platform, should be possible
// once the Nvidia context issues are resolved.
////////////////////////////////////////////////////////////////////////
std::vector<sycl::device> Devs;
for (const auto &plt : sycl::platform::get_platforms()) {
auto Devs = platform(gpu_selector_v).get_devices(info::device_type::gpu);

if (plt.get_backend() == sycl::backend::ext_oneapi_cuda)
Devs.push_back(plt.get_devices()[0]);
}
if (Devs.size() < 2) {
std::cout << "Cannot test P2P capabilities, at least two devices are "
"required, exiting."
Expand Down

0 comments on commit ab6c0f5

Please sign in to comment.