Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL] convert sycl::device to sycl::device* for better handling #504

Merged
merged 6 commits into from
Nov 5, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/struct_mv/_hypre_struct_mv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1253,7 +1253,7 @@ ReductionBoxLoopforall( LOOP_BODY loop_body,
hypre_HandleComputeStream(hypre_handle())->submit([&] (sycl::handler& cgh)
{
sycl::accessor sum_acc(sum_buf, cgh, sycl::read_write);
cgh.parallel_for(sycl::nd_range<1>(gDim*bDim, bDim), sycl::ONEAPI::reduction(sum_acc, sycl::ONEAPI::plus<>()), loop_body);
cgh.parallel_for(sycl::nd_range<1>(gDim*bDim, bDim), sycl::ext::oneapi::reduction(sum_acc, std::plus<>()), loop_body);
}).wait_and_throw();
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/struct_mv/boxloop_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ ReductionBoxLoopforall( LOOP_BODY loop_body,
hypre_HandleComputeStream(hypre_handle())->submit([&] (sycl::handler& cgh)
{
sycl::accessor sum_acc(sum_buf, cgh, sycl::read_write);
cgh.parallel_for(sycl::nd_range<1>(gDim*bDim, bDim), sycl::ONEAPI::reduction(sum_acc, sycl::ONEAPI::plus<>()), loop_body);
cgh.parallel_for(sycl::nd_range<1>(gDim*bDim, bDim), sycl::ext::oneapi::reduction(sum_acc, std::plus<>()), loop_body);
}).wait_and_throw();
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/utilities/_hypre_utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ struct hypre_DeviceData
hypre_device_allocator device_allocator;
#endif
#if defined(HYPRE_USING_SYCL)
sycl::device device;
sycl::device* device;
#else
HYPRE_Int device;
#endif
Expand Down
13 changes: 7 additions & 6 deletions src/utilities/device_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -968,14 +968,14 @@ hypre_DeviceDataStream(hypre_DeviceData *data, HYPRE_Int i)
catch (sycl::exception const& ex)
{
std::cout << "Caught asynchronous SYCL exception:" << std::endl
<< ex.what() << ", OpenCL code: " << ex.get_cl_code() << std::endl;
<< ex.what() << ", SYCL code: " << ex.code() << std::endl;
}
}
};

sycl::device syclDev = data->device;
sycl::context syclctxt = sycl::context(syclDev, sycl_asynchandler);
stream = new sycl::queue(syclctxt, syclDev, sycl::property_list{sycl::property::queue::in_order{}});
sycl::device* syclDev = data->device;
sycl::context syclctxt = sycl::context(*syclDev, sycl_asynchandler);
stream = new sycl::queue(syclctxt, *syclDev, sycl::property_list{sycl::property::queue::in_order{}});
data->streams[i] = stream;
}
#endif
Expand Down Expand Up @@ -1023,7 +1023,7 @@ sycl::queue*
hypre_DeviceDataComputeStream(hypre_DeviceData *data)
{
return hypre_DeviceDataStream(data,
hypre_DeviceDataComputeStreamNum(data));
hypre_DeviceDataComputeStreamNum(data));
}

#if defined(HYPRE_USING_CURAND)
Expand Down Expand Up @@ -1233,7 +1233,7 @@ hypre_DeviceDataCreate()

#if defined(HYPRE_USING_SYCL)
/* WM: does the default selector get a GPU if available? Having trouble with getting the device on frank, so temporarily just passing the default selector */
hypre_DeviceDataDevice(data) = sycl::device(sycl::default_selector{});
hypre_DeviceDataDevice(data) = nullptr;
#else
hypre_DeviceDataDevice(data) = 0;
#endif
Expand Down Expand Up @@ -1490,6 +1490,7 @@ hypre_bind_device( HYPRE_Int myid,

/* get number of devices on this node */
hypre_GetDeviceCount(&nDevices);
/* TODO: ABB might need to look into this since nDevices are overwritten by 1 */
nDevices = 1;

/* set device */
Expand Down
2 changes: 1 addition & 1 deletion src/utilities/device_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ struct hypre_DeviceData
hypre_device_allocator device_allocator;
#endif
#if defined(HYPRE_USING_SYCL)
sycl::device device;
sycl::device* device;
#else
HYPRE_Int device;
#endif
Expand Down
71 changes: 51 additions & 20 deletions src/utilities/general.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,43 @@ hypre_SetDevice(hypre_int device_id, hypre_Handle *hypre_handle_)
#endif

#if defined(HYPRE_USING_SYCL)
/* sycl device set at construction of hypre_DeviceData object */
HYPRE_Int nDevices=0;
sycl::platform platform(sycl::gpu_selector{});
auto gpu_devices = platform.get_devices(sycl::info::device_type::gpu);
for (int i = 0; i < gpu_devices.size(); i++) {
if(gpu_devices[i].get_info<sycl::info::device::partition_max_sub_devices>() > 0) {
auto subDevicesDomainNuma = gpu_devices[i].create_sub_devices<sycl::info::partition_property::partition_by_affinity_domain>(sycl::info::partition_affinity_domain::numa);
nDevices += subDevicesDomainNuma.size();
}
else {
nDevices++;
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code above looks like it replicates the new implementation in hypre_GetDeviceCount(). Can we just call that here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


if (device_id > nDevices) {
hypre_printf("ERROR: SYCL device-ID exceed the number of devices on-node... \n");
}

HYPRE_Int local_nDevices=0;
for (int i = 0; i < gpu_devices.size(); i++) {
// multi-tile GPUs
if (gpu_devices[i].get_info<sycl::info::device::partition_max_sub_devices>() > 0) {
auto subDevicesDomainNuma = gpu_devices[i].create_sub_devices<sycl::info::partition_property::partition_by_affinity_domain>(sycl::info::partition_affinity_domain::numa);
for (auto &tile : subDevicesDomainNuma) {
if (local_nDevices == device_id) {
hypre_HandleDevice(hypre_handle_) = &tile;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So does this consider each tile to be a separate device? What is the effect of this vs. setting the environment variable that Brian had mentioned: export ZE_AFFINITY_MASK=0.0? Does this do the same thing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does the same but just generalizes. ZE_AFFINITY_MASK=0.0 is designed to be set for device ID 0 and tile 0 and not to be a scalable solution.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds good. Is there ever a case where you would want to treat all tiles as a single device? Either way, I think I'm happy using this for now at least.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Treating a device as 2 separate tiles (explicit scaling) and device as a whole (implicit scaling) are two options. Since majority of the Apps interacting with Hypre are exploring explicit mode, hence the separate tiles. But I want to add implicit scaling too. I will add macros like the following under HYPRE_USING_SYCL for a user to choose either explicit or implicit.

For e.g., HYPRE_SYCL_EXPLICIT and HYPRE_SYCL_IMPLICIT

}
local_nDevices++;
}
}
// single-tile GPUs
else {
if (local_nDevices == device_id) {
hypre_HandleDevice(hypre_handle_) = &(gpu_devices[i]);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you set hypre_HandleDevice(hypre_handle_) here, you need to make sure it is not overwritten below (line 143). Also, if you want to use hyrpe_SetDevice(), you need to do corresponding implementation for hypre_GetDevice(), which still has no implementation for sycl.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, hypre_GetDevice() method is still undefined for SYCL. I guess #if defined(HYPRE_USING_GPU) && !defined(HYPRE_USING_SYCL) fixes the resetting the value.

}
local_nDevices++;
}
}
#elif defined(HYPRE_USING_GPU)
if (hypre_handle_)
{
Expand Down Expand Up @@ -152,25 +188,20 @@ hypre_GetDeviceCount(hypre_int *device_count)
#endif

#if defined(HYPRE_USING_SYCL)
/* WM: todo - doesn't work on frank... commenting out */
/* sycl::platform platform(sycl::gpu_selector{}); */
/* auto const& gpu_devices = platform.get_devices(); */
/* for (int i = 0; i < gpu_devices.size(); i++) */
/* { */
/* if (gpu_devices[i].is_gpu()) */
/* { */
/* if(gpu_devices[i].get_info<sycl::info::device::partition_max_sub_devices>() > 0) */
/* { */
/* auto subDevicesDomainNuma = gpu_devices[i].create_sub_devices<sycl::info::partition_property::partition_by_affinity_domain>( */
/* sycl::info::partition_affinity_domain::numa); */
/* (*device_count) += subDevicesDomainNuma.size(); */
/* } */
/* else */
/* { */
/* (*device_count)++; */
/* } */
/* } */
/* } */
sycl::platform platform(sycl::gpu_selector{});
auto const& gpu_devices = platform.get_devices(sycl::info::device_type::gpu);
for (int i = 0; i < gpu_devices.size(); i++)
{
if(gpu_devices[i].get_info<sycl::info::device::partition_max_sub_devices>() > 0)
{
auto subDevicesDomainNuma = gpu_devices[i].create_sub_devices<sycl::info::partition_property::partition_by_affinity_domain>(sycl::info::partition_affinity_domain::numa);
(*device_count) += subDevicesDomainNuma.size();
}
else
{
(*device_count)++;
}
}
#endif

return hypre_error_flag;
Expand Down