Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][L0] Add memory access hint to piKernelSetArgMemObj #9752

Merged
merged 11 commits into from
Jun 28, 2023
39 changes: 33 additions & 6 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,13 @@
// 12.30 Added PI_EXT_INTEL_DEVICE_INFO_MEM_CHANNEL_SUPPORT device info query.
// 12.31 Added PI_EXT_CODEPLAY_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP device
// info query.
// 12.32 Removed backwards compatibility of piextQueueCreateWithNativeHandle and
// 13.32 Removed backwards compatibility of piextQueueCreateWithNativeHandle and
// piextQueueGetNativeHandle
// 14.33 Added new parameter (memory object properties) to
// piextKernelSetArgMemObj

#define _PI_H_VERSION_MAJOR 13
#define _PI_H_VERSION_MINOR 32
#define _PI_H_VERSION_MAJOR 14
#define _PI_H_VERSION_MINOR 33

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -1709,13 +1711,38 @@ __SYCL_EXPORT pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
const pi_event *event_wait_list,
pi_event *event);

#ifndef PI_BIT
#define PI_BIT(_i) (1 << _i)
#endif // PI_BIT

typedef enum {
PI_ACCESS_READ_WRITE = PI_BIT(0),
PI_ACCESS_WRITE_ONLY = PI_BIT(1),
PI_ACCESS_READ_ONLY = PI_BIT(2)
} _pi_mem_obj_access;
using pi_mem_obj_access = _pi_mem_obj_access;
typedef uint32_t pi_mem_access_flag;

typedef enum {
PI_KERNEL_ARG_MEM_OBJ_ACCESS = 27,
PI_ENUM_FORCE_UINT32 = 0x7fffffff
} _pi_mem_obj_property_type;
using pi_mem_obj_property_type = _pi_mem_obj_property_type;

typedef struct {
pi_mem_obj_property_type type;
void *pNext;
pi_mem_access_flag mem_access;
} _pi_mem_obj_property;
using pi_mem_obj_property = _pi_mem_obj_property;

// Extension to allow backends to process a PI memory object before adding it
// as an argument for a kernel.
// Note: This is needed by the CUDA backend to extract the device pointer to
// the memory as the kernels uses it rather than the PI object itself.
__SYCL_EXPORT pi_result piextKernelSetArgMemObj(pi_kernel kernel,
pi_uint32 arg_index,
const pi_mem *arg_value);
__SYCL_EXPORT pi_result piextKernelSetArgMemObj(
pi_kernel kernel, pi_uint32 arg_index,
const pi_mem_obj_property *arg_properties, const pi_mem *arg_value);
Comment on lines +1743 to +1745
Copy link
Contributor

@smaslov-intel smaslov-intel Jun 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please bump the major.minor PI version at the top of this file

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done 11aeaec


// Extension to allow backends to process a PI sampler object before adding it
// as an argument for a kernel.
Expand Down
3 changes: 2 additions & 1 deletion sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1409,7 +1409,8 @@ pi_result piKernelSetArg(pi_kernel, pi_uint32, size_t, const void *) {
DIE_NO_IMPLEMENTATION;
}

pi_result piextKernelSetArgMemObj(pi_kernel, pi_uint32, const pi_mem *) {
pi_result piextKernelSetArgMemObj(pi_kernel, pi_uint32,
const pi_mem_obj_property *, const pi_mem *) {
DIE_NO_IMPLEMENTATION;
}

Expand Down
9 changes: 6 additions & 3 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2007,9 +2007,10 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
sycl::detail::pi::assertion(
hipDeviceGetPCIBusId(AddressBuffer, AddressBufferSize, device->get()) ==
hipSuccess);
// A typical PCI address is 12 bytes + \0: "1234:67:90.2", but the HIP API is not
// guaranteed to use this format. In practice, it uses this format, at least
// in 5.3-5.5. To be on the safe side, we make sure the terminating \0 is set.
// A typical PCI address is 12 bytes + \0: "1234:67:90.2", but the HIP API
// is not guaranteed to use this format. In practice, it uses this format,
// at least in 5.3-5.5. To be on the safe side, we make sure the terminating
// \0 is set.
AddressBuffer[AddressBufferSize - 1] = '\0';
sycl::detail::pi::assertion(strnlen(AddressBuffer, AddressBufferSize) > 0);
return getInfoArray(strnlen(AddressBuffer, AddressBufferSize - 1) + 1,
Expand Down Expand Up @@ -2961,7 +2962,9 @@ pi_result hip_piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index,
}

pi_result hip_piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
const pi_mem_obj_property *arg_properties,
const pi_mem *arg_value) {
std::ignore = arg_properties;

assert(kernel != nullptr);
assert(arg_value != nullptr);
Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,9 +370,10 @@ pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex, size_t ArgSize,

// Special version of piKernelSetArg to accept pi_mem.
pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
const pi_mem_obj_property *ArgProperties,
const pi_mem *ArgValue) {

return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgValue);
return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties,
ArgValue);
}

// Special version of piKernelSetArg to accept pi_sampler.
Expand Down
2 changes: 2 additions & 0 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,9 @@ pi_result piSamplerCreate(pi_context context,
}

pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index,
const pi_mem_obj_property *arg_properties,
const pi_mem *arg_value) {
std::ignore = arg_properties;
return cast<pi_result>(
clSetKernelArg(cast<cl_kernel>(kernel), cast<cl_uint>(arg_index),
sizeof(arg_value), cast<const cl_mem *>(arg_value)));
Expand Down
46 changes: 35 additions & 11 deletions sycl/plugins/unified_runtime/pi2ur.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2003,8 +2003,10 @@ inline pi_result piextGetDeviceFunctionPointer(pi_device Device,
}

// Special version of piKernelSetArg to accept pi_mem.
inline pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
const pi_mem *ArgValue) {
inline pi_result
piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
const pi_mem_obj_property *ArgProperties,
const pi_mem *ArgValue) {

// TODO: the better way would probably be to add a new PI API for
// extracting native PI object from PI handle, and have SYCL
Expand All @@ -2017,21 +2019,43 @@ inline pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex,
if (ArgValue)
UrMemory = reinterpret_cast<ur_mem_handle_t>(*ArgValue);

ur_kernel_arg_mem_obj_properties_t Properties{};

// We don't yet know the device where this kernel will next be run on.
// Thus we can't know the actual memory allocation that needs to be used.
// Remember the memory object being used as an argument for this kernel
// to process it later when the device is known (at the kernel enqueue).
//
// TODO: for now we have to conservatively assume the access as read-write.
// Improve that by passing SYCL buffer accessor type into
// piextKernelSetArgMemObj.
//

ur_kernel_handle_t UrKernel = reinterpret_cast<ur_kernel_handle_t>(Kernel);
HANDLE_ERRORS(
urKernelSetArgMemObj(UrKernel, ArgIndex, &Properties, UrMemory));
// the only applicable type, just ignore anything else
if (ArgProperties && ArgProperties->type == PI_KERNEL_ARG_MEM_OBJ_ACCESS) {
// following structure layout checks to be replaced with
// std::is_layout_compatible after move to C++20
static_assert(sizeof(pi_mem_obj_property) ==
sizeof(ur_kernel_arg_mem_obj_properties_t));
static_assert(sizeof(pi_mem_obj_property::type) ==
sizeof(ur_kernel_arg_mem_obj_properties_t::stype));
static_assert(sizeof(pi_mem_obj_property::pNext) ==
sizeof(ur_kernel_arg_mem_obj_properties_t::pNext));
static_assert(sizeof(pi_mem_obj_property::mem_access) ==
sizeof(ur_kernel_arg_mem_obj_properties_t::memoryAccess));

static_assert(uint32_t(PI_ACCESS_READ_WRITE) ==
uint32_t(UR_MEM_FLAG_READ_WRITE));
static_assert(uint32_t(PI_ACCESS_READ_ONLY) ==
uint32_t(UR_MEM_FLAG_READ_ONLY));
static_assert(uint32_t(PI_ACCESS_WRITE_ONLY) ==
uint32_t(UR_MEM_FLAG_WRITE_ONLY));
static_assert(uint32_t(PI_KERNEL_ARG_MEM_OBJ_ACCESS) ==
uint32_t(UR_STRUCTURE_TYPE_KERNEL_ARG_MEM_OBJ_PROPERTIES));

const ur_kernel_arg_mem_obj_properties_t *UrMemProperties =
reinterpret_cast<const ur_kernel_arg_mem_obj_properties_t *>(
ArgProperties);
HANDLE_ERRORS(
urKernelSetArgMemObj(UrKernel, ArgIndex, UrMemProperties, UrMemory));
} else {
HANDLE_ERRORS(urKernelSetArgMemObj(UrKernel, ArgIndex, nullptr, UrMemory));
}

return PI_SUCCESS;
}

Expand Down
9 changes: 5 additions & 4 deletions sycl/plugins/unified_runtime/pi_unified_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,12 @@ __SYCL_EXPORT pi_result piKernelCreate(pi_program Program,
}

// Special version of piKernelSetArg to accept pi_mem.
__SYCL_EXPORT pi_result piextKernelSetArgMemObj(pi_kernel Kernel,
pi_uint32 ArgIndex,
const pi_mem *ArgValue) {
__SYCL_EXPORT pi_result piextKernelSetArgMemObj(
pi_kernel Kernel, pi_uint32 ArgIndex,
const pi_mem_obj_property *ArgProperties, const pi_mem *ArgValue) {

return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgValue);
return pi2ur::piextKernelSetArgMemObj(Kernel, ArgIndex, ArgProperties,
ArgValue);
}

__SYCL_EXPORT pi_result piKernelSetArg(pi_kernel Kernel, pi_uint32 ArgIndex,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -699,9 +699,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj(

ur_mem_handle_t_ *UrMem = ur_cast<ur_mem_handle_t_ *>(ArgValue);

ur_mem_handle_t_::access_mode_t UrAccessMode = ur_mem_handle_t_::read_write;
if (Properties) {
switch (Properties->memoryAccess) {
case UR_MEM_FLAG_READ_WRITE:
UrAccessMode = ur_mem_handle_t_::read_write;
break;
case UR_MEM_FLAG_WRITE_ONLY:
UrAccessMode = ur_mem_handle_t_::write_only;
break;
case UR_MEM_FLAG_READ_ONLY:
UrAccessMode = ur_mem_handle_t_::read_only;
break;
default:
return UR_RESULT_ERROR_INVALID_ARGUMENT;
}
}
auto Arg = UrMem ? UrMem : nullptr;
Kernel->PendingArguments.push_back(
{ArgIndex, sizeof(void *), Arg, ur_mem_handle_t_::read_write});
{ArgIndex, sizeof(void *), Arg, UrAccessMode});

return UR_RESULT_SUCCESS;
}
Expand Down
25 changes: 22 additions & 3 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2178,6 +2178,18 @@ static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) {
}
}

pi_mem_obj_access AccessModeToPi(access::mode AccessorMode) {
switch (AccessorMode) {
case access::mode::read:
return PI_ACCESS_READ_ONLY;
case access::mode::write:
case access::mode::discard_write:
return PI_ACCESS_WRITE_ONLY;
default:
return PI_ACCESS_READ_WRITE;
}
}

static pi_result SetKernelParamsAndLaunch(
const QueueImplPtr &Queue, std::vector<ArgDesc> &Args,
const std::shared_ptr<device_image_impl> &DeviceImageImpl,
Expand Down Expand Up @@ -2212,8 +2224,11 @@ static pi_result SetKernelParamsAndLaunch(
Plugin->call<PiApiKind::piKernelSetArg>(
Kernel, NextTrueIndex, sizeof(sycl::detail::pi::PiMem), &MemArg);
} else {
pi_mem_obj_property MemObjData{};
MemObjData.mem_access = AccessModeToPi(Req->MAccessMode);
MemObjData.type = PI_KERNEL_ARG_MEM_OBJ_ACCESS;
Plugin->call<PiApiKind::piextKernelSetArgMemObj>(Kernel, NextTrueIndex,
&MemArg);
&MemObjData, &MemArg);
}
break;
}
Expand Down Expand Up @@ -2250,8 +2265,12 @@ static pi_result SetKernelParamsAndLaunch(
// Avoid taking an address of nullptr
sycl::detail::pi::PiMem *SpecConstsBufferArg =
SpecConstsBuffer ? &SpecConstsBuffer : nullptr;
Plugin->call<PiApiKind::piextKernelSetArgMemObj>(Kernel, NextTrueIndex,
SpecConstsBufferArg);

pi_mem_obj_property MemObjData{};
MemObjData.mem_access = PI_ACCESS_READ_ONLY;
MemObjData.type = PI_KERNEL_ARG_MEM_OBJ_ACCESS;
Plugin->call<PiApiKind::piextKernelSetArgMemObj>(
Kernel, NextTrueIndex, &MemObjData, SpecConstsBufferArg);
break;
}
case kernel_param_kind_t::kind_invalid:
Expand Down
1 change: 1 addition & 0 deletions sycl/unittests/buffer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ add_sycl_unittest(BufferTests OBJECT
Image.cpp
BufferDestructionCheck.cpp
MemChannel.cpp
KernelArgMemObj.cpp
)
Loading