diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index e4345fe5769d1..81f05d4d04f52 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -350,7 +350,9 @@ typedef enum { PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE, PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, - PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE + PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE, + // The number of registers used by the compiled kernel (device specific) + PI_KERNEL_GROUP_INFO_NUM_REGS = 0x10112 } _pi_kernel_group_info; typedef enum { diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index cc74ed463dd3e..9348ce670aaff 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -250,6 +250,7 @@ enum class kernel_device_specific : cl_kernel_work_group_info { preferred_work_group_size_multiple = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, private_mem_size = CL_KERNEL_PRIVATE_MEM_SIZE, + ext_codeplay_num_regs = PI_KERNEL_GROUP_INFO_NUM_REGS, max_sub_group_size = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, max_num_sub_groups = CL_KERNEL_MAX_NUM_SUB_GROUPS, compile_num_sub_groups = CL_KERNEL_COMPILE_NUM_SUB_GROUPS, diff --git a/sycl/include/CL/sycl/info/kernel_device_specific_traits.def b/sycl/include/CL/sycl/info/kernel_device_specific_traits.def index 8ff826fd95ac4..90496dd78d461 100644 --- a/sycl/include/CL/sycl/info/kernel_device_specific_traits.def +++ b/sycl/include/CL/sycl/info/kernel_device_specific_traits.def @@ -4,6 +4,7 @@ __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, global_work_size, cl::sycl::ran __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, preferred_work_group_size_multiple, size_t) __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, private_mem_size, cl_ulong) +__SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, ext_codeplay_num_regs, uint32_t) __SYCL_PARAM_TRAITS_SPEC(kernel_device_specific, work_group_size, size_t) __SYCL_PARAM_TRAITS_SPEC_WITH_INPUT(kernel_device_specific, max_sub_group_size, uint32_t, cl::sycl::range<3>) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 4305df5ee6a7f..2dafcebd1abe7 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -2552,6 +2552,14 @@ pi_result cuda_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(bytes)); } + case PI_KERNEL_GROUP_INFO_NUM_REGS: { + int numRegs = 0; + cl::sycl::detail::pi::assertion( + cuFuncGetAttribute(&numRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, + kernel->get()) == CUDA_SUCCESS); + return getInfo(param_value_size, param_value, param_value_size_ret, + pi_uint32(numRegs)); + } default: __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index be0872469da9a..e62bea93dff73 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -3136,6 +3136,12 @@ pi_result hip_piKernelGetGroupInfo(pi_kernel kernel, pi_device device, return getInfo(param_value_size, param_value, param_value_size_ret, pi_uint64(bytes)); } + case PI_KERNEL_GROUP_INFO_NUM_REGS: { + cl::sycl::detail::pi::die("PI_KERNEL_GROUP_INFO_NUM_REGS in " + "piKernelGetGroupInfo not implemented\n"); + return {}; + } + default: __SYCL_PI_HANDLE_UNKNOWN_PARAM_NAME(param_name); } diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 969f83c2dea37..382c8229ea071 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -4270,6 +4270,11 @@ pi_result piKernelGetGroupInfo(pi_kernel Kernel, pi_device Device, } case PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE: return ReturnValue(pi_uint32{Kernel->ZeKernelProperties->privateMemSize}); + case PI_KERNEL_GROUP_INFO_NUM_REGS: { + die("PI_KERNEL_GROUP_INFO_NUM_REGS in piKernelGetGroupInfo not " + "implemented\n"); + break; + } default: zePrint("Unknown ParamName in piKernelGetGroupInfo: ParamName=%d(0x%x)\n", ParamName, ParamName); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 8f6da7d7ae82d..54a889a713751 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -747,6 +747,26 @@ pi_result piKernelCreate(pi_program program, const char *kernel_name, return ret_err; } +pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, + pi_kernel_group_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + if (kernel == nullptr) { + return PI_INVALID_KERNEL; + } + + switch (param_name) { + case PI_KERNEL_GROUP_INFO_NUM_REGS: + return PI_INVALID_VALUE; + default: + cl_int result = clGetKernelWorkGroupInfo( + cast(kernel), cast(device), + cast(param_name), param_value_size, + param_value, param_value_size_ret); + return static_cast(result); + } +} + pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name, size_t input_value_size, @@ -1371,7 +1391,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piKernelCreate, piKernelCreate) _PI_CL(piKernelSetArg, clSetKernelArg) _PI_CL(piKernelGetInfo, clGetKernelInfo) - _PI_CL(piKernelGetGroupInfo, clGetKernelWorkGroupInfo) + _PI_CL(piKernelGetGroupInfo, piKernelGetGroupInfo) _PI_CL(piKernelGetSubGroupInfo, piKernelGetSubGroupInfo) _PI_CL(piKernelRetain, clRetainKernel) _PI_CL(piKernelRelease, clReleaseKernel) diff --git a/sycl/source/detail/kernel_info.hpp b/sycl/source/detail/kernel_info.hpp index 85f46e00637e3..6ada88952cf1d 100644 --- a/sycl/source/detail/kernel_info.hpp +++ b/sycl/source/detail/kernel_info.hpp @@ -71,6 +71,9 @@ struct IsWorkGroupInfo< template <> struct IsWorkGroupInfo : std::true_type {}; +template <> +struct IsWorkGroupInfo + : std::true_type {}; template struct get_kernel_device_specific_info { @@ -143,6 +146,13 @@ inline cl_ulong get_kernel_device_specific_info_host< return 0; } +template <> +inline uint32_t get_kernel_device_specific_info_host< + info::kernel_device_specific::ext_codeplay_num_regs>( + const cl::sycl::device &) { + return 0; +} + template <> inline uint32_t get_kernel_device_specific_info_host< info::kernel_device_specific::max_num_sub_groups>( diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 23f7a3992bf60..caf4f72b48adb 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -13,6 +13,7 @@ piDevicesGet piEnqueueMemBufferMap piEventCreate piKernelCreate +piKernelGetGroupInfo piKernelGetSubGroupInfo piKernelSetExecInfo piMemBufferCreate diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 910763574c4a4..fca6b0c4c8a3d 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4229,6 +4229,7 @@ _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4532EEENS3_12par _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4533EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4537EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE4538EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE +_ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE65810EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceE _ZNK2cl4sycl6kernel8get_infoILNS0_4info22kernel_device_specificE8243EEENS3_12param_traitsIS4_XT_EE11return_typeERKNS0_6deviceENS6_10input_typeE _ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4496EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6kernel8get_infoILNS0_4info6kernelE4497EEENS3_12param_traitsIS4_XT_EE11return_typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 4bd4b6d687679..85ca1a6ef59b4 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -49,6 +49,7 @@ ??$get_info@$0BABB@@device@sycl@cl@@QEBA_KXZ ??$get_info@$0BABBA@@device@sycl@cl@@QEBA_NXZ ??$get_info@$0BABBB@@device@sycl@cl@@QEBA?AV?$vector@W4memory_order@sycl@cl@@V?$allocator@W4memory_order@sycl@cl@@@std@@@std@@XZ +??$get_info@$0BABBC@@kernel@sycl@cl@@QEBAIAEBVdevice@12@@Z ??$get_info@$0BABC@@device@sycl@cl@@QEBA_KXZ ??$get_info@$0BABD@@device@sycl@cl@@QEBA_KXZ ??$get_info@$0BABE@@device@sycl@cl@@QEBA_KXZ