Skip to content

Commit

Permalink
[Testing] Spec clarifications and testing updates for kernel
Browse files Browse the repository at this point in the history
As well as some additional tests, some additions to the spec were
made to clarify error conditions:
* Several information queries were updated to provide a default of
  0 (like OpenCL).
* `UR_RESULT_ERROR_INVALID_KERNEL_ARG` added for enqueues where
  a parameter has not been specified.
* The OpenCL adapter now handles invalid kernel args correctly.
  • Loading branch information
RossBrunton committed May 22, 2024
1 parent 3c188c9 commit e2ffea6
Show file tree
Hide file tree
Showing 21 changed files with 347 additions and 10 deletions.
7 changes: 5 additions & 2 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -4772,7 +4772,8 @@ typedef enum ur_kernel_group_info_t {
UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE = 0, ///< [size_t[3]] Return Work Group maximum global size
UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE = 1, ///< [size_t] Return maximum Work Group size
UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE = 2, ///< [size_t[3]] Return Work Group size required by the source code, such
///< as __attribute__((required_work_group_size(X,Y,Z))
///< as __attribute__((required_work_group_size(X,Y,Z)), or (0, 0, 0) if
///< unspecified
UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = 3, ///< [size_t] Return local memory required by the Kernel
UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = 4, ///< [size_t] Return preferred multiple of Work Group size for launch
UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = 5, ///< [size_t] Return minimum amount of private memory in bytes used by each
Expand All @@ -4788,7 +4789,8 @@ typedef enum ur_kernel_group_info_t {
typedef enum ur_kernel_sub_group_info_t {
UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE = 0, ///< [uint32_t] Return maximum SubGroup size
UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS = 1, ///< [uint32_t] Return maximum number of SubGroup
UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS = 2, ///< [uint32_t] Return number of SubGroup required by the source code
UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS = 2, ///< [uint32_t] Return number of SubGroup required by the source code or 0
///< if unspecified
UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL = 3, ///< [uint32_t] Return SubGroup size required by Intel
/// @cond
UR_KERNEL_SUB_GROUP_INFO_FORCE_UINT32 = 0x7fffffff
Expand Down Expand Up @@ -5989,6 +5991,7 @@ urEventSetCallback(
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
/// - ::UR_RESULT_ERROR_INVALID_VALUE
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values have not been specified."
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
UR_APIEXPORT ur_result_t UR_APICALL
Expand Down
2 changes: 2 additions & 0 deletions scripts/core/enqueue.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ returns:
- $X_RESULT_ERROR_INVALID_WORK_DIMENSION
- $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
- $X_RESULT_ERROR_INVALID_VALUE
- $X_RESULT_ERROR_INVALID_KERNEL_ARGS
- "The kernel argument values have not been specified."
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
- $X_RESULT_ERROR_OUT_OF_RESOURCES
--- #--------------------------------------------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions scripts/core/kernel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ etors:
- name: WORK_GROUP_SIZE
desc: "[size_t] Return maximum Work Group size"
- name: COMPILE_WORK_GROUP_SIZE
desc: "[size_t[3]] Return Work Group size required by the source code, such as __attribute__((required_work_group_size(X,Y,Z))"
desc: "[size_t[3]] Return Work Group size required by the source code, such as __attribute__((required_work_group_size(X,Y,Z)), or (0, 0, 0) if unspecified"
- name: LOCAL_MEM_SIZE
desc: "[size_t] Return local memory required by the Kernel"
- name: PREFERRED_WORK_GROUP_SIZE_MULTIPLE
Expand All @@ -154,7 +154,7 @@ etors:
- name: MAX_NUM_SUB_GROUPS
desc: "[uint32_t] Return maximum number of SubGroup"
- name: COMPILE_NUM_SUB_GROUPS
desc: "[uint32_t] Return number of SubGroup required by the source code"
desc: "[uint32_t] Return number of SubGroup required by the source code or 0 if unspecified"
- name: SUB_GROUP_SIZE_INTEL
desc: "[uint32_t] Return SubGroup size required by Intel"
--- #--------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions source/adapters/opencl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ ur_result_t mapCLErrorToUR(cl_int Result) {
return UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS;
case CL_DEVICE_NOT_AVAILABLE:
return UR_RESULT_ERROR_DEVICE_NOT_AVAILABLE;
case CL_INVALID_KERNEL_ARGS:
return UR_RESULT_ERROR_INVALID_KERNEL_ARGS;
default:
return UR_RESULT_ERROR_UNKNOWN;
}
Expand Down
1 change: 1 addition & 0 deletions source/loader/ur_libapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4768,6 +4768,7 @@ ur_result_t UR_APICALL urEventSetCallback(
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
/// - ::UR_RESULT_ERROR_INVALID_VALUE
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values have not been specified."
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
ur_result_t UR_APICALL urEnqueueKernelLaunch(
Expand Down
1 change: 1 addition & 0 deletions source/ur_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4040,6 +4040,7 @@ ur_result_t UR_APICALL urEventSetCallback(
/// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
/// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
/// - ::UR_RESULT_ERROR_INVALID_VALUE
/// - ::UR_RESULT_ERROR_INVALID_KERNEL_ARGS - "The kernel argument values have not been specified."
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
ur_result_t UR_APICALL urEnqueueKernelLaunch(
Expand Down
1 change: 1 addition & 0 deletions test/conformance/device/device_adapter_native_cpu.match
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
urDeviceCreateWithNativeHandleTest.InvalidNullHandlePlatform
urDeviceCreateWithNativeHandleTest.InvalidNullPointerDevice
{{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime
urDeviceGetInfoSingleTest.MaxWorkGroupSizeIsNonzero
{{OPT}}urDeviceSelectBinaryTest.Success
urDeviceGetInfoTest.Success/UR_DEVICE_INFO_DEVICE_ID
urDeviceGetInfoTest.Success/UR_DEVICE_INFO_MEMORY_CLOCK_RATE
Expand Down
39 changes: 33 additions & 6 deletions test/conformance/device/urDeviceGetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <array>
#include <map>
#include <uur/fixtures.h>

Expand Down Expand Up @@ -242,6 +243,12 @@ INSTANTIATE_TEST_SUITE_P(
return ss.str();
});

struct urDeviceGetInfoSingleTest : uur::urAllDevicesTest {
void SetUp() override {
UUR_RETURN_ON_FATAL_FAILURE(uur::urAllDevicesTest::SetUp());
}
};

bool doesReturnArray(ur_device_info_t info_type) {
if (info_type == UR_DEVICE_INFO_SUPPORTED_PARTITIONS ||
info_type == UR_DEVICE_INFO_PARTITION_TYPE) {
Expand Down Expand Up @@ -284,15 +291,15 @@ TEST_P(urDeviceGetInfoTest, Success) {
}
}

TEST_P(urDeviceGetInfoTest, InvalidNullHandleDevice) {
TEST_F(urDeviceGetInfoSingleTest, InvalidNullHandleDevice) {
ur_device_type_t device_type;
ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE,
urDeviceGetInfo(nullptr, UR_DEVICE_INFO_TYPE,
sizeof(ur_device_type_t), &device_type,
nullptr));
}

TEST_P(urDeviceGetInfoTest, InvalidEnumerationInfoType) {
TEST_F(urDeviceGetInfoSingleTest, InvalidEnumerationInfoType) {
for (auto device : devices) {
ur_device_type_t device_type;
ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_ENUMERATION,
Expand All @@ -302,7 +309,7 @@ TEST_P(urDeviceGetInfoTest, InvalidEnumerationInfoType) {
}
}

TEST_P(urDeviceGetInfoTest, InvalidSizePropSize) {
TEST_F(urDeviceGetInfoSingleTest, InvalidSizePropSize) {
for (auto device : devices) {
ur_device_type_t device_type;
ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_SIZE,
Expand All @@ -311,7 +318,7 @@ TEST_P(urDeviceGetInfoTest, InvalidSizePropSize) {
}
}

TEST_P(urDeviceGetInfoTest, InvalidSizePropSizeSmall) {
TEST_F(urDeviceGetInfoSingleTest, InvalidSizePropSizeSmall) {
for (auto device : devices) {
ur_device_type_t device_type;
ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_SIZE,
Expand All @@ -321,7 +328,7 @@ TEST_P(urDeviceGetInfoTest, InvalidSizePropSizeSmall) {
}
}

TEST_P(urDeviceGetInfoTest, InvalidNullPointerPropValue) {
TEST_F(urDeviceGetInfoSingleTest, InvalidNullPointerPropValue) {
for (auto device : devices) {
ur_device_type_t device_type;
ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER,
Expand All @@ -331,10 +338,30 @@ TEST_P(urDeviceGetInfoTest, InvalidNullPointerPropValue) {
}
}

TEST_P(urDeviceGetInfoTest, InvalidNullPointerPropSizeRet) {
TEST_F(urDeviceGetInfoSingleTest, InvalidNullPointerPropSizeRet) {
for (auto device : devices) {
ASSERT_EQ_RESULT(
UR_RESULT_ERROR_INVALID_NULL_POINTER,
urDeviceGetInfo(device, UR_DEVICE_INFO_TYPE, 0, nullptr, nullptr));
}
}

TEST_F(urDeviceGetInfoSingleTest, MaxWorkGroupSizeIsNonzero) {
for (auto device : devices) {
size_t max_global_size;

ASSERT_SUCCESS(
urDeviceGetInfo(device, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
sizeof(size_t), &max_global_size, nullptr));
ASSERT_NE(max_global_size, 0);

std::array<size_t, 3> max_work_group_sizes;
ASSERT_SUCCESS(urDeviceGetInfo(device,
UR_DEVICE_INFO_MAX_WORK_GROUPS_3D,
sizeof(max_work_group_sizes),
max_work_group_sizes.data(), nullptr));
for (size_t i = 0; i < 3; i++) {
ASSERT_NE(max_work_group_sizes[i], 0);
}
}
}
10 changes: 10 additions & 0 deletions test/conformance/device_code/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,13 @@ macro(add_device_binary SOURCE_FILE)
continue()
endif()

# HIP doesn't seem to provide the symbol
# `_ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E11FixedSgSize` which
# causes a build failure here
if(${TRIPLE} MATCHES "amd" AND ${KERNEL_NAME} MATCHES "subgroup")
continue()
endif()

add_custom_command(OUTPUT "${BIN_PATH}"
COMMAND ${UR_DPCXX} -fsycl -fsycl-targets=${TRIPLE} -fsycl-device-code-split=off
${AMD_TARGET_BACKEND} ${AMD_OFFLOAD_ARCH} ${AMD_NOGPULIB}
Expand Down Expand Up @@ -139,6 +146,9 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/saxpy_usm.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/indexers_usm.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/build_failure.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fixed_wg_size.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/standard_types.cpp)
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/subgroup.cpp)

set(KERNEL_HEADER ${UR_CONFORMANCE_DEVICE_BINARIES_DIR}/kernel_entry_points.h)
add_custom_command(OUTPUT ${KERNEL_HEADER}
Expand Down
27 changes: 27 additions & 0 deletions test/conformance/device_code/fixed_wg_size.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (C) 2024 Intel Corporation
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <sycl/sycl.hpp>

struct KernelFunctor {
void operator()(sycl::nd_item<3>) const {}
void operator()(sycl::item<3>) const {}

auto get(sycl::ext::oneapi::experimental::properties_tag) {
return sycl::ext::oneapi::experimental::properties{
sycl::ext::oneapi::experimental::work_group_size<4, 4, 4>};
}
};

int main() {
sycl::queue myQueue;
myQueue.submit([&](sycl::handler &cgh) {
cgh.parallel_for<class FixedWgSize>(sycl::range<3>(8, 8, 8),
KernelFunctor{});
});

myQueue.wait();
return 0;
}
42 changes: 42 additions & 0 deletions test/conformance/device_code/standard_types.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright (C) 2024 Intel Corporation
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <stdint.h>
#include <sycl/sycl.hpp>

struct Struct {
uint32_t a;
uint32_t b;
};

int main() {
sycl::queue deviceQueue;
sycl::range<1> numOfItems{1};

uint32_t output = 0;

volatile bool test_bool = true;
volatile uint8_t test_u8 = 2;
volatile uint32_t test_u32 = 3;
volatile uint64_t test_u64 = 5;
Struct test_struct{7, 5};
volatile float test_float = 11;

{
sycl::buffer output_buff(&output, sycl::range(1));
deviceQueue.submit([&](sycl::handler &cgh) {
sycl::accessor acc{output_buff, cgh, sycl::read_write};
auto kern = [=](sycl::id<1> id) {
acc[id] = 100 + (test_bool ? 1 : 0) * test_u8 * test_u32 *
test_u64 * test_struct.a *
static_cast<uint32_t>(test_float);
};
cgh.parallel_for<class Foo>(numOfItems, kern);
});
deviceQueue.wait();
}

return output == 2410;
}
35 changes: 35 additions & 0 deletions test/conformance/device_code/subgroup.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (C) 2024 Intel Corporation
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <sycl/sycl.hpp>

struct KernelFunctor {
sycl::accessor<size_t, 1, sycl::access_mode::write> Acc;

KernelFunctor(sycl::accessor<size_t, 1, sycl::access_mode::write> Acc)
: Acc(Acc) {}

void operator()(sycl::nd_item<1> NdItem) const {
auto SG = NdItem.get_sub_group();
if (NdItem.get_global_linear_id() == 0) {
Acc[0] = SG.get_local_linear_range();
}
}
};

int main() {
sycl::queue myQueue;
size_t output = 0;
sycl::buffer output_buff(&output, sycl::range(1));

myQueue.submit([&](sycl::handler &cgh) {
sycl::accessor acc{output_buff, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<class FixedSgSize>(sycl::nd_range<1>(8, 2),
KernelFunctor{acc});
});

myQueue.wait();
return 0;
}
3 changes: 3 additions & 0 deletions test/conformance/enqueue/enqueue_adapter_cuda.match
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
urEnqueueKernelLaunchTest.InvalidKernelArgs/NVIDIA_CUDA_BACKEND___{{.*}}_
urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/NVIDIA_CUDA_BACKEND___{{.*}}_
urEnqueueKernelLaunchKernelSubGroupTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_
{{OPT}}urEnqueueKernelLaunchWithVirtualMemory.Success/NVIDIA_CUDA_BACKEND___{{.*}}_
{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}_
{{OPT}}urEnqueueMemBufferFillTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___size__256__patternSize__256
Expand Down
7 changes: 7 additions & 0 deletions test/conformance/enqueue/enqueue_adapter_native_cpu.match
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@
{{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkDimension/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkGroupSize/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.SuccessWithExplicitLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchKernelStandardTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU_
{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___1D_1
{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___1D_31
{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU___1D_1027
Expand Down
2 changes: 2 additions & 0 deletions test/conformance/enqueue/enqueue_adapter_opencl.match
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,6 @@
{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/Intel_R__OpenCL___{{.*}}
{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/Intel_R__OpenCL___{{.*}}
{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__OpenCL___{{.*}}
urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__OpenCL___{{.*}}_
urEnqueueKernelLaunchKernelSubGroupTest.Success/Intel_R__OpenCL___{{.*}}_
{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/Intel_R__OpenCL___{{.*}}_UsePoolEnabled
Loading

0 comments on commit e2ffea6

Please sign in to comment.