From bf49ae554633fe577071d49fcee71d27e23cefe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 4 Sep 2024 15:34:52 +0100 Subject: [PATCH] Fix mistakes in tests --- include/ur_api.h | 2 +- scripts/core/exp-command-buffer.yml | 2 +- source/adapters/cuda/command_buffer.cpp | 2 +- source/adapters/level_zero/context.cpp | 2 +- source/adapters/level_zero/v2/api.cpp | 1 + source/loader/layers/validation/ur_valddi.cpp | 4 - source/loader/ur_libapi.cpp | 2 +- source/ur_api.cpp | 2 +- .../exp_command_buffer_adapter_hip.match | 4 + ...command_buffer_adapter_level_zero_v2.match | 32 -- .../update/kernel_handle_update.cpp | 6 +- .../usm_fill_kernel_update.cpp | 357 ------------------ .../usm_saxpy_kernel_update.cpp | 354 ----------------- .../exp_enqueue_native/CMakeLists.txt | 8 +- 14 files changed, 18 insertions(+), 760 deletions(-) delete mode 100644 test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp delete mode 100644 test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp diff --git a/include/ur_api.h b/include/ur_api.h index f67ce01303..7979da57bf 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -8393,7 +8393,7 @@ urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `phKernelAlternatives == NULL && numKernelAlternatives > 0` /// + `phKernelAlternatives != NULL && numKernelAlternatives == 0` -/// + `phKernelAlternatives` contains `hKernel` +/// + If `phKernelAlternatives` contains `hKernel` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 5ebb3ddeb9..d3f5a95bc8 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -341,7 +341,7 @@ returns: - $X_RESULT_ERROR_INVALID_VALUE: - "`phKernelAlternatives == NULL && numKernelAlternatives > 0`" - "`phKernelAlternatives != NULL && numKernelAlternatives == 0`" - - "`phKernelAlternatives` contains `hKernel`" + - "If `phKernelAlternatives` contains `hKernel`" - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 90d2e17862..0a6f0015e8 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -879,7 +879,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } - if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) { + if (pUpdateKernelLaunch->newWorkDim) { // Error If Local size and not global size if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 452189d038..0b54968d67 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -801,7 +801,7 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( ze_result_t ZeResult = ZE_CALL_NOCHECK(zeFenceQueryStatus, (it->second.ZeFence)); - if (ZeResult == ZE_RESULT_SUCCESS) { + if (ZeResult _ == ZE_RESULT_SUCCESS) { std::vector EventListToCleanup; Queue->resetCommandList(it, false, EventListToCleanup); CleanupEventListFromResetCmdList(EventListToCleanup, diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp index dc52874364..ce3651c6a3 100644 --- a/source/adapters/level_zero/v2/api.cpp +++ b/source/adapters/level_zero/v2/api.cpp @@ -507,6 +507,7 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numKernelAlternatives, ur_kernel_handle_t *phKernelAlternatives, uint32_t numSyncPointsInWaitList, const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 1c6dbb1392..b05194bef1 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8105,10 +8105,6 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_INVALID_VALUE; } - if (phKernelAlternatives` contains `hKernel) { - return UR_RESULT_ERROR_INVALID_VALUE; - } - if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; } diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 89e4a3788c..36e61ba09d 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7527,7 +7527,7 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `phKernelAlternatives == NULL && numKernelAlternatives > 0` /// + `phKernelAlternatives != NULL && numKernelAlternatives == 0` -/// + `phKernelAlternatives` contains `hKernel` +/// + If `phKernelAlternatives` contains `hKernel` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 967121dd31..0babfaf8ae 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6383,7 +6383,7 @@ ur_result_t UR_APICALL urCommandBufferFinalizeExp( /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `phKernelAlternatives == NULL && numKernelAlternatives > 0` /// + `phKernelAlternatives != NULL && numKernelAlternatives == 0` -/// + `phKernelAlternatives` contains `hKernel` +/// + If `phKernelAlternatives` contains `hKernel` /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP /// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match index e69de29bb2..a39a452d04 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match @@ -0,0 +1,4 @@ +urCommandBufferKernelHandleUpdateTest.Success/AMD_HIP_BACKEND___{{.*}}_ +urCommandBufferKernelHandleUpdateTest.UpdateAgain/AMD_HIP_BACKEND___{{.*}}_ +urCommandBufferKernelHandleUpdateTest.KernelAlternativeNotRegistered/AMD_HIP_BACKEND___{{.*}}_ +urCommandBufferKernelHandleUpdateTest.RegisterInvalidKernelAlternative/AMD_HIP_BACKEND___{{.*}}_ diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match index f997810ca5..8a8eff0cf5 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match @@ -1,34 +1,7 @@ -BufferFillCommandTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.UpdateGlobalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.SeparateUpdateCalls/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.OverrideUpdate/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferFillCommandTest.OverrideArgList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMFillCommandTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMFillCommandTest.UpdateBeforeEnqueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMMultipleFillCommandTest.UpdateAllKernels/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -BufferSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMMultiSaxpyKernelTest.UpdateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -USMMultiSaxpyKernelTest.UpdateWithoutBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Update3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Update2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Update1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -NDRangeUpdateTest.Invalid/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferReleaseExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferReleaseExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.ReleaseCmdBufBeforeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.ReleaseCmdBufMultipleHandles/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferReleaseCommandExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferRetainExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferRetainExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferRetainCommandExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferRetainCommandExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.NotFinalizedCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.NotUpdatableCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.GlobalLocalSizeMistach/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.ImplToUserDefinedLocalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -InvalidUpdateTest.UserToImplDefinedLocalSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferCommandsTest.urCommandBufferAppendUSMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferCommandsTest.urCommandBufferAppendUSMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ @@ -55,8 +28,3 @@ urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Z urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -urCommandBufferKernelHandleUpdateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.UpdateAgain/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.KernelAlternativeNotRegistered/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urCommandBufferKernelHandleUpdateTest.RegisterInvalidKernelAlternative/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ - diff --git a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp index 4aac942231..a533786917 100644 --- a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp +++ b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp @@ -7,9 +7,6 @@ #include "uur/raii.h" #include -// Tests that it is possible to update the kernel handle of a command-buffer node. -// This test launches a Saxpy kernel using a command-buffer and then updates the -// node with a completely different kernel that does a fill 2D operation. struct TestKernel { TestKernel(std::string Name, ur_platform_handle_t Platform, @@ -247,6 +244,9 @@ struct urCommandBufferKernelHandleUpdateTest UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urCommandBufferKernelHandleUpdateTest); +/* Tests that it is possible to update the kernel handle of a command-buffer node. + * This test launches a Saxpy kernel using a command-buffer and then updates the + * node with a completely different kernel that does a fill 2D operation. */ TEST_P(urCommandBufferKernelHandleUpdateTest, Success) { std::vector KernelAlternatives = { diff --git a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp deleted file mode 100644 index 6a86f30cea..0000000000 --- a/test/conformance/exp_command_buffer/usm_fill_kernel_update.cpp +++ /dev/null @@ -1,357 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -// See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "fixtures.h" -#include - -// Test that updating a command-buffer with a single kernel command -// taking USM arguments works correctly. -struct USMFillCommandTest - : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { - void SetUp() override { - program_name = "fill_usm"; - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::SetUp()); - - ur_device_usm_access_capability_flags_t shared_usm_flags; - ASSERT_SUCCESS( - uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); - if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { - GTEST_SKIP() << "Shared USM is not supported."; - } - - // Allocate USM pointer to fill - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &shared_ptr)); - ASSERT_NE(shared_ptr, nullptr); - std::memset(shared_ptr, 0, allocation_size); - - // Index 0 is output - ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, shared_ptr)); - // Index 1 is input scalar - ASSERT_SUCCESS( - urKernelSetArgValue(kernel, 1, sizeof(val), nullptr, &val)); - - // Append kernel command to command-buffer and close command-buffer - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handle)); - ASSERT_NE(command_handle, nullptr); - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - } - - void Validate(uint32_t *pointer, size_t length, uint32_t val) { - for (size_t i = 0; i < length; i++) { - ASSERT_EQ(pointer[i], val); - } - } - - void TearDown() override { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); - } - - if (new_shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); - } - - if (command_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); - } - - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::TearDown()); - } - - static constexpr uint32_t val = 42; - static constexpr size_t local_size = 4; - static constexpr size_t global_size = 32; - static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; - static constexpr size_t allocation_size = sizeof(val) * global_size; - void *shared_ptr = nullptr; - void *new_shared_ptr = nullptr; - ur_exp_command_buffer_command_handle_t command_handle = nullptr; -}; - -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMFillCommandTest); - -// Test using a different global size to fill and larger USM output buffer -TEST_P(USMFillCommandTest, UpdateParameters) { - // Run command-buffer prior to update an verify output - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - Validate((uint32_t *)shared_ptr, global_size, val); - - // Allocate a new USM pointer of larger size if feature is supported. - size_t new_global_size = global_size * 2; - const size_t new_allocation_size = sizeof(val) * new_global_size; - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - new_allocation_size, &new_shared_ptr)); - ASSERT_NE(new_shared_ptr, nullptr); - std::memset(new_shared_ptr, 0, new_allocation_size); - - // Set new USM pointer as kernel output at index 0 - ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 0, // argIndex - nullptr, // pProperties - &new_shared_ptr, // pArgValue - }; - - // Set new value to use for fill at kernel index 1 - uint32_t new_val = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_val), // argSize - nullptr, // pProperties - &new_val, // hArgValue - }; - - size_t new_local_size = local_size; - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 1, // numNewPointerArgs - 1, // numNewValueArgs - static_cast(n_dimensions), // newWorkDim - nullptr, // pNewMemObjArgList - &new_output_desc, // pNewPointerArgList - &new_input_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - &new_global_size, // pNewGlobalWorkSize - &new_local_size, // pNewLocalWorkSize - }; - - // Update kernel and enqueue command-buffer again - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - Validate((uint32_t *)new_shared_ptr, new_global_size, new_val); -} - -// Test updating a command-buffer which hasn't been enqueued yet -TEST_P(USMFillCommandTest, UpdateBeforeEnqueue) { - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &new_shared_ptr)); - ASSERT_NE(new_shared_ptr, nullptr); - std::memset(new_shared_ptr, 0, allocation_size); - - // Set new USM pointer as kernel output at index 0 - ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 0, // argIndex - nullptr, // pProperties - &new_shared_ptr, // pArgValue - }; - - // Set new value to use for fill at kernel index 1 - uint32_t new_val = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_val), // argSize - nullptr, // pProperties - &new_val, // hArgValue - }; - - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 1, // numNewPointerArgs - 1, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - &new_output_desc, // pNewPointerArgList - &new_input_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Update kernel and enqueue command-buffer - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - Validate((uint32_t *)new_shared_ptr, global_size, new_val); -} - -// Test updating a command-buffer with multiple USM fill kernel commands -struct USMMultipleFillCommandTest - : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { - void SetUp() override { - program_name = "fill_usm"; - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::SetUp()); - - ur_device_usm_access_capability_flags_t shared_usm_flags; - ASSERT_SUCCESS( - uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); - if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { - GTEST_SKIP() << "Shared USM is not supported."; - } - - // Create a single USM allocation which will be used by all kernels - // by accessing at pointer offsets - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &shared_ptr)); - ASSERT_NE(shared_ptr, nullptr); - std::memset(shared_ptr, 0, allocation_size); - - // Append multiple kernel commands to command-buffer - for (size_t k = 0; k < num_kernels; k++) { - // Calculate offset into output allocation, and set as - // kernel output. - void *offset_ptr = (uint32_t *)shared_ptr + (k * elements); - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 0, nullptr, offset_ptr)); - - // Each kernel has a unique fill value - uint32_t fill_val = val + k; - ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(fill_val), - nullptr, &fill_val)); - - // Append kernel and store returned handle - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &elements, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handles[k])); - ASSERT_NE(command_handles[k], nullptr); - } - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - } - - void Validate(uint32_t *pointer, size_t length, uint32_t val) { - for (size_t i = 0; i < length; i++) { - ASSERT_EQ(pointer[i], val); - } - } - - void TearDown() override { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); - } - - if (new_shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, new_shared_ptr)); - } - - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::TearDown()); - } - - static constexpr uint32_t val = 42; - static constexpr size_t local_size = 4; - static constexpr size_t global_size = 64; - static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; - static constexpr size_t allocation_size = sizeof(val) * global_size; - static constexpr size_t num_kernels = 8; - static constexpr size_t elements = global_size / num_kernels; - - void *shared_ptr = nullptr; - void *new_shared_ptr = nullptr; - std::array - command_handles; -}; - -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMMultipleFillCommandTest); - -// Test updating all the kernels commands in the command-buffer -TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { - // Run command-buffer prior to update an verify output - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - uint32_t *output = (uint32_t *)shared_ptr; - for (size_t i = 0; i < global_size; i++) { - const uint32_t expected = val + (i / elements); - ASSERT_EQ(expected, output[i]); - } - - // Create a new USM allocation to update kernel outputs to - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &new_shared_ptr)); - ASSERT_NE(new_shared_ptr, nullptr); - std::memset(new_shared_ptr, 0, allocation_size); - - // Update each kernel in the command-buffer. - uint32_t new_val = 33; - for (size_t k = 0; k < num_kernels; k++) { - // Update output pointer to an offset into new USM allocation - void *offset_ptr = (uint32_t *)new_shared_ptr + (k * elements); - ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 0, // argIndex - nullptr, // pProperties - &offset_ptr, // pArgValue - }; - - // Update fill value - uint32_t new_fill_val = new_val + k; - ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(int), // argSize - nullptr, // pProperties - &new_fill_val, // hArgValue - }; - - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 1, // numNewPointerArgs - 1, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - &new_output_desc, // pNewPointerArgList - &new_input_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handles[k], - &update_desc)); - } - - // Update kernel and enqueue command-buffer again - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - uint32_t *updated_output = (uint32_t *)new_shared_ptr; - for (size_t i = 0; i < global_size; i++) { - uint32_t expected = new_val + (i / elements); - ASSERT_EQ(expected, updated_output[i]) << i; - } -} diff --git a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp deleted file mode 100644 index ea32f7e046..0000000000 --- a/test/conformance/exp_command_buffer/usm_saxpy_kernel_update.cpp +++ /dev/null @@ -1,354 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -// See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "fixtures.h" -#include - -// Test that updating a command-buffer with a single kernel command -// taking USM & scalar arguments works correctly. - -struct USMSaxpyKernelTestBase - : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { - virtual void SetUp() override { - program_name = "saxpy_usm"; - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::SetUp()); - - ur_device_usm_access_capability_flags_t shared_usm_flags; - ASSERT_SUCCESS( - uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); - if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { - GTEST_SKIP() << "Shared USM is not supported."; - } - - const size_t allocation_size = sizeof(uint32_t) * global_size; - for (auto &shared_ptr : shared_ptrs) { - ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr, - allocation_size, &shared_ptr)); - ASSERT_NE(shared_ptr, nullptr); - - std::vector pattern(allocation_size); - uur::generateMemFillPattern(pattern); - std::memcpy(shared_ptr, pattern.data(), allocation_size); - } - - // Index 0 is output - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 0, nullptr, shared_ptrs[0])); - // Index 1 is A - ASSERT_SUCCESS(urKernelSetArgValue(kernel, 1, sizeof(A), nullptr, &A)); - // Index 2 is X - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 2, nullptr, shared_ptrs[1])); - // Index 3 is Y - ASSERT_SUCCESS( - urKernelSetArgPointer(kernel, 3, nullptr, shared_ptrs[2])); - } - - void Validate(uint32_t *output, uint32_t *X, uint32_t *Y, uint32_t A, - size_t length) { - for (size_t i = 0; i < length; i++) { - uint32_t result = A * X[i] + Y[i]; - ASSERT_EQ(result, output[i]); - } - } - - virtual void TearDown() override { - for (auto &shared_ptr : shared_ptrs) { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(context, shared_ptr)); - } - } - - UUR_RETURN_ON_FATAL_FAILURE( - urUpdatableCommandBufferExpExecutionTest::TearDown()); - } - - static constexpr size_t local_size = 4; - static constexpr size_t global_size = 32; - static constexpr size_t global_offset = 0; - static constexpr size_t n_dimensions = 1; - static constexpr uint32_t A = 42; - std::array shared_ptrs = {nullptr, nullptr, nullptr, nullptr}; -}; - -struct USMSaxpyKernelTest : USMSaxpyKernelTestBase { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::SetUp()); - - // Append kernel command to command-buffer and close command-buffer - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handle)); - ASSERT_NE(command_handle, nullptr); - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - } - - void TearDown() override { - if (command_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(command_handle)); - } - - UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::TearDown()); - } - - ur_exp_command_buffer_command_handle_t command_handle = nullptr; -}; - -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMSaxpyKernelTest); - -TEST_P(USMSaxpyKernelTest, UpdateParameters) { - // Run command-buffer prior to update an verify output - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - uint32_t *output = (uint32_t *)shared_ptrs[0]; - uint32_t *X = (uint32_t *)shared_ptrs[1]; - uint32_t *Y = (uint32_t *)shared_ptrs[2]; - Validate(output, X, Y, A, global_size); - - // Update inputs - ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; - - // New X at index 2 - new_input_descs[0] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 2, // argIndex - nullptr, // pProperties - &shared_ptrs[3], // pArgValue - }; - - // New Y at index 3 - new_input_descs[1] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 3, // argIndex - nullptr, // pProperties - &shared_ptrs[4], // pArgValue - }; - - // New A at index 1 - uint32_t new_A = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_A), // argSize - nullptr, // pProperties - &new_A, // hArgValue - }; - - // Update kernel inputs - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 2, // numNewPointerArgs - 1, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - new_input_descs, // pNewPointerArgList - &new_A_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Update kernel and enqueue command-buffer again - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - uint32_t *new_output = (uint32_t *)shared_ptrs[0]; - uint32_t *new_X = (uint32_t *)shared_ptrs[3]; - uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; - Validate(new_output, new_X, new_Y, new_A, global_size); -} - -struct USMMultiSaxpyKernelTest : USMSaxpyKernelTestBase { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::SetUp()); - - // Append kernel command to command-buffer and close command-buffer - for (unsigned node = 0; node < nodes; node++) { - ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( - updatable_cmd_buf_handle, kernel, n_dimensions, &global_offset, - &global_size, &local_size, 0, nullptr, 0, nullptr, nullptr, - &command_handles[node])); - ASSERT_NE(command_handles[node], nullptr); - } - - ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); - } - - void TearDown() override { - for (auto &handle : command_handles) { - if (handle) { - EXPECT_SUCCESS(urCommandBufferReleaseCommandExp(handle)); - } - } - UUR_RETURN_ON_FATAL_FAILURE(USMSaxpyKernelTestBase::TearDown()); - } - - static constexpr size_t nodes = 1024; - static constexpr uint32_t A = 42; - std::array command_handles{}; -}; - -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(USMMultiSaxpyKernelTest); - -TEST_P(USMMultiSaxpyKernelTest, UpdateParameters) { - // Run command-buffer prior to update an verify output - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - uint32_t *output = (uint32_t *)shared_ptrs[0]; - uint32_t *X = (uint32_t *)shared_ptrs[1]; - uint32_t *Y = (uint32_t *)shared_ptrs[2]; - Validate(output, X, Y, A, global_size); - - // Update inputs - ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; - - // New X at index 2 - new_input_descs[0] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 2, // argIndex - nullptr, // pProperties - &shared_ptrs[3], // pArgValue - }; - - // New Y at index 3 - new_input_descs[1] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 3, // argIndex - nullptr, // pProperties - &shared_ptrs[4], // pArgValue - }; - - // New A at index 1 - uint32_t new_A = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_A), // argSize - nullptr, // pProperties - &new_A, // hArgValue - }; - - // Update kernel inputs - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 2, // numNewPointerArgs - 1, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - new_input_descs, // pNewPointerArgList - &new_A_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Update kernel and enqueue command-buffer again - for (auto &handle : command_handles) { - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(handle, &update_desc)); - } - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - uint32_t *new_output = (uint32_t *)shared_ptrs[0]; - uint32_t *new_X = (uint32_t *)shared_ptrs[3]; - uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; - Validate(new_output, new_X, new_Y, new_A, global_size); -} - -TEST_P(USMMultiSaxpyKernelTest, UpdateWithoutBlocking) { - // Prepare new inputs - ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; - - // New X at index 2 - new_input_descs[0] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 2, // argIndex - nullptr, // pProperties - &shared_ptrs[3], // pArgValue - }; - - // New Y at index 3 - new_input_descs[1] = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype - nullptr, // pNext - 3, // argIndex - nullptr, // pProperties - &shared_ptrs[4], // pArgValue - }; - - // New A at index 1 - uint32_t new_A = 33; - ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype - nullptr, // pNext - 1, // argIndex - sizeof(new_A), // argSize - nullptr, // pProperties - &new_A, // hArgValue - }; - - // Update kernel inputs - ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { - UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype - nullptr, // pNext - kernel, //hNewKernel - 0, // numNewMemObjArgs - 2, // numNewPointerArgs - 1, // numNewValueArgs - 0, // newWorkDim - nullptr, // pNewMemObjArgList - new_input_descs, // pNewPointerArgList - &new_A_desc, // pNewValueArgList - nullptr, // pNewGlobalWorkOffset - nullptr, // pNewGlobalWorkSize - nullptr, // pNewLocalWorkSize - }; - - // Run command-buffer prior to update without doing a blocking wait after - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - - // Update kernel and enqueue command-buffer again - for (auto &handle : command_handles) { - ASSERT_SUCCESS( - urCommandBufferUpdateKernelLaunchExp(handle, &update_desc)); - } - ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, - nullptr, nullptr)); - ASSERT_SUCCESS(urQueueFinish(queue)); - - // Verify that update occurred correctly - uint32_t *new_output = (uint32_t *)shared_ptrs[0]; - uint32_t *new_X = (uint32_t *)shared_ptrs[3]; - uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; - Validate(new_output, new_X, new_Y, new_A, global_size); -} diff --git a/test/conformance/exp_enqueue_native/CMakeLists.txt b/test/conformance/exp_enqueue_native/CMakeLists.txt index 64f885fb94..403d3caa3c 100644 --- a/test/conformance/exp_enqueue_native/CMakeLists.txt +++ b/test/conformance/exp_enqueue_native/CMakeLists.txt @@ -5,12 +5,12 @@ if (UR_BUILD_ADAPTER_CUDA) add_conformance_test_with_kernels_environment( - exp_enqueue_native - enqueue_native_cuda.cpp + exp_enqueue_native + enqueue_native_cuda.cpp ) target_include_directories(test-exp_enqueue_native PRIVATE - ${PROJECT_SOURCE_DIR}/source - ${PROJECT_SOURCE_DIR}/source/adapters/cuda + ${PROJECT_SOURCE_DIR}/source + ${PROJECT_SOURCE_DIR}/source/adapters/cuda ) target_link_libraries(test-exp_enqueue_native PRIVATE cudadrv) endif()