From 685973d0da70baf6ef405b5c8e4573a839e150c9 Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Thu, 5 Sep 2024 15:59:27 -0700 Subject: [PATCH] [L0] Refactor to remove default constructor inits - Remove all the default constructor inits to address error prone code changes and force setting of options and flags individually. Signed-off-by: Neil R. Spruit --- source/adapters/level_zero/command_buffer.cpp | 49 ++++++++++++------- source/adapters/level_zero/context.cpp | 8 +-- source/adapters/level_zero/context.hpp | 4 +- source/adapters/level_zero/event.cpp | 43 ++++++++++------ source/adapters/level_zero/event.hpp | 9 ++-- source/adapters/level_zero/image.cpp | 2 +- source/adapters/level_zero/kernel.cpp | 10 ++-- source/adapters/level_zero/memory.cpp | 40 +++++++++------ source/adapters/level_zero/queue.cpp | 33 ++++++++----- source/adapters/level_zero/queue.hpp | 14 +++--- 10 files changed, 130 insertions(+), 82 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 8341d8f68e..774e82f57a 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -146,8 +146,10 @@ ur_result_t createSyncPointAndGetZeEvents( UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList, ZeEventList)); ur_event_handle_t LaunchEvent; - UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, HostVisible, - &LaunchEvent, false, !CommandBuffer->IsProfilingEnabled)); + UR_CALL(EventCreate(CommandBuffer->Context, nullptr /*Queue*/, + false /*IsMultiDevice*/, HostVisible, &LaunchEvent, + false /*CounterBasedEventEnabled*/, + !CommandBuffer->IsProfilingEnabled)); LaunchEvent->CommandType = CommandType; ZeLaunchEvent = LaunchEvent->ZeEvent; @@ -325,22 +327,26 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() { // Release additional signal and wait events used by command_buffer if (SignalEvent) { - CleanupCompletedEvent(SignalEvent, false); + CleanupCompletedEvent(SignalEvent, false /*QueueLocked*/, + false /*SetEventCompleted*/); urEventReleaseInternal(SignalEvent); } if (WaitEvent) { - CleanupCompletedEvent(WaitEvent, false); + CleanupCompletedEvent(WaitEvent, false /*QueueLocked*/, + false /*SetEventCompleted*/); urEventReleaseInternal(WaitEvent); } if (AllResetEvent) { - CleanupCompletedEvent(AllResetEvent, false); + CleanupCompletedEvent(AllResetEvent, false /*QueueLocked*/, + false /*SetEventCompleted*/); urEventReleaseInternal(AllResetEvent); } // Release events added to the command_buffer for (auto &Sync : SyncPoints) { auto &Event = Sync.second; - CleanupCompletedEvent(Event, false); + CleanupCompletedEvent(Event, false /*QueueLocked*/, + false /*SetEventCompleted*/); urEventReleaseInternal(Event); } @@ -514,12 +520,15 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, ur_event_handle_t WaitEvent; ur_event_handle_t AllResetEvent; - UR_CALL(EventCreate(Context, nullptr, false, false, &SignalEvent, false, - !EnableProfiling)); - UR_CALL(EventCreate(Context, nullptr, false, false, &WaitEvent, false, - !EnableProfiling)); - UR_CALL(EventCreate(Context, nullptr, false, false, &AllResetEvent, false, - !EnableProfiling)); + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + false /*HostVisible*/, &SignalEvent, + false /*CounterBasedEventEnabled*/, !EnableProfiling)); + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + false /*HostVisible*/, &WaitEvent, + false /*CounterBasedEventEnabled*/, !EnableProfiling)); + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + false /*HostVisible*/, &AllResetEvent, + false /*CounterBasedEventEnabled*/, !EnableProfiling)); std::vector PrecondEvents = {WaitEvent->ZeEvent, AllResetEvent->ZeEvent}; @@ -1151,14 +1160,15 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer, // when `EventWaitList` dependencies are complete. ur_command_list_ptr_t WaitCommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( - Queue, WaitCommandList, false, NumEventsInWaitList, EventWaitList, - false)); + Queue, WaitCommandList, false /*UseCopyEngine*/, NumEventsInWaitList, + EventWaitList, false /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); ZE2UR_CALL(zeCommandListAppendBarrier, (WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent, CommandBuffer->WaitEvent->WaitList.Length, CommandBuffer->WaitEvent->WaitList.ZeEventList)); - Queue->executeCommandList(WaitCommandList, false, false); + Queue->executeCommandList(WaitCommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/); MustSignalWaitEvent = false; } } @@ -1270,9 +1280,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // Create a command-list to signal the Event on completion ur_command_list_ptr_t SignalCommandList{}; - UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, - false, NumEventsInWaitList, - EventWaitList, false)); + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, SignalCommandList, false /*UseCopyEngine*/, NumEventsInWaitList, + EventWaitList, false /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); // Reset the wait-event for the UR command-buffer that is signaled when its // submission dependencies have been satisfied. @@ -1287,7 +1297,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( // parameter with signal command-list completing. UR_CALL(createUserEvent(CommandBuffer, Queue, SignalCommandList, Event)); - UR_CALL(Queue->executeCommandList(SignalCommandList, false, false)); + UR_CALL(Queue->executeCommandList(SignalCommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 54aa15d71e..c0b8b24ffa 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -767,9 +767,11 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( CommandList = Queue->CommandListMap .emplace(ZeCommandList, - ur_command_list_info_t(ZeFence, true, false, - ZeCommandQueue, ZeQueueDesc, - Queue->useCompletionBatching())) + ur_command_list_info_t( + ZeFence, true, false, ZeCommandQueue, ZeQueueDesc, + Queue->useCompletionBatching(), true /*CanReuse */, + ZeCommandListIt->second.InOrderList, + ZeCommandListIt->second.IsImmediate)) .first; } ZeCommandListCache.erase(ZeCommandListIt); diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 0e3f5e7884..5d2e4300a1 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -299,8 +299,8 @@ struct ur_context_handle_t_ : _ur_object { ur_result_t getAvailableCommandList( ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList, bool UseCopyEngine, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, bool AllowBatching = false, - ze_command_queue_handle_t *ForcedCmdQueue = nullptr); + const ur_event_handle_t *EventWaitList, bool AllowBatching, + ze_command_queue_handle_t *ForcedCmdQueue); // Checks if Device is covered by this context. // For that the Device or its root devices need to be in the context. diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 0668cc185e..bec123e4db 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -85,7 +85,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( // Get a new command list to be used on this call ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, + false /*AllowBatching*/, nullptr /*ForceCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; @@ -106,7 +107,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( // Execute command list asynchronously as the event will be used // to track down its completion. - return Queue->executeCommandList(CommandList); + return Queue->executeCommandList(CommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/); } { @@ -258,12 +260,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( ur_command_list_ptr_t CmdList; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CmdList, false /*UseCopyEngine=*/, NumEventsInWaitList, - EventWaitList, OkToBatch)); + EventWaitList, OkToBatch, nullptr /*ForcedCmdQueue*/)); // Insert the barrier into the command-list and execute. UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, *Event, IsInternal)); - UR_CALL(Queue->executeCommandList(CmdList, false, OkToBatch)); + UR_CALL( + Queue->executeCommandList(CmdList, false /*IsBlocking*/, OkToBatch)); // Because of the dependency between commands in the in-order queue we don't // need to keep track of any active barriers if we have in-order queue. @@ -328,7 +331,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( ur_command_list_ptr_t CmdList; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CmdList, false /*UseCopyEngine=*/, NumEventsInWaitList, - EventWaitList, OkToBatch)); + EventWaitList, OkToBatch, nullptr /*ForcedCmdQueue*/)); CmdLists.push_back(CmdList); } @@ -377,7 +380,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( // Only batch if the matching CmdList is already open. OkToBatch = CommandBatch.OpenCommandList == CmdList; - UR_CALL(Queue->executeCommandList(CmdList, false, OkToBatch)); + UR_CALL( + Queue->executeCommandList(CmdList, false /*IsBlocking*/, OkToBatch)); } UR_CALL(Queue->ActiveBarriers.clear()); @@ -687,7 +691,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - /* AllowBatching */ false)); + /* AllowBatching */ false, nullptr /*ForcedCmdQueue*/)); UR_CALL(createEventAndAssociateQueue( Queue, OutEvent, UR_COMMAND_TIMESTAMP_RECORDING_EXP, CommandList, @@ -710,7 +714,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( (*OutEvent)->WaitList.ZeEventList)); UR_CALL( - Queue->executeCommandList(CommandList, Blocking, /* OkToBatch */ false)); + Queue->executeCommandList(CommandList, Blocking, false /* OkToBatch */)); return UR_RESULT_SUCCESS; } @@ -738,7 +742,8 @@ ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( ur_command_list_ptr_t CommandList{}; UR_CALL(UrQueue->Context->getAvailableCommandList( - UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch)) + UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch, + nullptr /*ForcedCmdQueue*/)) // Create a "proxy" host-visible event. UR_CALL(createEventAndAssociateQueue( @@ -756,7 +761,8 @@ ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandList->first, HostVisibleEvent->ZeEvent)); - UR_CALL(UrQueue->executeCommandList(CommandList, false, OkToBatch)) + UR_CALL(UrQueue->executeCommandList(CommandList, false /*IsBlocking*/, + OkToBatch)) this->IsCreatingHostProxyEvent = false; } @@ -835,7 +841,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventWait( else { // NOTE: we are cleaning up after the event here to free resources // sooner in case run-time is not calling urEventRelease soon enough. - CleanupCompletedEvent(reinterpret_cast(Event)); + CleanupCompletedEvent(reinterpret_cast(Event), + false /*QueueLocked*/, + false /*SetEventCompleted*/); // For the case when we have out-of-order queue or regular command // lists its more efficient to check fences so put the queue in the // set to cleanup later. @@ -903,7 +911,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate( ur_event_handle_t *Event ///< [out] pointer to the handle of the event object created. ) { - UR_CALL(EventCreate(Context, nullptr, false, true, Event)); + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + true /*HostVisible*/, Event, + false /*CounterBasedEventEnabled*/, + false /*ForceDisableProfiling*/)); (*Event)->RefCountExternal++; if (!(*Event)->CounterBasedEventsEnabled) @@ -922,7 +933,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( // we dont have urEventCreate, so use this check for now to know that // the call comes from urEventCreate() if (reinterpret_cast(NativeEvent) == nullptr) { - UR_CALL(EventCreate(Context, nullptr, false, true, Event)); + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + true /*HostVisible*/, Event, + false /*CounterBasedEventEnabled*/, + false /*ForceDisableProfiling*/)); (*Event)->RefCountExternal++; if (!(*Event)->CounterBasedEventsEnabled) @@ -1497,7 +1511,8 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( // This prevents a potential deadlock with recursive // event locks. UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, false, 0, nullptr, true)); + Queue, CommandList, false /*UseCopyEngine*/, 0, nullptr, + true /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); } std::shared_lock Lock(EventList[I]->Mutex); diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index 2d1f536e4e..67452b02e1 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -32,8 +32,8 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event); ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent, - bool CounterBasedEventEnabled = false, - bool ForceDisableProfiling = false); + bool CounterBasedEventEnabled, + bool ForceDisableProfiling); } // extern "C" // This is an experimental option that allows to disable caching of events in @@ -273,9 +273,8 @@ template <> ze_result_t zeHostSynchronize(ze_command_queue_handle_t Handle); // the event, updates the last command event in the queue and cleans up all dep // events of the event. // If the caller locks queue mutex then it must pass 'true' to QueueLocked. -ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, - bool QueueLocked = false, - bool SetEventCompleted = false); +ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, + bool SetEventCompleted); // Get value of device scope events env var setting or default setting static const EventsScope DeviceEventsSetting = [] { diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index c4623f314c..60e3d074dc 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -801,7 +801,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( ur_command_list_ptr_t CommandList{}; UR_CALL(hQueue->Context->getAvailableCommandList( hQueue, CommandList, UseCopyEngine, numEventsInWaitList, phEventWaitList, - OkToBatch)); + OkToBatch, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index ac942c173e..e57e98669e 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -136,7 +136,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - true /* AllowBatching */)); + true /* AllowBatching */, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent{}; @@ -199,7 +199,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( // Execute command list asynchronously, as the event will be used // to track down its completion. - UR_CALL(Queue->executeCommandList(CommandList, false, true)); + UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/, + true /*OKToBatchCommand*/)); return UR_RESULT_SUCCESS; } @@ -400,7 +401,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - true /* AllowBatching */)); + true /* AllowBatching */, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent{}; @@ -463,7 +464,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( // Execute command list asynchronously, as the event will be used // to track down its completion. - UR_CALL(Queue->executeCommandList(CommandList, false, true)); + UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/, + true /*OKToBatchCommand*/)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 2fd66948e2..24653bbc53 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -78,7 +78,7 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - OkToBatch)); + OkToBatch, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; @@ -131,7 +131,7 @@ ur_result_t enqueueMemCopyRectHelper( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - OkToBatch)); + OkToBatch, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; @@ -240,7 +240,7 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, bool OkToBatch = true; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - OkToBatch)); + OkToBatch, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; @@ -270,7 +270,8 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, // Execute command list asynchronously, as the event will be used // to track down its completion. - UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch)); + UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/, + OkToBatch)); } else { // Copy pattern into every entry in memory array pointed by Ptr. uint32_t NumOfCopySteps = Size / PatternSize; @@ -290,7 +291,8 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, printZeEventList(WaitList); // Execute command list synchronously. - UR_CALL(Queue->executeCommandList(CommandList, true, OkToBatch)); + UR_CALL( + Queue->executeCommandList(CommandList, true /*IsBlocking*/, OkToBatch)); } return UR_RESULT_SUCCESS; @@ -357,7 +359,7 @@ static ur_result_t enqueueMemImageCommandHelper( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - OkToBatch)); + OkToBatch, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; @@ -1015,7 +1017,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( // For discrete devices we need a command list ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, + false /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); // Add the event to the command list. CommandList->second.append(reinterpret_cast(*Event)); @@ -1035,7 +1038,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( (ZeCommandList, *RetMap, ZeHandleSrc + Offset, Size, ZeEvent, WaitList.Length, WaitList.ZeEventList)); - UR_CALL(Queue->executeCommandList(CommandList, BlockingMap)); + UR_CALL(Queue->executeCommandList(CommandList, BlockingMap, + false /*OKToBatchCommand*/)); } auto Res = Buffer->Mappings.insert({*RetMap, {Offset, Size}}); @@ -1142,7 +1146,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( reinterpret_cast(Queue), CommandList, UseCopyEngine, - NumEventsInWaitList, EventWaitList)); + NumEventsInWaitList, EventWaitList, false /*AllowBatching*/, + nullptr /*ForcedCmdQueue*/)); CommandList->second.append(reinterpret_cast(*Event)); (*Event)->RefCount.increment(); @@ -1170,7 +1175,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( // Execute command list asynchronously, as the event will be used // to track down its completion. - UR_CALL(Queue->executeCommandList(CommandList)); + UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/)); return UR_RESULT_SUCCESS; } @@ -1252,7 +1258,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( // TODO: Change UseCopyEngine argument to 'true' once L0 backend // support is added UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, + false /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); // TODO: do we need to create a unique command type for this? ze_event_handle_t ZeEvent = nullptr; @@ -1277,7 +1284,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( // so manually add command to signal our event. ZE2UR_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent)); - UR_CALL(Queue->executeCommandList(CommandList, false)); + UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/)); return UR_RESULT_SUCCESS; } @@ -1307,8 +1315,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( // UseCopyEngine is set to 'false' here. // TODO: Additional analysis is required to check if this operation will // run faster on copy engines. - UR_CALL(Queue->Context->getAvailableCommandList(Queue, CommandList, - UseCopyEngine, 0, nullptr)); + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, CommandList, UseCopyEngine, 0, nullptr, false /*AllowBatching*/, + nullptr /*ForcedCmdQueue*/)); // TODO: do we need to create a unique command type for this? ze_event_handle_t ZeEvent = nullptr; @@ -1335,7 +1344,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMAdvise( // so manually add command to signal our event. ZE2UR_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent)); - Queue->executeCommandList(CommandList, false); + Queue->executeCommandList(CommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 47dddac89b..191f4113a1 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -104,7 +104,10 @@ ur_result_t ur_completion_batch::seal(ur_queue_handle_t queue, assert(st == ACCUMULATING); if (!barrierEvent) { - UR_CALL(EventCreate(queue->Context, queue, false, true, &barrierEvent)); + UR_CALL(EventCreate(queue->Context, queue, false /*IsMultiDevice*/, + true /*HostVisible*/, &barrierEvent, + false /*CounterBasedEventEnabled*/, + false /*ForceDisableProfiling*/)); } // Instead of collecting all the batched events, we simply issue a global @@ -307,7 +310,9 @@ ur_result_t resetCommandLists(ur_queue_handle_t Queue) { // Handle immediate command lists here, they don't need to be reset and we // only need to cleanup events. if (Queue->UsingImmCmdLists) { - UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*locked*/)); + UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*QueueLocked*/, + false /*QueueSynced*/, + nullptr /*CompletedEvent*/)); return UR_RESULT_SUCCESS; } @@ -680,7 +685,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( std::scoped_lock EventLock(Event->Mutex); Event->Completed = true; } - UR_CALL(CleanupCompletedEvent(Event)); + UR_CALL(CleanupCompletedEvent(Event, false /*QueueLocked*/, + false /*SetEventCompleted*/)); // This event was removed from the command list, so decrement ref count // (it was incremented when they were added to the command list). UR_CALL(urEventReleaseInternal(reinterpret_cast(Event))); @@ -1644,7 +1650,8 @@ ur_result_t CleanupEventListFromResetCmdList( for (auto &Event : EventListToCleanup) { // We don't need to synchronize the events since the fence associated with // the command list was synchronized. - UR_CALL(CleanupCompletedEvent(Event, QueueLocked, true)); + UR_CALL( + CleanupCompletedEvent(Event, QueueLocked, true /*SetEventCompleted*/)); // This event was removed from the command list, so decrement ref count // (it was incremented when they were added to the command list). UR_CALL(urEventReleaseInternal(Event)); @@ -1868,9 +1875,9 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, : nullptr; if (*Event == nullptr) - UR_CALL(EventCreate(Queue->Context, Queue, IsMultiDevice, - HostVisible.value(), Event, - Queue->CounterBasedEventsEnabled)); + UR_CALL(EventCreate( + Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event, + Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/)); (*Event)->UrQueue = Queue; (*Event)->CommandType = CommandType; @@ -1967,7 +1974,9 @@ ur_result_t ur_queue_handle_t_::executeOpenCommandList(bool IsCopy) { // queue, then close and execute that command list now. if (hasOpenCommandList(IsCopy)) { adjustBatchSizeForPartialBatch(IsCopy); - auto Res = executeCommandList(CommandBatch.OpenCommandList, false, false); + auto Res = + executeCommandList(CommandBatch.OpenCommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/); CommandBatch.OpenCommandList = CommandListMap.end(); return Res; } @@ -2260,9 +2269,11 @@ ur_result_t ur_queue_handle_t_::createCommandList( std::tie(CommandList, std::ignore) = CommandListMap.insert( std::pair( - ZeCommandList, ur_command_list_info_t( - ZeFence, false, false, ZeCommandQueue, ZeQueueDesc, - useCompletionBatching(), true, IsInOrderList))); + ZeCommandList, + ur_command_list_info_t( + ZeFence, false /*ZeFenceInUse*/, false /*IsClosed*/, + ZeCommandQueue, ZeQueueDesc, useCompletionBatching(), + true /*CanReuse*/, IsInOrderList, false /*IsImmediate*/))); UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList)); UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine)); diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 592a2808aa..8d4d046e2b 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -150,10 +150,9 @@ struct ur_completion_batches { }; ur_result_t resetCommandLists(ur_queue_handle_t Queue); -ur_result_t -CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, bool QueueLocked = false, - bool QueueSynced = false, - ur_event_handle_t CompletedEvent = nullptr); +ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, + bool QueueLocked, bool QueueSynced, + ur_event_handle_t CompletedEvent); // Structure describing the specific use of a command-list in a queue. // This is because command-lists are re-used across multiple queues @@ -162,8 +161,8 @@ struct ur_command_list_info_t { ur_command_list_info_t(ze_fence_handle_t ZeFence, bool ZeFenceInUse, bool IsClosed, ze_command_queue_handle_t ZeQueue, ZeStruct ZeQueueDesc, - bool UseCompletionBatching, bool CanReuse = true, - bool IsInOrderList = false, bool IsImmediate = false) + bool UseCompletionBatching, bool CanReuse, + bool IsInOrderList, bool IsImmediate) : ZeFence(ZeFence), ZeFenceInUse(ZeFenceInUse), IsClosed(IsClosed), ZeQueue(ZeQueue), ZeQueueDesc(ZeQueueDesc), IsInOrderList(IsInOrderList), CanReuse(CanReuse), @@ -528,8 +527,7 @@ struct ur_queue_handle_t_ : _ur_object { // // For immediate commandlists, no close and execute is necessary. ur_result_t executeCommandList(ur_command_list_ptr_t CommandList, - bool IsBlocking = false, - bool OKToBatchCommand = false); + bool IsBlocking, bool OKToBatchCommand); // Helper method telling whether we need to reuse discarded event in this // queue.