diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 188202c2f61e..cea788ad3ef9 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -1428,7 +1428,7 @@ class __SYCL_EXPORT handler { processProperties(Props); StoreLambda( std::move(KernelFunc)); - setType(detail::CGType::Kernel); + setType(detail::CGType::Kernel); setNDRangeUsed(true); #endif } diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 48d80c06394b..3128556a3641 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -73,8 +73,7 @@ class NDRDescT { } } - template - static sycl::range<3> padRange(sycl::range Range) { + template static sycl::range<3> padRange(sycl::range Range) { if constexpr (Dims == 3) { return Range; } else { @@ -108,21 +107,23 @@ class NDRDescT { setNDRangeLeftover(); } - NDRDescT(sycl::range<3> NumWorkItems, sycl::id<3> Offset, int DimsArg) + NDRDescT(sycl::range<3> NumWorkItems, sycl::id<3> Offset, int DimsArg) : GlobalSize{NumWorkItems}, GlobalOffset{Offset}, Dims{size_t(DimsArg)} {} NDRDescT(sycl::range<3> NumWorkItems, sycl::range<3> LocalSize, sycl::id<3> Offset, int DimsArg) : GlobalSize{NumWorkItems}, LocalSize{LocalSize}, GlobalOffset{Offset}, Dims{size_t(DimsArg)} { - setNDRangeLeftover();} + setNDRangeLeftover(); + } template NDRDescT(sycl::nd_range ExecutionRange, int DimsArg) : NDRDescT(padRange(ExecutionRange.get_global_range()), padRange(ExecutionRange.get_local_range()), padId(ExecutionRange.get_offset()), size_t(DimsArg)) { - setNDRangeLeftover();} + setNDRangeLeftover(); + } template NDRDescT(sycl::nd_range ExecutionRange) @@ -360,8 +361,8 @@ class CGCopyUSM : public CG { public: CGCopyUSM(void *Src, void *Dst, size_t Length, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CGType::CopyUSM, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), - MLength(Length) {} + : CG(CGType::CopyUSM, std::move(CGData), std::move(loc)), MSrc(Src), + MDst(Dst), MLength(Length) {} void *getSrc() { return MSrc; } void *getDst() { return MDst; } @@ -392,8 +393,8 @@ class CGPrefetchUSM : public CG { public: CGPrefetchUSM(void *DstPtr, size_t Length, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CGType::PrefetchUSM, std::move(CGData), std::move(loc)), MDst(DstPtr), - MLength(Length) {} + : CG(CGType::PrefetchUSM, std::move(CGData), std::move(loc)), + MDst(DstPtr), MLength(Length) {} void *getDst() { return MDst; } size_t getLength() { return MLength; } }; @@ -445,8 +446,8 @@ class CGCopy2DUSM : public CG { CGCopy2DUSM(void *Src, void *Dst, size_t SrcPitch, size_t DstPitch, size_t Width, size_t Height, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CGType::Copy2DUSM, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), - MSrcPitch(SrcPitch), MDstPitch(DstPitch), MWidth(Width), + : CG(CGType::Copy2DUSM, std::move(CGData), std::move(loc)), MSrc(Src), + MDst(Dst), MSrcPitch(SrcPitch), MDstPitch(DstPitch), MWidth(Width), MHeight(Height) {} void *getSrc() const { return MSrc; } @@ -491,8 +492,9 @@ class CGMemset2DUSM : public CG { CGMemset2DUSM(char Value, void *DstPtr, size_t Pitch, size_t Width, size_t Height, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CGType::Memset2DUSM, std::move(CGData), std::move(loc)), MValue(Value), - MDst(DstPtr), MPitch(Pitch), MWidth(Width), MHeight(Height) {} + : CG(CGType::Memset2DUSM, std::move(CGData), std::move(loc)), + MValue(Value), MDst(DstPtr), MPitch(Pitch), MWidth(Width), + MHeight(Height) {} void *getDst() const { return MDst; } size_t getPitch() const { return MPitch; } size_t getWidth() const { return MWidth; } @@ -536,8 +538,8 @@ class CGCopyToDeviceGlobal : public CG { bool IsDeviceImageScoped, size_t NumBytes, size_t Offset, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CGType::CopyToDeviceGlobal, std::move(CGData), std::move(loc)), MSrc(Src), - MDeviceGlobalPtr(DeviceGlobalPtr), + : CG(CGType::CopyToDeviceGlobal, std::move(CGData), std::move(loc)), + MSrc(Src), MDeviceGlobalPtr(DeviceGlobalPtr), MIsDeviceImageScoped(IsDeviceImageScoped), MNumBytes(NumBytes), MOffset(Offset) {} @@ -593,8 +595,8 @@ class CGCopyImage : public CG { sycl::detail::pi::PiImageRegion HostExtent, sycl::detail::pi::PiImageRegion CopyExtent, CG::StorageInitHelper CGData, detail::code_location loc = {}) - : CG(CGType::CopyImage, std::move(CGData), std::move(loc)), MSrc(Src), MDst(Dst), - MImageDesc(ImageDesc), MImageFormat(ImageFormat), + : CG(CGType::CopyImage, std::move(CGData), std::move(loc)), MSrc(Src), + MDst(Dst), MImageDesc(ImageDesc), MImageFormat(ImageFormat), MImageCopyFlags(ImageCopyFlags), MSrcOffset(SrcOffset), MDstOffset(DstOffset), MHostExtent(HostExtent), MCopyExtent(CopyExtent) {} diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index b3cf5f1bebed..54fa26b7751f 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -9,8 +9,8 @@ #pragma once #include "sycl/handler.hpp" -#include #include +#include #include #include @@ -155,7 +155,7 @@ class handler_impl { // Track whether an NDRange was used when submitting a kernel (as opposed to a // range), needed for graph update bool MNDRangeUsed = false; - + /// The storage for the arguments passed. /// We need to store a copy of values that are passed explicitly through /// set_arg, require and so on, because we need them to be alive after @@ -188,7 +188,7 @@ class handler_impl { std::shared_ptr MSubgraphNode; /// Storage for the CG created when handling graph nodes added explicitly. std::unique_ptr MGraphNodeCG; - + /// Storage for lambda/function when using HostTask std::shared_ptr MHostTask; /// The list of valid SYCL events that need to complete diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 7064e697d831..5fa0cc0e970b 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -12,13 +12,13 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include #include #include @@ -90,8 +90,8 @@ handler::handler(std::shared_ptr Queue, std::shared_ptr SecondaryQueue, bool CallerNeedsEvent) : impl(std::make_shared(std::move(PrimaryQueue), - std::move(SecondaryQueue), - CallerNeedsEvent)), + std::move(SecondaryQueue), + CallerNeedsEvent)), MQueue(std::move(Queue)) {} handler::handler( @@ -123,7 +123,8 @@ bool handler::isStateExplicitKernelBundle() const { std::shared_ptr handler::getOrInsertHandlerKernelBundle(bool Insert) const { if (!impl->MKernelBundle && Insert) { - auto Ctx = impl->MGraph ? impl->MGraph->getContext() : MQueue->get_context(); + auto Ctx = + impl->MGraph ? impl->MGraph->getContext() : MQueue->get_context(); auto Dev = impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device(); impl->MKernelBundle = detail::getSyclObjImpl( get_kernel_bundle(Ctx, {Dev}, {})); @@ -199,7 +200,8 @@ event handler::finalize() { // Make sure implicit non-interop kernel bundles have the kernel if (!KernelBundleImpPtr->isInterop() && !impl->isStateExplicitKernelBundle()) { - auto Dev = impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device(); + auto Dev = + impl->MGraph ? impl->MGraph->getDevice() : MQueue->get_device(); kernel_id KernelID = detail::ProgramManager::getInstance().getSYCLKernelID( MKernelName.c_str()); @@ -356,21 +358,22 @@ event handler::finalize() { new detail::CGUpdateHost(MDstPtr, std::move(impl->CGData), MCodeLoc)); break; case detail::CGType::CopyUSM: - CommandGroup.reset(new detail::CGCopyUSM(MSrcPtr, MDstPtr, MLength, - std::move(impl->CGData), MCodeLoc)); + CommandGroup.reset(new detail::CGCopyUSM( + MSrcPtr, MDstPtr, MLength, std::move(impl->CGData), MCodeLoc)); break; case detail::CGType::FillUSM: - CommandGroup.reset(new detail::CGFillUSM( - std::move(MPattern), MDstPtr, MLength, std::move(impl->CGData), MCodeLoc)); + CommandGroup.reset(new detail::CGFillUSM(std::move(MPattern), MDstPtr, + MLength, std::move(impl->CGData), + MCodeLoc)); break; case detail::CGType::PrefetchUSM: - CommandGroup.reset(new detail::CGPrefetchUSM(MDstPtr, MLength, - std::move(impl->CGData), MCodeLoc)); + CommandGroup.reset(new detail::CGPrefetchUSM( + MDstPtr, MLength, std::move(impl->CGData), MCodeLoc)); break; case detail::CGType::AdviseUSM: CommandGroup.reset(new detail::CGAdviseUSM(MDstPtr, MLength, impl->MAdvice, - std::move(impl->CGData), getType(), - MCodeLoc)); + std::move(impl->CGData), + getType(), MCodeLoc)); break; case detail::CGType::Copy2DUSM: CommandGroup.reset(new detail::CGCopy2DUSM( @@ -388,8 +391,9 @@ event handler::finalize() { std::move(impl->CGData), MCodeLoc)); break; case detail::CGType::CodeplayHostTask: { - auto context = impl->MGraph ? detail::getSyclObjImpl(impl->MGraph->getContext()) - : MQueue->getContextImplPtr(); + auto context = impl->MGraph + ? detail::getSyclObjImpl(impl->MGraph->getContext()) + : MQueue->getContextImplPtr(); CommandGroup.reset(new detail::CGHostTask( std::move(impl->MHostTask), MQueue, context, std::move(impl->MArgs), std::move(impl->CGData), getType(), MCodeLoc)); @@ -399,13 +403,13 @@ event handler::finalize() { case detail::CGType::BarrierWaitlist: { if (auto GraphImpl = getCommandGraph(); GraphImpl != nullptr) { impl->CGData.MEvents.insert(std::end(impl->CGData.MEvents), - std::begin(impl->MEventsWaitWithBarrier), - std::end(impl->MEventsWaitWithBarrier)); + std::begin(impl->MEventsWaitWithBarrier), + std::end(impl->MEventsWaitWithBarrier)); // Barrier node is implemented as an empty node in Graph // but keep the barrier type to help managing dependencies setType(detail::CGType::Barrier); - CommandGroup.reset( - new detail::CG(detail::CGType::Barrier, std::move(impl->CGData), MCodeLoc)); + CommandGroup.reset(new detail::CG(detail::CGType::Barrier, + std::move(impl->CGData), MCodeLoc)); } else { CommandGroup.reset( new detail::CGBarrier(std::move(impl->MEventsWaitWithBarrier), @@ -414,7 +418,8 @@ event handler::finalize() { break; } case detail::CGType::ProfilingTag: { - CommandGroup.reset(new detail::CGProfilingTag(std::move(impl->CGData), MCodeLoc)); + CommandGroup.reset( + new detail::CGProfilingTag(std::move(impl->CGData), MCodeLoc)); break; } case detail::CGType::CopyToDeviceGlobal: { @@ -466,17 +471,18 @@ event handler::finalize() { CommandGroup.reset(new detail::CGCopyImage( MSrcPtr, MDstPtr, impl->MImageDesc, impl->MImageFormat, impl->MImageCopyFlags, impl->MSrcOffset, impl->MDestOffset, - impl->MHostExtent, impl->MCopyExtent, std::move(impl->CGData), MCodeLoc)); + impl->MHostExtent, impl->MCopyExtent, std::move(impl->CGData), + MCodeLoc)); break; case detail::CGType::SemaphoreWait: CommandGroup.reset(new detail::CGSemaphoreWait( - impl->MInteropSemaphoreHandle, impl->MWaitValue, std::move(impl->CGData), - MCodeLoc)); + impl->MInteropSemaphoreHandle, impl->MWaitValue, + std::move(impl->CGData), MCodeLoc)); break; case detail::CGType::SemaphoreSignal: CommandGroup.reset(new detail::CGSemaphoreSignal( - impl->MInteropSemaphoreHandle, impl->MSignalValue, std::move(impl->CGData), - MCodeLoc)); + impl->MInteropSemaphoreHandle, impl->MSignalValue, + std::move(impl->CGData), MCodeLoc)); break; case detail::CGType::None: if (detail::pi::trace(detail::pi::TraceLevel::PI_TRACE_ALL)) { @@ -487,8 +493,8 @@ event handler::finalize() { // For Standard mode (non-graph), // empty nodes are not sent to the scheduler to save time if (impl->MGraph || (MQueue && MQueue->getCommandGraph())) { - CommandGroup.reset( - new detail::CG(detail::CGType::None, std::move(impl->CGData), MCodeLoc)); + CommandGroup.reset(new detail::CG(detail::CGType::None, + std::move(impl->CGData), MCodeLoc)); } else { detail::EventImplPtr Event = std::make_shared(); MLastEvent = detail::createSyclObjFromImpl(Event); @@ -524,8 +530,7 @@ event handler::finalize() { GraphImpl->MMutex); ext::oneapi::experimental::node_type NodeType = - impl->MUserFacingNodeType != - ext::oneapi::experimental::node_type::empty + impl->MUserFacingNodeType != ext::oneapi::experimental::node_type::empty ? impl->MUserFacingNodeType : ext::oneapi::experimental::detail::getNodeTypeFromCG(getType()); @@ -675,17 +680,17 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, static_cast(&S->GlobalBuf); detail::AccessorImplPtr GBufImpl = detail::getSyclObjImpl(*GBufBase); detail::Requirement *GBufReq = GBufImpl.get(); - addArgsForGlobalAccessor(GBufReq, Index, IndexShift, Size, - IsKernelCreatedFromSource, - impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); + addArgsForGlobalAccessor( + GBufReq, Index, IndexShift, Size, IsKernelCreatedFromSource, + impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GOffsetBase = static_cast(&S->GlobalOffset); detail::AccessorImplPtr GOfssetImpl = detail::getSyclObjImpl(*GOffsetBase); detail::Requirement *GOffsetReq = GOfssetImpl.get(); - addArgsForGlobalAccessor(GOffsetReq, Index, IndexShift, Size, - IsKernelCreatedFromSource, - impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); + addArgsForGlobalAccessor( + GOffsetReq, Index, IndexShift, Size, IsKernelCreatedFromSource, + impl->MNDRDesc.GlobalSize.size(), impl->MArgs, IsESIMD); ++IndexShift; detail::AccessorBaseHost *GFlushBase = static_cast(&S->GlobalFlushBuf); @@ -738,7 +743,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, // make it a minimum allocation of 1 byte. SizeInBytes = std::max(SizeInBytes, 1); impl->MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, - SizeInBytes, Index + IndexShift); + SizeInBytes, Index + IndexShift); // TODO ESIMD currently does not suport MSize field passing yet // accessor::init for ESIMD-mode accessor has a single field, translated // to a single kernel argument set above. @@ -880,8 +885,8 @@ void handler::verifyUsedKernelBundleInternal(detail::string_view KernelName) { return; kernel_id KernelID = detail::get_kernel_id_impl(KernelName); - device Dev = - impl->MGraph ? impl->MGraph->getDevice() : detail::getDeviceFromHandler(*this); + device Dev = impl->MGraph ? impl->MGraph->getDevice() + : detail::getDeviceFromHandler(*this); if (!UsedKernelBundleImplPtr->has_kernel(KernelID, Dev)) throw sycl::exception( make_error_code(errc::kernel_not_supported), @@ -1461,8 +1466,10 @@ void handler::use_kernel_bundle( const kernel_bundle &ExecBundle) { std::shared_ptr PrimaryQueue = impl->MSubmissionPrimaryQueue; - if ((!impl->MGraph && (PrimaryQueue->get_context() != ExecBundle.get_context())) || - (impl->MGraph && (impl->MGraph->getContext() != ExecBundle.get_context()))) + if ((!impl->MGraph && + (PrimaryQueue->get_context() != ExecBundle.get_context())) || + (impl->MGraph && + (impl->MGraph->getContext() != ExecBundle.get_context()))) throw sycl::exception( make_error_code(errc::invalid), "Context associated with the primary queue is different from the " @@ -1836,16 +1843,14 @@ void handler::addArg(detail::kernel_param_kind_t ArgKind, void *Req, impl->MArgs.emplace_back(ArgKind, Req, AccessTarget, ArgIndex); } -void handler::clearArgs() { - impl->MArgs.clear(); -} +void handler::clearArgs() { impl->MArgs.clear(); } void handler::setArgsToAssociatedAccessors() { impl->MArgs = impl->MAssociatedAccesors; } bool handler::HasAssociatedAccessor(detail::AccessorImplHost *Req, - access::target AccessTarget) const { + access::target AccessTarget) const { return std::find_if( impl->MAssociatedAccesors.cbegin(), impl->MAssociatedAccesors.cend(), [&](const detail::ArgDesc &AD) { @@ -1867,8 +1872,8 @@ void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, impl->MNDRDesc = NDRDescT{NumWorkItems, Offset, Dims}; } void handler::setNDRangeDescriptorPadded(sycl::range<3> NumWorkItems, - sycl::range<3> LocalSize, sycl::id<3> Offset, - int Dims) { + sycl::range<3> LocalSize, + sycl::id<3> Offset, int Dims) { impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset, Dims}; } diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index ae5bca9c4a8b..fbbf3b725643 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -137,7 +137,6 @@ class MockHandler : public sycl::handler { return CommandGroup; } - }; const sycl::detail::KernelArgMask *getKernelArgMaskFromBundle( diff --git a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp index 66c40572440d..255786a0ad4f 100644 --- a/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp +++ b/sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp @@ -8,8 +8,8 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" -#include #include +#include #include #include #include @@ -82,15 +82,13 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) { // previous task, this is needed to properly sync blocking & blocked tasks. sycl::event Event; { - LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, - Queue}; + LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(0); Queue->finalizeHandler(MockCGH, Event); } { - LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, - Queue}; + LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue}; EXPECT_CALL(MockCGH, depends_on(An())) .Times(1); Queue->finalizeHandler(MockCGH, Event); diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index cc4bfe123e21..97426dd06097 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -230,8 +230,8 @@ class MockHandler : public sycl::handler { : sycl::handler(Queue, CallerNeedsEvent) {} // Methods using sycl::handler::addReduction; - using sycl::handler::impl; using sycl::handler::getType; + using sycl::handler::impl; using sycl::handler::setNDRangeDescriptor; sycl::detail::NDRDescT &getNDRDesc() { return impl->MNDRDesc; } @@ -265,9 +265,7 @@ class MockHandler : public sycl::handler { } std::shared_ptr &getQueue() { return MQueue; } - void setType(sycl::detail::CGType Type) { - impl->MCGType = Type; - } + void setType(sycl::detail::CGType Type) { impl->MCGType = Type; } template