From 01682a1f69702924d610d074512e597993720836 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Sun, 29 Aug 2021 09:57:56 +0200 Subject: [PATCH 1/4] use lambda captures for GKO_REGISTER_OPERATION Replace the hand-written closure by using a lambda directly --- core/matrix/dense.cpp | 84 +++++----- include/ginkgo/core/base/executor.hpp | 217 ++++++++++++++++---------- 2 files changed, 173 insertions(+), 128 deletions(-) diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 853d7898bb4..413ef1d0739 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -468,10 +468,9 @@ void Dense::move_to(Dense> *result) template void Dense::convert_to(Coo *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_coo *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_coo(in, out); + }); } @@ -485,10 +484,9 @@ void Dense::move_to(Coo *result) template void Dense::convert_to(Coo *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_coo *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_coo(in, out); + }); } @@ -502,10 +500,9 @@ void Dense::move_to(Coo *result) template void Dense::convert_to(Csr *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_csr *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_csr(in, out); + }); result->make_srow(); } @@ -520,10 +517,9 @@ void Dense::move_to(Csr *result) template void Dense::convert_to(Csr *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_csr *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_csr(in, out); + }); result->make_srow(); } @@ -538,10 +534,9 @@ void Dense::move_to(Csr *result) template void Dense::convert_to(Ell *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_ell *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_ell(in, out); + }); } @@ -555,10 +550,9 @@ void Dense::move_to(Ell *result) template void Dense::convert_to(Ell *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_ell *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_ell(in, out); + }); } @@ -572,10 +566,9 @@ void Dense::move_to(Ell *result) template void Dense::convert_to(Hybrid *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_hybrid *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_hybrid(in, out); + }); } @@ -589,10 +582,9 @@ void Dense::move_to(Hybrid *result) template void Dense::convert_to(Hybrid *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_hybrid *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_hybrid(in, out); + }); } @@ -606,10 +598,9 @@ void Dense::move_to(Hybrid *result) template void Dense::convert_to(Sellp *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_sellp *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_sellp(in, out); + }); } @@ -623,10 +614,9 @@ void Dense::move_to(Sellp *result) template void Dense::convert_to(Sellp *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_sellp *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_sellp(in, out); + }); } @@ -640,10 +630,9 @@ void Dense::move_to(Sellp *result) template void Dense::convert_to(SparsityCsr *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_sparsity_csr *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_sparsity_csr(in, out); + }); } @@ -657,10 +646,9 @@ void Dense::move_to(SparsityCsr *result) template void Dense::convert_to(SparsityCsr *result) const { - conversion_helper( - result, this, - dense::template make_convert_to_sparsity_csr *&, - decltype(result)>); + conversion_helper(result, this, [](const auto &in, const auto &out) { + return dense::make_convert_to_sparsity_csr(in, out); + }); } diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 59876d620cc..abb56c42c95 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -276,43 +276,95 @@ class Operation { virtual const char *get_name() const noexcept; }; -#define GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(_type, _namespace, _kernel) \ -public: \ - void run(std::shared_ptr exec) const override \ - { \ - this->call(counts{}, exec); \ - } \ - \ -private: \ - template \ - void call(::gko::syn::value_list, \ - std::shared_ptr &exec) const \ - { \ - ::gko::kernels::_namespace::_kernel( \ - exec, std::forward(std::get(data))...); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") -#define GKO_DETAIL_DEFINE_RUN_OVERLOAD(_type, _namespace, _kernel, ...) \ -public: \ - void run(std::shared_ptr exec) const override \ - { \ - this->call(counts{}, exec); \ - } \ - \ -private: \ - template \ - void call(::gko::syn::value_list, \ - std::shared_ptr &exec) const \ - { \ - ::gko::kernels::_namespace::_kernel( \ - exec, std::forward(std::get(data))...); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ - "semi-colon warnings") +namespace detail { + + +constexpr static int ref_exec_tag = 0; +constexpr static int omp_exec_tag = 1; +constexpr static int cuda_exec_tag = 2; +constexpr static int hip_exec_tag = 3; +constexpr static int dpcpp_exec_tag = 4; + + +/** + * The RegisteredOperation class wraps a functor that will be called with a tag + * parameter based on the dynamic type of the executor that runs it. + * + * ReferenceExecutor uses ref_exec_tag, OmpExecutor uses omp_exec_tag, + * CudaExecutor uses cuda_exec_tag, HipExecutor uses hip_exec_tag and + * DpcppExecutor uses dpcpp_exec_tag. + * + * It is used to implement the @ref GKO_REGISTER_OPERATION macro. + * + * @tparam Closure the type of the functor of taking parameters + * (std::shared_ptr, int) + */ +template +class RegisteredOperation : public Operation { +public: + /** + * Creates a RegisteredOperation object from a functor and a name. + * + * @param name the name to be used for this operation + * @param op a functor object which will be called with the executor and + corresponding tag + */ + RegisteredOperation(const char *name, int num_params, Closure op) + : name_(name), num_params_(num_params), op_(std::move(op)) + {} + + const char *get_name() const noexcept override + { + static auto name = [this] { + std::ostringstream oss; + oss << name_ << '#' << num_params_; + return oss.str(); + }(); + return name.c_str(); + } + + void run(std::shared_ptr exec) const override + { + op_(exec, ref_exec_tag); + } + + void run(std::shared_ptr exec) const override + { + op_(exec, omp_exec_tag); + } + + void run(std::shared_ptr exec) const override + { + op_(exec, cuda_exec_tag); + } + + void run(std::shared_ptr exec) const override + { + op_(exec, hip_exec_tag); + } + + void run(std::shared_ptr exec) const override + { + op_(exec, dpcpp_exec_tag); + } + +private: + const char *name_; + int num_params_; + Closure op_; +}; + + +template +RegisteredOperation make_register_operation(const char *name, + int num_params, Closure op) +{ + return RegisteredOperation{name, num_params, std::move(op)}; +} + + +} // namespace detail /** @@ -386,45 +438,51 @@ private: \ * * @ingroup Executor */ -#define GKO_REGISTER_OPERATION(_name, _kernel) \ - template \ - class _name##_operation : public Operation { \ - using counts = \ - ::gko::syn::as_list<::gko::syn::range<0, sizeof...(Args)>>; \ - \ - public: \ - explicit _name##_operation(Args &&... args) \ - : data(std::forward(args)...) \ - {} \ - \ - const char *get_name() const noexcept override \ - { \ - static auto name = [this] { \ - std::ostringstream oss; \ - oss << #_kernel << '#' << sizeof...(Args); \ - return oss.str(); \ - }(); \ - return name.c_str(); \ - } \ - \ - GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(OmpExecutor, omp, _kernel); \ - GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(CudaExecutor, cuda, _kernel); \ - GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(HipExecutor, hip, _kernel); \ - GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(DpcppExecutor, dpcpp, _kernel); \ - GKO_KERNEL_DETAIL_DEFINE_RUN_OVERLOAD(ReferenceExecutor, reference, \ - _kernel); \ - \ - private: \ - mutable std::tuple data; \ - }; \ - \ - template \ - static _name##_operation make_##_name(Args &&... args) \ - { \ - return _name##_operation(std::forward(args)...); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ +#define GKO_REGISTER_OPERATION(_name, _kernel) \ + template \ + auto make_##_name(Args &&... args) \ + { \ + return ::gko::detail::make_register_operation( \ + #_name, sizeof...(Args), \ + [&args...](std::shared_ptr exec, int tag) { \ + switch (tag) { \ + case ::gko::detail::ref_exec_tag: \ + ::gko::kernels::reference::_kernel( \ + std::static_pointer_cast< \ + const ::gko::ReferenceExecutor>(exec), \ + std::forward(args)...); \ + return; \ + case ::gko::detail::omp_exec_tag: \ + ::gko::kernels::omp::_kernel( \ + std::static_pointer_cast( \ + exec), \ + std::forward(args)...); \ + return; \ + case ::gko::detail::cuda_exec_tag: \ + ::gko::kernels::cuda::_kernel( \ + std::static_pointer_cast( \ + exec), \ + std::forward(args)...); \ + return; \ + case ::gko::detail::hip_exec_tag: \ + ::gko::kernels::hip::_kernel( \ + std::static_pointer_cast( \ + exec), \ + std::forward(args)...); \ + return; \ + case ::gko::detail::dpcpp_exec_tag: \ + ::gko::kernels::dpcpp::_kernel( \ + std::static_pointer_cast( \ + exec), \ + std::forward(args)...); \ + return; \ + default: \ + GKO_NOT_IMPLEMENTED; \ + } \ + }); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -923,13 +981,12 @@ class Executor : public log::EnableLogging { private: /** - * The LambdaOperation class wraps three functor objects into an + * The LambdaOperation class wraps four functor objects into an * Operation. * * The first object is called by the OmpExecutor, the second one by the * CudaExecutor and the last one by the HipExecutor. When run on the - * ReferenceExecutor, the implementation will launch the CPU reference - * version. + * ReferenceExecutor, the implementation will launch the OpenMP version. * * @tparam ClosureOmp the type of the first functor * @tparam ClosureCuda the type of the second functor @@ -941,14 +998,14 @@ class Executor : public log::EnableLogging { class LambdaOperation : public Operation { public: /** - * Creates an LambdaOperation object from two functors. + * Creates an LambdaOperation object from four functors. * * @param op_omp a functor object which will be called by OmpExecutor * and ReferenceExecutor * @param op_cuda a functor object which will be called by CudaExecutor * @param op_hip a functor object which will be called by HipExecutor * @param op_dpcpp a functor object which will be called by - * DpcppExecutor + * DpcppExecutor */ LambdaOperation(const ClosureOmp &op_omp, const ClosureCuda &op_cuda, const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) From 63b6971965b2798e49d3374611e1f1438407b6b7 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 31 Aug 2021 11:12:51 -0400 Subject: [PATCH 2/4] simplify GKO_REGISTER_OPERATION dispatch Use the parameter type instead of an int parameter. --- include/ginkgo/core/base/executor.hpp | 65 ++++++++++++++------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index abb56c42c95..4db6a8cc17f 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -280,13 +280,6 @@ class Operation { namespace detail { -constexpr static int ref_exec_tag = 0; -constexpr static int omp_exec_tag = 1; -constexpr static int cuda_exec_tag = 2; -constexpr static int hip_exec_tag = 3; -constexpr static int dpcpp_exec_tag = 4; - - /** * The RegisteredOperation class wraps a functor that will be called with a tag * parameter based on the dynamic type of the executor that runs it. @@ -326,27 +319,27 @@ class RegisteredOperation : public Operation { void run(std::shared_ptr exec) const override { - op_(exec, ref_exec_tag); + op_(exec); } void run(std::shared_ptr exec) const override { - op_(exec, omp_exec_tag); + op_(exec); } void run(std::shared_ptr exec) const override { - op_(exec, cuda_exec_tag); + op_(exec); } void run(std::shared_ptr exec) const override { - op_(exec, hip_exec_tag); + op_(exec); } void run(std::shared_ptr exec) const override { - op_(exec, dpcpp_exec_tag); + op_(exec); } private: @@ -440,43 +433,51 @@ RegisteredOperation make_register_operation(const char *name, */ #define GKO_REGISTER_OPERATION(_name, _kernel) \ template \ - auto make_##_name(Args &&... args) \ + auto make_##_name(Args &&... args) \ { \ return ::gko::detail::make_register_operation( \ - #_name, sizeof...(Args), \ - [&args...](std::shared_ptr exec, int tag) { \ - switch (tag) { \ - case ::gko::detail::ref_exec_tag: \ + #_name, sizeof...(Args), [&args...](auto exec) { \ + if (std::is_same< \ + decltype(exec), \ + std::shared_ptr>:: \ + value) { \ ::gko::kernels::reference::_kernel( \ - std::static_pointer_cast< \ + std::dynamic_pointer_cast< \ const ::gko::ReferenceExecutor>(exec), \ std::forward(args)...); \ - return; \ - case ::gko::detail::omp_exec_tag: \ + } else if (std::is_same< \ + decltype(exec), \ + std::shared_ptr>:: \ + value) { \ ::gko::kernels::omp::_kernel( \ - std::static_pointer_cast( \ + std::dynamic_pointer_cast( \ exec), \ std::forward(args)...); \ - return; \ - case ::gko::detail::cuda_exec_tag: \ + } else if (std::is_same< \ + decltype(exec), \ + std::shared_ptr>:: \ + value) { \ ::gko::kernels::cuda::_kernel( \ - std::static_pointer_cast( \ + std::dynamic_pointer_cast( \ exec), \ std::forward(args)...); \ - return; \ - case ::gko::detail::hip_exec_tag: \ + } else if (std::is_same< \ + decltype(exec), \ + std::shared_ptr>:: \ + value) { \ ::gko::kernels::hip::_kernel( \ - std::static_pointer_cast( \ + std::dynamic_pointer_cast( \ exec), \ std::forward(args)...); \ - return; \ - case ::gko::detail::dpcpp_exec_tag: \ + } else if (std::is_same< \ + decltype(exec), \ + std::shared_ptr>:: \ + value) { \ ::gko::kernels::dpcpp::_kernel( \ - std::static_pointer_cast( \ + std::dynamic_pointer_cast( \ exec), \ std::forward(args)...); \ - return; \ - default: \ + } else { \ GKO_NOT_IMPLEMENTED; \ } \ }); \ From 2f9cefb63cbfe615d47494b00781e186608e484e Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 31 Aug 2021 12:40:51 -0400 Subject: [PATCH 3/4] review updates * Fix comments * Simplify macro Co-authored-by: Terry Cojean --- include/ginkgo/core/base/executor.hpp | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 4db6a8cc17f..b6770f92310 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -281,12 +281,8 @@ namespace detail { /** - * The RegisteredOperation class wraps a functor that will be called with a tag - * parameter based on the dynamic type of the executor that runs it. - * - * ReferenceExecutor uses ref_exec_tag, OmpExecutor uses omp_exec_tag, - * CudaExecutor uses cuda_exec_tag, HipExecutor uses hip_exec_tag and - * DpcppExecutor uses dpcpp_exec_tag. + * The RegisteredOperation class wraps a functor that will be called with the + * executor statically cast to its dynamic type. * * It is used to implement the @ref GKO_REGISTER_OPERATION macro. * @@ -300,8 +296,7 @@ class RegisteredOperation : public Operation { * Creates a RegisteredOperation object from a functor and a name. * * @param name the name to be used for this operation - * @param op a functor object which will be called with the executor and - corresponding tag + * @param op a functor object which will be called with the executor. */ RegisteredOperation(const char *name, int num_params, Closure op) : name_(name), num_params_(num_params), op_(std::move(op)) @@ -433,12 +428,13 @@ RegisteredOperation make_register_operation(const char *name, */ #define GKO_REGISTER_OPERATION(_name, _kernel) \ template \ - auto make_##_name(Args &&... args) \ + auto make_##_name(Args &&... args) \ { \ return ::gko::detail::make_register_operation( \ #_name, sizeof...(Args), [&args...](auto exec) { \ + using exec_type = decltype(exec); \ if (std::is_same< \ - decltype(exec), \ + exec_type, \ std::shared_ptr>:: \ value) { \ ::gko::kernels::reference::_kernel( \ @@ -446,7 +442,7 @@ RegisteredOperation make_register_operation(const char *name, const ::gko::ReferenceExecutor>(exec), \ std::forward(args)...); \ } else if (std::is_same< \ - decltype(exec), \ + exec_type, \ std::shared_ptr>:: \ value) { \ ::gko::kernels::omp::_kernel( \ @@ -454,7 +450,7 @@ RegisteredOperation make_register_operation(const char *name, exec), \ std::forward(args)...); \ } else if (std::is_same< \ - decltype(exec), \ + exec_type, \ std::shared_ptr>:: \ value) { \ ::gko::kernels::cuda::_kernel( \ @@ -462,7 +458,7 @@ RegisteredOperation make_register_operation(const char *name, exec), \ std::forward(args)...); \ } else if (std::is_same< \ - decltype(exec), \ + exec_type, \ std::shared_ptr>:: \ value) { \ ::gko::kernels::hip::_kernel( \ @@ -470,7 +466,7 @@ RegisteredOperation make_register_operation(const char *name, exec), \ std::forward(args)...); \ } else if (std::is_same< \ - decltype(exec), \ + exec_type, \ std::shared_ptr>:: \ value) { \ ::gko::kernels::dpcpp::_kernel( \ From 109c48d1eb7d4dcc7e61527d607b0e9cb80f61e8 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 31 Aug 2021 18:55:50 +0200 Subject: [PATCH 4/4] review update --- core/matrix/dense.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 413ef1d0739..37b82970f6d 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -468,6 +468,7 @@ void Dense::move_to(Dense> *result) template void Dense::convert_to(Coo *result) const { + // const ref parameters, as make_* functions take parameters by ref conversion_helper(result, this, [](const auto &in, const auto &out) { return dense::make_convert_to_coo(in, out); });