From be2478bb672f3e7a901060c2280d301d6341f61a Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 7 Feb 2022 22:48:43 +0100 Subject: [PATCH 1/7] Add strategy enum and set default to gko --- include/ginkgo/core/matrix/dense.hpp | 51 ++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index e7fac1987f2..1e64c1682b3 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -155,6 +155,8 @@ class Dense using row_major_range = gko::range>; + enum class strategy_type { vendor, gko }; + /** * Creates a Dense matrix with the same size and stride as another Dense * matrix. @@ -183,10 +185,11 @@ class Dense */ static std::unique_ptr create_with_type_of( const Dense* other, std::shared_ptr exec, - const dim<2>& size = dim<2>{}) + const dim<2>& size = dim<2>{}, + const strategy_type strategy = strategy_type::gko) { // See create_with_config_of() - return (*other).create_with_type_of_impl(exec, size, size[1]); + return (*other).create_with_type_of_impl(exec, size, size[1], strategy); } /** @@ -199,10 +202,11 @@ class Dense */ static std::unique_ptr create_with_type_of( const Dense* other, std::shared_ptr exec, - const dim<2>& size, size_type stride) + const dim<2>& size, size_type stride, + const strategy_type strategy = strategy_type::gko) { // See create_with_config_of() - return (*other).create_with_type_of_impl(exec, size, stride); + return (*other).create_with_type_of_impl(exec, size, stride, strategy); } friend class Dense>; @@ -634,6 +638,19 @@ class Dense return values_.get_num_elems(); } + /** Returns the strategy to be used for the operations. See @strategy_type + * + * @return the strategy + */ + strategy_type get_strategy() const noexcept { return strategy_; } + + /** + * Set the strategy + * + * @param strategy the dense strategy + */ + void set_strategy(strategy_type strategy) { strategy_ = strategy; } + /** * Returns a single element of the matrix. * @@ -913,8 +930,9 @@ class Dense * @param exec Executor associated to the matrix * @param size size of the matrix */ - Dense(std::shared_ptr exec, const dim<2>& size = dim<2>{}) - : Dense(std::move(exec), size, size[1]) + Dense(std::shared_ptr exec, const dim<2>& size = dim<2>{}, + const strategy_type strategy = strategy_type::gko) + : Dense(std::move(exec), size, size[1], strategy) {} /** @@ -927,10 +945,11 @@ class Dense * number of matrix elements) */ Dense(std::shared_ptr exec, const dim<2>& size, - size_type stride) + size_type stride, const strategy_type strategy = strategy_type::gko) : EnableLinOp(exec, size), values_(exec, size[0] * stride), - stride_(stride) + stride_(stride), + strategy_(strategy) {} /** @@ -951,10 +970,12 @@ class Dense */ template Dense(std::shared_ptr exec, const dim<2>& size, - ValuesArray&& values, size_type stride) + ValuesArray&& values, size_type stride, + const strategy_type strategy = strategy_type::gko) : EnableLinOp(exec, size), values_{exec, std::forward(values)}, - stride_{stride} + stride_{stride}, + strategy_(strategy) { if (size[0] > 0 && size[1] > 0) { GKO_ENSURE_IN_BOUNDS((size[0] - 1) * stride + size[1] - 1, @@ -971,7 +992,7 @@ class Dense virtual std::unique_ptr create_with_same_config() const { return Dense::create(this->get_executor(), this->get_size(), - this->get_stride()); + this->get_stride(), this->get_strategy()); } /** @@ -983,9 +1004,10 @@ class Dense */ virtual std::unique_ptr create_with_type_of_impl( std::shared_ptr exec, const dim<2>& size, - size_type stride) const + size_type stride, + const strategy_type strategy = strategy_type::gko) const { - return Dense::create(exec, size, stride); + return Dense::create(exec, size, stride, strategy); } template @@ -1106,7 +1128,7 @@ class Dense this->get_executor(), range_result.length(0) * range_this.length(1) - columns.begin, range_result->data), - stride); + stride, this->get_strategy()); } void apply_impl(const LinOp* b, LinOp* x) const override; @@ -1161,6 +1183,7 @@ class Dense private: Array values_; size_type stride_; + strategy_type strategy_{strategy_type::gko}; }; From e10863d0ff2d9e31267d851fc4347405d276475f Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 7 Feb 2022 22:49:13 +0100 Subject: [PATCH 2/7] Add core with switches and kernel impls --- core/device_hooks/common_kernels.inc.cpp | 1 + core/matrix/dense.cpp | 33 ++++++++++++++--- core/matrix/dense_kernels.hpp | 8 +++++ cuda/matrix/dense_kernels.cu | 45 ++++++++++++++++++------ cuda/test/matrix/dense_kernels.cpp | 22 ++++++++++++ dpcpp/matrix/dense_kernels.dp.cpp | 30 ++++++++++------ hip/matrix/dense_kernels.hip.cpp | 45 ++++++++++++++++++------ hip/test/matrix/dense_kernels.hip.cpp | 22 ++++++++++++ omp/matrix/dense_kernels.cpp | 30 ++++++++++------ reference/matrix/dense_kernels.cpp | 20 +++++++++++ 10 files changed, 211 insertions(+), 45 deletions(-) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index c288aac4ea2..0c0606f6976 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -255,6 +255,7 @@ GKO_STUB_VALUE_AND_SCALAR_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL); diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 1d61ac82c40..bbb4309af4e 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -77,6 +77,7 @@ GKO_REGISTER_OPERATION(sub_scaled, dense::sub_scaled); GKO_REGISTER_OPERATION(add_scaled_diag, dense::add_scaled_diag); GKO_REGISTER_OPERATION(sub_scaled_diag, dense::sub_scaled_diag); GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot); +GKO_REGISTER_OPERATION(compute_dot_vendor, dense::compute_dot_vendor); GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot); GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2); GKO_REGISTER_OPERATION(compute_norm1, dense::compute_norm1); @@ -272,7 +273,14 @@ void Dense::compute_dot_impl(const LinOp* b, LinOp* result) const auto exec = this->get_executor(); auto dense_b = make_temporary_conversion(b); auto dense_res = make_temporary_conversion(result); - exec->run(dense::make_compute_dot(this, dense_b.get(), dense_res.get())); + auto strat = this->get_strategy(); + if (strat == strategy_type::gko) { + exec->run( + dense::make_compute_dot(this, dense_b.get(), dense_res.get())); + } else if (strat == strategy_type::vendor) { + exec->run(dense::make_compute_dot_vendor(this, dense_b.get(), + dense_res.get())); + } } @@ -285,8 +293,13 @@ void Dense::compute_conj_dot_impl(const LinOp* b, auto exec = this->get_executor(); auto dense_b = make_temporary_conversion(b); auto dense_res = make_temporary_conversion(result); - exec->run( - dense::make_compute_conj_dot(this, dense_b.get(), dense_res.get())); + auto strat = this->get_strategy(); + if (strat == strategy_type::gko) { + exec->run( + dense::make_compute_conj_dot(this, dense_b.get(), dense_res.get())); + } else if (strat == strategy_type::vendor) { + GKO_NOT_IMPLEMENTED; + } } @@ -297,7 +310,12 @@ void Dense::compute_norm2_impl(LinOp* result) const auto exec = this->get_executor(); auto dense_res = make_temporary_conversion>(result); - exec->run(dense::make_compute_norm2(this, dense_res.get())); + auto strat = this->get_strategy(); + if (strat == strategy_type::gko) { + exec->run(dense::make_compute_norm2(this, dense_res.get())); + } else if (strat == strategy_type::vendor) { + GKO_NOT_IMPLEMENTED; + } } template @@ -307,7 +325,12 @@ void Dense::compute_norm1_impl(LinOp* result) const auto exec = this->get_executor(); auto dense_res = make_temporary_conversion>(result); - exec->run(dense::make_compute_norm1(this, dense_res.get())); + auto strat = this->get_strategy(); + if (strat == strategy_type::gko) { + exec->run(dense::make_compute_norm1(this, dense_res.get())); + } else if (strat == strategy_type::vendor) { + GKO_NOT_IMPLEMENTED; + } } template diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 1cb428c8b0d..3c9f5d61133 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -110,6 +110,12 @@ namespace kernels { const matrix::Dense<_type>* y, \ matrix::Dense<_type>* result) +#define GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL(_type) \ + void compute_dot_vendor(std::shared_ptr exec, \ + const matrix::Dense<_type>* x, \ + const matrix::Dense<_type>* y, \ + matrix::Dense<_type>* result) + #define GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(_type) \ void compute_conj_dot(std::shared_ptr exec, \ const matrix::Dense<_type>* x, \ @@ -302,6 +308,8 @@ namespace kernels { template \ GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(ValueType); \ template \ + GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL(ValueType); \ + template \ GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL(ValueType); \ diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index 7d2fc8defc7..1df5e9d3356 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -74,7 +74,32 @@ constexpr int default_block_size = 512; template -void simple_apply(std::shared_ptr exec, +void compute_dot_vendor(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) +{ + if (cublas::is_supported::value) { + auto handle = exec->get_cublas_handle(); + if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { + cublas::pointer_mode_guard pm_guard(handle); + cublas::dot(handle, x->get_size()[0], x->get_const_values(), + x->get_size()[1], y->get_const_values(), + y->get_size()[1], result->get_values()); + } else { + GKO_NOT_IMPLEMENTED; + } + } else { + GKO_NOT_IMPLEMENTED; + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); + + +template +void simple_apply(std::shared_ptr exec, const matrix::Dense* a, const matrix::Dense* b, matrix::Dense* c) @@ -104,7 +129,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template -void apply(std::shared_ptr exec, +void apply(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Dense* a, const matrix::Dense* b, const matrix::Dense* beta, matrix::Dense* c) @@ -131,7 +156,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template -void convert_to_coo(std::shared_ptr exec, +void convert_to_coo(std::shared_ptr exec, const matrix::Dense* source, const int64* row_ptrs, matrix::Coo* result) @@ -160,7 +185,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_csr(std::shared_ptr exec, +void convert_to_csr(std::shared_ptr exec, const matrix::Dense* source, matrix::Csr* result) { @@ -188,7 +213,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_ell(std::shared_ptr exec, +void convert_to_ell(std::shared_ptr exec, const matrix::Dense* source, matrix::Ell* result) { @@ -236,7 +261,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_hybrid(std::shared_ptr exec, +void convert_to_hybrid(std::shared_ptr exec, const matrix::Dense* source, const int64* coo_row_ptrs, matrix::Hybrid* result) @@ -268,7 +293,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_sellp(std::shared_ptr exec, +void convert_to_sellp(std::shared_ptr exec, const matrix::Dense* source, matrix::Sellp* result) { @@ -297,7 +322,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_sparsity_csr(std::shared_ptr exec, +void convert_to_sparsity_csr(std::shared_ptr exec, const matrix::Dense* source, matrix::SparsityCsr* result) { @@ -323,7 +348,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void transpose(std::shared_ptr exec, +void transpose(std::shared_ptr exec, const matrix::Dense* orig, matrix::Dense* trans) { @@ -348,7 +373,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template -void conj_transpose(std::shared_ptr exec, +void conj_transpose(std::shared_ptr exec, const matrix::Dense* orig, matrix::Dense* trans) { diff --git a/cuda/test/matrix/dense_kernels.cpp b/cuda/test/matrix/dense_kernels.cpp index a45274cdb9b..a4d9622a16f 100644 --- a/cuda/test/matrix/dense_kernels.cpp +++ b/cuda/test/matrix/dense_kernels.cpp @@ -193,6 +193,28 @@ TEST_F(Dense, SingleVectorCudaComputeDotIsEquivalentToRef) } +TEST_F(Dense, SingleVectorCudaComputeDotVendorIsEquivalentToRef) +{ + set_up_vector_data(1); + dx->set_strategy(Mtx::strategy_type::vendor); + + x->compute_dot(y.get(), expected.get()); + dx->compute_dot(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Dense, MultipleVectorCudaComputeDotVendorFails) +{ + set_up_vector_data(20); + dx->set_strategy(Mtx::strategy_type::vendor); + + x->compute_dot(y.get(), expected.get()); + ASSERT_THROW(dx->compute_dot(dy.get(), dresult.get()), gko::NotImplemented); +} + + TEST_F(Dense, MultipleVectorCudaComputeDotIsEquivalentToRef) { set_up_vector_data(20); diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 618fab1ee29..17d5e74be28 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -190,7 +190,17 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose, template -void simple_apply(std::shared_ptr exec, +void compute_dot_vendor(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); + + +template +void simple_apply(std::shared_ptr exec, const matrix::Dense* a, const matrix::Dense* b, matrix::Dense* c) @@ -207,7 +217,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template -void apply(std::shared_ptr exec, +void apply(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Dense* a, const matrix::Dense* b, const matrix::Dense* beta, matrix::Dense* c) @@ -226,7 +236,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template -void convert_to_coo(std::shared_ptr exec, +void convert_to_coo(std::shared_ptr exec, const matrix::Dense* source, const int64* row_ptrs, matrix::Coo* result) @@ -262,7 +272,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_csr(std::shared_ptr exec, +void convert_to_csr(std::shared_ptr exec, const matrix::Dense* source, matrix::Csr* result) { @@ -296,7 +306,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_ell(std::shared_ptr exec, +void convert_to_ell(std::shared_ptr exec, const matrix::Dense* source, matrix::Ell* result) { @@ -355,7 +365,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_hybrid(std::shared_ptr exec, +void convert_to_hybrid(std::shared_ptr exec, const matrix::Dense* source, const int64* coo_row_ptrs, matrix::Hybrid* result) @@ -411,7 +421,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_sellp(std::shared_ptr exec, +void convert_to_sellp(std::shared_ptr exec, const matrix::Dense* source, matrix::Sellp* result) { @@ -454,7 +464,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_sparsity_csr(std::shared_ptr exec, +void convert_to_sparsity_csr(std::shared_ptr exec, const matrix::Dense* source, matrix::SparsityCsr* result) { @@ -486,7 +496,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void transpose(std::shared_ptr exec, +void transpose(std::shared_ptr exec, const matrix::Dense* orig, matrix::Dense* trans) { @@ -508,7 +518,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template -void conj_transpose(std::shared_ptr exec, +void conj_transpose(std::shared_ptr exec, const matrix::Dense* orig, matrix::Dense* trans) { diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index 63e42fbea5b..77b7f36806b 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -77,7 +77,32 @@ constexpr int default_block_size = 512; template -void simple_apply(std::shared_ptr exec, +void compute_dot_vendor(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) +{ + if (hipblas::is_supported::value) { + auto handle = exec->get_hipblas_handle(); + if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { + hipblas::pointer_mode_guard pm_guard(handle); + hipblas::dot(handle, x->get_size()[0], x->get_const_values(), + x->get_size()[1], y->get_const_values(), + y->get_size()[1], result->get_values()); + } else { + GKO_NOT_IMPLEMENTED; + } + } else { + GKO_NOT_IMPLEMENTED; + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); + + +template +void simple_apply(std::shared_ptr exec, const matrix::Dense* a, const matrix::Dense* b, matrix::Dense* c) @@ -108,7 +133,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template -void apply(std::shared_ptr exec, +void apply(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Dense* a, const matrix::Dense* b, const matrix::Dense* beta, matrix::Dense* c) @@ -135,7 +160,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template -void convert_to_coo(std::shared_ptr exec, +void convert_to_coo(std::shared_ptr exec, const matrix::Dense* source, const int64* row_ptrs, matrix::Coo* result) @@ -164,7 +189,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_csr(std::shared_ptr exec, +void convert_to_csr(std::shared_ptr exec, const matrix::Dense* source, matrix::Csr* result) { @@ -192,7 +217,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_ell(std::shared_ptr exec, +void convert_to_ell(std::shared_ptr exec, const matrix::Dense* source, matrix::Ell* result) { @@ -241,7 +266,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_hybrid(std::shared_ptr exec, +void convert_to_hybrid(std::shared_ptr exec, const matrix::Dense* source, const int64* coo_row_ptrs, matrix::Hybrid* result) @@ -274,7 +299,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_sellp(std::shared_ptr exec, +void convert_to_sellp(std::shared_ptr exec, const matrix::Dense* source, matrix::Sellp* result) { @@ -305,7 +330,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_sparsity_csr(std::shared_ptr exec, +void convert_to_sparsity_csr(std::shared_ptr exec, const matrix::Dense* source, matrix::SparsityCsr* result) { @@ -332,7 +357,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void transpose(std::shared_ptr exec, +void transpose(std::shared_ptr exec, const matrix::Dense* orig, matrix::Dense* trans) { @@ -357,7 +382,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template -void conj_transpose(std::shared_ptr exec, +void conj_transpose(std::shared_ptr exec, const matrix::Dense* orig, matrix::Dense* trans) { diff --git a/hip/test/matrix/dense_kernels.hip.cpp b/hip/test/matrix/dense_kernels.hip.cpp index 894b5fcc408..e111a10b761 100644 --- a/hip/test/matrix/dense_kernels.hip.cpp +++ b/hip/test/matrix/dense_kernels.hip.cpp @@ -189,6 +189,28 @@ TEST_F(Dense, SingleVectorHipComputeDotIsEquivalentToRef) } +TEST_F(Dense, SingleVectorHipComputeDotVendorIsEquivalentToRef) +{ + set_up_vector_data(1); + dx->set_strategy(Mtx::strategy_type::vendor); + + x->compute_dot(y.get(), expected.get()); + dx->compute_dot(dy.get(), dresult.get()); + + GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); +} + + +TEST_F(Dense, MultipleVectorHipComputeDotVendorFails) +{ + set_up_vector_data(20); + dx->set_strategy(Mtx::strategy_type::vendor); + + x->compute_dot(y.get(), expected.get()); + ASSERT_THROW(dx->compute_dot(dy.get(), dresult.get()), gko::NotImplemented); +} + + TEST_F(Dense, MultipleVectorHipComputeDotIsEquivalentToRef) { set_up_vector_data(20); diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index dd47a2b3ed7..df78b50a29a 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -69,7 +69,17 @@ namespace dense { template -void simple_apply(std::shared_ptr exec, +void compute_dot_vendor(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); + + +template +void simple_apply(std::shared_ptr exec, const matrix::Dense* a, const matrix::Dense* b, matrix::Dense* c) @@ -95,7 +105,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template -void apply(std::shared_ptr exec, +void apply(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Dense* a, const matrix::Dense* b, const matrix::Dense* beta, matrix::Dense* c) @@ -131,7 +141,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template -void convert_to_coo(std::shared_ptr exec, +void convert_to_coo(std::shared_ptr exec, const matrix::Dense* source, const int64* row_ptrs, matrix::Coo* result) @@ -164,7 +174,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_csr(std::shared_ptr exec, +void convert_to_csr(std::shared_ptr exec, const matrix::Dense* source, matrix::Csr* result) { @@ -195,7 +205,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_ell(std::shared_ptr exec, +void convert_to_ell(std::shared_ptr exec, const matrix::Dense* source, matrix::Ell* result) { @@ -276,7 +286,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_hybrid(std::shared_ptr exec, +void convert_to_hybrid(std::shared_ptr exec, const matrix::Dense* source, const int64* coo_row_ptrs, matrix::Hybrid* result) @@ -322,7 +332,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_sellp(std::shared_ptr exec, +void convert_to_sellp(std::shared_ptr exec, const matrix::Dense* source, matrix::Sellp* result) { @@ -364,7 +374,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_sparsity_csr(std::shared_ptr exec, +void convert_to_sparsity_csr(std::shared_ptr exec, const matrix::Dense* source, matrix::SparsityCsr* result) { @@ -394,7 +404,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void transpose(std::shared_ptr exec, +void transpose(std::shared_ptr exec, const matrix::Dense* orig, matrix::Dense* trans) { @@ -410,7 +420,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template -void conj_transpose(std::shared_ptr exec, +void conj_transpose(std::shared_ptr exec, const matrix::Dense* orig, matrix::Dense* trans) { diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index c45f3316414..f8321c2857c 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -297,6 +297,26 @@ void compute_dot(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); +template +void compute_dot_vendor(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) +{ + for (size_type j = 0; j < x->get_size()[1]; ++j) { + result->at(0, j) = zero(); + } + for (size_type i = 0; i < x->get_size()[0]; ++i) { + for (size_type j = 0; j < x->get_size()[1]; ++j) { + result->at(0, j) += x->at(i, j) * y->at(i, j); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); + + template void compute_conj_dot(std::shared_ptr exec, const matrix::Dense* x, From fc5efa791a838d561ab663d03db322af31a0d9b9 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 8 Feb 2022 15:10:12 +0100 Subject: [PATCH 3/7] Move the kernel_impl switch inside the kernel. --- core/device_hooks/common_kernels.inc.cpp | 2 +- core/matrix/dense.cpp | 35 ++++------------ core/matrix/dense_kernels.hpp | 14 +++---- cuda/matrix/dense_kernels.cu | 18 ++++----- cuda/test/matrix/dense_kernels.cpp | 22 ---------- dpcpp/matrix/dense_kernels.dp.cpp | 14 ++++--- hip/matrix/dense_kernels.hip.cpp | 18 ++++----- hip/test/matrix/dense_kernels.hip.cpp | 22 ---------- include/ginkgo/core/matrix/dense.hpp | 51 +++++++----------------- omp/matrix/dense_kernels.cpp | 14 ++++--- reference/matrix/dense_kernels.cpp | 19 +++------ 11 files changed, 71 insertions(+), 158 deletions(-) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 0c0606f6976..015c83cfdc3 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -255,7 +255,7 @@ GKO_STUB_VALUE_AND_SCALAR_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); -GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL); diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index bbb4309af4e..94a565ffa0c 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -77,7 +77,7 @@ GKO_REGISTER_OPERATION(sub_scaled, dense::sub_scaled); GKO_REGISTER_OPERATION(add_scaled_diag, dense::add_scaled_diag); GKO_REGISTER_OPERATION(sub_scaled_diag, dense::sub_scaled_diag); GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot); -GKO_REGISTER_OPERATION(compute_dot_vendor, dense::compute_dot_vendor); +GKO_REGISTER_OPERATION(compute_dot_dispatch, dense::compute_dot_dispatch); GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot); GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2); GKO_REGISTER_OPERATION(compute_norm1, dense::compute_norm1); @@ -273,14 +273,8 @@ void Dense::compute_dot_impl(const LinOp* b, LinOp* result) const auto exec = this->get_executor(); auto dense_b = make_temporary_conversion(b); auto dense_res = make_temporary_conversion(result); - auto strat = this->get_strategy(); - if (strat == strategy_type::gko) { - exec->run( - dense::make_compute_dot(this, dense_b.get(), dense_res.get())); - } else if (strat == strategy_type::vendor) { - exec->run(dense::make_compute_dot_vendor(this, dense_b.get(), - dense_res.get())); - } + exec->run( + dense::make_compute_dot_dispatch(this, dense_b.get(), dense_res.get())); } @@ -293,13 +287,8 @@ void Dense::compute_conj_dot_impl(const LinOp* b, auto exec = this->get_executor(); auto dense_b = make_temporary_conversion(b); auto dense_res = make_temporary_conversion(result); - auto strat = this->get_strategy(); - if (strat == strategy_type::gko) { - exec->run( - dense::make_compute_conj_dot(this, dense_b.get(), dense_res.get())); - } else if (strat == strategy_type::vendor) { - GKO_NOT_IMPLEMENTED; - } + exec->run( + dense::make_compute_conj_dot(this, dense_b.get(), dense_res.get())); } @@ -310,12 +299,7 @@ void Dense::compute_norm2_impl(LinOp* result) const auto exec = this->get_executor(); auto dense_res = make_temporary_conversion>(result); - auto strat = this->get_strategy(); - if (strat == strategy_type::gko) { - exec->run(dense::make_compute_norm2(this, dense_res.get())); - } else if (strat == strategy_type::vendor) { - GKO_NOT_IMPLEMENTED; - } + exec->run(dense::make_compute_norm2(this, dense_res.get())); } template @@ -325,12 +309,7 @@ void Dense::compute_norm1_impl(LinOp* result) const auto exec = this->get_executor(); auto dense_res = make_temporary_conversion>(result); - auto strat = this->get_strategy(); - if (strat == strategy_type::gko) { - exec->run(dense::make_compute_norm1(this, dense_res.get())); - } else if (strat == strategy_type::vendor) { - GKO_NOT_IMPLEMENTED; - } + exec->run(dense::make_compute_norm1(this, dense_res.get())); } template diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 3c9f5d61133..45adb45ae71 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -104,18 +104,18 @@ namespace kernels { const matrix::Diagonal<_type>* x, \ matrix::Dense<_type>* y) +#define GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL(_type) \ + void compute_dot_dispatch(std::shared_ptr exec, \ + const matrix::Dense<_type>* x, \ + const matrix::Dense<_type>* y, \ + matrix::Dense<_type>* result) + #define GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(_type) \ void compute_dot(std::shared_ptr exec, \ const matrix::Dense<_type>* x, \ const matrix::Dense<_type>* y, \ matrix::Dense<_type>* result) -#define GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL(_type) \ - void compute_dot_vendor(std::shared_ptr exec, \ - const matrix::Dense<_type>* x, \ - const matrix::Dense<_type>* y, \ - matrix::Dense<_type>* result) - #define GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(_type) \ void compute_conj_dot(std::shared_ptr exec, \ const matrix::Dense<_type>* x, \ @@ -308,7 +308,7 @@ namespace kernels { template \ GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(ValueType); \ template \ - GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL(ValueType); \ + GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(ValueType); \ template \ diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index 1df5e9d3356..0a40e840e84 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -74,14 +74,14 @@ constexpr int default_block_size = 512; template -void compute_dot_vendor(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result) +void compute_dot_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { - if (cublas::is_supported::value) { - auto handle = exec->get_cublas_handle(); - if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { + if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { + if (cublas::is_supported::value) { + auto handle = exec->get_cublas_handle(); cublas::pointer_mode_guard pm_guard(handle); cublas::dot(handle, x->get_size()[0], x->get_const_values(), x->get_size()[1], y->get_const_values(), @@ -90,12 +90,12 @@ void compute_dot_vendor(std::shared_ptr exec, GKO_NOT_IMPLEMENTED; } } else { - GKO_NOT_IMPLEMENTED; + compute_dot(exec, x, y, result); } } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); + GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); template diff --git a/cuda/test/matrix/dense_kernels.cpp b/cuda/test/matrix/dense_kernels.cpp index a4d9622a16f..a45274cdb9b 100644 --- a/cuda/test/matrix/dense_kernels.cpp +++ b/cuda/test/matrix/dense_kernels.cpp @@ -193,28 +193,6 @@ TEST_F(Dense, SingleVectorCudaComputeDotIsEquivalentToRef) } -TEST_F(Dense, SingleVectorCudaComputeDotVendorIsEquivalentToRef) -{ - set_up_vector_data(1); - dx->set_strategy(Mtx::strategy_type::vendor); - - x->compute_dot(y.get(), expected.get()); - dx->compute_dot(dy.get(), dresult.get()); - - GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -} - - -TEST_F(Dense, MultipleVectorCudaComputeDotVendorFails) -{ - set_up_vector_data(20); - dx->set_strategy(Mtx::strategy_type::vendor); - - x->compute_dot(y.get(), expected.get()); - ASSERT_THROW(dx->compute_dot(dy.get(), dresult.get()), gko::NotImplemented); -} - - TEST_F(Dense, MultipleVectorCudaComputeDotIsEquivalentToRef) { set_up_vector_data(20); diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 17d5e74be28..705155b0ea8 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -190,13 +190,17 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose, template -void compute_dot_vendor(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result) GKO_NOT_IMPLEMENTED; +void compute_dot_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) +{ + // TODO Add onemkl for single column + compute_dot(exec, x, y, result); +} GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); + GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); template diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index 77b7f36806b..b7a22d1c87e 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -77,14 +77,14 @@ constexpr int default_block_size = 512; template -void compute_dot_vendor(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result) +void compute_dot_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { - if (hipblas::is_supported::value) { - auto handle = exec->get_hipblas_handle(); - if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { + if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { + if (hipblas::is_supported::value) { + auto handle = exec->get_hipblas_handle(); hipblas::pointer_mode_guard pm_guard(handle); hipblas::dot(handle, x->get_size()[0], x->get_const_values(), x->get_size()[1], y->get_const_values(), @@ -93,12 +93,12 @@ void compute_dot_vendor(std::shared_ptr exec, GKO_NOT_IMPLEMENTED; } } else { - GKO_NOT_IMPLEMENTED; + compute_dot(exec, x, y, result); } } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); + GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); template diff --git a/hip/test/matrix/dense_kernels.hip.cpp b/hip/test/matrix/dense_kernels.hip.cpp index e111a10b761..894b5fcc408 100644 --- a/hip/test/matrix/dense_kernels.hip.cpp +++ b/hip/test/matrix/dense_kernels.hip.cpp @@ -189,28 +189,6 @@ TEST_F(Dense, SingleVectorHipComputeDotIsEquivalentToRef) } -TEST_F(Dense, SingleVectorHipComputeDotVendorIsEquivalentToRef) -{ - set_up_vector_data(1); - dx->set_strategy(Mtx::strategy_type::vendor); - - x->compute_dot(y.get(), expected.get()); - dx->compute_dot(dy.get(), dresult.get()); - - GKO_ASSERT_MTX_NEAR(dresult, expected, 1e-14); -} - - -TEST_F(Dense, MultipleVectorHipComputeDotVendorFails) -{ - set_up_vector_data(20); - dx->set_strategy(Mtx::strategy_type::vendor); - - x->compute_dot(y.get(), expected.get()); - ASSERT_THROW(dx->compute_dot(dy.get(), dresult.get()), gko::NotImplemented); -} - - TEST_F(Dense, MultipleVectorHipComputeDotIsEquivalentToRef) { set_up_vector_data(20); diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index 1e64c1682b3..e7fac1987f2 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -155,8 +155,6 @@ class Dense using row_major_range = gko::range>; - enum class strategy_type { vendor, gko }; - /** * Creates a Dense matrix with the same size and stride as another Dense * matrix. @@ -185,11 +183,10 @@ class Dense */ static std::unique_ptr create_with_type_of( const Dense* other, std::shared_ptr exec, - const dim<2>& size = dim<2>{}, - const strategy_type strategy = strategy_type::gko) + const dim<2>& size = dim<2>{}) { // See create_with_config_of() - return (*other).create_with_type_of_impl(exec, size, size[1], strategy); + return (*other).create_with_type_of_impl(exec, size, size[1]); } /** @@ -202,11 +199,10 @@ class Dense */ static std::unique_ptr create_with_type_of( const Dense* other, std::shared_ptr exec, - const dim<2>& size, size_type stride, - const strategy_type strategy = strategy_type::gko) + const dim<2>& size, size_type stride) { // See create_with_config_of() - return (*other).create_with_type_of_impl(exec, size, stride, strategy); + return (*other).create_with_type_of_impl(exec, size, stride); } friend class Dense>; @@ -638,19 +634,6 @@ class Dense return values_.get_num_elems(); } - /** Returns the strategy to be used for the operations. See @strategy_type - * - * @return the strategy - */ - strategy_type get_strategy() const noexcept { return strategy_; } - - /** - * Set the strategy - * - * @param strategy the dense strategy - */ - void set_strategy(strategy_type strategy) { strategy_ = strategy; } - /** * Returns a single element of the matrix. * @@ -930,9 +913,8 @@ class Dense * @param exec Executor associated to the matrix * @param size size of the matrix */ - Dense(std::shared_ptr exec, const dim<2>& size = dim<2>{}, - const strategy_type strategy = strategy_type::gko) - : Dense(std::move(exec), size, size[1], strategy) + Dense(std::shared_ptr exec, const dim<2>& size = dim<2>{}) + : Dense(std::move(exec), size, size[1]) {} /** @@ -945,11 +927,10 @@ class Dense * number of matrix elements) */ Dense(std::shared_ptr exec, const dim<2>& size, - size_type stride, const strategy_type strategy = strategy_type::gko) + size_type stride) : EnableLinOp(exec, size), values_(exec, size[0] * stride), - stride_(stride), - strategy_(strategy) + stride_(stride) {} /** @@ -970,12 +951,10 @@ class Dense */ template Dense(std::shared_ptr exec, const dim<2>& size, - ValuesArray&& values, size_type stride, - const strategy_type strategy = strategy_type::gko) + ValuesArray&& values, size_type stride) : EnableLinOp(exec, size), values_{exec, std::forward(values)}, - stride_{stride}, - strategy_(strategy) + stride_{stride} { if (size[0] > 0 && size[1] > 0) { GKO_ENSURE_IN_BOUNDS((size[0] - 1) * stride + size[1] - 1, @@ -992,7 +971,7 @@ class Dense virtual std::unique_ptr create_with_same_config() const { return Dense::create(this->get_executor(), this->get_size(), - this->get_stride(), this->get_strategy()); + this->get_stride()); } /** @@ -1004,10 +983,9 @@ class Dense */ virtual std::unique_ptr create_with_type_of_impl( std::shared_ptr exec, const dim<2>& size, - size_type stride, - const strategy_type strategy = strategy_type::gko) const + size_type stride) const { - return Dense::create(exec, size, stride, strategy); + return Dense::create(exec, size, stride); } template @@ -1128,7 +1106,7 @@ class Dense this->get_executor(), range_result.length(0) * range_this.length(1) - columns.begin, range_result->data), - stride, this->get_strategy()); + stride); } void apply_impl(const LinOp* b, LinOp* x) const override; @@ -1183,7 +1161,6 @@ class Dense private: Array values_; size_type stride_; - strategy_type strategy_{strategy_type::gko}; }; diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index df78b50a29a..924832456f7 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -69,13 +69,17 @@ namespace dense { template -void compute_dot_vendor(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result) GKO_NOT_IMPLEMENTED; +void compute_dot_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) +{ + // OpenMP uses the unified kernel. + compute_dot(exec, x, y, result); +} GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); + GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); template diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index f8321c2857c..4cfc4246976 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -298,23 +298,16 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); template -void compute_dot_vendor(std::shared_ptr exec, - const matrix::Dense* x, - const matrix::Dense* y, - matrix::Dense* result) +void compute_dot_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) = zero(); - } - for (size_type i = 0; i < x->get_size()[0]; ++i) { - for (size_type j = 0; j < x->get_size()[1]; ++j) { - result->at(0, j) += x->at(i, j) * y->at(i, j); - } - } + compute_dot(exec, x, y, result); } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( - GKO_DECLARE_DENSE_COMPUTE_DOT_VENDOR_KERNEL); + GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); template From fd49a7fb5f9d4aa585fc0525b8b2745743c197ae Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 10 Feb 2022 22:25:42 +0100 Subject: [PATCH 4/7] Move single vector conj_dot and nrm2 to vendor --- core/device_hooks/common_kernels.inc.cpp | 2 + core/matrix/dense.cpp | 9 ++-- core/matrix/dense_kernels.hpp | 15 +++++++ cuda/matrix/dense_kernels.cu | 52 +++++++++++++++++++++++- hip/matrix/dense_kernels.hip.cpp | 52 +++++++++++++++++++++++- omp/matrix/dense_kernels.cpp | 25 ++++++++++++ reference/matrix/dense_kernels.cpp | 25 ++++++++++++ 7 files changed, 173 insertions(+), 7 deletions(-) diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 015c83cfdc3..2f7e7e9fe2a 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -257,7 +257,9 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL); diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 94a565ffa0c..aebcf96b9f6 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -79,7 +79,10 @@ GKO_REGISTER_OPERATION(sub_scaled_diag, dense::sub_scaled_diag); GKO_REGISTER_OPERATION(compute_dot, dense::compute_dot); GKO_REGISTER_OPERATION(compute_dot_dispatch, dense::compute_dot_dispatch); GKO_REGISTER_OPERATION(compute_conj_dot, dense::compute_conj_dot); +GKO_REGISTER_OPERATION(compute_conj_dot_dispatch, + dense::compute_conj_dot_dispatch); GKO_REGISTER_OPERATION(compute_norm2, dense::compute_norm2); +GKO_REGISTER_OPERATION(compute_norm2_dispatch, dense::compute_norm2_dispatch); GKO_REGISTER_OPERATION(compute_norm1, dense::compute_norm1); GKO_REGISTER_OPERATION(compute_max_nnz_per_row, dense::compute_max_nnz_per_row); GKO_REGISTER_OPERATION(compute_hybrid_coo_row_ptrs, @@ -287,8 +290,8 @@ void Dense::compute_conj_dot_impl(const LinOp* b, auto exec = this->get_executor(); auto dense_b = make_temporary_conversion(b); auto dense_res = make_temporary_conversion(result); - exec->run( - dense::make_compute_conj_dot(this, dense_b.get(), dense_res.get())); + exec->run(dense::make_compute_conj_dot_dispatch(this, dense_b.get(), + dense_res.get())); } @@ -299,7 +302,7 @@ void Dense::compute_norm2_impl(LinOp* result) const auto exec = this->get_executor(); auto dense_res = make_temporary_conversion>(result); - exec->run(dense::make_compute_norm2(this, dense_res.get())); + exec->run(dense::make_compute_norm2_dispatch(this, dense_res.get())); } template diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 45adb45ae71..26ac73462e4 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -116,6 +116,12 @@ namespace kernels { const matrix::Dense<_type>* y, \ matrix::Dense<_type>* result) +#define GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL(_type) \ + void compute_conj_dot_dispatch( \ + std::shared_ptr exec, \ + const matrix::Dense<_type>* x, const matrix::Dense<_type>* y, \ + matrix::Dense<_type>* result) + #define GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(_type) \ void compute_conj_dot(std::shared_ptr exec, \ const matrix::Dense<_type>* x, \ @@ -127,6 +133,11 @@ namespace kernels { const matrix::Dense<_type>* x, \ matrix::Dense>* result) +#define GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL(_type) \ + void compute_norm2_dispatch(std::shared_ptr exec, \ + const matrix::Dense<_type>* x, \ + matrix::Dense>* result) + #define GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL(_type) \ void compute_norm1(std::shared_ptr exec, \ const matrix::Dense<_type>* x, \ @@ -312,8 +323,12 @@ namespace kernels { template \ GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(ValueType); \ template \ + GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL(ValueType); \ + template \ GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL(ValueType); \ template \ + GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL(ValueType); \ + template \ GKO_DECLARE_DENSE_COMPUTE_NORM1_KERNEL(ValueType); \ template \ GKO_DECLARE_DENSE_FILL_IN_MATRIX_DATA_KERNEL(ValueType, IndexType); \ diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index 0a40e840e84..08520cb063d 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -84,8 +84,8 @@ void compute_dot_dispatch(std::shared_ptr exec, auto handle = exec->get_cublas_handle(); cublas::pointer_mode_guard pm_guard(handle); cublas::dot(handle, x->get_size()[0], x->get_const_values(), - x->get_size()[1], y->get_const_values(), - y->get_size()[1], result->get_values()); + x->get_stride(), y->get_const_values(), y->get_stride(), + result->get_values()); } else { GKO_NOT_IMPLEMENTED; } @@ -98,6 +98,54 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); +template +void compute_conj_dot_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) +{ + if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { + if (cublas::is_supported::value) { + auto handle = exec->get_cublas_handle(); + cublas::pointer_mode_guard pm_guard(handle); + cublas::conj_dot(handle, x->get_size()[0], x->get_const_values(), + x->get_stride(), y->get_const_values(), + y->get_stride(), result->get_values()); + } else { + GKO_NOT_IMPLEMENTED; + } + } else { + compute_conj_dot(exec, x, y, result); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL); + + +template +void compute_norm2_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) +{ + if (x->get_size()[1] == 1) { + if (cublas::is_supported::value) { + auto handle = exec->get_cublas_handle(); + cublas::pointer_mode_guard pm_guard(handle); + cublas::norm2(handle, x->get_size()[0], x->get_const_values(), + x->get_stride(), result->get_values()); + } else { + GKO_NOT_IMPLEMENTED; + } + } else { + compute_norm2(exec, x, result); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL); + + template void simple_apply(std::shared_ptr exec, const matrix::Dense* a, diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index b7a22d1c87e..1c94bcab76a 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -87,8 +87,8 @@ void compute_dot_dispatch(std::shared_ptr exec, auto handle = exec->get_hipblas_handle(); hipblas::pointer_mode_guard pm_guard(handle); hipblas::dot(handle, x->get_size()[0], x->get_const_values(), - x->get_size()[1], y->get_const_values(), - y->get_size()[1], result->get_values()); + x->get_stride(), y->get_const_values(), + y->get_stride(), result->get_values()); } else { GKO_NOT_IMPLEMENTED; } @@ -101,6 +101,54 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); +template +void compute_conj_dot_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) +{ + if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { + if (hipblas::is_supported::value) { + auto handle = exec->get_hipblas_handle(); + hipblas::pointer_mode_guard pm_guard(handle); + hipblas::conj_dot(handle, x->get_size()[0], x->get_const_values(), + x->get_stride(), y->get_const_values(), + y->get_stride(), result->get_values()); + } else { + GKO_NOT_IMPLEMENTED; + } + } else { + compute_conj_dot(exec, x, y, result); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL); + + +template +void compute_norm2_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) +{ + if (x->get_size()[1] == 1) { + if (hipblas::is_supported::value) { + auto handle = exec->get_hipblas_handle(); + hipblas::pointer_mode_guard pm_guard(handle); + hipblas::norm2(handle, x->get_size()[0], x->get_const_values(), + x->get_stride(), result->get_values()); + } else { + GKO_NOT_IMPLEMENTED; + } + } else { + compute_norm2(exec, x, result); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL); + + template void simple_apply(std::shared_ptr exec, const matrix::Dense* a, diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index 924832456f7..6a7e3fa7b01 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -82,6 +82,31 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); +template +void compute_conj_dot_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) +{ + compute_conj_dot(exec, x, y, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL); + + +template +void compute_norm2_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) +{ + compute_norm2(exec, x, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL); + + template void simple_apply(std::shared_ptr exec, const matrix::Dense* a, diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index 4cfc4246976..f0ffc237d9c 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -329,6 +329,19 @@ void compute_conj_dot(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); +template +void compute_conj_dot_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) +{ + compute_conj_dot(exec, x, y, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL); + + template void compute_norm2(std::shared_ptr exec, const matrix::Dense* x, @@ -350,6 +363,18 @@ void compute_norm2(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); +template +void compute_norm2_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) +{ + compute_norm2(exec, x, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL); + + template void compute_norm1(std::shared_ptr exec, const matrix::Dense* x, From 02ee7c60d71f8552d5ceea2060041e0fe9f1b4f2 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 11 Feb 2022 08:39:10 +0100 Subject: [PATCH 5/7] Fix dpcpp kernels and use unified everywhere else --- cuda/matrix/dense_kernels.cu | 6 +++--- dpcpp/matrix/dense_kernels.dp.cpp | 27 ++++++++++++++++++++++++++- hip/matrix/dense_kernels.hip.cpp | 6 +++--- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index 08520cb063d..060e4faa6fd 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -87,7 +87,7 @@ void compute_dot_dispatch(std::shared_ptr exec, x->get_stride(), y->get_const_values(), y->get_stride(), result->get_values()); } else { - GKO_NOT_IMPLEMENTED; + compute_dot(exec, x, y, result); } } else { compute_dot(exec, x, y, result); @@ -112,7 +112,7 @@ void compute_conj_dot_dispatch(std::shared_ptr exec, x->get_stride(), y->get_const_values(), y->get_stride(), result->get_values()); } else { - GKO_NOT_IMPLEMENTED; + compute_conj_dot(exec, x, y, result); } } else { compute_conj_dot(exec, x, y, result); @@ -135,7 +135,7 @@ void compute_norm2_dispatch(std::shared_ptr exec, cublas::norm2(handle, x->get_size()[0], x->get_const_values(), x->get_stride(), result->get_values()); } else { - GKO_NOT_IMPLEMENTED; + compute_norm2(exec, x, result); } } else { compute_norm2(exec, x, result); diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 705155b0ea8..05d0ecb1b51 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -195,7 +195,7 @@ void compute_dot_dispatch(std::shared_ptr exec, const matrix::Dense* y, matrix::Dense* result) { - // TODO Add onemkl for single column + // TODO Add onemkl for single column ? compute_dot(exec, x, y, result); } @@ -203,6 +203,31 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( GKO_DECLARE_DENSE_COMPUTE_DOT_DISPATCH_KERNEL); +template +void compute_conj_dot_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) +{ + compute_conj_dot(exec, x, y, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_DISPATCH_KERNEL); + + +template +void compute_norm2_dispatch(std::shared_ptr exec, + const matrix::Dense* x, + matrix::Dense>* result) +{ + compute_norm2(exec, x, result); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_DENSE_COMPUTE_NORM2_DISPATCH_KERNEL); + + template void simple_apply(std::shared_ptr exec, const matrix::Dense* a, diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index 1c94bcab76a..53fd9e4d284 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -90,7 +90,7 @@ void compute_dot_dispatch(std::shared_ptr exec, x->get_stride(), y->get_const_values(), y->get_stride(), result->get_values()); } else { - GKO_NOT_IMPLEMENTED; + compute_dot(exec, x, y, result); } } else { compute_dot(exec, x, y, result); @@ -115,7 +115,7 @@ void compute_conj_dot_dispatch(std::shared_ptr exec, x->get_stride(), y->get_const_values(), y->get_stride(), result->get_values()); } else { - GKO_NOT_IMPLEMENTED; + compute_conj_dot(exec, x, y, result); } } else { compute_conj_dot(exec, x, y, result); @@ -138,7 +138,7 @@ void compute_norm2_dispatch(std::shared_ptr exec, hipblas::norm2(handle, x->get_size()[0], x->get_const_values(), x->get_stride(), result->get_values()); } else { - GKO_NOT_IMPLEMENTED; + compute_norm2(exec, x, result); } } else { compute_norm2(exec, x, result); From 3287119b05934c0d075acdd73909280df1a82c15 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 11 Feb 2022 14:15:47 +0100 Subject: [PATCH 6/7] Fix issues with PM guards --- cuda/matrix/dense_kernels.cu | 3 --- hip/matrix/dense_kernels.hip.cpp | 3 --- 2 files changed, 6 deletions(-) diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index 060e4faa6fd..0d39cbb9fbc 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -82,7 +82,6 @@ void compute_dot_dispatch(std::shared_ptr exec, if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { if (cublas::is_supported::value) { auto handle = exec->get_cublas_handle(); - cublas::pointer_mode_guard pm_guard(handle); cublas::dot(handle, x->get_size()[0], x->get_const_values(), x->get_stride(), y->get_const_values(), y->get_stride(), result->get_values()); @@ -107,7 +106,6 @@ void compute_conj_dot_dispatch(std::shared_ptr exec, if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { if (cublas::is_supported::value) { auto handle = exec->get_cublas_handle(); - cublas::pointer_mode_guard pm_guard(handle); cublas::conj_dot(handle, x->get_size()[0], x->get_const_values(), x->get_stride(), y->get_const_values(), y->get_stride(), result->get_values()); @@ -131,7 +129,6 @@ void compute_norm2_dispatch(std::shared_ptr exec, if (x->get_size()[1] == 1) { if (cublas::is_supported::value) { auto handle = exec->get_cublas_handle(); - cublas::pointer_mode_guard pm_guard(handle); cublas::norm2(handle, x->get_size()[0], x->get_const_values(), x->get_stride(), result->get_values()); } else { diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index 53fd9e4d284..d883f415770 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -85,7 +85,6 @@ void compute_dot_dispatch(std::shared_ptr exec, if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { if (hipblas::is_supported::value) { auto handle = exec->get_hipblas_handle(); - hipblas::pointer_mode_guard pm_guard(handle); hipblas::dot(handle, x->get_size()[0], x->get_const_values(), x->get_stride(), y->get_const_values(), y->get_stride(), result->get_values()); @@ -110,7 +109,6 @@ void compute_conj_dot_dispatch(std::shared_ptr exec, if (x->get_size()[1] == 1 && y->get_size()[1] == 1) { if (hipblas::is_supported::value) { auto handle = exec->get_hipblas_handle(); - hipblas::pointer_mode_guard pm_guard(handle); hipblas::conj_dot(handle, x->get_size()[0], x->get_const_values(), x->get_stride(), y->get_const_values(), y->get_stride(), result->get_values()); @@ -134,7 +132,6 @@ void compute_norm2_dispatch(std::shared_ptr exec, if (x->get_size()[1] == 1) { if (hipblas::is_supported::value) { auto handle = exec->get_hipblas_handle(); - hipblas::pointer_mode_guard pm_guard(handle); hipblas::norm2(handle, x->get_size()[0], x->get_const_values(), x->get_stride(), result->get_values()); } else { From 3ea6e0cb31f4fdb4c578740006b0896be7717374 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 11 Feb 2022 17:27:16 +0100 Subject: [PATCH 7/7] Add single vector nrm2 tests --- cuda/test/matrix/dense_kernels.cpp | 16 +++++++++++++++- dpcpp/matrix/dense_kernels.dp.cpp | 2 ++ dpcpp/test/matrix/dense_kernels.cpp | 16 +++++++++++++++- hip/test/matrix/dense_kernels.hip.cpp | 16 +++++++++++++++- omp/test/matrix/dense_kernels.cpp | 16 +++++++++++++++- 5 files changed, 62 insertions(+), 4 deletions(-) diff --git a/cuda/test/matrix/dense_kernels.cpp b/cuda/test/matrix/dense_kernels.cpp index a45274cdb9b..7d292a8b5fa 100644 --- a/cuda/test/matrix/dense_kernels.cpp +++ b/cuda/test/matrix/dense_kernels.cpp @@ -226,7 +226,21 @@ TEST_F(Dense, MultipleVectorCudaComputeConjDotIsEquivalentToRef) } -TEST_F(Dense, CudaComputeNorm2IsEquivalentToRef) +TEST_F(Dense, SingleVectorCudaComputeNorm2IsEquivalentToRef) +{ + set_up_vector_data(1); + auto norm_size = gko::dim<2>{1, x->get_size()[1]}; + auto norm_expected = NormVector::create(this->ref, norm_size); + auto dnorm = NormVector::create(this->cuda, norm_size); + + x->compute_norm2(norm_expected.get()); + dx->compute_norm2(dnorm.get()); + + GKO_ASSERT_MTX_NEAR(norm_expected, dnorm, 1e-14); +} + + +TEST_F(Dense, MultipleVectorCudaComputeNorm2IsEquivalentToRef) { set_up_vector_data(20); auto norm_size = gko::dim<2>{1, x->get_size()[1]}; diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 05d0ecb1b51..2a7120ffa0b 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -209,6 +209,7 @@ void compute_conj_dot_dispatch(std::shared_ptr exec, const matrix::Dense* y, matrix::Dense* result) { + // TODO Add onemkl for single column ? compute_conj_dot(exec, x, y, result); } @@ -221,6 +222,7 @@ void compute_norm2_dispatch(std::shared_ptr exec, const matrix::Dense* x, matrix::Dense>* result) { + // TODO Add onemkl for single column ? compute_norm2(exec, x, result); } diff --git a/dpcpp/test/matrix/dense_kernels.cpp b/dpcpp/test/matrix/dense_kernels.cpp index a426c88116f..56ef8b5532e 100644 --- a/dpcpp/test/matrix/dense_kernels.cpp +++ b/dpcpp/test/matrix/dense_kernels.cpp @@ -230,7 +230,21 @@ TEST_F(Dense, MultipleVectorDpcppComputeConjDotIsEquivalentToRef) } -TEST_F(Dense, DpcppComputeNorm2IsEquivalentToRef) +TEST_F(Dense, SingleVectorDpcppComputeNorm2IsEquivalentToRef) +{ + set_up_vector_data(1); + auto norm_size = gko::dim<2>{1, x->get_size()[1]}; + auto norm_expected = NormVector::create(this->ref, norm_size); + auto dnorm = NormVector::create(this->dpcpp, norm_size); + + x->compute_norm2(norm_expected.get()); + dx->compute_norm2(dnorm.get()); + + GKO_ASSERT_MTX_NEAR(norm_expected, dnorm, r::value); +} + + +TEST_F(Dense, MultipleVectorDpcppComputeNorm2IsEquivalentToRef) { set_up_vector_data(20); auto norm_size = gko::dim<2>{1, x->get_size()[1]}; diff --git a/hip/test/matrix/dense_kernels.hip.cpp b/hip/test/matrix/dense_kernels.hip.cpp index 894b5fcc408..a6ae2df195b 100644 --- a/hip/test/matrix/dense_kernels.hip.cpp +++ b/hip/test/matrix/dense_kernels.hip.cpp @@ -222,7 +222,21 @@ TEST_F(Dense, MultipleVectorHipComputeConjDotIsEquivalentToRef) } -TEST_F(Dense, HipComputeNorm2IsEquivalentToRef) +TEST_F(Dense, SingleHipComputeNorm2IsEquivalentToRef) +{ + set_up_vector_data(1); + auto norm_size = gko::dim<2>{1, x->get_size()[1]}; + auto norm_expected = NormVector::create(this->ref, norm_size); + auto dnorm = NormVector::create(this->hip, norm_size); + + x->compute_norm2(norm_expected.get()); + dx->compute_norm2(dnorm.get()); + + GKO_ASSERT_MTX_NEAR(norm_expected, dnorm, 1e-14); +} + + +TEST_F(Dense, MultipleHipComputeNorm2IsEquivalentToRef) { set_up_vector_data(20); auto norm_size = gko::dim<2>{1, x->get_size()[1]}; diff --git a/omp/test/matrix/dense_kernels.cpp b/omp/test/matrix/dense_kernels.cpp index 9d8db3975b9..7b8c4ac365b 100644 --- a/omp/test/matrix/dense_kernels.cpp +++ b/omp/test/matrix/dense_kernels.cpp @@ -239,7 +239,21 @@ TEST_F(Dense, MultipleVectorOmpComputeConjDotIsEquivalentToRef) } -TEST_F(Dense, ComputesNorm2IsEquivalentToRef) +TEST_F(Dense, SingleVectorComputesNorm2IsEquivalentToRef) +{ + set_up_vector_data(1); + auto norm_size = gko::dim<2>{1, x->get_size()[1]}; + auto norm_expected = NormVector::create(this->ref, norm_size); + auto dnorm = NormVector::create(this->omp, norm_size); + + x->compute_norm2(norm_expected.get()); + dx->compute_norm2(dnorm.get()); + + GKO_ASSERT_MTX_NEAR(norm_expected, dnorm, 1e-14); +} + + +TEST_F(Dense, MultipleVectorComputesNorm2IsEquivalentToRef) { set_up_vector_data(20); auto norm_size = gko::dim<2>{1, x->get_size()[1]};