diff --git a/tests/kokkos-based/triangular_matrix_left_product_kokkos.cpp b/tests/kokkos-based/triangular_matrix_left_product_kokkos.cpp new file mode 100644 index 00000000..5e264b70 --- /dev/null +++ b/tests/kokkos-based/triangular_matrix_left_product_kokkos.cpp @@ -0,0 +1,113 @@ + /* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include "gtest_fixtures.hpp" +#include "helpers.hpp" + +namespace{ + +using namespace kokkostesting; + +template +void updating_triangular_matrix_left_product_gold_solution(A_t A, + Triangle /* t */, Diagonal /* d */, B_t B, E_t E, C_t C) +{ + using size_type = typename extents<>::size_type; + using c_element_type = typename C_t::value_type; + constexpr bool explicit_diag = std::is_same_v; + constexpr bool lower = std::is_same_v; + + for (size_type i = 0; i < C.extent(0); ++i) { + for (size_type j = 0; j < C.extent(1); ++j) { + C(i, j) = E(i, j); + const size_type k0 = lower ? (explicit_diag ? i : i + 1) : 0; + const size_type k1 = lower ? C.extent(0) :(explicit_diag ? i + 1 : i); + for (size_type k = k0; k < k1; ++k) { + C(i, j) += A(i, k) * B(k, j); + } + if constexpr (!explicit_diag) { + C(i, j) += /* 1 times */ B(i, j); + } + } + } +} + +template +void test_overwriting_triangular_matrix_left_product_impl(A_t A, B_t B, C_t C, Triangle t, Diagonal d) +{ + const auto get_gold = [&](auto C_gold) { + set(C_gold, 0); + updating_triangular_matrix_left_product_gold_solution(A, t, d, B, C_gold, C_gold); + }; + const auto compute = [&]() { + std::experimental::linalg::triangular_matrix_left_product( + KokkosKernelsSTD::kokkos_exec<>(), A, t, d, B, C); + }; + const auto tol = tolerance(1e-20, 1e-10f); + test_op_CAB(A, B, C, tol, get_gold, compute); +} + +} // anonymous namespace + +#define DEFINE_TESTS(blas_val_type) \ +TEST_F(blas3_signed_##blas_val_type##_fixture, \ + kokkos_triangular_matrix_left_product) { \ + using val_t = typename blas3_signed_##blas_val_type##_fixture::value_type; \ + run_checked_tests("kokkos_", "triangular_matrix_left_product", "", \ + #blas_val_type, [&]() { \ + \ + test_overwriting_triangular_matrix_left_product_impl(A_sym_e0, B_e0e2, C_e0e2, \ + std::experimental::linalg::lower_triangle, \ + std::experimental::linalg::implicit_unit_diagonal); \ + test_overwriting_triangular_matrix_left_product_impl(A_sym_e0, B_e0e2, C_e0e2, \ + std::experimental::linalg::upper_triangle, \ + std::experimental::linalg::explicit_diagonal); \ + \ + }); \ +} + +DEFINE_TESTS(double) +DEFINE_TESTS(float) +DEFINE_TESTS(complex_double) \ No newline at end of file diff --git a/tests/kokkos-based/triangular_matrix_right_product_kokkos.cpp b/tests/kokkos-based/triangular_matrix_right_product_kokkos.cpp new file mode 100644 index 00000000..6a324363 --- /dev/null +++ b/tests/kokkos-based/triangular_matrix_right_product_kokkos.cpp @@ -0,0 +1,117 @@ + /* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include "gtest_fixtures.hpp" +#include "helpers.hpp" + +namespace{ + +using namespace kokkostesting; + +template +void updating_triangular_matrix_right_product_gold_solution(A_t A, + Triangle /* t */, Diagonal /* d */, B_t B, E_t E, C_t C) +{ + using size_type = typename extents<>::size_type; + using c_element_type = typename C_t::value_type; + constexpr bool explicit_diag = std::is_same_v; + constexpr bool lower = std::is_same_v; + + for (size_type i = 0; i < C.extent(0); ++i) { + for (size_type j = 0; j < C.extent(1); ++j) { + C(i, j) = E(i, j); + // Note: lower triangle of A(k, j) means k <= j + const auto k0 = lower ? 0 : (explicit_diag ? j : j + 1); + const auto k1 = lower ? (explicit_diag ? j + 1 : j) : C.extent(1); + for (size_type k = k0; k < k1; ++k) { + C(i, j) += B(i, k) * A(k, j); + } + if constexpr (!explicit_diag) { + C(i, j) += B(i, j) /* times 1 */; + } + } + } +} + +template +void test_overwriting_triangular_matrix_right_product_impl(A_t A, B_t B, C_t C, Triangle t, Diagonal d) +{ + const auto get_gold = [&](auto C_gold) { + set(C_gold, 0); + updating_triangular_matrix_right_product_gold_solution(A, t, d, B, C_gold, C_gold); + }; + const auto compute = [&]() { + std::experimental::linalg::triangular_matrix_right_product( + KokkosKernelsSTD::kokkos_exec<>(), A, t, d, B, C); + }; + const auto tol = tolerance(1e-20, 1e-10f); + test_op_CAB(A, B, C, tol, get_gold, compute); +} + +} // anonymous namespace + +#define DEFINE_TESTS(blas_val_type) \ +TEST_F(blas3_signed_##blas_val_type##_fixture, \ + kokkos_triangular_matrix_right_product) { \ + using val_t = typename blas3_signed_##blas_val_type##_fixture::value_type; \ + run_checked_tests("kokkos_", "triangular_matrix_right_product", "", \ + #blas_val_type, [&]() { \ + /* copy to prevent fixture modification (TODO: add to fixture ?) */ \ + auto B_data = create_stdvector_and_copy_rowwise(C_e2e0); \ + auto B_e2e0 = make_mdspan(B_data.data(), C_e2e0.extent(0), C_e2e0.extent(1)); \ + \ + test_overwriting_triangular_matrix_right_product_impl(A_sym_e0, B_e2e0, C_e2e0, \ + std::experimental::linalg::lower_triangle, \ + std::experimental::linalg::implicit_unit_diagonal); \ + test_overwriting_triangular_matrix_right_product_impl(A_sym_e0, B_e2e0, C_e2e0, \ + std::experimental::linalg::upper_triangle, \ + std::experimental::linalg::explicit_diagonal); \ + \ + }); \ +} + +DEFINE_TESTS(double) +DEFINE_TESTS(float) +DEFINE_TESTS(complex_double) \ No newline at end of file diff --git a/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas3_matrix_product_kk.hpp b/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas3_matrix_product_kk.hpp index 8f38141a..a7586405 100644 --- a/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas3_matrix_product_kk.hpp +++ b/tpl-implementations/include/experimental/__p1673_bits/kokkos-kernels/blas3_matrix_product_kk.hpp @@ -43,6 +43,8 @@ #ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_PRODUCT_HPP_ #define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_KOKKOSKERNELS_MATRIX_PRODUCT_HPP_ + #include + #include "signal_kokkos_impl_called.hpp" #include "static_extent_match.hpp" #include "triangle.hpp" @@ -169,19 +171,17 @@ template void product(ExecSpace &&exec, AType A, BType B, CType C, InitFunc init, UpdateFunc update) { + using size_type = typename std::experimental::extents<>::size_type; const auto A_view = Impl::mdspan_to_view(A); const auto B_view = Impl::mdspan_to_view(B); auto C_view = Impl::mdspan_to_view(C); - - using c_element_type = typename decltype(C_view)::non_const_value_type; - using size_type = typename std::experimental::extents<>::size_type; + const size_type ext1 = A_view.extent(1); KokkosKernelsSTD::Impl::ParallelMatrixVisitor( std::move(exec), C_view).for_each_matrix_element( KOKKOS_LAMBDA(const auto i, const auto j) { decltype(auto) cij = C_view(i, j); init(cij, i, j); - const size_type ext1 = A_view.extent(1); for (size_type k = 0; k < ext1; ++k) { update(i, j, k, cij, A_view, B_view); } @@ -210,11 +210,12 @@ template ::size_type numRows_C, std::experimental::extents<>::size_type numCols_C, class Layout_C, - class UpdateFunc> -void product(ExecSpace &&exec, AType A, BType B, EType E, - std::experimental::mdspan, Layout_C, - std::experimental::default_accessor> C, // disambiguate with init() - UpdateFunc update) + class UpdateFunc, + // make CType specific to disambiguate with InitFunc + class CType = std::experimental::mdspan, Layout_C, + std::experimental::default_accessor>> +void product(ExecSpace &&exec, AType A, BType B, EType E, CType C, UpdateFunc update) { const auto E_view = Impl::mdspan_to_view(E); product(std::move(exec), A, B, C, @@ -223,6 +224,95 @@ void product(ExecSpace &&exec, AType A, BType B, EType E, }, update); } +template +void trmm_kk(Side, Triangle t, DiagonalStorage d, + AViewType A_view, CViewType C_view) +{ + const auto side = std::is_same_v ? "L" : "R"; + // KK and stdBLAS use REVERSED triangle definitions + const auto triangle = std::is_same_v ? "U" : "L"; + + const auto notranspose = "N"; + const auto diagonal = "N"; // implicit unit diagonal doesn't work in KK + using c_element_type = typename CViewType::non_const_value_type; + const auto alpha = static_cast(1.0); + + KokkosBlas::trmm(side, triangle, notranspose, diagonal, alpha, A_view, C_view); +} + +template +void trmm_left(KokkosExecSpace &&exec, Triangle t, DiagonalStorage d, + AViewType A_view, BViewType B_view, CViewType C_view) +{ + using size_type = typename std::experimental::extents<>::size_type; + using c_element_type = typename CViewType::non_const_value_type; + constexpr bool lower = std::is_same_v; + constexpr bool explicit_diag = std::is_same_v; + const auto C_ext0 = C_view.extent(0); + + KokkosKernelsSTD::Impl::ParallelMatrixVisitor v(std::move(exec), C_view); + v.for_each_matrix_element( + KOKKOS_LAMBDA(const auto i, const auto j) { + decltype(auto) cij = C_view(i, j); + cij = c_element_type{}; + const size_type k0 = lower ? (explicit_diag ? i : i + 1) : 0; + const size_type k1 = lower ? C_ext0 :(explicit_diag ? i + 1 : i); + for (size_type k = k0; k < k1; ++k) { + cij += A_view(i, k) * B_view(k, j); + } + if constexpr (!explicit_diag) { + C_view(i, j) += /* 1 times */ B_view(i, j); + } + }); +} + +template +void trmm_right(KokkosExecSpace &&exec, Triangle t, DiagonalStorage d, + AViewType A_view, BViewType B_view, CViewType C_view) +{ + using size_type = typename std::experimental::extents<>::size_type; + using c_element_type = typename CViewType::non_const_value_type; + constexpr bool lower = std::is_same_v; + constexpr bool explicit_diag = std::is_same_v; + const auto C_ext1 = C_view.extent(1); + + KokkosKernelsSTD::Impl::ParallelMatrixVisitor v(std::move(exec), C_view); + v.for_each_matrix_element( + KOKKOS_LAMBDA(const auto i, const auto j) { + decltype(auto) cij = C_view(i, j); + cij = c_element_type{}; + // Note: lower triangle of A(k, j) means k <= j + const auto k0 = lower ? 0 : (explicit_diag ? j : j + 1); + const auto k1 = lower ? (explicit_diag ? j + 1 : j) : C_ext1; + for (size_type k = k0; k < k1; ++k) { + cij += B_view(i, k) * A_view(k, j); + } + if constexpr (!explicit_diag) { + C_view(i, j) += B_view(i, j) /* times 1 */; + } + }); +} + } // namespace matproduct_impl // Overwriting symmetric matrix-matrix left product @@ -257,7 +347,7 @@ void symmetric_matrix_left_product( std::experimental::default_accessor> C) { matproduct_impl::check_left_product(A, t, B, C, "symmetric_matrix_left_product"); - + Impl::signal_kokkos_impl_called("overwriting_symmetric_matrix_left_product"); constexpr bool lower = std::is_same_v; @@ -356,7 +446,7 @@ void symmetric_matrix_right_product( std::experimental::default_accessor> C) { matproduct_impl::check_right_product(A, t, B, C, "symmetric_matrix_right_product"); - + Impl::signal_kokkos_impl_called("overwriting_symmetric_matrix_right_product"); constexpr bool lower = std::is_same_v; @@ -625,5 +715,111 @@ void hermitian_matrix_right_product( }); } +// Overwriting triangular matrix-matrix left product +// performs BLAS xTRMM: C = A x B + +MDSPAN_TEMPLATE_REQUIRES(class ExecSpace, + class ElementType_A, + std::experimental::extents<>::size_type numRows_A, + std::experimental::extents<>::size_type numCols_A, + class Layout_A, + class Triangle, + class DiagonalStorage, + class ElementType_B, + std::experimental::extents<>::size_type numRows_B, + std::experimental::extents<>::size_type numCols_B, + class Layout_B, + class ElementType_C, + std::experimental::extents<>::size_type numRows_C, + std::experimental::extents<>::size_type numCols_C, + class Layout_C, + /* requires */ (Impl::is_unique_layout_v + and Impl::is_unique_layout_v + and (Impl::is_unique_layout_v + or Impl::is_layout_blas_packed_v))) +void triangular_matrix_left_product( + kokkos_exec&& exec, + std::experimental::mdspan, Layout_A, + std::experimental::default_accessor> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_B, + std::experimental::default_accessor> B, + std::experimental::mdspan, Layout_C, + std::experimental::default_accessor> C) +{ + matproduct_impl::check_left_product(A, t, B, C, "triangular_matrix_left_product"); + + Impl::signal_kokkos_impl_called("updating_triangular_matrix_left_product_kokkos"); + + // convert mdspans to views + const auto A_view = Impl::mdspan_to_view(A); + const auto B_view = Impl::mdspan_to_view(B); + auto C_view = Impl::mdspan_to_view(C); + + // implicit diagonal is not supported by KK implementation of TRSM + // and the execution space is ignored + if constexpr (std::is_same_v) { + matproduct_impl::trmm_left(ExecSpace(), t, d, A_view, B_view, C_view); + } else { + Kokkos::deep_copy(C_view, B_view); + matproduct_impl::trmm_kk(std::experimental::linalg::left_side, t, d, A_view, C_view); + } +} + +// Overwriting triangular matrix-matrix right product +// performs BLAS xTRMM: C = B x A + +MDSPAN_TEMPLATE_REQUIRES(class ExecSpace, + class ElementType_A, + std::experimental::extents<>::size_type numRows_A, + std::experimental::extents<>::size_type numCols_A, + class Layout_A, + class Triangle, + class DiagonalStorage, + class ElementType_B, + std::experimental::extents<>::size_type numRows_B, + std::experimental::extents<>::size_type numCols_B, + class Layout_B, + class ElementType_C, + std::experimental::extents<>::size_type numRows_C, + std::experimental::extents<>::size_type numCols_C, + class Layout_C, + /* requires */ (Impl::is_unique_layout_v + and Impl::is_unique_layout_v + and (Impl::is_unique_layout_v + or Impl::is_layout_blas_packed_v))) +void triangular_matrix_right_product( + kokkos_exec&& exec, + std::experimental::mdspan, Layout_A, + std::experimental::default_accessor> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_B, + std::experimental::default_accessor> B, + std::experimental::mdspan, Layout_C, + std::experimental::default_accessor> C) +{ + matproduct_impl::check_right_product(A, t, B, C, "triangular_matrix_right_product"); + + Impl::signal_kokkos_impl_called("updating_triangular_matrix_right_product_kokkos"); + + // convert mdspans to views + const auto A_view = Impl::mdspan_to_view(A); + const auto B_view = Impl::mdspan_to_view(B); + auto C_view = Impl::mdspan_to_view(C); + + // implicit diagonal is not supported by KK implementation of TRSM + // and the execution space is ignored + if constexpr (std::is_same_v) { + matproduct_impl::trmm_right(ExecSpace(), t, d, A_view, B_view, C_view); + } else { + Kokkos::deep_copy(C_view, B_view); + matproduct_impl::trmm_kk(std::experimental::linalg::right_side, t, d, A_view, C_view); + } +} + } // namespace KokkosKernelsSTD #endif