From 97099de9aeea69fffc317b2ee9e0d97eb30734db Mon Sep 17 00:00:00 2001 From: Baptiste Legouix Date: Thu, 4 Jul 2024 09:03:39 +0200 Subject: [PATCH] SplineBuilder minor optimization (#517) - spline_tr was allocating too much rows (they were unused). - Rely on ddc::parallel_fill and Kokkos::deep_copy to fill splines with vals --- .../ddc/kernels/splines/spline_builder.hpp | 42 +++++++++++-------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/include/ddc/kernels/splines/spline_builder.hpp b/include/ddc/kernels/splines/spline_builder.hpp index 32344b783..bb9062672 100644 --- a/include/ddc/kernels/splines/spline_builder.hpp +++ b/include/ddc/kernels/splines/spline_builder.hpp @@ -330,7 +330,11 @@ class SplineBuilder */ batched_spline_tr_domain_type batched_spline_tr_domain() const noexcept { - return batched_spline_tr_domain_type(spline_domain(), batch_domain()); + return batched_spline_tr_domain_type( + batched_spline_domain().restrict(ddc::DiscreteDomain( + ddc::DiscreteElement(0), + ddc::DiscreteVector( + ddc::discrete_space().nbasis())))); } public: @@ -805,28 +809,30 @@ operator()( }); } - // TODO : Consider optimizing // Fill spline with vals (to work in spline afterward and preserve vals) - auto const& offset_proxy = m_offset; - auto const& interp_size_proxy = interpolation_domain().extents(); - auto const& nbasis_proxy = ddc::discrete_space().nbasis(); - ddc::parallel_for_each( - "ddc_splines_fill_rhs", + ddc::parallel_fill( exec_space(), - batch_domain(), - KOKKOS_LAMBDA(typename batch_domain_type::discrete_element_type j) { - for (int i = s_nbc_xmin; i < s_nbc_xmin + offset_proxy; ++i) { - spline(ddc::DiscreteElement(i), j) = 0.0; - } - for (int i = 0; i < interp_size_proxy; ++i) { - spline(ddc::DiscreteElement(s_nbc_xmin + i + offset_proxy), j) - = vals(ddc::DiscreteElement(i), j); - } - }); + spline[ddc::DiscreteDomain( + ddc::DiscreteElement(s_nbc_xmin), + ddc::DiscreteVector(m_offset))], + 0.); + // NOTE: We rely on Kokkos::deep_copy because ddc::parallel_deepcopy do not support + // different domain-typed Chunks. + Kokkos::deep_copy( + exec_space(), + spline[ddc::DiscreteDomain( + ddc::DiscreteElement(s_nbc_xmin + m_offset), + ddc::DiscreteVector(static_cast( + vals.domain().template extent())))] + .allocation_kokkos_view(), + vals.allocation_kokkos_view()); + + // Hermite boundary conditions at xmax, if any // NOTE: For consistency with the linear system, the i-th derivative // provided by the user must be multiplied by dx^i + auto const& nbasis_proxy = ddc::discrete_space().nbasis(); if constexpr (BcXmax == BoundCond::HERMITE) { assert(derivs_xmax->template extent() == s_nbc_xmax); auto derivs_xmax_values = *derivs_xmax; @@ -845,8 +851,8 @@ operator()( }); } - // TODO : Consider optimizing // Allocate and fill a transposed version of spline in order to get dimension of interest as last dimension (optimal for GPU, necessary for Ginkgo). Also select only relevant rows in case of periodic boundaries + auto const& offset_proxy = m_offset; ddc::Chunk spline_tr_alloc( batched_spline_tr_domain(), ddc::KokkosAllocator());