diff --git a/include/micm/solver/lu_decomposition_mozart.hpp b/include/micm/solver/lu_decomposition_mozart.hpp index 6e3127b8f..7e85ec012 100644 --- a/include/micm/solver/lu_decomposition_mozart.hpp +++ b/include/micm/solver/lu_decomposition_mozart.hpp @@ -25,21 +25,16 @@ namespace micm /// /// The pseudo-code in C++ for the corresponding dense matrix algorithm for matrix A /// and separate lower (upper) triangular matrix L(U) would be: - /// - /// L[0][0] = 1.0 // Set the diagonal of the L matrix to 1 - /// U[0][0] = A[0][0] // Set the U values for the first column - /// for i = 1...n-1 - /// L[i][0] = A[i][0] / U[0][0] // Set the L values for the first column - /// U[0][i] = A[0][i] // Set the U values for the first row - /// for k = 1...n-1 - /// for j = 1...k - /// U[j][k] = A[j][k] - L[j][0] * U[0][k] - /// for j = k+1...n-1 - /// L[j][k] = A[j][k] - L[j][0] * U[0][k] - /// for i = 1...n-1 - /// L[i][i] = 1.0 // Set the diagonal of the L matrix to 1 - /// for j = i+1...n-1 - /// L[j][i] = L[j][i] / U[i][i] // Multiply column below diagonal + /// + /// for i = 0...n-1 // Initialize U and L matrices to the A values + /// for j = i...n-1 // Initialize U matrix including diagonal + /// U[i][j] = A[i][j] + /// L[i][i] = 1 // Lower triangular matrix is 1 along the diagonal + /// for j = 0...i-1 // Initialize L matrix excluding diagonal + /// L[i][j] = A[i][j] + /// for i = 0...n-1 + /// for j = i+1...n-1 // Multiply column below diagonal + /// L[j][i] = L[j][i] / U[i][i] /// for k = i+1...n-1 // Modify sub-matrix /// for j = i+1...k /// U[j][k] = U[j][k] - L[j][i] * U[i][k] @@ -59,35 +54,24 @@ namespace micm class LuDecompositionMozart { protected: - /// Index in L.data_, U.data_, and A.data_ for L[0][0], U[0][0], and A[0][0] - std::tuple l00_u00_a00_; - /// Index in L.data_ and A.data_ for L[i][0] and A[i][0] (when non-zero) for each iteration - /// of the (i) loop for the lower triangular matrix. Also, includes a flag to indicate - /// if the value is zero in A - std::vector> li0_ai0_doFill_; - /// Index in U.data_ and A.data_ for U[0][i] and A[0][i] (when non-zero) for each iteration - /// of the (i) loop for the upper triangular matrix. Also, includes a flag to indicate if the - /// value is zero in A - std::vector> u0i_a0i_doFill_; - /// Number of elements in the inner (j) loop for each iteration of the outer (i) loop for the - /// upper and lower triangular matrices, and the index in U.data_ for U[0][k]. Also, includes - /// a flag to indicate if the value is zero in U[0][k] - std::vector> nujk_nljk_u0k_doFill_; - /// Index in U.data_ and A.data_ for U[j][k], A[j][k] (when non-zero), and L[j][0] - /// (when non-zero) for each iteration of the inner (j) loop for the upper triangular matrix. - /// Also, includes flags to indicate if the value is zero in A, and if the value is zero for - /// L[j][0] * U[0][k] - std::vector> ujk_ajk_lj0_doFill_skipLU_; - /// Index in L.data_ and A.data_ for L[j][k], A[j][k] (when non-zero), L[j][0] - /// (when non-zero) for each iteration of the inner (j) loop for the lower triangular matrix. - /// Also, includes flags to indicate if the value is zero in A, and if the value is zero for - /// L[j][0] * U[0][k] - std::vector> ljk_ajk_lj0_doFill_skipLU_; - /// Index in L.data_ and U.data_ for L[i][i] and U[i][i] and the number of elements in the - /// middle (j) and (k) loops for each iteration of the outer (i) loop - std::vector> lii_uii_nj_nk_; - /// Index in L.data_ for L[j][i] for each iteration of the inner (j) loop for the lower - /// triangular matrix + /// Index in L.data_ for all diagonal elements, and number of iterations of the middle (j) loops + /// used to set the initial value for the L and U matrices + std::vector> lii_nuij_nlij_; + /// Index in U.data_ and A.data_ for U[i][j] and A[i][j] for each iteration of the inner (j) loop + /// used to set the initial value for the U matrix + std::vector> uij_aij_; + /// Index in L.data_ and A.data_ for L[i][j] and A[i][j] for each iteration of the inner (j) loop + /// used to set the initial value for the L matrix + std::vector> lij_aij_; + /// Index in U.data_ for each non-zero element in U that is zero in A + std::vector fill_uij_; + /// Index in L.data_ for each non-zero element in L that is zero in A + std::vector fill_lij_; + /// Index in U.data_ for U[i][i] and the number of elements in the middle (j) and (k) loops + /// for each iteration of the outer (i) loop + std::vector> uii_nj_nk_; + /// Index in L.data_ for L[j][i] for each iteration of the middle (j) loop + /// for the lower triangular matrix std::vector lji_; /// Number of elements in the inner (j) loops for each iteration of the middle (k) loop for the /// upper and lower triangular matrices, and the index in U.data_ for U[j][k] for each iteration diff --git a/include/micm/solver/lu_decomposition_mozart.inl b/include/micm/solver/lu_decomposition_mozart.inl index ca98ef597..60ebbb9c6 100644 --- a/include/micm/solver/lu_decomposition_mozart.inl +++ b/include/micm/solver/lu_decomposition_mozart.inl @@ -36,101 +36,50 @@ namespace micm const auto& L_row_ids = LU.first.RowIdsVector(); const auto& U_row_start = LU.second.RowStartVector(); const auto& U_row_ids = LU.second.RowIdsVector(); - l00_u00_a00_ = std::tuple(LU.first.VectorIndex(0, 0, 0), - LU.second.VectorIndex(0, 0, 0), - matrix.VectorIndex(0, 0, 0)); - for (std::size_t i = 1; i < n; ++i) - { - if (LU.first.IsZero(i, 0)) - continue; - std::tuple li0_ai0_doFill(0, 0, true); - std::get<0>(li0_ai0_doFill) = LU.first.VectorIndex(0, i, 0); - if (!matrix.IsZero(i, 0)) - { - std::get<1>(li0_ai0_doFill) = matrix.VectorIndex(0, i, 0); - std::get<2>(li0_ai0_doFill) = false; - } - li0_ai0_doFill_.push_back(li0_ai0_doFill); - } - for (std::size_t i = 1; i < n; ++i) - { - if (LU.second.IsZero(0, i)) - continue; - std::tuple u0i_a0i_doFill(0, 0, true); - std::get<0>(u0i_a0i_doFill) = LU.second.VectorIndex(0, 0, i); - if (!matrix.IsZero(0, i)) - { - std::get<1>(u0i_a0i_doFill) = matrix.VectorIndex(0, 0, i); - std::get<2>(u0i_a0i_doFill) = false; - } - u0i_a0i_doFill_.push_back(u0i_a0i_doFill); - } - for (std::size_t k = 1; k < n; ++k) + for (std::size_t i = 0; i < n; ++i) { - std::tuple nujk_nljk_u0k_doFill(0, 0, 0, true); - bool has_u0k = !LU.second.IsZero(0, k); - if (has_u0k) - { - std::get<2>(nujk_nljk_u0k_doFill) = LU.second.VectorIndex(0, 0, k); - std::get<3>(nujk_nljk_u0k_doFill) = false; - } - for (std::size_t j = 1; j <= k; ++j) + std::tuple lii_nuij_nlij(0, 0, 0); + std::get<0>(lii_nuij_nlij) = LU.first.VectorIndex(0, i, i); + // set initial values for U matrix + for (std::size_t j = i; j < n; ++j) { - if (LU.second.IsZero(j, k)) - continue; - std::tuple ujk_ajk_lj0_doFill_skipLU(0, 0, 0, true, true); - std::get<0>(ujk_ajk_lj0_doFill_skipLU) = LU.second.VectorIndex(0, j, k); - bool has_lj0_u0k = (!LU.first.IsZero(j, 0)) && has_u0k; - if (!matrix.IsZero(j, k)) - { - std::get<1>(ujk_ajk_lj0_doFill_skipLU) = matrix.VectorIndex(0, j, k); - std::get<3>(ujk_ajk_lj0_doFill_skipLU) = false; - } - if (has_lj0_u0k) + if (matrix.IsZero(i, j)) { - std::get<2>(ujk_ajk_lj0_doFill_skipLU) = LU.first.VectorIndex(0, j, 0); - std::get<4>(ujk_ajk_lj0_doFill_skipLU) = false; + if (!LU.second.IsZero(i, j)) + fill_uij_.push_back(LU.second.VectorIndex(0, i, j)); + continue; } - ujk_ajk_lj0_doFill_skipLU_.push_back(ujk_ajk_lj0_doFill_skipLU); - ++(std::get<0>(nujk_nljk_u0k_doFill)); + uij_aij_.push_back(std::make_pair(LU.second.VectorIndex(0, i, j), matrix.VectorIndex(0, i, j))); + ++(std::get<1>(lii_nuij_nlij)); } - for (std::size_t j = k + 1; j < n; ++j) + // set initial values for L matrix + for (std::size_t j = 0; j < i; ++j) { - if (LU.first.IsZero(j, k)) - continue; - std::tuple ljk_ajk_lj0_doFill_skipLU(0, 0, 0, true, true); - std::get<0>(ljk_ajk_lj0_doFill_skipLU) = LU.first.VectorIndex(0, j, k); - bool has_lj0_u0k = (!LU.first.IsZero(j, 0)) && has_u0k; - if (!matrix.IsZero(j, k)) + if (matrix.IsZero(i, j)) { - std::get<1>(ljk_ajk_lj0_doFill_skipLU) = matrix.VectorIndex(0, j, k); - std::get<3>(ljk_ajk_lj0_doFill_skipLU) = false; - } - if (has_lj0_u0k) - { - std::get<2>(ljk_ajk_lj0_doFill_skipLU) = LU.first.VectorIndex(0, j, 0); - std::get<4>(ljk_ajk_lj0_doFill_skipLU) = false; + if (!LU.first.IsZero(i, j)) + fill_lij_.push_back(LU.first.VectorIndex(0, i, j)); + continue; } - ljk_ajk_lj0_doFill_skipLU_.push_back(ljk_ajk_lj0_doFill_skipLU); - ++(std::get<1>(nujk_nljk_u0k_doFill)); + lij_aij_.push_back(std::make_pair(LU.first.VectorIndex(0, i, j), matrix.VectorIndex(0, i, j))); + ++(std::get<2>(lii_nuij_nlij)); } - nujk_nljk_u0k_doFill_.push_back(nujk_nljk_u0k_doFill); + lii_nuij_nlij_.push_back(lii_nuij_nlij); } - for (std::size_t i = 1; i < n; ++i) + for (std::size_t i = 0; i < matrix.NumRows(); ++i) { - std::tuple lii_uii_nj_nk(0, 0, 0, 0); - std::get<0>(lii_uii_nj_nk) = LU.first.VectorIndex(0, i, i); - std::get<1>(lii_uii_nj_nk) = LU.second.VectorIndex(0, i, i); + std::tuple uii_nj_nk(0, 0, 0); + std::get<0>(uii_nj_nk) = LU.second.VectorIndex(0, i, i); // middle j loop to set L[j][i] for (std::size_t j = i + 1; j < n; ++j) { if (LU.first.IsZero(j, i)) continue; lji_.push_back(LU.first.VectorIndex(0, j, i)); - ++(std::get<2>(lii_uii_nj_nk)); + ++(std::get<1>(uii_nj_nk)); } // middle k loop to set U[j][k] and L[j][k] - for (std::size_t k = i + 1; k < n; ++k) + for (std::size_t k = i+1; k < n; ++k) { if (LU.second.IsZero(i, k)) continue; @@ -159,9 +108,9 @@ namespace micm ++(std::get<1>(nujk_nljk_uik)); } nujk_nljk_uik_.push_back(nujk_nljk_uik); - ++(std::get<3>(lii_uii_nj_nk)); + ++(std::get<2>(uii_nj_nk)); } - lii_uii_nj_nk_.push_back(lii_uii_nj_nk); + uii_nj_nk_.push_back(uii_nj_nk); } } @@ -234,107 +183,56 @@ namespace micm auto A_vector = std::next(A.AsVector().begin(), i_block * A.FlatBlockSize()); auto L_vector = std::next(L.AsVector().begin(), i_block * L.FlatBlockSize()); auto U_vector = std::next(U.AsVector().begin(), i_block * U.FlatBlockSize()); - auto ujk_ajk_lj0_doFill_skipLU = ujk_ajk_lj0_doFill_skipLU_.begin(); - auto ljk_ajk_lj0_doFill_skipLU = ljk_ajk_lj0_doFill_skipLU_.begin(); - auto lii_uii_nj_nk = lii_uii_nj_nk_.begin(); + auto uij_aij = uij_aij_.begin(); + auto lij_aij = lij_aij_.begin(); + auto uii_nj_nk = uii_nj_nk_.begin(); auto lji = lji_.begin(); auto nujk_nljk_uik = nujk_nljk_uik_.begin(); auto ujk_lji = ujk_lji_.begin(); auto ljk_lji = ljk_lji_.begin(); - double Uii_inverse; - double Uik; - - L_vector[std::get<0>(l00_u00_a00_)] = 1.0; - U_vector[std::get<1>(l00_u00_a00_)] = A_vector[std::get<2>(l00_u00_a00_)]; - Uii_inverse = 1.0 / U_vector[std::get<1>(l00_u00_a00_)]; - for (auto& li0_ai0_doFill : li0_ai0_doFill_) - { - if (std::get<2>(li0_ai0_doFill)) - { - L_vector[std::get<0>(li0_ai0_doFill)] = 0; - } else { - L_vector[std::get<0>(li0_ai0_doFill)] = A_vector[std::get<1>(li0_ai0_doFill)] * Uii_inverse; - } - } - for (auto& u0i_a0i_doFill : u0i_a0i_doFill_) - { - if (std::get<2>(u0i_a0i_doFill)) - { - U_vector[std::get<0>(u0i_a0i_doFill)] = 0; - } else { - U_vector[std::get<0>(u0i_a0i_doFill)] = A_vector[std::get<1>(u0i_a0i_doFill)]; - } - } - for (auto& nujk_nljk_u0k_doFill : nujk_nljk_u0k_doFill_) + + for (auto& lii_nuij_nlij : lii_nuij_nlij_) { - if (std::get<3>(nujk_nljk_u0k_doFill)) - { - Uik = 0; - } - else - { - Uik = U_vector[std::get<2>(nujk_nljk_u0k_doFill)]; - } - for (std::size_t j = 0; j < std::get<0>(nujk_nljk_u0k_doFill); ++j) + for (std::size_t i = 0; i < std::get<1>(lii_nuij_nlij); ++i) { - const std::size_t ujk = std::get<0>(*ujk_ajk_lj0_doFill_skipLU); - if (std::get<3>(*ujk_ajk_lj0_doFill_skipLU)) - { - U_vector[ujk] = 0; - } - else - { - U_vector[ujk] = A_vector[std::get<1>(*ujk_ajk_lj0_doFill_skipLU)]; - } - if (!std::get<4>(*ujk_ajk_lj0_doFill_skipLU)) - { - U_vector[ujk] -= L_vector[std::get<2>(*ujk_ajk_lj0_doFill_skipLU)] * Uik; - } - ++ujk_ajk_lj0_doFill_skipLU; + U_vector[uij_aij->first] = A_vector[uij_aij->second]; + ++uij_aij; } - for (std::size_t j = 0; j < std::get<1>(nujk_nljk_u0k_doFill); ++j) + L_vector[std::get<0>(lii_nuij_nlij)] = 1.0; + for (std::size_t i = 0; i < std::get<2>(lii_nuij_nlij); ++i) { - const std::size_t ljk = std::get<0>(*ljk_ajk_lj0_doFill_skipLU); - if (std::get<3>(*ljk_ajk_lj0_doFill_skipLU)) - { - L_vector[ljk] = 0; - } - else - { - L_vector[ljk] = A_vector[std::get<1>(*ljk_ajk_lj0_doFill_skipLU)]; - } - if (!std::get<4>(*ljk_ajk_lj0_doFill_skipLU)) - { - L_vector[ljk] -= L_vector[std::get<2>(*ljk_ajk_lj0_doFill_skipLU)] * Uik; - } - ++ljk_ajk_lj0_doFill_skipLU; + L_vector[lij_aij->first] = A_vector[lij_aij->second]; + ++lij_aij; } } - for (std::size_t i = 1; i < n; ++i) + for (auto& fill_uij : fill_uij_) + U_vector[fill_uij] = 0; + for (auto& fill_lij : fill_lij_) + L_vector[fill_lij] = 0; + for (std::size_t i = 0; i < n; ++i) { - L_vector[std::get<0>(*lii_uii_nj_nk)] = 1.0; - Uii_inverse = 1.0 / U_vector[std::get<1>(*lii_uii_nj_nk)]; - for (std::size_t j = 0; j < std::get<2>(*lii_uii_nj_nk); ++j) + auto Uii_inverse = 1.0 / U_vector[std::get<0>(*uii_nj_nk)]; + for (std::size_t ij = 0; ij < std::get<1>(*uii_nj_nk); ++ij) { L_vector[*lji] = L_vector[*lji] * Uii_inverse; ++lji; } - for (std::size_t k = 0; k < std::get<3>(*lii_uii_nj_nk); ++k) + for (std::size_t ik = 0; ik < std::get<2>(*uii_nj_nk); ++ik) { - const double Uik = U_vector[std::get<2>(*nujk_nljk_uik)]; - for (std::size_t j = 0; j < std::get<0>(*nujk_nljk_uik); ++j) + const std::size_t uik = std::get<2>(*nujk_nljk_uik); + for (std::size_t ij = 0; ij < std::get<0>(*nujk_nljk_uik); ++ij) { - U_vector[ujk_lji->first] -= L_vector[ujk_lji->second] * Uik; + U_vector[ujk_lji->first] -= L_vector[ujk_lji->second] * U_vector[uik]; ++ujk_lji; } - for (std::size_t j = 0 ; j < std::get<1>(*nujk_nljk_uik); ++j) + for (std::size_t ij = 0 ; ij < std::get<1>(*nujk_nljk_uik); ++ij) { - L_vector[ljk_lji->first] -= L_vector[ljk_lji->second] * Uik; + L_vector[ljk_lji->first] -= L_vector[ljk_lji->second] * U_vector[uik]; ++ljk_lji; } ++nujk_nljk_uik; } - ++lii_uii_nj_nk; + ++uii_nj_nk; } } } @@ -354,7 +252,6 @@ namespace micm const std::size_t L_GroupSizeOfFlatBlockSize = L.GroupSize(L.FlatBlockSize()); const std::size_t U_GroupSizeOfFlatBlockSize = U.GroupSize(U.FlatBlockSize()); double Uii_inverse[A_GroupVectorSize]; - double Uik[A_GroupVectorSize]; // Loop over groups of blocks for (std::size_t i_group = 0; i_group < A.NumberOfGroups(A_BlockSize); ++i_group) @@ -362,133 +259,65 @@ namespace micm auto A_vector = std::next(A.AsVector().begin(), i_group * A_GroupSizeOfFlatBlockSize); auto L_vector = std::next(L.AsVector().begin(), i_group * L_GroupSizeOfFlatBlockSize); auto U_vector = std::next(U.AsVector().begin(), i_group * U_GroupSizeOfFlatBlockSize); - auto ujk_ajk_lj0_doFill_skipLU = ujk_ajk_lj0_doFill_skipLU_.begin(); - auto ljk_ajk_lj0_doFill_skipLU = ljk_ajk_lj0_doFill_skipLU_.begin(); - auto lii_uii_nj_nk = lii_uii_nj_nk_.begin(); + auto uij_aij = uij_aij_.begin(); + auto lij_aij = lij_aij_.begin(); + auto uii_nj_nk = uii_nj_nk_.begin(); auto lji = lji_.begin(); auto nujk_nljk_uik = nujk_nljk_uik_.begin(); auto ujk_lji = ujk_lji_.begin(); auto ljk_lji = ljk_lji_.begin(); const std::size_t n_cells = std::min(A_GroupVectorSize, A_BlockSize - i_group * A_GroupVectorSize); - - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - L_vector[std::get<0>(l00_u00_a00_)+i_cell] = 1.0; - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - { - U_vector[std::get<1>(l00_u00_a00_)+i_cell] = A_vector[std::get<2>(l00_u00_a00_)+i_cell]; - Uii_inverse[i_cell] = 1.0 / U_vector[std::get<1>(l00_u00_a00_)+i_cell]; - } - for (auto& li0_ai0_doFill : li0_ai0_doFill_) - { - if (std::get<2>(li0_ai0_doFill)) - { - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - L_vector[std::get<0>(li0_ai0_doFill)+i_cell] = 0; - } - else - { - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - L_vector[std::get<0>(li0_ai0_doFill)+i_cell] = A_vector[std::get<1>(li0_ai0_doFill)+i_cell] * Uii_inverse[i_cell]; - } - } - for (auto& u0i_a0i_doFill : u0i_a0i_doFill_) + for (auto& lii_nuij_nlij : lii_nuij_nlij_) { - if (std::get<2>(u0i_a0i_doFill)) + for (std::size_t i = 0; i < std::get<1>(lii_nuij_nlij); ++i) { for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - U_vector[std::get<0>(u0i_a0i_doFill)+i_cell] = 0; + U_vector[uij_aij->first+i_cell] = A_vector[uij_aij->second+i_cell]; + ++uij_aij; } - else + for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) + L_vector[std::get<0>(lii_nuij_nlij)+i_cell] = 1.0; + for (std::size_t i = 0; i < std::get<2>(lii_nuij_nlij); ++i) { for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - U_vector[std::get<0>(u0i_a0i_doFill)+i_cell] = A_vector[std::get<1>(u0i_a0i_doFill)+i_cell]; + L_vector[lij_aij->first+i_cell] = A_vector[lij_aij->second+i_cell]; + ++lij_aij; } } - for (auto& nujk_nljk_u0k_doFill : nujk_nljk_u0k_doFill_) - { - if (std::get<3>(nujk_nljk_u0k_doFill)) - { - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - Uik[i_cell] = 0; - } - else - { - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - Uik[i_cell] = U_vector[std::get<2>(nujk_nljk_u0k_doFill)+i_cell]; - } - for (std::size_t j = 0; j < std::get<0>(nujk_nljk_u0k_doFill); ++j) - { - const std::size_t ujk = std::get<0>(*ujk_ajk_lj0_doFill_skipLU); - if (std::get<3>(*ujk_ajk_lj0_doFill_skipLU)) - { - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - U_vector[ujk+i_cell] = 0; - } - else - { - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - U_vector[ujk+i_cell] = A_vector[std::get<1>(*ujk_ajk_lj0_doFill_skipLU)+i_cell]; - } - if (!std::get<4>(*ujk_ajk_lj0_doFill_skipLU)) - { - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - U_vector[ujk+i_cell] -= L_vector[std::get<2>(*ujk_ajk_lj0_doFill_skipLU)+i_cell] * Uik[i_cell]; - } - ++ujk_ajk_lj0_doFill_skipLU; - } - for (std::size_t j = 0; j < std::get<1>(nujk_nljk_u0k_doFill); ++j) - { - const std::size_t ljk = std::get<0>(*ljk_ajk_lj0_doFill_skipLU); - if (std::get<3>(*ljk_ajk_lj0_doFill_skipLU)) - { - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - L_vector[ljk+i_cell] = 0; - } - else - { - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - L_vector[ljk+i_cell] = A_vector[std::get<1>(*ljk_ajk_lj0_doFill_skipLU)+i_cell]; - } - if (!std::get<4>(*ljk_ajk_lj0_doFill_skipLU)) - { - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - L_vector[ljk+i_cell] -= L_vector[std::get<2>(*ljk_ajk_lj0_doFill_skipLU)+i_cell] * Uik[i_cell]; - } - ++ljk_ajk_lj0_doFill_skipLU; - } - } - for (std::size_t i = 1; i < n; ++i) - { + for (auto& fill_uij : fill_uij_) for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - L_vector[std::get<0>(*lii_uii_nj_nk)+i_cell] = 1.0; + U_vector[fill_uij+i_cell] = 0; + for (auto& fill_lij : fill_lij_) for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - Uii_inverse[i_cell] = 1.0 / U_vector[std::get<1>(*lii_uii_nj_nk)+i_cell]; - for (std::size_t j = 0; j < std::get<2>(*lii_uii_nj_nk); ++j) + L_vector[fill_lij+i_cell] = 0; + for (std::size_t i = 0; i < n; ++i) + { + for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) + Uii_inverse[i_cell] = 1.0 / U_vector[std::get<0>(*uii_nj_nk)+i_cell]; + for (std::size_t ij = 0; ij < std::get<1>(*uii_nj_nk); ++ij) { for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) L_vector[*lji+i_cell] = L_vector[*lji+i_cell] * Uii_inverse[i_cell]; ++lji; } - for (std::size_t k = 0; k < std::get<3>(*lii_uii_nj_nk); ++k) + for (std::size_t ik = 0; ik < std::get<2>(*uii_nj_nk); ++ik) { const std::size_t uik = std::get<2>(*nujk_nljk_uik); - for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - Uik[i_cell] = U_vector[uik+i_cell]; - for (std::size_t j = 0; j < std::get<0>(*nujk_nljk_uik); ++j) + for (std::size_t ij = 0; ij < std::get<0>(*nujk_nljk_uik); ++ij) { for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - U_vector[ujk_lji->first+i_cell] -= L_vector[ujk_lji->second+i_cell] * Uik[i_cell]; + U_vector[ujk_lji->first+i_cell] -= L_vector[ujk_lji->second+i_cell] * U_vector[uik+i_cell]; ++ujk_lji; } - for (std::size_t j = 0; j < std::get<1>(*nujk_nljk_uik); ++j) + for (std::size_t ij = 0; ij < std::get<1>(*nujk_nljk_uik); ++ij) { for (std::size_t i_cell = 0; i_cell < n_cells; ++i_cell) - L_vector[ljk_lji->first+i_cell] -= L_vector[ljk_lji->second+i_cell] * Uik[i_cell]; + L_vector[ljk_lji->first+i_cell] -= L_vector[ljk_lji->second+i_cell] * U_vector[uik+i_cell]; ++ljk_lji; } ++nujk_nljk_uik; } - ++lii_uii_nj_nk; + ++uii_nj_nk; } } } diff --git a/test/unit/solver/test_lu_decomposition_policy.hpp b/test/unit/solver/test_lu_decomposition_policy.hpp index 550340fe3..d97ddaf15 100644 --- a/test/unit/solver/test_lu_decomposition_policy.hpp +++ b/test/unit/solver/test_lu_decomposition_policy.hpp @@ -5,7 +5,6 @@ #include #include -#include template void CopyToDevice(MatrixPolicy& matrix) @@ -122,13 +121,6 @@ void testDenseMatrix() LuDecompositionPolicy lud = LuDecompositionPolicy(A); auto LU = LuDecompositionPolicy::template GetLUMatrices(A, 0); lud.template Decompose(A, LU.first, LU.second); - std::cout << "A:" << std::endl; - print_matrix(A, 5); - std::cout << "L:" << std::endl; - print_matrix(LU.first, 5); - std::cout << "U:" << std::endl; - print_matrix(LU.second, 5); - check_results( A, LU.first, LU.second, [&](const double a, const double b) -> void { EXPECT_NEAR(a, b, 1.0e-10); }); }