From 5902a4bdce832c01af004eda2e5a1449f3897203 Mon Sep 17 00:00:00 2001 From: dcdemen Date: Tue, 23 Jul 2024 08:10:00 -0600 Subject: [PATCH 1/2] Removed use of a dense matrix for constraint assembly in favor of a more memory efficient structure. --- .../solver/assemble_constraints.hpp | 62 ++++++++++--------- ...calculate_constraint_residual_gradient.hpp | 12 ++-- src/restruct_poc/solver/constraints.hpp | 6 +- .../copy_constraints_to_sparse_matrix.hpp | 38 ++++++++++++ .../copy_sparse_values_to_transpose.hpp | 28 +++++++++ src/restruct_poc/solver/solver.hpp | 2 +- .../restruct_poc/test_rotating_beam.cpp | 8 --- tests/unit_tests/restruct_poc/test_solver.cpp | 28 --------- 8 files changed, 109 insertions(+), 75 deletions(-) create mode 100644 src/restruct_poc/solver/copy_constraints_to_sparse_matrix.hpp create mode 100644 src/restruct_poc/solver/copy_sparse_values_to_transpose.hpp diff --git a/src/restruct_poc/solver/assemble_constraints.hpp b/src/restruct_poc/solver/assemble_constraints.hpp index 7e5b73d3..34d5d8c9 100644 --- a/src/restruct_poc/solver/assemble_constraints.hpp +++ b/src/restruct_poc/solver/assemble_constraints.hpp @@ -6,8 +6,11 @@ #include "calculate_constraint_residual_gradient.hpp" #include "compute_number_of_non_zeros.hpp" +#include "contribute_elements_to_sparse_matrix.hpp" +#include "copy_constraints_to_sparse_matrix.hpp" #include "copy_into_sparse_matrix.hpp" #include "copy_into_sparse_matrix_transpose.hpp" +#include "copy_sparse_values_to_transpose.hpp" #include "populate_sparse_indices.hpp" #include "populate_sparse_row_ptrs.hpp" #include "solver.hpp" @@ -27,49 +30,48 @@ void AssembleConstraints(Solver& solver, Subview_N R_system, Subview_N R_lambda) // Transfer prescribed displacements to host solver.constraints.UpdateViews(); - Kokkos::deep_copy(solver.constraints.Phi, 0.); - Kokkos::deep_copy(solver.constraints.B, 0.); Kokkos::parallel_for( "CalculateConstraintResidualGradient", solver.constraints.num, CalculateConstraintResidualGradient{ - solver.constraints.data, - solver.constraints.control, - solver.constraints.u, - solver.state.q, - solver.constraints.Phi, - solver.constraints.B, - } + solver.constraints.data, solver.constraints.control, solver.constraints.u, + solver.state.q, solver.constraints.Phi, solver.constraints.gradient_terms} ); - auto B_num_rows = solver.constraints.B.extent(0); - auto B_num_columns = solver.constraints.B.extent(1); - - auto B_row_data_size = Kokkos::View::shmem_size(B_num_columns); - auto B_col_idx_size = Kokkos::View::shmem_size(B_num_columns); - auto constraint_policy = Kokkos::TeamPolicy<>(B_num_rows, Kokkos::AUTO()); - constraint_policy.set_scratch_size(1, Kokkos::PerTeam(B_row_data_size + B_col_idx_size)); + auto constraint_policy = Kokkos::TeamPolicy<>(solver.constraints.num, Kokkos::AUTO()); Kokkos::parallel_for( - "CopyIntoSparseMatrix", constraint_policy, - CopyIntoSparseMatrix{solver.B, solver.constraints.B} + "CopyConstraintsToSparseMatrix", constraint_policy, + CopyConstraintsToSparseMatrix{ + solver.constraints.data, solver.B, solver.constraints.gradient_terms} ); - auto B_t_row_data_size = Kokkos::View::shmem_size(B_num_rows); - auto B_t_col_idx_size = Kokkos::View::shmem_size(B_num_rows); - auto constraint_transpose_policy = Kokkos::TeamPolicy<>(B_num_columns, Kokkos::AUTO()); - constraint_transpose_policy.set_scratch_size( - 1, Kokkos::PerTeam(B_t_row_data_size + B_t_col_idx_size) - ); - Kokkos::parallel_for( - "CopyIntoSparseMatrix_Transpose", constraint_transpose_policy, - CopyIntoSparseMatrix_Transpose{solver.B_t, solver.constraints.B} - ); + Kokkos::fence(); + + { + auto trans_region = Kokkos::Profiling::ScopedRegion("Transpose Constraints Matrix"); + auto B_num_rows = solver.B.numRows(); + auto constraint_transpose_policy = Kokkos::TeamPolicy<>(B_num_rows, Kokkos::AUTO()); + auto tmp_row_map = Solver::RowPtrType( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp_row_map"), + solver.B_t.graph.row_map.extent(0) + ); + Kokkos::deep_copy(tmp_row_map, solver.B_t.graph.row_map); + Kokkos::parallel_for( + "CopySparseValuesToTranspose", constraint_transpose_policy, + CopySparseValuesToTranspose{solver.B, tmp_row_map, solver.B_t} + ); + KokkosSparse::sort_crs_matrix(solver.B_t); + } { auto resid_region = Kokkos::Profiling::ScopedRegion("Assemble Residual"); - auto R = Solver::ValuesType("R_local", R_system.extent(0)); + auto R = Solver::ValuesType( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "R_local"), R_system.extent(0) + ); Kokkos::deep_copy(R, R_system); - auto lambda = Solver::ValuesType("lambda", solver.state.lambda.extent(0)); + auto lambda = Solver::ValuesType( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "lambda"), solver.state.lambda.extent(0) + ); Kokkos::deep_copy(lambda, solver.state.lambda); auto spmv_handle = Solver::SpmvHandle(); KokkosSparse::spmv(&spmv_handle, "T", 1., solver.B, lambda, 1., R); diff --git a/src/restruct_poc/solver/calculate_constraint_residual_gradient.hpp b/src/restruct_poc/solver/calculate_constraint_residual_gradient.hpp index 395db477..59406f92 100644 --- a/src/restruct_poc/solver/calculate_constraint_residual_gradient.hpp +++ b/src/restruct_poc/solver/calculate_constraint_residual_gradient.hpp @@ -16,7 +16,7 @@ struct CalculateConstraintResidualGradient { View_Nx7::const_type constraint_u; View_Nx7::const_type node_u; View_N Phi_; - View_NxN B_; + Kokkos::View gradient_terms; KOKKOS_FUNCTION void operator()(const int i_constraint) const { @@ -179,9 +179,10 @@ struct CalculateConstraintResidualGradient { //--------------------------------- // Extract gradient block for target node of this constraint - auto i_col = i_node2 * kLieAlgebraComponents; + auto i_col = (i_node2 < i_node1 || i_node1 < 0) ? 0 : kLieAlgebraComponents; auto B = Kokkos::subview( - B_, cd.row_range, Kokkos::make_pair(i_col, i_col + kLieAlgebraComponents) + gradient_terms, i_constraint, Kokkos::ALL, + Kokkos::make_pair(i_col, i_col + kLieAlgebraComponents) ); // B(0:3,0:3) = I @@ -222,9 +223,10 @@ struct CalculateConstraintResidualGradient { } // Extract gradient block for base node of this constraint - i_col = i_node1 * kLieAlgebraComponents; + i_col = (i_node1 < i_node2) ? 0 : kLieAlgebraComponents; B = Kokkos::subview( - B_, cd.row_range, Kokkos::make_pair(i_col, i_col + kLieAlgebraComponents) + gradient_terms, i_constraint, Kokkos::ALL, + Kokkos::make_pair(i_col, i_col + kLieAlgebraComponents) ); // B(0:3,0:3) = -I diff --git a/src/restruct_poc/solver/constraints.hpp b/src/restruct_poc/solver/constraints.hpp index 296a7896..a770b1a1 100644 --- a/src/restruct_poc/solver/constraints.hpp +++ b/src/restruct_poc/solver/constraints.hpp @@ -36,10 +36,10 @@ struct Constraints { View_N control; View_Nx7 u; View_N Phi; - View_NxN B; + Kokkos::View gradient_terms; Constraints() {} - Constraints(const std::vector& constraints, int num_system_dofs) + Constraints(const std::vector& constraints) : num(constraints.size()), num_dofs(std::transform_reduce( constraints.cbegin(), constraints.cend(), 0, std::plus{}, @@ -54,7 +54,7 @@ struct Constraints { control("control", num), u("u", num), Phi("residual_vector", num_dofs), - B("gradient_matrix", num_dofs, num_system_dofs) { + gradient_terms("gradient_terms", num) { // Create host mirror for constraint data auto host_data = Kokkos::create_mirror(this->data); diff --git a/src/restruct_poc/solver/copy_constraints_to_sparse_matrix.hpp b/src/restruct_poc/solver/copy_constraints_to_sparse_matrix.hpp new file mode 100644 index 00000000..e99016a1 --- /dev/null +++ b/src/restruct_poc/solver/copy_constraints_to_sparse_matrix.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +namespace openturbine { +template +struct CopyConstraintsToSparseMatrix { + using RowDataType = typename CrsMatrixType::values_type::non_const_type; + using ColIdxType = typename CrsMatrixType::staticcrsgraph_type::entries_type::non_const_type; + Kokkos::View::const_type data; + CrsMatrixType sparse; + Kokkos::View dense; + + KOKKOS_FUNCTION + void operator()(Kokkos::TeamPolicy<>::member_type member) const { + auto i_constraint = member.league_rank(); + auto& cd = data(i_constraint); + auto start_row = cd.row_range.first; + auto end_row = cd.row_range.second; + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, start_row, end_row), [&](int i) { + auto row_number = i - start_row; + auto row = sparse.row(i); + auto row_map = sparse.graph.row_map; + auto cols = sparse.graph.entries; + auto row_data_data = Kokkos::Array{}; + auto col_idx_data = Kokkos::Array{}; + auto row_data = RowDataType(row_data_data.data(), row.length); + auto col_idx = ColIdxType(col_idx_data.data(), row.length); + for (int entry = 0; entry < row.length; ++entry) { + col_idx(entry) = cols(row_map(i) + entry); + row_data(entry) = dense(i_constraint, row_number, entry); + } + sparse.replaceValues(i, col_idx.data(), row.length, row_data.data()); + }); + } +}; +} // namespace openturbine \ No newline at end of file diff --git a/src/restruct_poc/solver/copy_sparse_values_to_transpose.hpp b/src/restruct_poc/solver/copy_sparse_values_to_transpose.hpp new file mode 100644 index 00000000..ec670ed5 --- /dev/null +++ b/src/restruct_poc/solver/copy_sparse_values_to_transpose.hpp @@ -0,0 +1,28 @@ +#pragma once + +#include +#include + +namespace openturbine { +template +struct CopySparseValuesToTranspose { + using RowMapType = typename CrsMatrixType::staticcrsgraph_type::row_map_type::non_const_type; + CrsMatrixType input; + RowMapType tmp; + CrsMatrixType transpose; + + KOKKOS_FUNCTION + void operator()(Kokkos::TeamPolicy<>::member_type member) const { + int row_index = member.league_rank(); + int col_begin = input.graph.row_map(row_index); + int col_end = input.graph.row_map(row_index + 1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, col_begin, col_end), [&](int in_index) { + int col_index = input.graph.entries(in_index); + using atomic_incr_type = typename std::remove_reference::type; + int pos = Kokkos::atomic_fetch_add(&(tmp(col_index)), atomic_incr_type{1}); + transpose.graph.entries(pos) = row_index; + transpose.values(pos) = input.values(in_index); + }); + } +}; +} // namespace openturbine \ No newline at end of file diff --git a/src/restruct_poc/solver/solver.hpp b/src/restruct_poc/solver/solver.hpp index ec150600..fe09bede 100644 --- a/src/restruct_poc/solver/solver.hpp +++ b/src/restruct_poc/solver/solver.hpp @@ -106,7 +106,7 @@ struct Solver { conditioner(beta * h * h), num_system_nodes(system_nodes.size()), num_system_dofs(num_system_nodes * kLieAlgebraComponents), - constraints(constraints_, num_system_dofs), + constraints(constraints_), num_dofs(num_system_dofs + constraints.num_dofs), state(num_system_nodes, constraints.num_dofs, system_nodes), T_dense("T dense", num_system_nodes), diff --git a/tests/unit_tests/restruct_poc/test_rotating_beam.cpp b/tests/unit_tests/restruct_poc/test_rotating_beam.cpp index 5d1b9736..2b56e460 100644 --- a/tests/unit_tests/restruct_poc/test_rotating_beam.cpp +++ b/tests/unit_tests/restruct_poc/test_rotating_beam.cpp @@ -252,14 +252,6 @@ TEST(RotatingBeamTest, TwoBeam) { for (int i = 0; i < m; ++i) { EXPECT_NEAR(Phi[i], Phi[i + m], 1.e-10); } - - // Check that B matrix is the same for both beams - auto B = kokkos_view_2D_to_vector(solver.constraints.B); - for (int i = 0; i < m; ++i) { - for (int j = 0; j < n; ++j) { - EXPECT_NEAR(B[i][j], B[i + m][j + n], 1.e-10); - } - } } TEST(RotatingBeamTest, ThreeBladeRotor) { diff --git a/tests/unit_tests/restruct_poc/test_solver.cpp b/tests/unit_tests/restruct_poc/test_solver.cpp index d1ae578b..a2ce0fec 100644 --- a/tests/unit_tests/restruct_poc/test_solver.cpp +++ b/tests/unit_tests/restruct_poc/test_solver.cpp @@ -259,20 +259,6 @@ TEST_F(NewSolverTest, ConstraintResidualVector) { ); } -TEST_F(NewSolverTest, ConstraintGradientMatrix) { - expect_kokkos_view_2D_equal( - Kokkos::subview(solver_->constraints.B, Kokkos::make_pair(0, 6), Kokkos::make_pair(0, 6)), - { - {1., 0., 0., 0.0000000000000000, 0.0000000000000000, 0.0000000000000000}, - {0., 1., 0., 0.0000000000000000, 0.0000000000000000, 0.0000000000000000}, - {0., 0., 1., 0.0000000000000000, 0.0000000000000000, 0.0000000000000000}, - {0., 0., 0., 1.0000000000000004, 0.0000000000000000, 0.0000000000000000}, - {0., 0., 0., 0.0000000000000000, 1.0000000000000004, 0.0000000000000000}, - {0., 0., 0., 0.0000000000000000, 0.0000000000000000, 1.0000000000000004}, - } - ); -} - TEST_F(NewSolverTest, AssembleResidualVector) { expect_kokkos_view_1D_equal( solver_->R, @@ -711,20 +697,6 @@ TEST_F(SolverStep2Test, ConstraintResidualVector) { ); } -TEST_F(SolverStep2Test, ConstraintGradientMatrix) { - expect_kokkos_view_2D_equal( - Kokkos::subview(solver_->constraints.B, Kokkos::make_pair(0, 6), Kokkos::make_pair(0, 6)), - { - {1.0, 0.0, 0.0, 0.0, 0.0, 0.0}, - {0.0, 1.0, 0.0, 0.0, 0.0, 0.0}, - {0.0, 0.0, 1.0, 0.0, 0.0, 0.0}, - {0.0, 0.0, 0.0, 1.0000000000000004, 0.0, -6.595417551796206e-27}, - {0.0, 0.0, 0.0, 0.0, 1.0000000000000004, -6.59541975021795e-30}, - {0.0, 0.0, 0.0, 6.595417551796206e-27, 6.595419750217949e-30, 1.0000000000000002}, - } - ); -} - TEST_F(SolverStep2Test, ResidualVector) { expect_kokkos_view_1D_equal( solver_->R, From 883b52d9bb527f2996021dc0b4258689c02a1fba Mon Sep 17 00:00:00 2001 From: dcdemen Date: Tue, 23 Jul 2024 21:52:37 -0600 Subject: [PATCH 2/2] Simplified data types in calculate_constraint_residual_gradient.hpp --- ...calculate_constraint_residual_gradient.hpp | 60 +++++++++---------- 1 file changed, 27 insertions(+), 33 deletions(-) diff --git a/src/restruct_poc/solver/calculate_constraint_residual_gradient.hpp b/src/restruct_poc/solver/calculate_constraint_residual_gradient.hpp index 59406f92..c9e155bd 100644 --- a/src/restruct_poc/solver/calculate_constraint_residual_gradient.hpp +++ b/src/restruct_poc/solver/calculate_constraint_residual_gradient.hpp @@ -26,7 +26,7 @@ struct CalculateConstraintResidualGradient { // Initial difference between nodes auto X0_data = Kokkos::Array{cd.X0[0], cd.X0[1], cd.X0[2]}; - auto X0 = Kokkos::View>{X0_data.data()}; + auto X0 = View_3{X0_data.data()}; // Base node displacement auto u1_data = Kokkos::Array{}; @@ -51,81 +51,75 @@ struct CalculateConstraintResidualGradient { node_u(i_node1, 3), node_u(i_node1, 4), node_u(i_node1, 5), node_u(i_node1, 6)}; } } - auto u1 = Kokkos::View>{u1_data.data()}; - auto R1 = Kokkos::View>{R1_data.data()}; + auto u1 = View_3{u1_data.data()}; + auto R1 = Kokkos::View{R1_data.data()}; // Target node displacement auto R2_data = Kokkos::Array{ node_u(i_node2, 3), node_u(i_node2, 4), node_u(i_node2, 5), node_u(i_node2, 6)}; - auto R2 = Kokkos::View>{R2_data.data()}; + auto R2 = Kokkos::View{R2_data.data()}; auto u2_data = Kokkos::Array{node_u(i_node2, 0), node_u(i_node2, 1), node_u(i_node2, 2)}; - auto u2 = Kokkos::View>{u2_data.data()}; + auto u2 = View_3{u2_data.data()}; // Rotation control auto RC_data = Kokkos::Array{1., 0., 0., 0}; - auto RC = Kokkos::View>{RC_data.data()}; + auto RC = Kokkos::View{RC_data.data()}; auto RCt_data = Kokkos::Array{1., 0., 0., 0.}; - auto RCt = Kokkos::View>{RCt_data.data()}; + auto RCt = Kokkos::View{RCt_data.data()}; auto RV_data = Kokkos::Array{}; - auto RV = Kokkos::View>{RV_data.data()}; + auto RV = Kokkos::View{RV_data.data()}; auto R1t_data = Kokkos::Array{}; - auto R1t = Kokkos::View>{R1t_data.data()}; + auto R1t = Kokkos::View{R1t_data.data()}; auto R2t_data = Kokkos::Array{}; - auto R2t = Kokkos::View>{R2t_data.data()}; + auto R2t = Kokkos::View{R2t_data.data()}; auto R1_X0_data = Kokkos::Array{}; - auto R1_X0 = - Kokkos::View>{R1_X0_data.data()}; + auto R1_X0 = Kokkos::View{R1_X0_data.data()}; auto R2_R1t_data = Kokkos::Array{}; - auto R2_R1t = - Kokkos::View>{R2_R1t_data.data()}; + auto R2_R1t = Kokkos::View{R2_R1t_data.data()}; auto R2_RCt_data = Kokkos::Array{}; - auto R2_RCt = - Kokkos::View>{R2_RCt_data.data()}; + auto R2_RCt = Kokkos::View{R2_RCt_data.data()}; auto R1_R2t_data = Kokkos::Array{}; - auto R1_R2t = - Kokkos::View>{R1_R2t_data.data()}; + auto R1_R2t = Kokkos::View{R1_R2t_data.data()}; auto R2_RCt_R1t_data = Kokkos::Array{}; - auto R2_RCt_R1t = - Kokkos::View>{R2_RCt_R1t_data.data()}; + auto R2_RCt_R1t = Kokkos::View{R2_RCt_R1t_data.data()}; auto A_data = Kokkos::Array{}; - auto A = Kokkos::View>{A_data.data()}; + auto A = View_3x3{A_data.data()}; auto C_data = Kokkos::Array{}; - auto C = Kokkos::View>{C_data.data()}; + auto C = View_3x3{C_data.data()}; auto Ct_data = Kokkos::Array{}; - auto Ct = - Kokkos::View>{Ct_data.data()}; + auto Ct = View_3x3{Ct_data.data()}; auto V3_data = Kokkos::Array{}; - auto V3 = Kokkos::View>{V3_data.data()}; + auto V3 = View_3{V3_data.data()}; // Cylindrical constraint data auto x0_data = Kokkos::Array{cd.axis_x[0], cd.axis_x[1], cd.axis_x[2]}; auto y0_data = Kokkos::Array{cd.axis_y[0], cd.axis_y[1], cd.axis_y[2]}; auto z0_data = Kokkos::Array{cd.axis_z[0], cd.axis_z[1], cd.axis_z[2]}; - auto x0 = Kokkos::View>{x0_data.data()}; - auto y0 = Kokkos::View>{y0_data.data()}; - auto z0 = Kokkos::View>{z0_data.data()}; + auto x0 = View_3{x0_data.data()}; + auto y0 = View_3{y0_data.data()}; + auto z0 = View_3{z0_data.data()}; auto x_data = Kokkos::Array{}; auto y_data = Kokkos::Array{}; auto z_data = Kokkos::Array{}; - auto x = Kokkos::View>{x_data.data()}; - auto y = Kokkos::View>{y_data.data()}; - auto z = Kokkos::View>{z_data.data()}; + auto x = View_3{x_data.data()}; + auto y = View_3{y_data.data()}; + auto z = View_3{z_data.data()}; auto xcy_data = Kokkos::Array{}; auto xcz_data = Kokkos::Array{}; - auto xcy = Kokkos::View>{xcy_data.data()}; - auto xcz = Kokkos::View>{xcz_data.data()}; + auto xcy = View_3{xcy_data.data()}; + auto xcz = View_3{xcz_data.data()}; //---------------------------------------------------------------------- // Residual Vector