Skip to content

Commit

Permalink
Merge pull request #222 from Exawind/remove_dense_constraints
Browse files Browse the repository at this point in the history
Removed use of a dense matrix for constraint assembly in favor of a more memory efficient structure
  • Loading branch information
ddement authored Jul 30, 2024
2 parents 47744e8 + 883b52d commit e13f4dd
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 108 deletions.
62 changes: 32 additions & 30 deletions src/restruct_poc/solver/assemble_constraints.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@

#include "calculate_constraint_residual_gradient.hpp"
#include "compute_number_of_non_zeros.hpp"
#include "contribute_elements_to_sparse_matrix.hpp"
#include "copy_constraints_to_sparse_matrix.hpp"
#include "copy_into_sparse_matrix.hpp"
#include "copy_into_sparse_matrix_transpose.hpp"
#include "copy_sparse_values_to_transpose.hpp"
#include "populate_sparse_indices.hpp"
#include "populate_sparse_row_ptrs.hpp"
#include "solver.hpp"
Expand All @@ -27,49 +30,48 @@ void AssembleConstraints(Solver& solver, Subview_N R_system, Subview_N R_lambda)
// Transfer prescribed displacements to host
solver.constraints.UpdateViews();

Kokkos::deep_copy(solver.constraints.Phi, 0.);
Kokkos::deep_copy(solver.constraints.B, 0.);
Kokkos::parallel_for(
"CalculateConstraintResidualGradient", solver.constraints.num,
CalculateConstraintResidualGradient{
solver.constraints.data,
solver.constraints.control,
solver.constraints.u,
solver.state.q,
solver.constraints.Phi,
solver.constraints.B,
}
solver.constraints.data, solver.constraints.control, solver.constraints.u,
solver.state.q, solver.constraints.Phi, solver.constraints.gradient_terms}
);

auto B_num_rows = solver.constraints.B.extent(0);
auto B_num_columns = solver.constraints.B.extent(1);

auto B_row_data_size = Kokkos::View<double*>::shmem_size(B_num_columns);
auto B_col_idx_size = Kokkos::View<int*>::shmem_size(B_num_columns);
auto constraint_policy = Kokkos::TeamPolicy<>(B_num_rows, Kokkos::AUTO());
constraint_policy.set_scratch_size(1, Kokkos::PerTeam(B_row_data_size + B_col_idx_size));
auto constraint_policy = Kokkos::TeamPolicy<>(solver.constraints.num, Kokkos::AUTO());

Kokkos::parallel_for(
"CopyIntoSparseMatrix", constraint_policy,
CopyIntoSparseMatrix<Solver::CrsMatrixType>{solver.B, solver.constraints.B}
"CopyConstraintsToSparseMatrix", constraint_policy,
CopyConstraintsToSparseMatrix<Solver::CrsMatrixType>{
solver.constraints.data, solver.B, solver.constraints.gradient_terms}
);

auto B_t_row_data_size = Kokkos::View<double*>::shmem_size(B_num_rows);
auto B_t_col_idx_size = Kokkos::View<int*>::shmem_size(B_num_rows);
auto constraint_transpose_policy = Kokkos::TeamPolicy<>(B_num_columns, Kokkos::AUTO());
constraint_transpose_policy.set_scratch_size(
1, Kokkos::PerTeam(B_t_row_data_size + B_t_col_idx_size)
);
Kokkos::parallel_for(
"CopyIntoSparseMatrix_Transpose", constraint_transpose_policy,
CopyIntoSparseMatrix_Transpose<Solver::CrsMatrixType>{solver.B_t, solver.constraints.B}
);
Kokkos::fence();

{
auto trans_region = Kokkos::Profiling::ScopedRegion("Transpose Constraints Matrix");
auto B_num_rows = solver.B.numRows();
auto constraint_transpose_policy = Kokkos::TeamPolicy<>(B_num_rows, Kokkos::AUTO());
auto tmp_row_map = Solver::RowPtrType(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "tmp_row_map"),
solver.B_t.graph.row_map.extent(0)
);
Kokkos::deep_copy(tmp_row_map, solver.B_t.graph.row_map);
Kokkos::parallel_for(
"CopySparseValuesToTranspose", constraint_transpose_policy,
CopySparseValuesToTranspose<Solver::CrsMatrixType>{solver.B, tmp_row_map, solver.B_t}
);
KokkosSparse::sort_crs_matrix(solver.B_t);
}

{
auto resid_region = Kokkos::Profiling::ScopedRegion("Assemble Residual");
auto R = Solver::ValuesType("R_local", R_system.extent(0));
auto R = Solver::ValuesType(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "R_local"), R_system.extent(0)
);
Kokkos::deep_copy(R, R_system);
auto lambda = Solver::ValuesType("lambda", solver.state.lambda.extent(0));
auto lambda = Solver::ValuesType(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "lambda"), solver.state.lambda.extent(0)
);
Kokkos::deep_copy(lambda, solver.state.lambda);
auto spmv_handle = Solver::SpmvHandle();
KokkosSparse::spmv(&spmv_handle, "T", 1., solver.B, lambda, 1., R);
Expand Down
72 changes: 34 additions & 38 deletions src/restruct_poc/solver/calculate_constraint_residual_gradient.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct CalculateConstraintResidualGradient {
View_Nx7::const_type constraint_u;
View_Nx7::const_type node_u;
View_N Phi_;
View_NxN B_;
Kokkos::View<double* [6][12]> gradient_terms;

KOKKOS_FUNCTION
void operator()(const int i_constraint) const {
Expand All @@ -26,7 +26,7 @@ struct CalculateConstraintResidualGradient {

// Initial difference between nodes
auto X0_data = Kokkos::Array<double, 3>{cd.X0[0], cd.X0[1], cd.X0[2]};
auto X0 = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{X0_data.data()};
auto X0 = View_3{X0_data.data()};

// Base node displacement
auto u1_data = Kokkos::Array<double, 3>{};
Expand All @@ -51,81 +51,75 @@ struct CalculateConstraintResidualGradient {
node_u(i_node1, 3), node_u(i_node1, 4), node_u(i_node1, 5), node_u(i_node1, 6)};
}
}
auto u1 = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{u1_data.data()};
auto R1 = Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{R1_data.data()};
auto u1 = View_3{u1_data.data()};
auto R1 = Kokkos::View<double[4]>{R1_data.data()};

// Target node displacement
auto R2_data = Kokkos::Array<double, 4>{
node_u(i_node2, 3), node_u(i_node2, 4), node_u(i_node2, 5), node_u(i_node2, 6)};
auto R2 = Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{R2_data.data()};
auto R2 = Kokkos::View<double[4]>{R2_data.data()};
auto u2_data =
Kokkos::Array<double, 3>{node_u(i_node2, 0), node_u(i_node2, 1), node_u(i_node2, 2)};
auto u2 = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{u2_data.data()};
auto u2 = View_3{u2_data.data()};

// Rotation control
auto RC_data = Kokkos::Array<double, 4>{1., 0., 0., 0};
auto RC = Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{RC_data.data()};
auto RC = Kokkos::View<double[4]>{RC_data.data()};
auto RCt_data = Kokkos::Array<double, 4>{1., 0., 0., 0.};
auto RCt = Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{RCt_data.data()};
auto RCt = Kokkos::View<double[4]>{RCt_data.data()};
auto RV_data = Kokkos::Array<double, 3>{};
auto RV = Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{RV_data.data()};
auto RV = Kokkos::View<double[4]>{RV_data.data()};

auto R1t_data = Kokkos::Array<double, 4>{};
auto R1t = Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{R1t_data.data()};
auto R1t = Kokkos::View<double[4]>{R1t_data.data()};

auto R2t_data = Kokkos::Array<double, 4>{};
auto R2t = Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{R2t_data.data()};
auto R2t = Kokkos::View<double[4]>{R2t_data.data()};

auto R1_X0_data = Kokkos::Array<double, 3>{};
auto R1_X0 =
Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{R1_X0_data.data()};
auto R1_X0 = Kokkos::View<double[4]>{R1_X0_data.data()};

auto R2_R1t_data = Kokkos::Array<double, 4>{};
auto R2_R1t =
Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{R2_R1t_data.data()};
auto R2_R1t = Kokkos::View<double[4]>{R2_R1t_data.data()};

auto R2_RCt_data = Kokkos::Array<double, 4>{};
auto R2_RCt =
Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{R2_RCt_data.data()};
auto R2_RCt = Kokkos::View<double[4]>{R2_RCt_data.data()};

auto R1_R2t_data = Kokkos::Array<double, 4>{};
auto R1_R2t =
Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{R1_R2t_data.data()};
auto R1_R2t = Kokkos::View<double[4]>{R1_R2t_data.data()};

auto R2_RCt_R1t_data = Kokkos::Array<double, 4>{};
auto R2_RCt_R1t =
Kokkos::View<double[4], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{R2_RCt_R1t_data.data()};
auto R2_RCt_R1t = Kokkos::View<double[4]>{R2_RCt_R1t_data.data()};

auto A_data = Kokkos::Array<double, 9>{};
auto A = Kokkos::View<double[3][3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{A_data.data()};
auto A = View_3x3{A_data.data()};

auto C_data = Kokkos::Array<double, 9>{};
auto C = Kokkos::View<double[3][3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{C_data.data()};
auto C = View_3x3{C_data.data()};

auto Ct_data = Kokkos::Array<double, 9>{};
auto Ct =
Kokkos::View<double[3][3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{Ct_data.data()};
auto Ct = View_3x3{Ct_data.data()};

auto V3_data = Kokkos::Array<double, 3>{};
auto V3 = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{V3_data.data()};
auto V3 = View_3{V3_data.data()};

// Cylindrical constraint data
auto x0_data = Kokkos::Array<double, 3>{cd.axis_x[0], cd.axis_x[1], cd.axis_x[2]};
auto y0_data = Kokkos::Array<double, 3>{cd.axis_y[0], cd.axis_y[1], cd.axis_y[2]};
auto z0_data = Kokkos::Array<double, 3>{cd.axis_z[0], cd.axis_z[1], cd.axis_z[2]};
auto x0 = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{x0_data.data()};
auto y0 = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{y0_data.data()};
auto z0 = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{z0_data.data()};
auto x0 = View_3{x0_data.data()};
auto y0 = View_3{y0_data.data()};
auto z0 = View_3{z0_data.data()};
auto x_data = Kokkos::Array<double, 3>{};
auto y_data = Kokkos::Array<double, 3>{};
auto z_data = Kokkos::Array<double, 3>{};
auto x = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{x_data.data()};
auto y = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{y_data.data()};
auto z = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{z_data.data()};
auto x = View_3{x_data.data()};
auto y = View_3{y_data.data()};
auto z = View_3{z_data.data()};
auto xcy_data = Kokkos::Array<double, 3>{};
auto xcz_data = Kokkos::Array<double, 3>{};
auto xcy = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{xcy_data.data()};
auto xcz = Kokkos::View<double[3], Kokkos::MemoryTraits<Kokkos::Unmanaged>>{xcz_data.data()};
auto xcy = View_3{xcy_data.data()};
auto xcz = View_3{xcz_data.data()};

//----------------------------------------------------------------------
// Residual Vector
Expand Down Expand Up @@ -179,9 +173,10 @@ struct CalculateConstraintResidualGradient {
//---------------------------------

// Extract gradient block for target node of this constraint
auto i_col = i_node2 * kLieAlgebraComponents;
auto i_col = (i_node2 < i_node1 || i_node1 < 0) ? 0 : kLieAlgebraComponents;
auto B = Kokkos::subview(
B_, cd.row_range, Kokkos::make_pair(i_col, i_col + kLieAlgebraComponents)
gradient_terms, i_constraint, Kokkos::ALL,
Kokkos::make_pair(i_col, i_col + kLieAlgebraComponents)
);

// B(0:3,0:3) = I
Expand Down Expand Up @@ -222,9 +217,10 @@ struct CalculateConstraintResidualGradient {
}

// Extract gradient block for base node of this constraint
i_col = i_node1 * kLieAlgebraComponents;
i_col = (i_node1 < i_node2) ? 0 : kLieAlgebraComponents;
B = Kokkos::subview(
B_, cd.row_range, Kokkos::make_pair(i_col, i_col + kLieAlgebraComponents)
gradient_terms, i_constraint, Kokkos::ALL,
Kokkos::make_pair(i_col, i_col + kLieAlgebraComponents)
);

// B(0:3,0:3) = -I
Expand Down
6 changes: 3 additions & 3 deletions src/restruct_poc/solver/constraints.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ struct Constraints {
View_N control;
View_Nx7 u;
View_N Phi;
View_NxN B;
Kokkos::View<double* [6][12]> gradient_terms;

Constraints() {}
Constraints(const std::vector<Constraint>& constraints, int num_system_dofs)
Constraints(const std::vector<Constraint>& constraints)
: num(constraints.size()),
num_dofs(std::transform_reduce(
constraints.cbegin(), constraints.cend(), 0, std::plus{},
Expand All @@ -54,7 +54,7 @@ struct Constraints {
control("control", num),
u("u", num),
Phi("residual_vector", num_dofs),
B("gradient_matrix", num_dofs, num_system_dofs) {
gradient_terms("gradient_terms", num) {
// Create host mirror for constraint data
auto host_data = Kokkos::create_mirror(this->data);

Expand Down
38 changes: 38 additions & 0 deletions src/restruct_poc/solver/copy_constraints_to_sparse_matrix.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#pragma once

#include <KokkosSparse.hpp>
#include <Kokkos_Core.hpp>

namespace openturbine {
template <typename CrsMatrixType>
struct CopyConstraintsToSparseMatrix {
using RowDataType = typename CrsMatrixType::values_type::non_const_type;
using ColIdxType = typename CrsMatrixType::staticcrsgraph_type::entries_type::non_const_type;
Kokkos::View<Constraints::DeviceData*>::const_type data;
CrsMatrixType sparse;
Kokkos::View<const double* [6][12]> dense;

KOKKOS_FUNCTION
void operator()(Kokkos::TeamPolicy<>::member_type member) const {
auto i_constraint = member.league_rank();
auto& cd = data(i_constraint);
auto start_row = cd.row_range.first;
auto end_row = cd.row_range.second;
Kokkos::parallel_for(Kokkos::TeamThreadRange(member, start_row, end_row), [&](int i) {
auto row_number = i - start_row;
auto row = sparse.row(i);
auto row_map = sparse.graph.row_map;
auto cols = sparse.graph.entries;
auto row_data_data = Kokkos::Array<typename RowDataType::value_type, 12>{};
auto col_idx_data = Kokkos::Array<typename ColIdxType::value_type, 12>{};
auto row_data = RowDataType(row_data_data.data(), row.length);
auto col_idx = ColIdxType(col_idx_data.data(), row.length);
for (int entry = 0; entry < row.length; ++entry) {
col_idx(entry) = cols(row_map(i) + entry);
row_data(entry) = dense(i_constraint, row_number, entry);
}
sparse.replaceValues(i, col_idx.data(), row.length, row_data.data());
});
}
};
} // namespace openturbine
28 changes: 28 additions & 0 deletions src/restruct_poc/solver/copy_sparse_values_to_transpose.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#pragma once

#include <KokkosSparse.hpp>
#include <Kokkos_Core.hpp>

namespace openturbine {
template <typename CrsMatrixType>
struct CopySparseValuesToTranspose {
using RowMapType = typename CrsMatrixType::staticcrsgraph_type::row_map_type::non_const_type;
CrsMatrixType input;
RowMapType tmp;
CrsMatrixType transpose;

KOKKOS_FUNCTION
void operator()(Kokkos::TeamPolicy<>::member_type member) const {
int row_index = member.league_rank();
int col_begin = input.graph.row_map(row_index);
int col_end = input.graph.row_map(row_index + 1);
Kokkos::parallel_for(Kokkos::TeamThreadRange(member, col_begin, col_end), [&](int in_index) {
int col_index = input.graph.entries(in_index);
using atomic_incr_type = typename std::remove_reference<decltype(tmp(0))>::type;
int pos = Kokkos::atomic_fetch_add(&(tmp(col_index)), atomic_incr_type{1});
transpose.graph.entries(pos) = row_index;
transpose.values(pos) = input.values(in_index);
});
}
};
} // namespace openturbine
2 changes: 1 addition & 1 deletion src/restruct_poc/solver/solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ struct Solver {
conditioner(beta * h * h),
num_system_nodes(system_nodes.size()),
num_system_dofs(num_system_nodes * kLieAlgebraComponents),
constraints(constraints_, num_system_dofs),
constraints(constraints_),
num_dofs(num_system_dofs + constraints.num_dofs),
state(num_system_nodes, constraints.num_dofs, system_nodes),
T_dense("T dense", num_system_nodes),
Expand Down
8 changes: 0 additions & 8 deletions tests/unit_tests/restruct_poc/test_rotating_beam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,6 @@ TEST(RotatingBeamTest, TwoBeam) {
for (int i = 0; i < m; ++i) {
EXPECT_NEAR(Phi[i], Phi[i + m], 1.e-10);
}

// Check that B matrix is the same for both beams
auto B = kokkos_view_2D_to_vector(solver.constraints.B);
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
EXPECT_NEAR(B[i][j], B[i + m][j + n], 1.e-10);
}
}
}

TEST(RotatingBeamTest, ThreeBladeRotor) {
Expand Down
Loading

0 comments on commit e13f4dd

Please sign in to comment.