Skip to content

Commit

Permalink
Add algorithm choice for triangular solvers
Browse files Browse the repository at this point in the history
  • Loading branch information
fritzgoebel committed Jul 27, 2022
1 parent fb1672d commit 0727a35
Show file tree
Hide file tree
Showing 19 changed files with 90 additions and 42 deletions.
3 changes: 2 additions & 1 deletion core/solver/lower_trs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ void LowerTrs<ValueType, IndexType>::generate()
if (this->get_system_matrix()) {
this->get_executor()->run(lower_trs::make_generate(
this->get_system_matrix().get(), this->solve_struct_,
this->get_parameters().unit_diagonal, parameters_.num_rhs));
this->get_parameters().unit_diagonal, parameters_.algorithm,
parameters_.num_rhs));
}
}

Expand Down
11 changes: 6 additions & 5 deletions core/solver/lower_trs_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,12 @@ namespace lower_trs {
bool& do_transpose)


#define GKO_DECLARE_LOWER_TRS_GENERATE_KERNEL(_vtype, _itype) \
void generate(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<_vtype, _itype>* matrix, \
std::shared_ptr<solver::SolveStruct>& solve_struct, \
bool unit_diag, const gko::size_type num_rhs)
#define GKO_DECLARE_LOWER_TRS_GENERATE_KERNEL(_vtype, _itype) \
void generate(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<_vtype, _itype>* matrix, \
std::shared_ptr<solver::SolveStruct>& solve_struct, \
bool unit_diag, const solver::trisolve_algorithm algorithm, \
const gko::size_type num_rhs)


#define GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL(_vtype, _itype) \
Expand Down
3 changes: 2 additions & 1 deletion core/solver/upper_trs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ void UpperTrs<ValueType, IndexType>::generate()
if (this->get_system_matrix()) {
this->get_executor()->run(upper_trs::make_generate(
this->get_system_matrix().get(), this->solve_struct_,
this->get_parameters().unit_diagonal, parameters_.num_rhs));
this->get_parameters().unit_diagonal, parameters_.algorithm,
parameters_.num_rhs));
}
}

Expand Down
11 changes: 6 additions & 5 deletions core/solver/upper_trs_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,12 @@ namespace upper_trs {
bool& do_transpose)


#define GKO_DECLARE_UPPER_TRS_GENERATE_KERNEL(_vtype, _itype) \
void generate(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<_vtype, _itype>* matrix, \
std::shared_ptr<gko::solver::SolveStruct>& solve_struct, \
bool unit_diag, const gko::size_type num_rhs)
#define GKO_DECLARE_UPPER_TRS_GENERATE_KERNEL(_vtype, _itype) \
void generate(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<_vtype, _itype>* matrix, \
std::shared_ptr<gko::solver::SolveStruct>& solve_struct, \
bool unit_diag, const solver::trisolve_algorithm algorithm, \
const gko::size_type num_rhs)


#define GKO_DECLARE_UPPER_TRS_SOLVE_KERNEL(_vtype, _itype) \
Expand Down
5 changes: 3 additions & 2 deletions cuda/solver/lower_trs_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,10 @@ template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType>* matrix,
std::shared_ptr<solver::SolveStruct>& solve_struct,
bool unit_diag, const gko::size_type num_rhs)
bool unit_diag, const solver::trisolve_algorithm algorithm,
const gko::size_type num_rhs)
{
if (matrix->get_strategy()->get_name() == "sparselib") {
if (algorithm == solver::trisolve_algorithm::sparselib) {
generate_kernel<ValueType, IndexType>(exec, matrix, solve_struct,
num_rhs, false, unit_diag);
}
Expand Down
5 changes: 3 additions & 2 deletions cuda/solver/upper_trs_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,10 @@ template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType>* matrix,
std::shared_ptr<solver::SolveStruct>& solve_struct,
bool unit_diag, const gko::size_type num_rhs)
bool unit_diag, const solver::trisolve_algorithm algorithm,
const gko::size_type num_rhs)
{
if (matrix->get_strategy()->get_name() == "sparselib") {
if (algorithm == solver::trisolve_algorithm::sparselib) {
generate_kernel<ValueType, IndexType>(exec, matrix, solve_struct,
num_rhs, true, unit_diag);
}
Expand Down
28 changes: 16 additions & 12 deletions cuda/test/solver/lower_trs_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,14 @@ TEST_F(LowerTrs, CudaLowerTrsFlagCheckIsCorrect)
}


TEST_F(LowerTrs, CudaSingleRhsApplyClassicalIsEquivalentToRef)
TEST_F(LowerTrs, CudaSingleRhsApplySparselibIsEquivalentToRef)
{
initialize_data(50, 1);
auto lower_trs_factory = gko::solver::LowerTrs<>::build().on(ref);
auto d_lower_trs_factory = gko::solver::LowerTrs<>::build().on(cuda);
d_csr_mtx->set_strategy(std::make_shared<CsrMtx::classical>());
auto d_lower_trs_factory =
gko::solver::LowerTrs<>::build()
.with_algorithm(gko::solver::trisolve_algorithm::sparselib)
.on(cuda);
auto solver = lower_trs_factory->generate(csr_mtx);
auto d_solver = d_lower_trs_factory->generate(d_csr_mtx);

Expand All @@ -164,19 +166,27 @@ TEST_F(LowerTrs, CudaSingleRhsApplyIsEquivalentToRef)
}


TEST_F(LowerTrs, CudaMultipleRhsApplyClassicalIsEquivalentToRef)
TEST_F(LowerTrs, CudaMultipleRhsApplySparselibIsEquivalentToRef)
{
initialize_data(50, 3);
auto lower_trs_factory =
gko::solver::LowerTrs<>::build().with_num_rhs(3u).on(ref);
auto d_lower_trs_factory =
gko::solver::LowerTrs<>::build().with_num_rhs(3u).on(cuda);
d_csr_mtx->set_strategy(std::make_shared<CsrMtx::classical>());
gko::solver::LowerTrs<>::build()
.with_algorithm(gko::solver::trisolve_algorithm::sparselib)
.with_num_rhs(3u)
.on(cuda);
auto solver = lower_trs_factory->generate(csr_mtx);
auto d_solver = d_lower_trs_factory->generate(d_csr_mtx);
auto db2_strided = Mtx::create(cuda, b->get_size(), 4);
d_b2->convert_to(db2_strided.get());
// The cuSPARSE Generic SpSM implementation uses the wrong stride here
// so the input and output stride need to match
#if CUDA_VERSION >= 11031
auto dx_strided = Mtx::create(cuda, x->get_size(), 4);
#else
auto dx_strided = Mtx::create(cuda, x->get_size(), 5);
#endif

solver->apply(b2.get(), x.get());
d_solver->apply(db2_strided.get(), dx_strided.get());
Expand All @@ -196,13 +206,7 @@ TEST_F(LowerTrs, CudaMultipleRhsApplyIsEquivalentToRef)
auto d_solver = d_lower_trs_factory->generate(d_csr_mtx);
auto db2_strided = Mtx::create(cuda, b->get_size(), 4);
d_b2->convert_to(db2_strided.get());
// The cuSPARSE Generic SpSM implementation uses the wrong stride here
// so the input and output stride need to match
#if CUDA_VERSION >= 11031
auto dx_strided = Mtx::create(cuda, x->get_size(), 4);
#else
auto dx_strided = Mtx::create(cuda, x->get_size(), 5);
#endif

solver->apply(b2.get(), x.get());
d_solver->apply(db2_strided.get(), dx_strided.get());
Expand Down
16 changes: 10 additions & 6 deletions cuda/test/solver/upper_trs_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,14 @@ TEST_F(UpperTrs, CudaUpperTrsFlagCheckIsCorrect)
}


TEST_F(UpperTrs, CudaSingleRhsApplyClassicalIsEquivalentToRef)
TEST_F(UpperTrs, CudaSingleRhsApplySparselibIsEquivalentToRef)
{
initialize_data(50, 1);
auto upper_trs_factory = gko::solver::UpperTrs<>::build().on(ref);
auto d_upper_trs_factory = gko::solver::UpperTrs<>::build().on(cuda);
d_csr_mtx->set_strategy(std::make_shared<CsrMtx::classical>());
auto d_upper_trs_factory =
gko::solver::UpperTrs<>::build()
.with_algorithm(gko::solver::trisolve_algorithm::sparselib)
.on(cuda);
auto solver = upper_trs_factory->generate(csr_mtx);
auto d_solver = d_upper_trs_factory->generate(d_csr_mtx);

Expand All @@ -164,14 +166,16 @@ TEST_F(UpperTrs, CudaSingleRhsApplyIsEquivalentToRef)
}


TEST_F(UpperTrs, CudaMultipleRhsApplyClassicalIsEquivalentToRef)
TEST_F(UpperTrs, CudaMultipleRhsApplySparselibIsEquivalentToRef)
{
initialize_data(50, 3);
auto upper_trs_factory =
gko::solver::UpperTrs<>::build().with_num_rhs(3u).on(ref);
auto d_upper_trs_factory =
gko::solver::UpperTrs<>::build().with_num_rhs(3u).on(cuda);
d_csr_mtx->set_strategy(std::make_shared<CsrMtx::classical>());
gko::solver::UpperTrs<>::build()
.with_algorithm(gko::solver::trisolve_algorithm::sparselib)
.with_num_rhs(3u)
.on(cuda);
auto solver = upper_trs_factory->generate(csr_mtx);
auto d_solver = d_upper_trs_factory->generate(d_csr_mtx);
auto db2_strided = Mtx::create(cuda, b->get_size(), 4);
Expand Down
3 changes: 2 additions & 1 deletion dpcpp/solver/lower_trs_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const DpcppExecutor> exec,
const matrix::Csr<ValueType, IndexType>* matrix,
std::shared_ptr<solver::SolveStruct>& solve_struct,
bool unit_diag, const gko::size_type num_rhs) GKO_NOT_IMPLEMENTED;
bool unit_diag, const solver::trisolve_algorithm algorithm,
const gko::size_type num_rhs) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_LOWER_TRS_GENERATE_KERNEL);
Expand Down
3 changes: 2 additions & 1 deletion dpcpp/solver/upper_trs_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const DpcppExecutor> exec,
const matrix::Csr<ValueType, IndexType>* matrix,
std::shared_ptr<solver::SolveStruct>& solve_struct,
bool unit_diag, const gko::size_type num_rhs) GKO_NOT_IMPLEMENTED;
bool unit_diag, const solver::trisolve_algorithm algorithm,
const gko::size_type num_rhs) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_UPPER_TRS_GENERATE_KERNEL);
Expand Down
3 changes: 2 additions & 1 deletion hip/solver/lower_trs_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const HipExecutor> exec,
const matrix::Csr<ValueType, IndexType>* matrix,
std::shared_ptr<solver::SolveStruct>& solve_struct,
bool unit_diag, const gko::size_type num_rhs)
bool unit_diag, const solver::trisolve_algorithm algorithm,
const gko::size_type num_rhs)
{
generate_kernel<ValueType, IndexType>(exec, matrix, solve_struct, num_rhs,
false, unit_diag);
Expand Down
3 changes: 2 additions & 1 deletion hip/solver/upper_trs_kernels.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const HipExecutor> exec,
const matrix::Csr<ValueType, IndexType>* matrix,
std::shared_ptr<solver::SolveStruct>& solve_struct,
bool unit_diag, const gko::size_type num_rhs)
bool unit_diag, const solver::trisolve_algorithm algorithm,
const gko::size_type num_rhs)
{
generate_kernel<ValueType, IndexType>(exec, matrix, solve_struct, num_rhs,
true, unit_diag);
Expand Down
9 changes: 9 additions & 0 deletions include/ginkgo/core/solver/lower_trs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ class LowerTrs : public EnableLinOp<LowerTrs<ValueType, IndexType>>,
* (false) or should it assume they are 1.0 (true)?
*/
bool GKO_FACTORY_PARAMETER_SCALAR(unit_diagonal, false);

/**
* Select the implementation which is supposed to be used for
* the triangular solver. This only matters for the Cuda
* executor where the choice is between the Ginkgo and the
* Cusparse implementation. Default is Ginkgo.
*/
trisolve_algorithm GKO_FACTORY_PARAMETER_SCALAR(
algorithm, trisolve_algorithm::ginkgo);
};
GKO_ENABLE_LIN_OP_FACTORY(LowerTrs, parameters, Factory);
GKO_ENABLE_BUILD_METHOD(Factory);
Expand Down
8 changes: 8 additions & 0 deletions include/ginkgo/core/solver/solver_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,14 @@ class EnablePreconditionedIterativeSolver
};


/**
* A helper for algorithm selection in the triangular solvers.
* It currently only matters for the Cuda executor as there,
* we have a choice between the Ginkgo and Cusparse implementations.
*/
enum class trisolve_algorithm { sparselib, ginkgo };


} // namespace solver
} // namespace gko

Expand Down
9 changes: 9 additions & 0 deletions include/ginkgo/core/solver/upper_trs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ class UpperTrs : public EnableLinOp<UpperTrs<ValueType, IndexType>>,
* (false) or should it assume they are 1.0 (true)?
*/
bool GKO_FACTORY_PARAMETER_SCALAR(unit_diagonal, false);

/**
* Select the implementation which is supposed to be used for
* the triangular solver. This only matters for the Cuda
* executor where the choice is between the Ginkgo and the
* Cusparse implementation. Default is Ginkgo.
*/
trisolve_algorithm GKO_FACTORY_PARAMETER_SCALAR(
algorithm, trisolve_algorithm::ginkgo);
};
GKO_ENABLE_LIN_OP_FACTORY(UpperTrs, parameters, Factory);
GKO_ENABLE_BUILD_METHOD(Factory);
Expand Down
3 changes: 2 additions & 1 deletion omp/solver/lower_trs_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType>* matrix,
std::shared_ptr<solver::SolveStruct>& solve_struct,
bool unit_diag, const gko::size_type num_rhs)
bool unit_diag, const solver::trisolve_algorithm algorithm,
const gko::size_type num_rhs)
{
// This generate kernel is here to allow for a more sophisticated
// implementation as for other executors. This kernel would perform the
Expand Down
3 changes: 2 additions & 1 deletion omp/solver/upper_trs_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const OmpExecutor> exec,
const matrix::Csr<ValueType, IndexType>* matrix,
std::shared_ptr<solver::SolveStruct>& solve_struct,
bool unit_diag, const gko::size_type num_rhs)
bool unit_diag, const solver::trisolve_algorithm algorithm,
const gko::size_type num_rhs)
{
// This generate kernel is here to allow for a more sophisticated
// implementation as for other executors. This kernel would perform the
Expand Down
3 changes: 2 additions & 1 deletion reference/solver/lower_trs_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const ReferenceExecutor> exec,
const matrix::Csr<ValueType, IndexType>* matrix,
std::shared_ptr<solver::SolveStruct>& solve_struct,
bool unit_diag, const gko::size_type num_rhs)
bool unit_diag, const solver::trisolve_algorithm algorithm,
const gko::size_type num_rhs)
{
// This generate kernel is here to allow for a more sophisticated
// implementation as for other executors. This kernel would perform the
Expand Down
3 changes: 2 additions & 1 deletion reference/solver/upper_trs_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const ReferenceExecutor> exec,
const matrix::Csr<ValueType, IndexType>* matrix,
std::shared_ptr<solver::SolveStruct>& solve_struct,
bool unit_diag, const gko::size_type num_rhs)
bool unit_diag, const solver::trisolve_algorithm algorithm,
const gko::size_type num_rhs)
{
// This generate kernel is here to allow for a more sophisticated
// implementation as for other executors. This kernel would perform the
Expand Down

0 comments on commit 0727a35

Please sign in to comment.