diff --git a/.github/workflows/ornl_ascent_mirror.yaml b/.github/workflows/ornl_ascent_mirror.yaml index 9e2cd805..e4cf5f2e 100644 --- a/.github/workflows/ornl_ascent_mirror.yaml +++ b/.github/workflows/ornl_ascent_mirror.yaml @@ -2,7 +2,7 @@ name: ORNL Ascent Mirror # triggers a github action everytime there is a push or mr on: - push: + #push: jobs: # To test on HPC resources we must first mirror the repo and then trigger a pipeline diff --git a/.github/workflows/ornl_crusher_mirror.yaml b/.github/workflows/ornl_crusher_mirror.yaml index 7886f25e..e5586e8c 100644 --- a/.github/workflows/ornl_crusher_mirror.yaml +++ b/.github/workflows/ornl_crusher_mirror.yaml @@ -2,11 +2,11 @@ name: ORNL Crusher Mirror # triggers a github action everytime there is a push or mr on: - pull_request: + #pull_request: push: branches: - - develop - - main + - never #develop + - ever #main jobs: # To test on HPC resources we must first mirror the repo and then trigger a pipeline diff --git a/CMakeLists.txt b/CMakeLists.txt index b0c96c90..1c6d6bb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,7 @@ option(RESOLVE_TEST_WITH_BSUB "Use `jsrun` instead of `mpirun` commands when run option(RESOLVE_USE_KLU "Use KLU, AMD and COLAMD libraries from SuiteSparse" ON) option(RESOLVE_USE_CUDA "Use CUDA language and SDK" OFF) option(RESOLVE_USE_HIP "Use HIP language and ROCm library" OFF) +option(RESOLVE_USE_PROFILING "Set profiling tracers in the code" OFF) option(RESOLVE_USE_GPU "Use GPU device for computations" OFF) mark_as_advanced(FORCE RESOLVE_USE_GPU) diff --git a/cmake/ReSolveFindHipLibraries.cmake b/cmake/ReSolveFindHipLibraries.cmake index b23d8021..e142fcfc 100644 --- a/cmake/ReSolveFindHipLibraries.cmake +++ b/cmake/ReSolveFindHipLibraries.cmake @@ -16,7 +16,10 @@ target_link_libraries(resolve_hip INTERFACE roc::rocsolver ) +# HIP/ROCm targets still don't have include directories set correctly +# We need this little hack for now :/ get_target_property(hip_includes hip::device INTERFACE_INCLUDE_DIRECTORIES) +message(STATUS "HIP include directories found at: ${hip_includes}") target_include_directories(resolve_hip INTERFACE $) diff --git a/docs/sphinx/developer_guide/profiling.rst b/docs/sphinx/developer_guide/profiling.rst index fcfd4686..0e1ca0fc 100644 --- a/docs/sphinx/developer_guide/profiling.rst +++ b/docs/sphinx/developer_guide/profiling.rst @@ -110,13 +110,10 @@ requires code to be instrumented using `ROC Tracer #include #include +#include using namespace ReSolve::constants; @@ -28,8 +29,8 @@ int main(int argc, char *argv[]) std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() << " x "<< A->getNumColumns() + << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); + RESOLVE_RANGE_POP("Matrix Read"); //Now convert to CSR. + RESOLVE_RANGE_PUSH("Convert to CSR"); if (i < 2) { A->updateFromCoo(A_coo, ReSolve::memory::HOST); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); @@ -117,10 +127,12 @@ int main(int argc, char *argv[]) A->updateFromCoo(A_coo, ReSolve::memory::DEVICE); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } + RESOLVE_RANGE_POP("Convert to CSR"); std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setup(A); matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = KLU->analyze(); @@ -153,7 +165,9 @@ int main(int argc, char *argv[]) GS->setup(A->getNumRows(), FGMRES->getRestart()); FGMRES->setup(A); } + RESOLVE_RANGE_POP("KLU"); } else { + RESOLVE_RANGE_PUSH("RocSolver"); //status = KLU->refactorize(); std::cout<<"Using ROCSOLVER RF"<refactorize(); @@ -177,10 +191,11 @@ int main(int argc, char *argv[]) matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE); norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::DEVICE); norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::DEVICE); - std::cout << "\t Matrix inf norm: " << std::scientific << std::setprecision(16) << norm_A<<"\n" - << "\t Residual inf norm: " << norm_r <<"\n" - << "\t Solution inf norm: " << norm_x <<"\n" - << "\t Norm of scaled residuals: "<< norm_r / (norm_A * norm_x) << "\n"; + std::cout << std::scientific << std::setprecision(16) + << "\t Matrix inf norm: " << norm_A << "\n" + << "\t Residual inf norm: " << norm_r << "\n" + << "\t Solution inf norm: " << norm_x << "\n" + << "\t Norm of scaled residuals: " << norm_r / (norm_A * norm_x) << "\n"; vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); if(!std::isnan(rnrm) && !std::isinf(rnrm)) { @@ -193,9 +208,11 @@ int main(int argc, char *argv[]) << FGMRES->getFinalResidualNorm()/norm_b << " iter: " << FGMRES->getNumIter() << "\n"; } + RESOLVE_RANGE_POP("RocSolver"); } } // for (int i = 0; i < numSystems; ++i) + RESOLVE_RANGE_POP(__FUNCTION__); delete A; delete A_coo; diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index 948db788..f5d23a3b 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -135,10 +135,26 @@ target_include_directories(ReSolve INTERFACE $ ) -# TODO: Make this PRIVATE dependency (requires refactoring ReSolve code) target_link_libraries(ReSolve PUBLIC ${ReSolve_Targets_List}) target_link_libraries(ReSolve PRIVATE resolve_version) +if(RESOLVE_USE_PROFILING) + if(RESOLVE_USE_HIP) + # Roctracer does not provide CMake target, so we use this hack here. + # The assumption is roctracer lib and headers are installed at the same + # place as the rest of ROCm. + target_link_libraries(ReSolve PUBLIC "-lroctracer64 -lroctx64") + elseif(RESOLVE_USE_CUDA) + # Nothing to do for CUDA profiling for now. + message(NOTICE "Profiling support enabled, but Re::Solve does not create tracer annotations for CUDA.") + message(NOTICE "This profiling support option will have no effect.") + else() + # Noting to do for profiling on the host for now. + message(NOTICE "Profiling support enabled, but Re::Solve does not create tracer annotations for host code.") + message(NOTICE "This profiling support option will have no effect.") + endif() +endif(RESOLVE_USE_PROFILING) + # Install targets install(TARGETS ReSolve EXPORT ReSolveTargets diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index 2b28c29c..1fbc8775 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -2,6 +2,7 @@ #include #include "LinSolverDirectRocSolverRf.hpp" #include +#include namespace ReSolve { @@ -31,6 +32,7 @@ namespace ReSolve index_type* Q, vector_type* rhs) { + RESOLVE_RANGE_PUSH(__FUNCTION__); //remember - P and Q are generally CPU variables int error_sum = 0; this->A_ = (matrix::Csr*) A; @@ -127,7 +129,7 @@ namespace ReSolve n, L_csr_->getNnz(), descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + L_csr_->getValues(ReSolve::memory::DEVICE), L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, @@ -140,7 +142,7 @@ namespace ReSolve n, U_csr_->getNnz(), descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + U_csr_->getValues(ReSolve::memory::DEVICE), U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, @@ -153,7 +155,7 @@ namespace ReSolve n, L_csr_->getNnz(), descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + L_csr_->getValues(ReSolve::memory::DEVICE), L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, @@ -185,11 +187,13 @@ namespace ReSolve } } + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } int LinSolverDirectRocSolverRf::refactorize() { + RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; mem_.deviceSynchronize(); status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(), @@ -228,13 +232,14 @@ namespace ReSolve error_sum += status_rocblas_; } - + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } // solution is returned in RHS int LinSolverDirectRocSolverRf::solve(vector_type* rhs) { + RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; if (solve_mode_ == 0) { mem_.deviceSynchronize(); @@ -290,11 +295,13 @@ namespace ReSolve permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,rhs->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } int LinSolverDirectRocSolverRf::solve(vector_type* rhs, vector_type* x) { + RESOLVE_RANGE_PUSH(__FUNCTION__); x->update(rhs->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::DEVICE); x->setDataUpdated(ReSolve::memory::DEVICE); int error_sum = 0; @@ -355,6 +362,7 @@ namespace ReSolve permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,x->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } @@ -427,6 +435,5 @@ namespace ReSolve Mshifts[static_cast(row)]++; } } - //Mshifts.~vector(); - } -}// namespace resolve + } // LinSolverDirectRocSolverRf::addFactors +} // namespace resolve diff --git a/resolve/Profiling.hpp b/resolve/Profiling.hpp new file mode 100644 index 00000000..bfe0d357 --- /dev/null +++ b/resolve/Profiling.hpp @@ -0,0 +1,18 @@ +#pragma once + +#ifdef RESOLVE_USE_PROFILING + +#ifdef RESOLVE_USE_HIP +#include +#define RESOLVE_RANGE_PUSH(x) roctxRangePush(x) +#define RESOLVE_RANGE_POP(x) roctxRangePop(); \ + roctxMarkA(x) +#endif + +#else + +// Not using profiling +#define RESOLVE_RANGE_PUSH(x) +#define RESOLVE_RANGE_POP(x) + +#endif // RESOLVE_USE_PROFILING diff --git a/resolve/resolve_defs.hpp.in b/resolve/resolve_defs.hpp.in index 49a39203..e2a6bb46 100644 --- a/resolve/resolve_defs.hpp.in +++ b/resolve/resolve_defs.hpp.in @@ -10,6 +10,7 @@ #cmakedefine RESOLVE_USE_RAJA #cmakedefine RESOLVE_USE_EIGEN #cmakedefine RESOLVE_USE_KLU +#cmakedefine RESOLVE_USE_PROFILING #define RESOLVE_VERSION "@PROJECT_VERSION@" #define RESOLVE_VERSION_MAJOR "@PROJECT_VERSION_MAJOR@"