From f511c16a26a005c69382310b46c8ea2a359eeb4c Mon Sep 17 00:00:00 2001 From: Paul Mullowney Date: Fri, 3 Nov 2023 14:16:45 -0400 Subject: [PATCH 1/6] Adding roctx profiling to rocSolver refactorization. --- examples/r_KLU_rocSolverRf_FGMRES.cpp | 35 ++++++++++++++++++++------ resolve/CMakeLists.txt | 2 ++ resolve/LinSolverDirectRocSolverRf.cpp | 25 ++++++++++++------ 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/examples/r_KLU_rocSolverRf_FGMRES.cpp b/examples/r_KLU_rocSolverRf_FGMRES.cpp index 49f8926c..f531a393 100644 --- a/examples/r_KLU_rocSolverRf_FGMRES.cpp +++ b/examples/r_KLU_rocSolverRf_FGMRES.cpp @@ -13,6 +13,7 @@ #include #include #include +#include using namespace ReSolve::constants; @@ -28,8 +29,8 @@ int main(int argc, char *argv[]) std::string rhsFileName = argv[2]; index_type numSystems = atoi(argv[3]); - std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<updateFromCoo(A_coo, ReSolve::memory::HOST); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); @@ -117,10 +125,13 @@ int main(int argc, char *argv[]) A->updateFromCoo(A_coo, ReSolve::memory::DEVICE); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } + roctxRangePop(); + roctxMarkA("Convert to CSR"); std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setup(A); matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = KLU->analyze(); @@ -153,7 +164,10 @@ int main(int argc, char *argv[]) GS->setup(A->getNumRows(), FGMRES->getRestart()); FGMRES->setup(A); } + roctxRangePop(); + roctxMarkA("KLU"); } else { + roctxRangePush("RocSolver"); //status = KLU->refactorize(); std::cout<<"Using ROCSOLVER RF"<refactorize(); @@ -177,10 +191,11 @@ int main(int argc, char *argv[]) matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE); norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::DEVICE); norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::DEVICE); - std::cout << "\t Matrix inf norm: " << std::scientific << std::setprecision(16) << norm_A<<"\n" - << "\t Residual inf norm: " << norm_r <<"\n" - << "\t Solution inf norm: " << norm_x <<"\n" - << "\t Norm of scaled residuals: "<< norm_r / (norm_A * norm_x) << "\n"; + std::cout << std::scientific << std::setprecision(16) + << "\t Matrix inf norm: " << norm_A << "\n" + << "\t Residual inf norm: " << norm_r << "\n" + << "\t Solution inf norm: " << norm_x << "\n" + << "\t Norm of scaled residuals: " << norm_r / (norm_A * norm_x) << "\n"; vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); if(!std::isnan(rnrm) && !std::isinf(rnrm)) { @@ -193,9 +208,13 @@ int main(int argc, char *argv[]) << FGMRES->getFinalResidualNorm()/norm_b << " iter: " << FGMRES->getNumIter() << "\n"; } + roctxRangePop(); + roctxMarkA("RocSolver"); } } // for (int i = 0; i < numSystems; ++i) + roctxRangePop(); + roctxMarkA(__FUNCTION__); delete A; delete A_coo; diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index 948db788..2bd43794 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -136,6 +136,8 @@ target_include_directories(ReSolve INTERFACE ) # TODO: Make this PRIVATE dependency (requires refactoring ReSolve code) +target_include_directories(ReSolve SYSTEM PUBLIC ${HIP_PATH}/roctracer/include ${HIP_PATH}/include ) +target_link_libraries(ReSolve PUBLIC "-L${HIP_PATH}/roctracer/lib -lroctracer64" "-L${HIP_PATH}/roctracer/lib -lroctx64" ) target_link_libraries(ReSolve PUBLIC ${ReSolve_Targets_List}) target_link_libraries(ReSolve PRIVATE resolve_version) diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index 2b28c29c..b5633ba4 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -2,6 +2,7 @@ #include #include "LinSolverDirectRocSolverRf.hpp" #include +#include namespace ReSolve { @@ -31,6 +32,7 @@ namespace ReSolve index_type* Q, vector_type* rhs) { + roctxRangePush(__FUNCTION__); //remember - P and Q are generally CPU variables int error_sum = 0; this->A_ = (matrix::Csr*) A; @@ -127,7 +129,7 @@ namespace ReSolve n, L_csr_->getNnz(), descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + L_csr_->getValues(ReSolve::memory::DEVICE), L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, @@ -140,7 +142,7 @@ namespace ReSolve n, U_csr_->getNnz(), descr_U_, - U_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + U_csr_->getValues(ReSolve::memory::DEVICE), U_csr_->getRowData(ReSolve::memory::DEVICE), U_csr_->getColData(ReSolve::memory::DEVICE), info_U_, @@ -153,7 +155,7 @@ namespace ReSolve n, L_csr_->getNnz(), descr_L_, - L_csr_->getValues(ReSolve::memory::DEVICE), //vals_, + L_csr_->getValues(ReSolve::memory::DEVICE), L_csr_->getRowData(ReSolve::memory::DEVICE), L_csr_->getColData(ReSolve::memory::DEVICE), info_L_, @@ -185,11 +187,14 @@ namespace ReSolve } } + roctxRangePop(); + roctxMarkA(__FUNCTION__); return error_sum; } int LinSolverDirectRocSolverRf::refactorize() { + roctxRangePush(__FUNCTION__); int error_sum = 0; mem_.deviceSynchronize(); status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(), @@ -228,13 +233,15 @@ namespace ReSolve error_sum += status_rocblas_; } - + roctxRangePop(); + roctxMarkA(__FUNCTION__); return error_sum; } // solution is returned in RHS int LinSolverDirectRocSolverRf::solve(vector_type* rhs) { + roctxRangePush(__FUNCTION__); int error_sum = 0; if (solve_mode_ == 0) { mem_.deviceSynchronize(); @@ -290,11 +297,14 @@ namespace ReSolve permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,rhs->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } + roctxRangePop(); + roctxMarkA(__FUNCTION__); return error_sum; } int LinSolverDirectRocSolverRf::solve(vector_type* rhs, vector_type* x) { + roctxRangePush(__FUNCTION__); x->update(rhs->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::DEVICE); x->setDataUpdated(ReSolve::memory::DEVICE); int error_sum = 0; @@ -355,6 +365,8 @@ namespace ReSolve permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,x->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } + roctxRangePop(); + roctxMarkA(__FUNCTION__); return error_sum; } @@ -427,6 +439,5 @@ namespace ReSolve Mshifts[static_cast(row)]++; } } - //Mshifts.~vector(); - } -}// namespace resolve + } // LinSolverDirectRocSolverRf::addFactors +} // namespace resolve From b25c1a0f32ed7062ce5e44acb668a62bcf003b50 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Fri, 19 Jan 2024 16:41:10 -0500 Subject: [PATCH 2/6] Abstract profiling tracers. --- CMakeLists.txt | 1 + examples/r_KLU_rocSolverRf_FGMRES.cpp | 32 ++++++++++++-------------- resolve/LinSolverDirectRocSolverRf.cpp | 22 ++++++++---------- resolve/Profiling.hpp | 18 +++++++++++++++ resolve/resolve_defs.hpp.in | 1 + 5 files changed, 44 insertions(+), 30 deletions(-) create mode 100644 resolve/Profiling.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b0c96c90..1c6d6bb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,7 @@ option(RESOLVE_TEST_WITH_BSUB "Use `jsrun` instead of `mpirun` commands when run option(RESOLVE_USE_KLU "Use KLU, AMD and COLAMD libraries from SuiteSparse" ON) option(RESOLVE_USE_CUDA "Use CUDA language and SDK" OFF) option(RESOLVE_USE_HIP "Use HIP language and ROCm library" OFF) +option(RESOLVE_USE_PROFILING "Set profiling tracers in the code" OFF) option(RESOLVE_USE_GPU "Use GPU device for computations" OFF) mark_as_advanced(FORCE RESOLVE_USE_GPU) diff --git a/examples/r_KLU_rocSolverRf_FGMRES.cpp b/examples/r_KLU_rocSolverRf_FGMRES.cpp index f531a393..c70d54dc 100644 --- a/examples/r_KLU_rocSolverRf_FGMRES.cpp +++ b/examples/r_KLU_rocSolverRf_FGMRES.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; @@ -58,10 +58,10 @@ int main(int argc, char *argv[]) ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP); ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS); - roctxRangePush(__FUNCTION__); + RESOLVE_RANGE_PUSH(__FUNCTION__); for (int i = 0; i < numSystems; ++i) { - roctxRangePush("Matrix Read"); + RESOLVE_RANGE_PUSH("Matrix Read"); index_type j = 4 + i * 2; fileId = argv[j]; rhsId = argv[j + 1]; @@ -109,14 +109,16 @@ int main(int argc, char *argv[]) ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() << " x "<< A->getNumColumns() + << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - roctxRangePop(); - roctxMarkA("Matrix Read"); + RESOLVE_RANGE_POP("Matrix Read"); //Now convert to CSR. - roctxRangePush("Convert to CSR"); + RESOLVE_RANGE_PUSH("Convert to CSR"); if (i < 2) { A->updateFromCoo(A_coo, ReSolve::memory::HOST); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); @@ -125,13 +127,12 @@ int main(int argc, char *argv[]) A->updateFromCoo(A_coo, ReSolve::memory::DEVICE); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } - roctxRangePop(); - roctxMarkA("Convert to CSR"); + RESOLVE_RANGE_POP("Convert to CSR"); std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setup(A); matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = KLU->analyze(); @@ -164,10 +165,9 @@ int main(int argc, char *argv[]) GS->setup(A->getNumRows(), FGMRES->getRestart()); FGMRES->setup(A); } - roctxRangePop(); - roctxMarkA("KLU"); + RESOLVE_RANGE_POP("KLU"); } else { - roctxRangePush("RocSolver"); + RESOLVE_RANGE_PUSH("RocSolver"); //status = KLU->refactorize(); std::cout<<"Using ROCSOLVER RF"<refactorize(); @@ -208,13 +208,11 @@ int main(int argc, char *argv[]) << FGMRES->getFinalResidualNorm()/norm_b << " iter: " << FGMRES->getNumIter() << "\n"; } - roctxRangePop(); - roctxMarkA("RocSolver"); + RESOLVE_RANGE_POP("RocSolver"); } } // for (int i = 0; i < numSystems; ++i) - roctxRangePop(); - roctxMarkA(__FUNCTION__); + RESOLVE_RANGE_POP(__FUNCTION__); delete A; delete A_coo; diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index b5633ba4..54fb94b6 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -2,7 +2,7 @@ #include #include "LinSolverDirectRocSolverRf.hpp" #include -#include +#include namespace ReSolve { @@ -32,7 +32,7 @@ namespace ReSolve index_type* Q, vector_type* rhs) { - roctxRangePush(__FUNCTION__); + RESOLVE_RANGE_PUSH(__FUNCTION__); //remember - P and Q are generally CPU variables int error_sum = 0; this->A_ = (matrix::Csr*) A; @@ -187,14 +187,13 @@ namespace ReSolve } } - roctxRangePop(); - roctxMarkA(__FUNCTION__); + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } int LinSolverDirectRocSolverRf::refactorize() { - roctxRangePush(__FUNCTION__); + RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; mem_.deviceSynchronize(); status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(), @@ -233,15 +232,14 @@ namespace ReSolve error_sum += status_rocblas_; } - roctxRangePop(); - roctxMarkA(__FUNCTION__); + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } // solution is returned in RHS int LinSolverDirectRocSolverRf::solve(vector_type* rhs) { - roctxRangePush(__FUNCTION__); + RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; if (solve_mode_ == 0) { mem_.deviceSynchronize(); @@ -297,14 +295,13 @@ namespace ReSolve permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,rhs->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } - roctxRangePop(); - roctxMarkA(__FUNCTION__); + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } int LinSolverDirectRocSolverRf::solve(vector_type* rhs, vector_type* x) { - roctxRangePush(__FUNCTION__); + RESOLVE_RANGE_PUSH(__FUNCTION__); x->update(rhs->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::DEVICE); x->setDataUpdated(ReSolve::memory::DEVICE); int error_sum = 0; @@ -365,8 +362,7 @@ namespace ReSolve permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,x->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } - roctxRangePop(); - roctxMarkA(__FUNCTION__); + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } diff --git a/resolve/Profiling.hpp b/resolve/Profiling.hpp new file mode 100644 index 00000000..bfe0d357 --- /dev/null +++ b/resolve/Profiling.hpp @@ -0,0 +1,18 @@ +#pragma once + +#ifdef RESOLVE_USE_PROFILING + +#ifdef RESOLVE_USE_HIP +#include +#define RESOLVE_RANGE_PUSH(x) roctxRangePush(x) +#define RESOLVE_RANGE_POP(x) roctxRangePop(); \ + roctxMarkA(x) +#endif + +#else + +// Not using profiling +#define RESOLVE_RANGE_PUSH(x) +#define RESOLVE_RANGE_POP(x) + +#endif // RESOLVE_USE_PROFILING diff --git a/resolve/resolve_defs.hpp.in b/resolve/resolve_defs.hpp.in index 49a39203..e2a6bb46 100644 --- a/resolve/resolve_defs.hpp.in +++ b/resolve/resolve_defs.hpp.in @@ -10,6 +10,7 @@ #cmakedefine RESOLVE_USE_RAJA #cmakedefine RESOLVE_USE_EIGEN #cmakedefine RESOLVE_USE_KLU +#cmakedefine RESOLVE_USE_PROFILING #define RESOLVE_VERSION "@PROJECT_VERSION@" #define RESOLVE_VERSION_MAJOR "@PROJECT_VERSION_MAJOR@" From 8537b5e457af3c06ddfa8b8f95578bc411ef2565 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Mon, 22 Jan 2024 16:22:33 -0500 Subject: [PATCH 3/6] Update CMake to build code with profiling annotations. --- cmake/ReSolveFindHipLibraries.cmake | 3 +++ docs/sphinx/developer_guide/profiling.rst | 11 ++++------- resolve/CMakeLists.txt | 16 +++++++++++++--- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/cmake/ReSolveFindHipLibraries.cmake b/cmake/ReSolveFindHipLibraries.cmake index b23d8021..e142fcfc 100644 --- a/cmake/ReSolveFindHipLibraries.cmake +++ b/cmake/ReSolveFindHipLibraries.cmake @@ -16,7 +16,10 @@ target_link_libraries(resolve_hip INTERFACE roc::rocsolver ) +# HIP/ROCm targets still don't have include directories set correctly +# We need this little hack for now :/ get_target_property(hip_includes hip::device INTERFACE_INCLUDE_DIRECTORIES) +message(STATUS "HIP include directories found at: ${hip_includes}") target_include_directories(resolve_hip INTERFACE $) diff --git a/docs/sphinx/developer_guide/profiling.rst b/docs/sphinx/developer_guide/profiling.rst index fcfd4686..0e1ca0fc 100644 --- a/docs/sphinx/developer_guide/profiling.rst +++ b/docs/sphinx/developer_guide/profiling.rst @@ -110,13 +110,10 @@ requires code to be instrumented using `ROC Tracer ) -# TODO: Make this PRIVATE dependency (requires refactoring ReSolve code) -target_include_directories(ReSolve SYSTEM PUBLIC ${HIP_PATH}/roctracer/include ${HIP_PATH}/include ) -target_link_libraries(ReSolve PUBLIC "-L${HIP_PATH}/roctracer/lib -lroctracer64" "-L${HIP_PATH}/roctracer/lib -lroctx64" ) target_link_libraries(ReSolve PUBLIC ${ReSolve_Targets_List}) target_link_libraries(ReSolve PRIVATE resolve_version) +if(RESOLVE_USE_PROFILING) + if(RESOLVE_USE_HIP) + # Roctracer does not provide CMake target, so we use this hack here. + # The assumption is roctracer lib and headers are installed at the same + # place as the rest of ROCm. + target_link_libraries(ReSolve PUBLIC "-lroctracer64 -lroctx64") + elseif(RESOLVE_USE_CUDA) + # Nothing to do for CUDA profiling for now. + else() + # Noting to do for profiling on the host for now. + endif() +endif(RESOLVE_USE_PROFILING) + # Install targets install(TARGETS ReSolve EXPORT ReSolveTargets From d9d35da3c9f677ed2471e3f6983230c337330ec2 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Mon, 22 Jan 2024 20:45:53 -0500 Subject: [PATCH 4/6] Turn off nonfunctioning CI pipelines. --- .github/workflows/ornl_ascent_mirror.yaml | 2 +- .github/workflows/ornl_crusher_mirror.yaml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ornl_ascent_mirror.yaml b/.github/workflows/ornl_ascent_mirror.yaml index 9e2cd805..e4cf5f2e 100644 --- a/.github/workflows/ornl_ascent_mirror.yaml +++ b/.github/workflows/ornl_ascent_mirror.yaml @@ -2,7 +2,7 @@ name: ORNL Ascent Mirror # triggers a github action everytime there is a push or mr on: - push: + #push: jobs: # To test on HPC resources we must first mirror the repo and then trigger a pipeline diff --git a/.github/workflows/ornl_crusher_mirror.yaml b/.github/workflows/ornl_crusher_mirror.yaml index 7886f25e..e5586e8c 100644 --- a/.github/workflows/ornl_crusher_mirror.yaml +++ b/.github/workflows/ornl_crusher_mirror.yaml @@ -2,11 +2,11 @@ name: ORNL Crusher Mirror # triggers a github action everytime there is a push or mr on: - pull_request: + #pull_request: push: branches: - - develop - - main + - never #develop + - ever #main jobs: # To test on HPC resources we must first mirror the repo and then trigger a pipeline From 9140c5a22baeea61bc75e898fc582e7d94ecdd27 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Mon, 22 Jan 2024 22:47:27 -0500 Subject: [PATCH 5/6] [skip ci] Fix indentation. --- examples/r_KLU_rocSolverRf_FGMRES.cpp | 12 ++++++------ resolve/LinSolverDirectRocSolverRf.cpp | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/examples/r_KLU_rocSolverRf_FGMRES.cpp b/examples/r_KLU_rocSolverRf_FGMRES.cpp index c70d54dc..192b178f 100644 --- a/examples/r_KLU_rocSolverRf_FGMRES.cpp +++ b/examples/r_KLU_rocSolverRf_FGMRES.cpp @@ -115,7 +115,7 @@ int main(int argc, char *argv[]) << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); - RESOLVE_RANGE_POP("Matrix Read"); + RESOLVE_RANGE_POP("Matrix Read"); //Now convert to CSR. RESOLVE_RANGE_PUSH("Convert to CSR"); @@ -127,12 +127,12 @@ int main(int argc, char *argv[]) A->updateFromCoo(A_coo, ReSolve::memory::DEVICE); vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } - RESOLVE_RANGE_POP("Convert to CSR"); + RESOLVE_RANGE_POP("Convert to CSR"); std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<setup(A); matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); status = KLU->analyze(); @@ -165,9 +165,9 @@ int main(int argc, char *argv[]) GS->setup(A->getNumRows(), FGMRES->getRestart()); FGMRES->setup(A); } - RESOLVE_RANGE_POP("KLU"); + RESOLVE_RANGE_POP("KLU"); } else { - RESOLVE_RANGE_PUSH("RocSolver"); + RESOLVE_RANGE_PUSH("RocSolver"); //status = KLU->refactorize(); std::cout<<"Using ROCSOLVER RF"<refactorize(); @@ -208,7 +208,7 @@ int main(int argc, char *argv[]) << FGMRES->getFinalResidualNorm()/norm_b << " iter: " << FGMRES->getNumIter() << "\n"; } - RESOLVE_RANGE_POP("RocSolver"); + RESOLVE_RANGE_POP("RocSolver"); } } // for (int i = 0; i < numSystems; ++i) diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index 54fb94b6..1fbc8775 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -32,7 +32,7 @@ namespace ReSolve index_type* Q, vector_type* rhs) { - RESOLVE_RANGE_PUSH(__FUNCTION__); + RESOLVE_RANGE_PUSH(__FUNCTION__); //remember - P and Q are generally CPU variables int error_sum = 0; this->A_ = (matrix::Csr*) A; @@ -187,13 +187,13 @@ namespace ReSolve } } - RESOLVE_RANGE_POP(__FUNCTION__); + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } int LinSolverDirectRocSolverRf::refactorize() { - RESOLVE_RANGE_PUSH(__FUNCTION__); + RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; mem_.deviceSynchronize(); status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(), @@ -232,14 +232,14 @@ namespace ReSolve error_sum += status_rocblas_; } - RESOLVE_RANGE_POP(__FUNCTION__); + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } // solution is returned in RHS int LinSolverDirectRocSolverRf::solve(vector_type* rhs) { - RESOLVE_RANGE_PUSH(__FUNCTION__); + RESOLVE_RANGE_PUSH(__FUNCTION__); int error_sum = 0; if (solve_mode_ == 0) { mem_.deviceSynchronize(); @@ -295,13 +295,13 @@ namespace ReSolve permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,rhs->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } - RESOLVE_RANGE_POP(__FUNCTION__); + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } int LinSolverDirectRocSolverRf::solve(vector_type* rhs, vector_type* x) { - RESOLVE_RANGE_PUSH(__FUNCTION__); + RESOLVE_RANGE_PUSH(__FUNCTION__); x->update(rhs->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::DEVICE); x->setDataUpdated(ReSolve::memory::DEVICE); int error_sum = 0; @@ -362,7 +362,7 @@ namespace ReSolve permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,x->getData(ReSolve::memory::DEVICE)); mem_.deviceSynchronize(); } - RESOLVE_RANGE_POP(__FUNCTION__); + RESOLVE_RANGE_POP(__FUNCTION__); return error_sum; } From 918a794cb541d5c1927403bdfe6b8a01413553a7 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Tue, 23 Jan 2024 13:29:23 -0500 Subject: [PATCH 6/6] Print warning when enabling profiling with CUDA. --- resolve/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index cfcc4732..f5d23a3b 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -146,8 +146,12 @@ if(RESOLVE_USE_PROFILING) target_link_libraries(ReSolve PUBLIC "-lroctracer64 -lroctx64") elseif(RESOLVE_USE_CUDA) # Nothing to do for CUDA profiling for now. + message(NOTICE "Profiling support enabled, but Re::Solve does not create tracer annotations for CUDA.") + message(NOTICE "This profiling support option will have no effect.") else() # Noting to do for profiling on the host for now. + message(NOTICE "Profiling support enabled, but Re::Solve does not create tracer annotations for host code.") + message(NOTICE "This profiling support option will have no effect.") endif() endif(RESOLVE_USE_PROFILING)