Skip to content

Commit

Permalink
Add tracer annotations to HIP code in ReSolve (#134)
Browse files Browse the repository at this point in the history
* Adding roctx profiling to rocSolver refactorization.

* Abstract profiling tracers.

* Update CMake to build code with profiling annotations.


---------

Co-authored-by: Paul Mullowney <Paul.Mullowney@amd.com>
  • Loading branch information
pelesh and PaulMullowney authored Jan 23, 2024
1 parent ec792e3 commit 90f5ff8
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ornl_ascent_mirror.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: ORNL Ascent Mirror

# triggers a github action everytime there is a push or mr
on:
push:
#push:

jobs:
# To test on HPC resources we must first mirror the repo and then trigger a pipeline
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/ornl_crusher_mirror.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ name: ORNL Crusher Mirror

# triggers a github action everytime there is a push or mr
on:
pull_request:
#pull_request:
push:
branches:
- develop
- main
- never #develop
- ever #main

jobs:
# To test on HPC resources we must first mirror the repo and then trigger a pipeline
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ option(RESOLVE_TEST_WITH_BSUB "Use `jsrun` instead of `mpirun` commands when run
option(RESOLVE_USE_KLU "Use KLU, AMD and COLAMD libraries from SuiteSparse" ON)
option(RESOLVE_USE_CUDA "Use CUDA language and SDK" OFF)
option(RESOLVE_USE_HIP "Use HIP language and ROCm library" OFF)
option(RESOLVE_USE_PROFILING "Set profiling tracers in the code" OFF)

option(RESOLVE_USE_GPU "Use GPU device for computations" OFF)
mark_as_advanced(FORCE RESOLVE_USE_GPU)
Expand Down
3 changes: 3 additions & 0 deletions cmake/ReSolveFindHipLibraries.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ target_link_libraries(resolve_hip INTERFACE
roc::rocsolver
)

# HIP/ROCm targets still don't have include directories set correctly
# We need this little hack for now :/
get_target_property(hip_includes hip::device INTERFACE_INCLUDE_DIRECTORIES)
message(STATUS "HIP include directories found at: ${hip_includes}")

target_include_directories(resolve_hip INTERFACE
$<BUILD_INTERFACE:${hip_includes}>)
Expand Down
11 changes: 4 additions & 7 deletions docs/sphinx/developer_guide/profiling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,10 @@ requires code to be instrumented using `ROC Tracer <https://rocm.docs.amd.com/pr
library. Both, ROCProfiler and ROC Tracer are part of the ROCm library, so
no additional software needs to be installed once you obtain ROCm.

To build your instrumented code, you need to link your Re::Solve build to
ROC Tracer library:

.. code:: cmake
target_include_directories(ReSolve SYSTEM PUBLIC ${HIP_PATH}/roctracer/include ${HIP_PATH}/include )
target_link_libraries(ReSolve PUBLIC "-L${HIP_PATH}/roctracer/lib -lroctracer64" "-L${HIP_PATH}/roctracer/lib -lroctx64" )
First, you need to make sure your Re::Solve library was built with ROC Tracer
support, i.e. the build was configured with CMake boolean flag
``RESOLVE_USE_PROFILING`` set to ``On``. Note that ROC Tracer annotation will
be enabled only if Re::Solve is built with HIP support.

Next, you need to annotate events you want to trace in your code execution.
This could be done in a straightforward manner using ROC Tracer push and pop
Expand Down
35 changes: 26 additions & 9 deletions examples/r_KLU_rocSolverRf_FGMRES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <resolve/LinSolverDirectRocSolverRf.hpp>
#include <resolve/LinSolverIterativeFGMRES.hpp>
#include <resolve/workspace/LinAlgWorkspace.hpp>
#include <resolve/Profiling.hpp>

using namespace ReSolve::constants;

Expand All @@ -28,8 +29,8 @@ int main(int argc, char *argv[])
std::string rhsFileName = argv[2];

index_type numSystems = atoi(argv[3]);
std::cout<<"Family mtx file name: "<< matrixFileName << ", total number of matrices: "<<numSystems<<std::endl;
std::cout<<"Family rhs file name: "<< rhsFileName << ", total number of RHSes: " << numSystems<<std::endl;
std::cout << "Family mtx file name: " << matrixFileName << ", total number of matrices: " << numSystems << std::endl;
std::cout << "Family rhs file name: " << rhsFileName << ", total number of RHSes: " << numSystems << std::endl;

std::string fileId;
std::string rhsId;
Expand All @@ -48,15 +49,19 @@ int main(int argc, char *argv[])
vector_type* vec_rhs;
vector_type* vec_x;
vector_type* vec_r;
real_type norm_A, norm_x, norm_r;//used for INF norm
real_type norm_A;
real_type norm_x;
real_type norm_r;

ReSolve::GramSchmidt* GS = new ReSolve::GramSchmidt(vector_handler, ReSolve::GramSchmidt::cgs2);
ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU;
ReSolve::LinSolverDirectRocSolverRf* Rf = new ReSolve::LinSolverDirectRocSolverRf(workspace_HIP);
ReSolve::LinSolverIterativeFGMRES* FGMRES = new ReSolve::LinSolverIterativeFGMRES(matrix_handler, vector_handler, GS);

RESOLVE_RANGE_PUSH(__FUNCTION__);
for (int i = 0; i < numSystems; ++i)
{
RESOLVE_RANGE_PUSH("Matrix Read");
index_type j = 4 + i * 2;
fileId = argv[j];
rhsId = argv[j + 1];
Expand Down Expand Up @@ -104,11 +109,16 @@ int main(int argc, char *argv[])
ReSolve::io::readAndUpdateMatrix(mat_file, A_coo);
ReSolve::io::readAndUpdateRhs(rhs_file, &rhs);
}
std::cout<<"Finished reading the matrix and rhs, size: "<<A->getNumRows()<<" x "<<A->getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<<A->symmetric()<< ", Expanded? "<<A->expanded()<<std::endl;
std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x "<< A->getNumColumns()
<< ", nnz: " << A->getNnz()
<< ", symmetric? " << A->symmetric()
<< ", Expanded? " << A->expanded() << std::endl;
mat_file.close();
rhs_file.close();
RESOLVE_RANGE_POP("Matrix Read");

//Now convert to CSR.
RESOLVE_RANGE_PUSH("Convert to CSR");
if (i < 2) {
A->updateFromCoo(A_coo, ReSolve::memory::HOST);
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST);
Expand All @@ -117,10 +127,12 @@ int main(int argc, char *argv[])
A->updateFromCoo(A_coo, ReSolve::memory::DEVICE);
vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
}
RESOLVE_RANGE_POP("Convert to CSR");
std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<<std::endl;
int status;
real_type norm_b;
if (i < 2){
if (i < 2) {
RESOLVE_RANGE_PUSH("KLU");
KLU->setup(A);
matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE);
status = KLU->analyze();
Expand Down Expand Up @@ -153,7 +165,9 @@ int main(int argc, char *argv[])
GS->setup(A->getNumRows(), FGMRES->getRestart());
FGMRES->setup(A);
}
RESOLVE_RANGE_POP("KLU");
} else {
RESOLVE_RANGE_PUSH("RocSolver");
//status = KLU->refactorize();
std::cout<<"Using ROCSOLVER RF"<<std::endl;
status = Rf->refactorize();
Expand All @@ -177,10 +191,11 @@ int main(int argc, char *argv[])
matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::DEVICE);
norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::DEVICE);
norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::DEVICE);
std::cout << "\t Matrix inf norm: " << std::scientific << std::setprecision(16) << norm_A<<"\n"
<< "\t Residual inf norm: " << norm_r <<"\n"
<< "\t Solution inf norm: " << norm_x <<"\n"
<< "\t Norm of scaled residuals: "<< norm_r / (norm_A * norm_x) << "\n";
std::cout << std::scientific << std::setprecision(16)
<< "\t Matrix inf norm: " << norm_A << "\n"
<< "\t Residual inf norm: " << norm_r << "\n"
<< "\t Solution inf norm: " << norm_x << "\n"
<< "\t Norm of scaled residuals: " << norm_r / (norm_A * norm_x) << "\n";

vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE);
if(!std::isnan(rnrm) && !std::isinf(rnrm)) {
Expand All @@ -193,9 +208,11 @@ int main(int argc, char *argv[])
<< FGMRES->getFinalResidualNorm()/norm_b
<< " iter: " << FGMRES->getNumIter() << "\n";
}
RESOLVE_RANGE_POP("RocSolver");
}

} // for (int i = 0; i < numSystems; ++i)
RESOLVE_RANGE_POP(__FUNCTION__);

delete A;
delete A_coo;
Expand Down
18 changes: 17 additions & 1 deletion resolve/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,26 @@ target_include_directories(ReSolve INTERFACE
$<INSTALL_INTERFACE:include>
)

# TODO: Make this PRIVATE dependency (requires refactoring ReSolve code)
target_link_libraries(ReSolve PUBLIC ${ReSolve_Targets_List})
target_link_libraries(ReSolve PRIVATE resolve_version)

if(RESOLVE_USE_PROFILING)
if(RESOLVE_USE_HIP)
# Roctracer does not provide CMake target, so we use this hack here.
# The assumption is roctracer lib and headers are installed at the same
# place as the rest of ROCm.
target_link_libraries(ReSolve PUBLIC "-lroctracer64 -lroctx64")
elseif(RESOLVE_USE_CUDA)
# Nothing to do for CUDA profiling for now.
message(NOTICE "Profiling support enabled, but Re::Solve does not create tracer annotations for CUDA.")
message(NOTICE "This profiling support option will have no effect.")
else()
# Noting to do for profiling on the host for now.
message(NOTICE "Profiling support enabled, but Re::Solve does not create tracer annotations for host code.")
message(NOTICE "This profiling support option will have no effect.")
endif()
endif(RESOLVE_USE_PROFILING)

# Install targets
install(TARGETS ReSolve
EXPORT ReSolveTargets
Expand Down
21 changes: 14 additions & 7 deletions resolve/LinSolverDirectRocSolverRf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <resolve/matrix/Csr.hpp>
#include "LinSolverDirectRocSolverRf.hpp"
#include <resolve/hip/hipKernels.h>
#include <resolve/Profiling.hpp>

namespace ReSolve
{
Expand Down Expand Up @@ -31,6 +32,7 @@ namespace ReSolve
index_type* Q,
vector_type* rhs)
{
RESOLVE_RANGE_PUSH(__FUNCTION__);
//remember - P and Q are generally CPU variables
int error_sum = 0;
this->A_ = (matrix::Csr*) A;
Expand Down Expand Up @@ -127,7 +129,7 @@ namespace ReSolve
n,
L_csr_->getNnz(),
descr_L_,
L_csr_->getValues(ReSolve::memory::DEVICE), //vals_,
L_csr_->getValues(ReSolve::memory::DEVICE),
L_csr_->getRowData(ReSolve::memory::DEVICE),
L_csr_->getColData(ReSolve::memory::DEVICE),
info_L_,
Expand All @@ -140,7 +142,7 @@ namespace ReSolve
n,
U_csr_->getNnz(),
descr_U_,
U_csr_->getValues(ReSolve::memory::DEVICE), //vals_,
U_csr_->getValues(ReSolve::memory::DEVICE),
U_csr_->getRowData(ReSolve::memory::DEVICE),
U_csr_->getColData(ReSolve::memory::DEVICE),
info_U_,
Expand All @@ -153,7 +155,7 @@ namespace ReSolve
n,
L_csr_->getNnz(),
descr_L_,
L_csr_->getValues(ReSolve::memory::DEVICE), //vals_,
L_csr_->getValues(ReSolve::memory::DEVICE),
L_csr_->getRowData(ReSolve::memory::DEVICE),
L_csr_->getColData(ReSolve::memory::DEVICE),
info_L_,
Expand Down Expand Up @@ -185,11 +187,13 @@ namespace ReSolve
}

}
RESOLVE_RANGE_POP(__FUNCTION__);
return error_sum;
}

int LinSolverDirectRocSolverRf::refactorize()
{
RESOLVE_RANGE_PUSH(__FUNCTION__);
int error_sum = 0;
mem_.deviceSynchronize();
status_rocblas_ = rocsolver_dcsrrf_refactlu(workspace_->getRocblasHandle(),
Expand Down Expand Up @@ -228,13 +232,14 @@ namespace ReSolve
error_sum += status_rocblas_;

}

RESOLVE_RANGE_POP(__FUNCTION__);
return error_sum;
}

// solution is returned in RHS
int LinSolverDirectRocSolverRf::solve(vector_type* rhs)
{
RESOLVE_RANGE_PUSH(__FUNCTION__);
int error_sum = 0;
if (solve_mode_ == 0) {
mem_.deviceSynchronize();
Expand Down Expand Up @@ -290,11 +295,13 @@ namespace ReSolve
permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,rhs->getData(ReSolve::memory::DEVICE));
mem_.deviceSynchronize();
}
RESOLVE_RANGE_POP(__FUNCTION__);
return error_sum;
}

int LinSolverDirectRocSolverRf::solve(vector_type* rhs, vector_type* x)
{
RESOLVE_RANGE_PUSH(__FUNCTION__);
x->update(rhs->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::DEVICE);
x->setDataUpdated(ReSolve::memory::DEVICE);
int error_sum = 0;
Expand Down Expand Up @@ -355,6 +362,7 @@ namespace ReSolve
permuteVectorQ(A_->getNumRows(), d_Q_,d_aux1_,x->getData(ReSolve::memory::DEVICE));
mem_.deviceSynchronize();
}
RESOLVE_RANGE_POP(__FUNCTION__);
return error_sum;
}

Expand Down Expand Up @@ -427,6 +435,5 @@ namespace ReSolve
Mshifts[static_cast<size_t>(row)]++;
}
}
//Mshifts.~vector();
}
}// namespace resolve
} // LinSolverDirectRocSolverRf::addFactors
} // namespace resolve
18 changes: 18 additions & 0 deletions resolve/Profiling.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#pragma once

#ifdef RESOLVE_USE_PROFILING

#ifdef RESOLVE_USE_HIP
#include <roctracer/roctx.h>
#define RESOLVE_RANGE_PUSH(x) roctxRangePush(x)
#define RESOLVE_RANGE_POP(x) roctxRangePop(); \
roctxMarkA(x)
#endif

#else

// Not using profiling
#define RESOLVE_RANGE_PUSH(x)
#define RESOLVE_RANGE_POP(x)

#endif // RESOLVE_USE_PROFILING
1 change: 1 addition & 0 deletions resolve/resolve_defs.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#cmakedefine RESOLVE_USE_RAJA
#cmakedefine RESOLVE_USE_EIGEN
#cmakedefine RESOLVE_USE_KLU
#cmakedefine RESOLVE_USE_PROFILING
#define RESOLVE_VERSION "@PROJECT_VERSION@"

#define RESOLVE_VERSION_MAJOR "@PROJECT_VERSION_MAJOR@"
Expand Down

0 comments on commit 90f5ff8

Please sign in to comment.