Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates for CUDA versions >= 12 #87

Merged
merged 6 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ if(OpenMP_FOUND)
OPTION (USE_OpenMP "Use OpenMP" ON)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
if(ENABLE_CUDA)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler='${OpenMP_CXX_FLAGS}'")
endif()
endif()
# Slurm support
if(SLURM_FOUND)
Expand Down
8 changes: 6 additions & 2 deletions expui/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ set(common_INCLUDE $<INSTALL_INTERFACE:include>


if(ENABLE_CUDA)
list(APPEND common_INCLUDE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDAToolkit_INCLUDE_DIRS})
list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt)
list(APPEND common_LINKLIB CUDA::cudart)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12)
list(APPEND common_LINKLIB CUDA::nvtx3)
else ()
list(APPEND common_LINKLIB CUDA::nvToolsExt)
endif ()
endif()

if(SLURM_FOUND)
Expand Down
8 changes: 6 additions & 2 deletions exputil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,12 @@ set(common_LINKLIBS ${DEP_LIB} OpenMP::OpenMP_CXX MPI::MPI_CXX
${FFTW_DOUBLE_LIB})

if(ENABLE_CUDA)
list(APPEND common_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDAToolkit_INCLUDE_DIRS})
list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt)
list(APPEND common_LINKLIB CUDA::cudart)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12)
list(APPEND common_LINKLIB CUDA::nvtx3)
else ()
list(APPEND common_LINKLIB CUDA::nvToolsExt)
endif ()
endif()

if(ENABLE_XDR AND TIRPC_FOUND)
Expand Down
4 changes: 2 additions & 2 deletions include/BiorthCube.H
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <massmodel.H>
#include <yaml-cpp/yaml.h>

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <cudaUtil.cuH>
#include <cudaMappingConstants.cuH>
#endif
Expand All @@ -28,7 +28,7 @@
#include <highfive/H5DataSpace.hpp>
#include <highfive/H5Attribute.hpp>

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <cudaParticle.cuH>
#include <cudaMappingConstants.cuH>
#endif
Expand Down
6 changes: 3 additions & 3 deletions include/BiorthCyl.H
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <massmodel.H>
#include <yaml-cpp/yaml.h>

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <cudaUtil.cuH>
#include <cudaMappingConstants.cuH>
#endif
Expand All @@ -28,7 +28,7 @@
#include <highfive/H5DataSpace.hpp>
#include <highfive/H5Attribute.hpp>

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <cudaParticle.cuH>
#include <cudaMappingConstants.cuH>
#endif
Expand Down Expand Up @@ -172,7 +172,7 @@ public:
static std::map<std::string, std::string>
cacheInfo(const std::string& cachefile, bool verbose=true);

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
void initialize_cuda(std::vector<cudaArray_t>& cuArray,
thrust::host_vector<cudaTextureObject_t>& tex);

Expand Down
4 changes: 2 additions & 2 deletions include/EmpCylSL.H
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <SLGridMP2.H>
#include <coef.H>

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <cudaParticle.cuH>
#include <cudaMappingConstants.cuH>
#endif
Expand Down Expand Up @@ -924,7 +924,7 @@ public:
//! Check orthogonality for basis (pyEXP style)
std::vector<Eigen::MatrixXd> orthoCheck();

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
cudaMappingConstants getCudaMappingConstants();

void initialize_cuda(std::vector<cudaArray_t>& cuArray,
Expand Down
6 changes: 3 additions & 3 deletions include/SLGridMP2.H
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <libvars.H>
using namespace __EXP__;

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <cudaUtil.cuH>
#include <cudaMappingConstants.cuH>
#endif
Expand Down Expand Up @@ -167,7 +167,7 @@ public:
//! produce matrices
std::vector<Eigen::MatrixXd> orthoCheck(int knots=40);

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
void initialize_cuda(std::vector<cudaArray_t>& cuArray,
thrust::host_vector<cudaTextureObject_t>& tex);

Expand Down Expand Up @@ -453,7 +453,7 @@ public:

//@}

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
void initialize_cuda(std::vector<cudaArray_t>& cuArray,
thrust::host_vector<cudaTextureObject_t>& tex);

Expand Down
7 changes: 6 additions & 1 deletion pyEXP/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@ set(common_INCLUDE $<INSTALL_INTERFACE:include>
${HDF5_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR})

if(ENABLE_CUDA)
list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt)
list(APPEND common_LINKLIB CUDA::cudart)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12)
list(APPEND common_LINKLIB CUDA::nvtx3)
else ()
list(APPEND common_LINKLIB CUDA::nvToolsExt)
endif ()
endif()

if(SLURM_FOUND)
Expand Down
35 changes: 23 additions & 12 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@

set(CUDA_SRC)
if (ENABLE_CUDA)
list(APPEND CUDA_SRC cudaPolarBasis.cu cudaSphericalBasis.cu
cudaCylinder.cu cudaEmpCylSL.cu cudaComponent.cu NVTX.cc
cudaIncpos.cu cudaIncvel.cu cudaMultistep.cu cudaOrient.cu
cudaBiorthCyl.cu cudaCube.cu cudaSlabSL.cu)
endif()

set(exp_SOURCES Basis.cc Bessel.cc Component.cc
Cube.cc Cylinder.cc ExternalCollection.cc
ExternalForce.cc Orient.cc PotAccel.cc ScatterMFP.cc
Expand All @@ -19,9 +10,25 @@ set(exp_SOURCES Basis.cc Bessel.cc Component.cc
OutMulti.cc OutRelaxation.cc OrbTrace.cc OutDiag.cc OutLog.cc
OutVel.cc OutCoef.cc multistep.cc parse.cc SlabSL.cc step.cc
tidalField.cc ultra.cc ultrasphere.cc MPL.cc OutFrac.cc OutCalbr.cc
ParticleFerry.cc chkSlurm.c chkTimer.cc GravKernel.cc ${CUDA_SRC}
ParticleFerry.cc chkSlurm.c chkTimer.cc GravKernel.cc
CenterFile.cc PolarBasis.cc FlatDisk.cc signals.cc)

if (ENABLE_CUDA)
list(APPEND exp_SOURCES cudaPolarBasis.cu cudaSphericalBasis.cu
cudaCylinder.cu cudaEmpCylSL.cu cudaComponent.cu NVTX.cc
cudaIncpos.cu cudaIncvel.cu cudaMultistep.cu cudaOrient.cu
cudaBiorthCyl.cu cudaCube.cu cudaSlabSL.cu)

set_source_files_properties(Component.cc ComponentContainer.cc
Cube.cc Cylinder.cc ExternalForce.cc NVTX.cc OrbTrace.cc Orient.cc
OutAscii.cc OutCHKPT.cc OutCHKPTQ.cc OutCalbr.cc OutFrac.cc
OutLog.cc OutPS.cc OutPSN.cc OutPSP.cc OutPSQ.cc OutPSR.cc
OutputContainer.cc PolarBasis.cc PotAccel.cc SlabSL.cc Sphere.cc
SphericalBasis.cc begin.cc incpos.cc incvel.cc step.cc
PROPERTIES LANGUAGE CUDA)

endif()

set(common_INCLUDE_DIRS
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/>
Expand All @@ -39,8 +46,12 @@ if(PNG_FOUND)
endif()

if(ENABLE_CUDA)
list(APPEND common_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDAToolkit_INCLUDE_DIRS})
list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt)
list(APPEND common_LINKLIB CUDA::cudart)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12)
list(APPEND common_LINKLIB CUDA::nvtx3)
else ()
list(APPEND common_LINKLIB CUDA::nvToolsExt)
endif ()
endif()

if(SLURM_FOUND)
Expand Down
8 changes: 4 additions & 4 deletions src/Component.H
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include <config_exp.h>

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <cudaParticle.cuH>
#endif

Expand Down Expand Up @@ -569,15 +569,15 @@ public:
//! Compute center of mass and center of velocity (CPU version)
void fix_positions_cpu(unsigned mlevel=0);

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
//! Compute center of mass and center of velocity (GPU version)
void fix_positions_cuda(unsigned mlevel=0);
#endif

//! Compute center of mass and center of velocity
void fix_positions(unsigned mlevel=0)
{
#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
if (use_cuda) fix_positions_cuda(mlevel);
else
#endif
Expand Down Expand Up @@ -926,7 +926,7 @@ public:
//! Compute level from minimum requested time step from last master step
inline bool DTreset() { return dtreset; }

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
//@{
//! CUDA utilities for handling host <===> device exchange

Expand Down
6 changes: 3 additions & 3 deletions src/Cube.H
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <Coefficients.H>
#include <PotAccel.H>

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <thrust/complex.h>
#include <cudaParticle.cuH>
#include <cudaMappingConstants.cuH>
Expand Down Expand Up @@ -41,7 +41,7 @@ private:
//! Valid keys for YAML configurations
static const std::set<std::string> valid_keys;

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
virtual void determine_coefficients_cuda();
virtual void determine_acceleration_cuda();
virtual void multistep_update_cuda();
Expand Down Expand Up @@ -135,7 +135,7 @@ private:
std::cout << std::string(60, '=') << std::endl;
std::cout << "Time in CPU: "
<< duration0.count()-duration1.count() << std::endl;
#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
if (c->cC->cudaDevice>=0) {
std::cout << "Time in GPU: " << duration1.count() << std::endl;
}
Expand Down
2 changes: 1 addition & 1 deletion src/CylEXP.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <expand.H>
#include <EmpCylSL.H>

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <cudaParticle.cuH>
#include <cudaMappingConstants.cuH>
#endif
Expand Down
15 changes: 3 additions & 12 deletions src/Cylinder.H
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@

#include <config_exp.h>

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <cudaParticle.cuH>
#include <cudaMappingConstants.cuH>
#endif
#include <NVTX.H>

#include <CoefContainer.H>

Expand Down Expand Up @@ -155,15 +154,7 @@ private:
a complete set of coefficients for force evaluation at an
intermediate time step
*/
void compute_multistep_coefficients()
{
if (play_back and not play_cnew) return;

nvTracerPtr tPtr;
if (cuda_prof)
tPtr = nvTracerPtr(new nvTracer("Cylinder::compute_multistep_coefficients"));
ortho->compute_multistep_coefficients(mfirst[mstep]);
}
void compute_multistep_coefficients();

//! Reset used particle counter and mass registers
virtual void multistep_reset();
Expand All @@ -180,7 +171,7 @@ protected:
int sampT, defSampT;

//! CUDA method for coefficient accumulation
#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
virtual void determine_coefficients_cuda(bool compute_pca);
virtual void determine_acceleration_cuda();
virtual void multistep_update_cuda();
Expand Down
11 changes: 11 additions & 0 deletions src/Cylinder.cc
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a nice maintenance change, forced by the new cuda compile strategy, but I think it also makes the codebase simpler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. The goal here was to get the implementation out of the header where it required NVTX-specific structures.

Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <MixtureBasis.H>
#include <Timer.H>
#include <exputils.H>
#include <NVTX.H>

Timer timer_debug;

Expand Down Expand Up @@ -1659,6 +1660,16 @@ void Cylinder::multistep_update(int from, int to, Component* c, int i, int id)
#endif
}

void Cylinder::compute_multistep_coefficients()
{
if (play_back and not play_cnew) return;

nvTracerPtr tPtr;
if (cuda_prof)
tPtr = nvTracerPtr(new nvTracer("Cylinder::compute_multistep_coefficients"));
ortho->compute_multistep_coefficients(mfirst[mstep]);
}


void Cylinder::multistep_reset()
{
Expand Down
1 change: 1 addition & 0 deletions src/ExternalForce.H
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume this is still for testing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like cruft to me.

Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public:
//! Finish and clean-up (caching data necessary for restart)
virtual void finish() {}

// #if HAVE_LIBCUDA==1 && defined (__NVCC__)
#if HAVE_LIBCUDA==1
//! Copy particles from device for non-cuda forces
void getParticlesCuda(Component *c);
Expand Down
4 changes: 2 additions & 2 deletions src/FlatDisk.H
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

typedef std::shared_ptr<BiorthCyl> CylPtr;

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
#include <cudaUtil.cuH>
#endif

Expand Down Expand Up @@ -61,7 +61,7 @@ private:

virtual double getRtable() { return ortho->getRtable(); }

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)
virtual void initialize_cuda()
{
sampT = floor(sqrt(component->CurTotal()));
Expand Down
6 changes: 5 additions & 1 deletion src/NVTX.H
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@

#include <config_exp.h>

#if HAVE_LIBCUDA==1
#if HAVE_LIBCUDA==1 && defined (__NVCC__)

#if __CUDACC_VER_MAJOR__ < 12
#include <nvToolsExt.h>
#else
#include <nvtx3/nvToolsExt.h>
#endif

class nvTracer {
private:
Expand Down
2 changes: 1 addition & 1 deletion src/NVTX.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include <NVTX.H>
#include "NVTX.H"

#if HAVE_LIBCUDA==1

Expand Down
2 changes: 0 additions & 2 deletions src/Orient.H
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we planning on stylistically removing this everywhere? Fine if so and I'll start when I touch any other code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really stylistic. In most compilers, using the "" first checks your local directory, and if it doesn't find a match then moves on to check the system paths. Using <> starts the search with system headers. NVTX.H should always be local. So I would say: use "" when we really mean local directory.

Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// This may look like C code, but it is really -*- C++ -*-

#ifndef _Orient_H
#define _Orient_H

Expand Down
Loading
Loading