From d006e86643076dd773f0f18acd6bfafc02b9f5b4 Mon Sep 17 00:00:00 2001 From: "Martin D. Weinberg" Date: Thu, 10 Oct 2024 11:04:00 -0400 Subject: [PATCH 1/5] Implement that CUDA 16 change in NVTX for nvtx3; run CUDA-aware source code through nvcc for consistence with the thrust and cub ABI changes --- CMakeLists.txt | 3 +++ expui/CMakeLists.txt | 8 ++++++-- exputil/CMakeLists.txt | 8 ++++++-- include/BiorthCube.H | 4 ++-- include/BiorthCyl.H | 6 +++--- include/EmpCylSL.H | 4 ++-- include/SLGridMP2.H | 6 +++--- pyEXP/CMakeLists.txt | 7 ++++++- src/CMakeLists.txt | 35 ++++++++++++++++++++++----------- src/Component.H | 8 ++++---- src/Cube.H | 6 +++--- src/CylEXP.H | 2 +- src/Cylinder.H | 15 +++----------- src/Cylinder.cc | 11 +++++++++++ src/ExternalForce.H | 1 + src/FlatDisk.H | 4 ++-- src/NVTX.H | 6 +++++- src/NVTX.cc | 2 +- src/Orient.H | 2 -- src/PolarBasis.H | 4 ++-- src/PotAccel.H | 2 +- src/SlabSL.H | 4 ++-- src/Sphere.H | 4 ++-- src/SphericalBasis.H | 4 ++-- src/cudaCylinder.cu | 1 + src/cudaPolarBasis.cu | 1 + src/global.H | 4 +++- src/user/CMakeLists.txt | 11 +++++++++-- utils/Analysis/CMakeLists.txt | 7 ++++++- utils/ICs/CMakeLists.txt | 7 ++++++- utils/MSSA/CMakeLists.txt | 7 ++++++- utils/PhaseSpace/CMakeLists.txt | 7 ++++++- utils/SL/CMakeLists.txt | 7 ++++++- utils/Test/CMakeLists.txt | 7 ++++++- 34 files changed, 144 insertions(+), 71 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dbede3fe3..75719f9b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,6 +122,9 @@ if(OpenMP_FOUND) OPTION (USE_OpenMP "Use OpenMP" ON) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + if(ENABLE_CUDA) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler='${OpenMP_CXX_FLAGS}'") + endif() endif() # Slurm support if(SLURM_FOUND) diff --git a/expui/CMakeLists.txt b/expui/CMakeLists.txt index 8690817f7..331717f5a 100644 --- a/expui/CMakeLists.txt +++ b/expui/CMakeLists.txt @@ -16,8 +16,12 @@ set(common_INCLUDE $ if(ENABLE_CUDA) - list(APPEND common_INCLUDE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () endif() if(SLURM_FOUND) diff --git a/exputil/CMakeLists.txt b/exputil/CMakeLists.txt index 87a25087a..b00437a45 100644 --- a/exputil/CMakeLists.txt +++ b/exputil/CMakeLists.txt @@ -56,8 +56,12 @@ set(common_LINKLIBS ${DEP_LIB} OpenMP::OpenMP_CXX MPI::MPI_CXX ${FFTW_DOUBLE_LIB}) if(ENABLE_CUDA) - list(APPEND common_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () endif() if(ENABLE_XDR AND TIRPC_FOUND) diff --git a/include/BiorthCube.H b/include/BiorthCube.H index 55dd19108..f7325fd44 100644 --- a/include/BiorthCube.H +++ b/include/BiorthCube.H @@ -16,7 +16,7 @@ #include #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #endif @@ -28,7 +28,7 @@ #include #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #endif diff --git a/include/BiorthCyl.H b/include/BiorthCyl.H index b052c7979..35f0a1e60 100644 --- a/include/BiorthCyl.H +++ b/include/BiorthCyl.H @@ -16,7 +16,7 @@ #include #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #endif @@ -28,7 +28,7 @@ #include #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #endif @@ -172,7 +172,7 @@ public: static std::map cacheInfo(const std::string& cachefile, bool verbose=true); -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) void initialize_cuda(std::vector& cuArray, thrust::host_vector& tex); diff --git a/include/EmpCylSL.H b/include/EmpCylSL.H index 233039ef7..c7e9bddd2 100644 --- a/include/EmpCylSL.H +++ b/include/EmpCylSL.H @@ -21,7 +21,7 @@ #include #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #endif @@ -924,7 +924,7 @@ public: //! Check orthogonality for basis (pyEXP style) std::vector orthoCheck(); -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) cudaMappingConstants getCudaMappingConstants(); void initialize_cuda(std::vector& cuArray, diff --git a/include/SLGridMP2.H b/include/SLGridMP2.H index f83d98fe9..1e50d604a 100644 --- a/include/SLGridMP2.H +++ b/include/SLGridMP2.H @@ -18,7 +18,7 @@ #include using namespace __EXP__; -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #endif @@ -167,7 +167,7 @@ public: //! produce matrices std::vector orthoCheck(int knots=40); -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) void initialize_cuda(std::vector& cuArray, thrust::host_vector& tex); @@ -453,7 +453,7 @@ public: //@} -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) void initialize_cuda(std::vector& cuArray, thrust::host_vector& tex); diff --git a/pyEXP/CMakeLists.txt b/pyEXP/CMakeLists.txt index d47080e6e..e93b59cf6 100644 --- a/pyEXP/CMakeLists.txt +++ b/pyEXP/CMakeLists.txt @@ -17,7 +17,12 @@ set(common_INCLUDE $ ${HDF5_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR}) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () endif() if(SLURM_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 04bd285ae..900041760 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,12 +1,3 @@ - -set(CUDA_SRC) -if (ENABLE_CUDA) - list(APPEND CUDA_SRC cudaPolarBasis.cu cudaSphericalBasis.cu - cudaCylinder.cu cudaEmpCylSL.cu cudaComponent.cu NVTX.cc - cudaIncpos.cu cudaIncvel.cu cudaMultistep.cu cudaOrient.cu - cudaBiorthCyl.cu cudaCube.cu cudaSlabSL.cu) -endif() - set(exp_SOURCES Basis.cc Bessel.cc Component.cc Cube.cc Cylinder.cc ExternalCollection.cc ExternalForce.cc Orient.cc PotAccel.cc ScatterMFP.cc @@ -19,9 +10,25 @@ set(exp_SOURCES Basis.cc Bessel.cc Component.cc OutMulti.cc OutRelaxation.cc OrbTrace.cc OutDiag.cc OutLog.cc OutVel.cc OutCoef.cc multistep.cc parse.cc SlabSL.cc step.cc tidalField.cc ultra.cc ultrasphere.cc MPL.cc OutFrac.cc OutCalbr.cc - ParticleFerry.cc chkSlurm.c chkTimer.cc GravKernel.cc ${CUDA_SRC} + ParticleFerry.cc chkSlurm.c chkTimer.cc GravKernel.cc CenterFile.cc PolarBasis.cc FlatDisk.cc signals.cc) +if (ENABLE_CUDA) + list(APPEND exp_SOURCES cudaPolarBasis.cu cudaSphericalBasis.cu + cudaCylinder.cu cudaEmpCylSL.cu cudaComponent.cu NVTX.cc + cudaIncpos.cu cudaIncvel.cu cudaMultistep.cu cudaOrient.cu + cudaBiorthCyl.cu cudaCube.cu cudaSlabSL.cu) + + set_source_files_properties(Component.cc ComponentContainer.cc + Cube.cc Cylinder.cc ExternalForce.cc NVTX.cc OrbTrace.cc Orient.cc + OutAscii.cc OutCHKPT.cc OutCHKPTQ.cc OutCalbr.cc OutFrac.cc + OutLog.cc OutPS.cc OutPSN.cc OutPSP.cc OutPSQ.cc OutPSR.cc + OutputContainer.cc PolarBasis.cc PotAccel.cc SlabSL.cc + SphericalBasis.cc begin.cc incpos.cc incvel.cc step.cc + PROPERTIES LANGUAGE CUDA) + +endif() + set(common_INCLUDE_DIRS $ $ @@ -39,8 +46,12 @@ if(PNG_FOUND) endif() if(ENABLE_CUDA) - list(APPEND common_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () endif() if(SLURM_FOUND) diff --git a/src/Component.H b/src/Component.H index 5dfc1ceb5..3f00ed9e5 100644 --- a/src/Component.H +++ b/src/Component.H @@ -19,7 +19,7 @@ #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #endif @@ -569,7 +569,7 @@ public: //! Compute center of mass and center of velocity (CPU version) void fix_positions_cpu(unsigned mlevel=0); -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) //! Compute center of mass and center of velocity (GPU version) void fix_positions_cuda(unsigned mlevel=0); #endif @@ -577,7 +577,7 @@ public: //! Compute center of mass and center of velocity void fix_positions(unsigned mlevel=0) { -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) if (use_cuda) fix_positions_cuda(mlevel); else #endif @@ -926,7 +926,7 @@ public: //! Compute level from minimum requested time step from last master step inline bool DTreset() { return dtreset; } -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) //@{ //! CUDA utilities for handling host <===> device exchange diff --git a/src/Cube.H b/src/Cube.H index 8e00413db..c9c6c99af 100644 --- a/src/Cube.H +++ b/src/Cube.H @@ -12,7 +12,7 @@ #include #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #include @@ -41,7 +41,7 @@ private: //! Valid keys for YAML configurations static const std::set valid_keys; -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) virtual void determine_coefficients_cuda(); virtual void determine_acceleration_cuda(); virtual void multistep_update_cuda(); @@ -135,7 +135,7 @@ private: std::cout << std::string(60, '=') << std::endl; std::cout << "Time in CPU: " << duration0.count()-duration1.count() << std::endl; -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) if (c->cC->cudaDevice>=0) { std::cout << "Time in GPU: " << duration1.count() << std::endl; } diff --git a/src/CylEXP.H b/src/CylEXP.H index 2df55949f..713a7690c 100644 --- a/src/CylEXP.H +++ b/src/CylEXP.H @@ -5,7 +5,7 @@ #include #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #endif diff --git a/src/Cylinder.H b/src/Cylinder.H index 8dc55b14b..e2b9f24a0 100644 --- a/src/Cylinder.H +++ b/src/Cylinder.H @@ -10,11 +10,10 @@ #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #endif -#include #include @@ -155,15 +154,7 @@ private: a complete set of coefficients for force evaluation at an intermediate time step */ - void compute_multistep_coefficients() - { - if (play_back and not play_cnew) return; - - nvTracerPtr tPtr; - if (cuda_prof) - tPtr = nvTracerPtr(new nvTracer("Cylinder::compute_multistep_coefficients")); - ortho->compute_multistep_coefficients(mfirst[mstep]); - } + void compute_multistep_coefficients(); //! Reset used particle counter and mass registers virtual void multistep_reset(); @@ -180,7 +171,7 @@ protected: int sampT, defSampT; //! CUDA method for coefficient accumulation -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) virtual void determine_coefficients_cuda(bool compute_pca); virtual void determine_acceleration_cuda(); virtual void multistep_update_cuda(); diff --git a/src/Cylinder.cc b/src/Cylinder.cc index e17fd50e1..e355bcdf9 100644 --- a/src/Cylinder.cc +++ b/src/Cylinder.cc @@ -12,6 +12,7 @@ #include #include #include +#include Timer timer_debug; @@ -1659,6 +1660,16 @@ void Cylinder::multistep_update(int from, int to, Component* c, int i, int id) #endif } +void Cylinder::compute_multistep_coefficients() +{ + if (play_back and not play_cnew) return; + + nvTracerPtr tPtr; + if (cuda_prof) + tPtr = nvTracerPtr(new nvTracer("Cylinder::compute_multistep_coefficients")); + ortho->compute_multistep_coefficients(mfirst[mstep]); +} + void Cylinder::multistep_reset() { diff --git a/src/ExternalForce.H b/src/ExternalForce.H index 8d8d0f67c..781775a07 100644 --- a/src/ExternalForce.H +++ b/src/ExternalForce.H @@ -53,6 +53,7 @@ public: //! Finish and clean-up (caching data necessary for restart) virtual void finish() {} + // #if HAVE_LIBCUDA==1 && defined (__NVCC__) #if HAVE_LIBCUDA==1 //! Copy particles from device for non-cuda forces void getParticlesCuda(Component *c); diff --git a/src/FlatDisk.H b/src/FlatDisk.H index 64be763a4..7ec72b644 100644 --- a/src/FlatDisk.H +++ b/src/FlatDisk.H @@ -9,7 +9,7 @@ typedef std::shared_ptr CylPtr; -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #endif @@ -61,7 +61,7 @@ private: virtual double getRtable() { return ortho->getRtable(); } -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) virtual void initialize_cuda() { sampT = floor(sqrt(component->CurTotal())); diff --git a/src/NVTX.H b/src/NVTX.H index dab8d3bcf..b091792c2 100644 --- a/src/NVTX.H +++ b/src/NVTX.H @@ -5,9 +5,13 @@ #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if __CUDACC_VER_MAJOR__ < 12 #include +#else +#include +#endif class nvTracer { private: diff --git a/src/NVTX.cc b/src/NVTX.cc index 8c4293eb6..60aabeca5 100644 --- a/src/NVTX.cc +++ b/src/NVTX.cc @@ -1,4 +1,4 @@ -#include +#include "NVTX.H" #if HAVE_LIBCUDA==1 diff --git a/src/Orient.H b/src/Orient.H index 13e6ac55a..b2bb1fa91 100644 --- a/src/Orient.H +++ b/src/Orient.H @@ -1,5 +1,3 @@ -// This may look like C code, but it is really -*- C++ -*- - #ifndef _Orient_H #define _Orient_H diff --git a/src/PolarBasis.H b/src/PolarBasis.H index caab18f0d..6d68ff11c 100644 --- a/src/PolarBasis.H +++ b/src/PolarBasis.H @@ -12,7 +12,7 @@ #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #endif @@ -198,7 +198,7 @@ protected: virtual double getRtable() = 0; //! CUDA method for coefficient accumulation -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) virtual void determine_coefficients_cuda(bool compute_pca); virtual void determine_acceleration_cuda(); virtual void multistep_update_cuda(); diff --git a/src/PotAccel.H b/src/PotAccel.H index 2c4dbe49f..f6b8ac99d 100644 --- a/src/PotAccel.H +++ b/src/PotAccel.H @@ -195,7 +195,7 @@ public: //! Execute to finish level shifts for particles virtual void multistep_update_finish() {} -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) //! Cuda implementation of level shifts virtual void multistep_update_cuda() {} #endif diff --git a/src/SlabSL.H b/src/SlabSL.H index bad380c12..f9eb1cdd2 100644 --- a/src/SlabSL.H +++ b/src/SlabSL.H @@ -11,7 +11,7 @@ #include #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #include @@ -56,7 +56,7 @@ private: SlabSLCoefHeader coefheader; -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) virtual void determine_coefficients_cuda(); virtual void determine_acceleration_cuda(); virtual void multistep_update_cuda(); diff --git a/src/Sphere.H b/src/Sphere.H index 0ec8e9c7a..0d2141d14 100644 --- a/src/Sphere.H +++ b/src/Sphere.H @@ -9,7 +9,7 @@ typedef std::shared_ptr SLGridSphPtr; -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #endif @@ -68,7 +68,7 @@ private: void make_model_bin(); void make_model_plummer(); -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) virtual void initialize_cuda() { ortho->initialize_cuda(cuInterpArray, tex); diff --git a/src/SphericalBasis.H b/src/SphericalBasis.H index cd949a96e..01c38f780 100644 --- a/src/SphericalBasis.H +++ b/src/SphericalBasis.H @@ -12,7 +12,7 @@ #include -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) #include #include #endif @@ -117,7 +117,7 @@ protected: virtual void determine_coefficients_playback(void); //! CUDA method for coefficient accumulation -#if HAVE_LIBCUDA==1 +#if HAVE_LIBCUDA==1 && defined (__NVCC__) virtual void determine_coefficients_cuda(bool compute_pca); virtual void determine_acceleration_cuda(); virtual void multistep_update_cuda(); diff --git a/src/cudaCylinder.cu b/src/cudaCylinder.cu index 8c978ea6b..fff4d19d7 100644 --- a/src/cudaCylinder.cu +++ b/src/cudaCylinder.cu @@ -3,6 +3,7 @@ #include #include #include +#include #include "expand.H" // Define for debugging diff --git a/src/cudaPolarBasis.cu b/src/cudaPolarBasis.cu index 226252aaf..0e51e34ce 100644 --- a/src/cudaPolarBasis.cu +++ b/src/cudaPolarBasis.cu @@ -3,6 +3,7 @@ #include #include #include +#include #include "expand.H" // Define for debugging diff --git a/src/global.H b/src/global.H index 658127f37..0dea91378 100644 --- a/src/global.H +++ b/src/global.H @@ -345,9 +345,11 @@ extern bool cuda_prof; #include "Species.H" #if HAVE_LIBCUDA==1 +extern int cudaGlobalDevice; +#if defined (__NVCC__) #include extern thrust::device_vector cuDstepL, cuDstepN; -extern int cudaGlobalDevice; +#endif #endif #endif diff --git a/src/user/CMakeLists.txt b/src/user/CMakeLists.txt index 3f2a5ff8a..695bc301d 100644 --- a/src/user/CMakeLists.txt +++ b/src/user/CMakeLists.txt @@ -13,8 +13,15 @@ set (common_LINKLIBS ${DEP_LIB} OpenMP::OpenMP_CXX MPI::MPI_CXX exputil EXPlib yaml-cpp) if(ENABLE_CUDA) - list(APPEND common_INCLUDE_DIRS ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${CUDAToolkit_INCLUDE_DIRS}) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () + # set_source_files_properties(UserBar.cc UserDisk.cc UserHalo.cc + # UserLogPot.cc UserMNdisk.cc UserMW.cc UserTest.cc UserTestCuda.cc + # PROPERTIES LANGUAGE CUDA) endif() set(user_SRC UserTest.cc) diff --git a/utils/Analysis/CMakeLists.txt b/utils/Analysis/CMakeLists.txt index d06dbe69c..124f435aa 100644 --- a/utils/Analysis/CMakeLists.txt +++ b/utils/Analysis/CMakeLists.txt @@ -13,7 +13,12 @@ if(PNG_FOUND) endif() if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () endif() if(SLURM_FOUND) diff --git a/utils/ICs/CMakeLists.txt b/utils/ICs/CMakeLists.txt index 06cbb8b18..b53496e63 100644 --- a/utils/ICs/CMakeLists.txt +++ b/utils/ICs/CMakeLists.txt @@ -9,7 +9,12 @@ set(common_LINKLIB OpenMP::OpenMP_CXX MPI::MPI_CXX yaml-cpp exputil ${HDF5_CXX_LIBRARIES}) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () endif() if(ENABLE_XDR AND TIRPC_FOUND) diff --git a/utils/MSSA/CMakeLists.txt b/utils/MSSA/CMakeLists.txt index c4db00080..3b21103b3 100644 --- a/utils/MSSA/CMakeLists.txt +++ b/utils/MSSA/CMakeLists.txt @@ -17,7 +17,12 @@ set(common_INCLUDE $ ${HDF5_INCLUDE_DIRS}) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () endif() if(SLURM_FOUND) diff --git a/utils/PhaseSpace/CMakeLists.txt b/utils/PhaseSpace/CMakeLists.txt index 71d1c2141..4893a413e 100644 --- a/utils/PhaseSpace/CMakeLists.txt +++ b/utils/PhaseSpace/CMakeLists.txt @@ -21,7 +21,12 @@ if(PNG_FOUND) endif() if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () endif() if(ENABLE_XDR AND TIRPC_FOUND) diff --git a/utils/SL/CMakeLists.txt b/utils/SL/CMakeLists.txt index 55fe30615..44b4ef080 100644 --- a/utils/SL/CMakeLists.txt +++ b/utils/SL/CMakeLists.txt @@ -6,7 +6,12 @@ set(common_LINKLIB OpenMP::OpenMP_CXX MPI::MPI_CXX yaml-cpp exputil ${VTK_LIBRARIES}) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () endif() if(ENABLE_XDR AND TIRPC_FOUND) diff --git a/utils/Test/CMakeLists.txt b/utils/Test/CMakeLists.txt index e680d1723..0d53e8b7a 100644 --- a/utils/Test/CMakeLists.txt +++ b/utils/Test/CMakeLists.txt @@ -17,7 +17,12 @@ set(common_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/..) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart CUDA::nvToolsExt ) + list(APPEND common_LINKLIB CUDA::cudart) + if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) + list(APPEND common_LINKLIB CUDA::nvtx3) + else () + list(APPEND common_LINKLIB CUDA::nvToolsExt) + endif () endif() if(ENABLE_XDR AND TIRPC_FOUND) From b8ac63ffb993d0d49e09a7c9d1d7e7a024ea2a3c Mon Sep 17 00:00:00 2001 From: "Martin D. Weinberg" Date: Thu, 10 Oct 2024 11:47:30 -0400 Subject: [PATCH 2/5] Remove a few unused parameters from gendisk source code [no ci] --- utils/ICs/initial.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/utils/ICs/initial.cc b/utils/ICs/initial.cc index d5d949c55..354733a87 100644 --- a/utils/ICs/initial.cc +++ b/utils/ICs/initial.cc @@ -360,7 +360,7 @@ main(int ac, char **av) double RMIN, RCYLMIN, RCYLMAX, SCSPH, RSPHSL, DMFAC, RFACTOR, SHFAC; double X0, Y0, Z0, U0, V0, W0; int RNUM, PNUM, TNUM, VFLAG, DFLAG; - bool expcond, LOGR, CHEBY, SELECT, DUMPCOEF; + bool expcond, LOGR, CHEBY, DUMPCOEF; int CMAPR, CMAPZ, NCHEB, TCHEB, CMTYPE, NDR, NDZ, NHR, NHT, NDP; int LMAX, NMAXH, NMAXD, MMAX, NUMX, NUMY, NOUT, NMAXLIM, NODD, DF; int DIVERGE, DIVERGE2, SEED, itmax; @@ -368,7 +368,7 @@ main(int ac, char **av) double PPower, R_DF, DR_DF; double Hratio, scale_height, scale_length, scale_lenfkN; double disk_mass, gas_mass, gscal_length, ToomreQ, Temp, Tmin; - bool const_height, images, multi, SVD, basis, zeropos, zerovel; + bool const_height, images, multi, basis, zeropos, zerovel; bool report, ignore, evolved, diskmodel; int nhalo, ndisk, ngas, ngparam; std::string hbods, dbods, gbods, suffix, centerfile, halofile1, halofile2; @@ -870,7 +870,6 @@ main(int ac, char **av) EmpCylSL::CMAPZ = CMAPZ; EmpCylSL::VFLAG = VFLAG; EmpCylSL::logarithmic = LOGR; - EmpCylSL::PCAVAR = SELECT; // Create expansion only if needed . . . std::shared_ptr expandd; From d24ae4f9b15807367278eb543d25d31e51411549 Mon Sep 17 00:00:00 2001 From: "Martin D. Weinberg" Date: Thu, 10 Oct 2024 14:05:24 -0400 Subject: [PATCH 3/5] Put Sphere.cc in nvcc compile list to prevent issues with parameter passing --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 900041760..0594c836f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,7 +23,7 @@ if (ENABLE_CUDA) Cube.cc Cylinder.cc ExternalForce.cc NVTX.cc OrbTrace.cc Orient.cc OutAscii.cc OutCHKPT.cc OutCHKPTQ.cc OutCalbr.cc OutFrac.cc OutLog.cc OutPS.cc OutPSN.cc OutPSP.cc OutPSQ.cc OutPSR.cc - OutputContainer.cc PolarBasis.cc PotAccel.cc SlabSL.cc + OutputContainer.cc PolarBasis.cc PotAccel.cc SlabSL.cc Sphere.cc SphericalBasis.cc begin.cc incpos.cc incvel.cc step.cc PROPERTIES LANGUAGE CUDA) From 876a923a82b584f3eca9536a4bdd902ebfba96c9 Mon Sep 17 00:00:00 2001 From: Georgia Stuart Date: Sun, 13 Oct 2024 23:03:01 -0400 Subject: [PATCH 4/5] Rework cuda file handling Signed-off-by: Georgia Stuart --- expui/CMakeLists.txt | 2 +- exputil/CMakeLists.txt | 7 ++++--- include/BiorthCube.H | 4 ++-- include/BiorthCyl.H | 6 +++--- include/EmpCylSL.H | 4 ++-- include/SLGridMP2.H | 6 +++--- pyEXP/CMakeLists.txt | 2 +- src/CMakeLists.txt | 16 ++++++---------- src/Component.H | 8 ++++---- src/Cube.H | 6 +++--- src/CylEXP.H | 2 +- src/Cylinder.H | 4 ++-- src/ExternalForce.H | 2 +- src/FlatDisk.H | 4 ++-- src/NVTX.H | 2 +- src/PolarBasis.H | 4 ++-- src/PotAccel.H | 2 +- src/SlabSL.H | 4 ++-- src/Sphere.H | 4 ++-- src/SphericalBasis.H | 4 ++-- src/user/CMakeLists.txt | 6 +++--- utils/Analysis/CMakeLists.txt | 2 +- utils/ICs/CMakeLists.txt | 2 +- utils/MSSA/CMakeLists.txt | 2 +- utils/PhaseSpace/CMakeLists.txt | 2 +- utils/SL/CMakeLists.txt | 2 +- utils/Test/CMakeLists.txt | 2 +- 27 files changed, 54 insertions(+), 57 deletions(-) diff --git a/expui/CMakeLists.txt b/expui/CMakeLists.txt index 331717f5a..7e55a3667 100644 --- a/expui/CMakeLists.txt +++ b/expui/CMakeLists.txt @@ -16,7 +16,7 @@ set(common_INCLUDE $ if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) else () diff --git a/exputil/CMakeLists.txt b/exputil/CMakeLists.txt index b00437a45..9ccb58eca 100644 --- a/exputil/CMakeLists.txt +++ b/exputil/CMakeLists.txt @@ -51,14 +51,15 @@ set(common_INCLUDE_DIRS $ ${DEP_INC} ${EIGEN3_INCLUDE_DIR} ${HDF5_INCLUDE_DIRS} ${FFTW_INCLUDE_DIRS}) -set(common_LINKLIBS ${DEP_LIB} OpenMP::OpenMP_CXX MPI::MPI_CXX +set(common_LINKLIB ${DEP_LIB} OpenMP::OpenMP_CXX MPI::MPI_CXX yaml-cpp ${VTK_LIBRARIES} ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES} ${FFTW_DOUBLE_LIB}) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) + set_source_files_properties(${BIORTH_SRC} ${PARTICLE_SRC} PROPERTIES LANGUAGE CUDA) else () list(APPEND common_LINKLIB CUDA::nvToolsExt) endif () @@ -74,7 +75,7 @@ endif() add_library(exputil ${exputil_SOURCES}) set_target_properties(exputil PROPERTIES OUTPUT_NAME exputil) target_include_directories(exputil PUBLIC ${common_INCLUDE_DIRS}) -target_link_libraries(exputil PUBLIC ${common_LINKLIBS}) +target_link_libraries(exputil PUBLIC ${common_LINKLIB}) install(TARGETS exputil DESTINATION lib) diff --git a/include/BiorthCube.H b/include/BiorthCube.H index f7325fd44..55dd19108 100644 --- a/include/BiorthCube.H +++ b/include/BiorthCube.H @@ -16,7 +16,7 @@ #include #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #endif @@ -28,7 +28,7 @@ #include #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #endif diff --git a/include/BiorthCyl.H b/include/BiorthCyl.H index 35f0a1e60..b052c7979 100644 --- a/include/BiorthCyl.H +++ b/include/BiorthCyl.H @@ -16,7 +16,7 @@ #include #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #endif @@ -28,7 +28,7 @@ #include #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #endif @@ -172,7 +172,7 @@ public: static std::map cacheInfo(const std::string& cachefile, bool verbose=true); -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 void initialize_cuda(std::vector& cuArray, thrust::host_vector& tex); diff --git a/include/EmpCylSL.H b/include/EmpCylSL.H index c7e9bddd2..233039ef7 100644 --- a/include/EmpCylSL.H +++ b/include/EmpCylSL.H @@ -21,7 +21,7 @@ #include #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #endif @@ -924,7 +924,7 @@ public: //! Check orthogonality for basis (pyEXP style) std::vector orthoCheck(); -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 cudaMappingConstants getCudaMappingConstants(); void initialize_cuda(std::vector& cuArray, diff --git a/include/SLGridMP2.H b/include/SLGridMP2.H index 1e50d604a..f83d98fe9 100644 --- a/include/SLGridMP2.H +++ b/include/SLGridMP2.H @@ -18,7 +18,7 @@ #include using namespace __EXP__; -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #endif @@ -167,7 +167,7 @@ public: //! produce matrices std::vector orthoCheck(int knots=40); -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 void initialize_cuda(std::vector& cuArray, thrust::host_vector& tex); @@ -453,7 +453,7 @@ public: //@} -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 void initialize_cuda(std::vector& cuArray, thrust::host_vector& tex); diff --git a/pyEXP/CMakeLists.txt b/pyEXP/CMakeLists.txt index e93b59cf6..dc8cebf0e 100644 --- a/pyEXP/CMakeLists.txt +++ b/pyEXP/CMakeLists.txt @@ -17,7 +17,7 @@ set(common_INCLUDE $ ${HDF5_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR}) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) else () diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0594c836f..45827e5f9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,15 +18,6 @@ if (ENABLE_CUDA) cudaCylinder.cu cudaEmpCylSL.cu cudaComponent.cu NVTX.cc cudaIncpos.cu cudaIncvel.cu cudaMultistep.cu cudaOrient.cu cudaBiorthCyl.cu cudaCube.cu cudaSlabSL.cu) - - set_source_files_properties(Component.cc ComponentContainer.cc - Cube.cc Cylinder.cc ExternalForce.cc NVTX.cc OrbTrace.cc Orient.cc - OutAscii.cc OutCHKPT.cc OutCHKPTQ.cc OutCalbr.cc OutFrac.cc - OutLog.cc OutPS.cc OutPSN.cc OutPSP.cc OutPSQ.cc OutPSR.cc - OutputContainer.cc PolarBasis.cc PotAccel.cc SlabSL.cc Sphere.cc - SphericalBasis.cc begin.cc incpos.cc incvel.cc step.cc - PROPERTIES LANGUAGE CUDA) - endif() set(common_INCLUDE_DIRS @@ -46,9 +37,10 @@ if(PNG_FOUND) endif() if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) + set_source_files_properties(${exp_SOURCES} PROPERTIES LANGUAGE CUDA) else () list(APPEND common_LINKLIB CUDA::nvToolsExt) endif () @@ -75,5 +67,9 @@ add_executable(exp expand.cc) target_include_directories(exp PUBLIC ${common_INCLUDE_DIRS}) target_link_libraries(exp PUBLIC ${common_LINKLIB} EXPlib) +if (ENABLE_CUDA) + set_target_properties(exp PROPERTIES LINKER_LANGUAGE CUDA) +endif () + install(TARGETS EXPlib DESTINATION lib) install(TARGETS exp DESTINATION bin) diff --git a/src/Component.H b/src/Component.H index 3f00ed9e5..5dfc1ceb5 100644 --- a/src/Component.H +++ b/src/Component.H @@ -19,7 +19,7 @@ #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #endif @@ -569,7 +569,7 @@ public: //! Compute center of mass and center of velocity (CPU version) void fix_positions_cpu(unsigned mlevel=0); -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 //! Compute center of mass and center of velocity (GPU version) void fix_positions_cuda(unsigned mlevel=0); #endif @@ -577,7 +577,7 @@ public: //! Compute center of mass and center of velocity void fix_positions(unsigned mlevel=0) { -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 if (use_cuda) fix_positions_cuda(mlevel); else #endif @@ -926,7 +926,7 @@ public: //! Compute level from minimum requested time step from last master step inline bool DTreset() { return dtreset; } -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 //@{ //! CUDA utilities for handling host <===> device exchange diff --git a/src/Cube.H b/src/Cube.H index c9c6c99af..8e00413db 100644 --- a/src/Cube.H +++ b/src/Cube.H @@ -12,7 +12,7 @@ #include #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #include @@ -41,7 +41,7 @@ private: //! Valid keys for YAML configurations static const std::set valid_keys; -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 virtual void determine_coefficients_cuda(); virtual void determine_acceleration_cuda(); virtual void multistep_update_cuda(); @@ -135,7 +135,7 @@ private: std::cout << std::string(60, '=') << std::endl; std::cout << "Time in CPU: " << duration0.count()-duration1.count() << std::endl; -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 if (c->cC->cudaDevice>=0) { std::cout << "Time in GPU: " << duration1.count() << std::endl; } diff --git a/src/CylEXP.H b/src/CylEXP.H index 713a7690c..2df55949f 100644 --- a/src/CylEXP.H +++ b/src/CylEXP.H @@ -5,7 +5,7 @@ #include #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #endif diff --git a/src/Cylinder.H b/src/Cylinder.H index e2b9f24a0..08e012c58 100644 --- a/src/Cylinder.H +++ b/src/Cylinder.H @@ -10,7 +10,7 @@ #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #endif @@ -171,7 +171,7 @@ protected: int sampT, defSampT; //! CUDA method for coefficient accumulation -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 virtual void determine_coefficients_cuda(bool compute_pca); virtual void determine_acceleration_cuda(); virtual void multistep_update_cuda(); diff --git a/src/ExternalForce.H b/src/ExternalForce.H index 781775a07..f445852cb 100644 --- a/src/ExternalForce.H +++ b/src/ExternalForce.H @@ -53,7 +53,7 @@ public: //! Finish and clean-up (caching data necessary for restart) virtual void finish() {} - // #if HAVE_LIBCUDA==1 && defined (__NVCC__) + // #if HAVE_LIBCUDA==1 #if HAVE_LIBCUDA==1 //! Copy particles from device for non-cuda forces void getParticlesCuda(Component *c); diff --git a/src/FlatDisk.H b/src/FlatDisk.H index 7ec72b644..64be763a4 100644 --- a/src/FlatDisk.H +++ b/src/FlatDisk.H @@ -9,7 +9,7 @@ typedef std::shared_ptr CylPtr; -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #endif @@ -61,7 +61,7 @@ private: virtual double getRtable() { return ortho->getRtable(); } -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 virtual void initialize_cuda() { sampT = floor(sqrt(component->CurTotal())); diff --git a/src/NVTX.H b/src/NVTX.H index b091792c2..44947c197 100644 --- a/src/NVTX.H +++ b/src/NVTX.H @@ -5,7 +5,7 @@ #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #if __CUDACC_VER_MAJOR__ < 12 #include diff --git a/src/PolarBasis.H b/src/PolarBasis.H index 6d68ff11c..caab18f0d 100644 --- a/src/PolarBasis.H +++ b/src/PolarBasis.H @@ -12,7 +12,7 @@ #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #endif @@ -198,7 +198,7 @@ protected: virtual double getRtable() = 0; //! CUDA method for coefficient accumulation -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 virtual void determine_coefficients_cuda(bool compute_pca); virtual void determine_acceleration_cuda(); virtual void multistep_update_cuda(); diff --git a/src/PotAccel.H b/src/PotAccel.H index f6b8ac99d..2c4dbe49f 100644 --- a/src/PotAccel.H +++ b/src/PotAccel.H @@ -195,7 +195,7 @@ public: //! Execute to finish level shifts for particles virtual void multistep_update_finish() {} -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 //! Cuda implementation of level shifts virtual void multistep_update_cuda() {} #endif diff --git a/src/SlabSL.H b/src/SlabSL.H index f9eb1cdd2..bad380c12 100644 --- a/src/SlabSL.H +++ b/src/SlabSL.H @@ -11,7 +11,7 @@ #include #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #include @@ -56,7 +56,7 @@ private: SlabSLCoefHeader coefheader; -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 virtual void determine_coefficients_cuda(); virtual void determine_acceleration_cuda(); virtual void multistep_update_cuda(); diff --git a/src/Sphere.H b/src/Sphere.H index 0d2141d14..0ec8e9c7a 100644 --- a/src/Sphere.H +++ b/src/Sphere.H @@ -9,7 +9,7 @@ typedef std::shared_ptr SLGridSphPtr; -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #endif @@ -68,7 +68,7 @@ private: void make_model_bin(); void make_model_plummer(); -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 virtual void initialize_cuda() { ortho->initialize_cuda(cuInterpArray, tex); diff --git a/src/SphericalBasis.H b/src/SphericalBasis.H index 01c38f780..cd949a96e 100644 --- a/src/SphericalBasis.H +++ b/src/SphericalBasis.H @@ -12,7 +12,7 @@ #include -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 #include #include #endif @@ -117,7 +117,7 @@ protected: virtual void determine_coefficients_playback(void); //! CUDA method for coefficient accumulation -#if HAVE_LIBCUDA==1 && defined (__NVCC__) +#if HAVE_LIBCUDA==1 virtual void determine_coefficients_cuda(bool compute_pca); virtual void determine_acceleration_cuda(); virtual void multistep_update_cuda(); diff --git a/src/user/CMakeLists.txt b/src/user/CMakeLists.txt index 695bc301d..e340cce7e 100644 --- a/src/user/CMakeLists.txt +++ b/src/user/CMakeLists.txt @@ -9,11 +9,11 @@ set (common_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/..) -set (common_LINKLIBS ${DEP_LIB} OpenMP::OpenMP_CXX MPI::MPI_CXX +set (common_LINKLIB ${DEP_LIB} OpenMP::OpenMP_CXX MPI::MPI_CXX exputil EXPlib yaml-cpp) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) else () @@ -40,7 +40,7 @@ foreach(mlib ${USER_MODULES}) add_library(${mlib} ${${mlib}_SRC}) set_target_properties(${mlib} PROPERTIES OUTPUT_NAME ${mlib}) target_include_directories(${mlib} PUBLIC ${common_INCLUDE_DIRS}) - target_link_libraries(${mlib} PUBLIC ${common_LINKLIBS}) + target_link_libraries(${mlib} PUBLIC ${common_LINKLIB}) install(TARGETS ${mlib} DESTINATION lib/user) endforeach() diff --git a/utils/Analysis/CMakeLists.txt b/utils/Analysis/CMakeLists.txt index 124f435aa..8bc7e3a08 100644 --- a/utils/Analysis/CMakeLists.txt +++ b/utils/Analysis/CMakeLists.txt @@ -13,7 +13,7 @@ if(PNG_FOUND) endif() if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) else () diff --git a/utils/ICs/CMakeLists.txt b/utils/ICs/CMakeLists.txt index b53496e63..0c89b9942 100644 --- a/utils/ICs/CMakeLists.txt +++ b/utils/ICs/CMakeLists.txt @@ -9,7 +9,7 @@ set(common_LINKLIB OpenMP::OpenMP_CXX MPI::MPI_CXX yaml-cpp exputil ${HDF5_CXX_LIBRARIES}) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) else () diff --git a/utils/MSSA/CMakeLists.txt b/utils/MSSA/CMakeLists.txt index 3b21103b3..f2a2690cc 100644 --- a/utils/MSSA/CMakeLists.txt +++ b/utils/MSSA/CMakeLists.txt @@ -17,7 +17,7 @@ set(common_INCLUDE $ ${HDF5_INCLUDE_DIRS}) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) else () diff --git a/utils/PhaseSpace/CMakeLists.txt b/utils/PhaseSpace/CMakeLists.txt index 4893a413e..63ced2c1f 100644 --- a/utils/PhaseSpace/CMakeLists.txt +++ b/utils/PhaseSpace/CMakeLists.txt @@ -21,7 +21,7 @@ if(PNG_FOUND) endif() if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) else () diff --git a/utils/SL/CMakeLists.txt b/utils/SL/CMakeLists.txt index 44b4ef080..59afb08fa 100644 --- a/utils/SL/CMakeLists.txt +++ b/utils/SL/CMakeLists.txt @@ -6,7 +6,7 @@ set(common_LINKLIB OpenMP::OpenMP_CXX MPI::MPI_CXX yaml-cpp exputil ${VTK_LIBRARIES}) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) else () diff --git a/utils/Test/CMakeLists.txt b/utils/Test/CMakeLists.txt index 0d53e8b7a..91a38c6ac 100644 --- a/utils/Test/CMakeLists.txt +++ b/utils/Test/CMakeLists.txt @@ -17,7 +17,7 @@ set(common_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/..) if(ENABLE_CUDA) - list(APPEND common_LINKLIB CUDA::cudart) + list(APPEND common_LINKLIB CUDA::toolkit CUDA::cudart) if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL 12) list(APPEND common_LINKLIB CUDA::nvtx3) else () From 92c1f8bf04151f9928119cfc24f87c29a8848d54 Mon Sep 17 00:00:00 2001 From: "Martin D. Weinberg" Date: Mon, 14 Oct 2024 14:07:06 -0400 Subject: [PATCH 5/5] Need at least CMake 3.25 to get the cuda::nvtx3 target --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 75719f9b2..72720b0d0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.21) # Needed for CUDA, MPI, and CTest features +cmake_minimum_required(VERSION 3.25) # Needed for CUDA, MPI, and CTest features project( EXP