From db8911d1e4ce99bfa1b1cd68c29f93f63ddf5960 Mon Sep 17 00:00:00 2001 From: Raimondas Galvelis Date: Thu, 18 Aug 2022 09:22:29 +0200 Subject: [PATCH] Refactor the common code (#65) * Factor out the CUDA accessors * Add an include path * Factor out atomicAdd --- CMakeLists.txt | 3 +- src/pytorch/common/accessor.cuh | 14 ++++++++++ src/pytorch/common/atomicAdd.cuh | 24 ++++++++++++++++ src/pytorch/neighbors/getNeighborPairsCUDA.cu | 28 ++----------------- 4 files changed, 43 insertions(+), 26 deletions(-) create mode 100644 src/pytorch/common/accessor.cuh create mode 100644 src/pytorch/common/atomicAdd.cuh diff --git a/CMakeLists.txt b/CMakeLists.txt index 05c44da..894d792 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,8 @@ set(SRC_FILES src/ani/CpuANISymmetryFunctions.cpp set(LIBRARY ${NAME}PyTorch) add_library(${LIBRARY} SHARED ${SRC_FILES}) set_property(TARGET ${LIBRARY} PROPERTY CXX_STANDARD 14) -target_include_directories(${LIBRARY} PRIVATE ${PYTHON_INCLUDE_DIRS} src/ani src/schnet) +target_include_directories(${LIBRARY} PRIVATE ${PYTHON_INCLUDE_DIRS} + src/ani src/pytorch src/schnet) target_link_libraries(${LIBRARY} ${TORCH_LIBRARIES} ${PYTHON_LIBRARIES}) if(ENABLE_CUDA) set_property(TARGET ${LIBRARY} PROPERTY CUDA_STANDARD 14) diff --git a/src/pytorch/common/accessor.cuh b/src/pytorch/common/accessor.cuh new file mode 100644 index 0000000..81957b3 --- /dev/null +++ b/src/pytorch/common/accessor.cuh @@ -0,0 +1,14 @@ +#ifndef NNPOPS_ACCESSOR_H +#define NNPOPS_ACCESSOR_H + +#include + +template + using Accessor = torch::PackedTensorAccessor32; + +template +inline Accessor get_accessor(const torch::Tensor& tensor) { + return tensor.packed_accessor32(); +}; + +#endif \ No newline at end of file diff --git a/src/pytorch/common/atomicAdd.cuh b/src/pytorch/common/atomicAdd.cuh new file mode 100644 index 0000000..80b79ce --- /dev/null +++ b/src/pytorch/common/atomicAdd.cuh @@ -0,0 +1,24 @@ +#ifndef NNPOPS_ATOMICADD_H +#define NNPOPS_ATOMICADD_H + +/* +Implement atomicAdd with double precision numbers for pre-Pascal GPUs. +Taken from https://stackoverflow.com/questions/37566987/cuda-atomicadd-for-doubles-definition-error +NOTE: remove when the support of CUDA 11 is dropped. +*/ + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 +__device__ double atomicAdd(double* address, double val) +{ + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} +#endif + +#endif \ No newline at end of file diff --git a/src/pytorch/neighbors/getNeighborPairsCUDA.cu b/src/pytorch/neighbors/getNeighborPairsCUDA.cu index b87a221..33e0058 100644 --- a/src/pytorch/neighbors/getNeighborPairsCUDA.cu +++ b/src/pytorch/neighbors/getNeighborPairsCUDA.cu @@ -4,6 +4,9 @@ #include #include +#include "common/accessor.cuh" +#include "common/atomicAdd.cuh" + using c10::cuda::CUDAStreamGuard; using c10::cuda::getCurrentCUDAStream; using std::make_tuple; @@ -14,40 +17,15 @@ using torch::autograd::tensor_list; using torch::empty; using torch::full; using torch::kInt32; -using torch::PackedTensorAccessor32; -using torch::RestrictPtrTraits; using torch::Scalar; using torch::Tensor; using torch::TensorOptions; using torch::zeros; -template - using Accessor = PackedTensorAccessor32; - -template -inline Accessor get_accessor(const Tensor& tensor) { - return tensor.packed_accessor32(); -}; - template __device__ __forceinline__ scalar_t sqrt_(scalar_t x) {}; template<> __device__ __forceinline__ float sqrt_(float x) { return ::sqrtf(x); }; template<> __device__ __forceinline__ double sqrt_(double x) { return ::sqrt(x); }; -// Support pre-Pascal GPUs. Remove when the support of CUDA 11 is dropped. -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 -__device__ double atomicAdd(double* address, double val) -{ - unsigned long long int* address_as_ull = (unsigned long long int*)address; - unsigned long long int old = *address_as_ull, assumed; - do { - assumed = old; - old = atomicCAS(address_as_ull, assumed, - __double_as_longlong(val + __longlong_as_double(assumed))); - } while (assumed != old); - return __longlong_as_double(old); -} -#endif - template __global__ void forward_kernel( const int32_t num_all_pairs, const Accessor positions,