Skip to content

Commit

Permalink
Refactor the common code (#65)
Browse files Browse the repository at this point in the history
* Factor out the CUDA accessors

* Add an include path

* Factor out atomicAdd
  • Loading branch information
Raimondas Galvelis authored Aug 18, 2022
1 parent 332b2fa commit db8911d
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 26 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ set(SRC_FILES src/ani/CpuANISymmetryFunctions.cpp
set(LIBRARY ${NAME}PyTorch)
add_library(${LIBRARY} SHARED ${SRC_FILES})
set_property(TARGET ${LIBRARY} PROPERTY CXX_STANDARD 14)
target_include_directories(${LIBRARY} PRIVATE ${PYTHON_INCLUDE_DIRS} src/ani src/schnet)
target_include_directories(${LIBRARY} PRIVATE ${PYTHON_INCLUDE_DIRS}
src/ani src/pytorch src/schnet)
target_link_libraries(${LIBRARY} ${TORCH_LIBRARIES} ${PYTHON_LIBRARIES})
if(ENABLE_CUDA)
set_property(TARGET ${LIBRARY} PROPERTY CUDA_STANDARD 14)
Expand Down
14 changes: 14 additions & 0 deletions src/pytorch/common/accessor.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef NNPOPS_ACCESSOR_H
#define NNPOPS_ACCESSOR_H

#include <torch/extension.h>

template <typename scalar_t, int num_dims>
using Accessor = torch::PackedTensorAccessor32<scalar_t, num_dims, torch::RestrictPtrTraits>;

template <typename scalar_t, int num_dims>
inline Accessor<scalar_t, num_dims> get_accessor(const torch::Tensor& tensor) {
return tensor.packed_accessor32<scalar_t, num_dims, torch::RestrictPtrTraits>();
};

#endif
24 changes: 24 additions & 0 deletions src/pytorch/common/atomicAdd.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef NNPOPS_ATOMICADD_H
#define NNPOPS_ATOMICADD_H

/*
Implement atomicAdd with double precision numbers for pre-Pascal GPUs.
Taken from https://stackoverflow.com/questions/37566987/cuda-atomicadd-for-doubles-definition-error
NOTE: remove when the support of CUDA 11 is dropped.
*/

#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
__device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
}
#endif

#endif
28 changes: 3 additions & 25 deletions src/pytorch/neighbors/getNeighborPairsCUDA.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
#include <algorithm>
#include <tuple>

#include "common/accessor.cuh"
#include "common/atomicAdd.cuh"

using c10::cuda::CUDAStreamGuard;
using c10::cuda::getCurrentCUDAStream;
using std::make_tuple;
Expand All @@ -14,40 +17,15 @@ using torch::autograd::tensor_list;
using torch::empty;
using torch::full;
using torch::kInt32;
using torch::PackedTensorAccessor32;
using torch::RestrictPtrTraits;
using torch::Scalar;
using torch::Tensor;
using torch::TensorOptions;
using torch::zeros;

template <typename scalar_t, int num_dims>
using Accessor = PackedTensorAccessor32<scalar_t, num_dims, RestrictPtrTraits>;

template <typename scalar_t, int num_dims>
inline Accessor<scalar_t, num_dims> get_accessor(const Tensor& tensor) {
return tensor.packed_accessor32<scalar_t, num_dims, RestrictPtrTraits>();
};

template <typename scalar_t> __device__ __forceinline__ scalar_t sqrt_(scalar_t x) {};
template<> __device__ __forceinline__ float sqrt_(float x) { return ::sqrtf(x); };
template<> __device__ __forceinline__ double sqrt_(double x) { return ::sqrt(x); };

// Support pre-Pascal GPUs. Remove when the support of CUDA 11 is dropped.
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
__device__ double atomicAdd(double* address, double val)
{
unsigned long long int* address_as_ull = (unsigned long long int*)address;
unsigned long long int old = *address_as_ull, assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
return __longlong_as_double(old);
}
#endif

template <typename scalar_t> __global__ void forward_kernel(
const int32_t num_all_pairs,
const Accessor<scalar_t, 2> positions,
Expand Down

0 comments on commit db8911d

Please sign in to comment.