-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Factor out the CUDA accessors * Add an include path * Factor out atomicAdd
- Loading branch information
Raimondas Galvelis
authored
Aug 18, 2022
1 parent
332b2fa
commit db8911d
Showing
4 changed files
with
43 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#ifndef NNPOPS_ACCESSOR_H | ||
#define NNPOPS_ACCESSOR_H | ||
|
||
#include <torch/extension.h> | ||
|
||
template <typename scalar_t, int num_dims> | ||
using Accessor = torch::PackedTensorAccessor32<scalar_t, num_dims, torch::RestrictPtrTraits>; | ||
|
||
template <typename scalar_t, int num_dims> | ||
inline Accessor<scalar_t, num_dims> get_accessor(const torch::Tensor& tensor) { | ||
return tensor.packed_accessor32<scalar_t, num_dims, torch::RestrictPtrTraits>(); | ||
}; | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#ifndef NNPOPS_ATOMICADD_H | ||
#define NNPOPS_ATOMICADD_H | ||
|
||
/* | ||
Implement atomicAdd with double precision numbers for pre-Pascal GPUs. | ||
Taken from https://stackoverflow.com/questions/37566987/cuda-atomicadd-for-doubles-definition-error | ||
NOTE: remove when the support of CUDA 11 is dropped. | ||
*/ | ||
|
||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600 | ||
__device__ double atomicAdd(double* address, double val) | ||
{ | ||
unsigned long long int* address_as_ull = (unsigned long long int*)address; | ||
unsigned long long int old = *address_as_ull, assumed; | ||
do { | ||
assumed = old; | ||
old = atomicCAS(address_as_ull, assumed, | ||
__double_as_longlong(val + __longlong_as_double(assumed))); | ||
} while (assumed != old); | ||
return __longlong_as_double(old); | ||
} | ||
#endif | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters