Skip to content

Commit

Permalink
Merge pull request #50 from NVlabs/sm_37
Browse files Browse the repository at this point in the history
Add support for sm_37 architecture
  • Loading branch information
Tom94 authored Jan 18, 2022
2 parents 3cf5fba + efb45de commit 6185718
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 20 deletions.
11 changes: 9 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ jobs:
arch: 70
- os: ubuntu-18.04
cuda: "10.2"
arch: 60
arch: 61
- os: ubuntu-18.04
cuda: "10.2"
arch: 37
env:
build_dir: "build"
config: "Release"
Expand Down Expand Up @@ -70,7 +73,11 @@ jobs:
- os: windows-2019
visual_studio: "Visual Studio 16 2019"
cuda: "11.5.1"
arch: 60
arch: 61
- os: windows-2019
visual_studio: "Visual Studio 16 2019"
cuda: "11.5.1"
arch: 37
env:
build_dir: "build"
config: "Release"
Expand Down
15 changes: 9 additions & 6 deletions include/neural-graphics-primitives/common_device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,19 @@ __device__ void deposit_image_gradient(const Eigen::Matrix<float, N_DIMS, 1>& va
pos.x() = std::max(std::min(pos.x(), resolution.x()-1), 0);
pos.y() = std::max(std::min(pos.y(), resolution.y()-1), 0);

if (std::is_same<T, float>::value) {
for (uint32_t c = 0; c < N_DIMS; ++c) {
atomicAdd(&gradient[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], (T)value[c] * weight);
atomicAdd(&gradient_weight[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], weight);
}
} else if (std::is_same<T, __half>::value) {
#if TCNN_MIN_GPU_ARCH >= 60 // atomicAdd(__half2) is only supported with compute capability 60 and above
if (std::is_same<T, __half>::value) {
for (uint32_t c = 0; c < N_DIMS; c += 2) {
atomicAdd((__half2*)&gradient[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], {(T)value[c] * weight, (T)value[c+1] * weight});
atomicAdd((__half2*)&gradient_weight[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], {weight, weight});
}
} else
#endif
{
for (uint32_t c = 0; c < N_DIMS; ++c) {
atomicAdd(&gradient[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], (T)value[c] * weight);
atomicAdd(&gradient_weight[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], weight);
}
}
};

Expand Down
14 changes: 9 additions & 5 deletions include/neural-graphics-primitives/envmap.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,18 @@ __device__ void deposit_envmap_gradient(const tcnn::vector_t<T, 4>& value, T* __
pos.y() = std::max(std::min(pos.y(), envmap_resolution.y()-1), 0);

Eigen::Array4f result;
if (std::is_same<T, float>::value) {
for (uint32_t c = 0; c < 4; ++c) {
atomicAdd(&envmap_gradient[(pos.x() + pos.y() * envmap_resolution.x()) * 4 + c], value[c] * weight);
}
} else if (std::is_same<T, __half>::value) {

#if TCNN_MIN_GPU_ARCH >= 60 // atomicAdd(__half2) is only supported with compute capability 60 and above
if (std::is_same<T, __half>::value) {
for (uint32_t c = 0; c < 4; c += 2) {
atomicAdd((__half2*)&envmap_gradient[(pos.x() + pos.y() * envmap_resolution.x()) * 4 + c], {value[c] * weight, value[c+1] * weight});
}
} else
#endif
{
for (uint32_t c = 0; c < 4; ++c) {
atomicAdd(&envmap_gradient[(pos.x() + pos.y() * envmap_resolution.x()) * 4 + c], value[c] * weight);
}
}
};

Expand Down
15 changes: 9 additions & 6 deletions include/neural-graphics-primitives/takikawa_encoding.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -240,17 +240,20 @@ __global__ void kernel_takikawa_backward(

int param_idx = node.vertices[idx] * N_FEATURES_PER_LEVEL;

if (N_FEATURES_PER_LEVEL == 1 || !std::is_same<T, __half>::value) {
#pragma unroll
for (uint32_t f = 0; f < N_FEATURES_PER_LEVEL; ++f) {
atomicAdd(&grid_gradient[param_idx], (T)((float)grad[f] * weight));
}
} else {
#if TCNN_MIN_GPU_ARCH >= 60 // atomicAdd(__half2) is only supported with compute capability 60 and above
if (N_FEATURES_PER_LEVEL > 1 && std::is_same<T, __half>::value) {
#pragma unroll
for (uint32_t feature = 0; feature < N_FEATURES_PER_LEVEL; feature += 2) {
__half2 v = {(__half)((float)grad[feature] * weight), (__half)((float)grad[feature+1] * weight)};
atomicAdd((__half2*)&grid_gradient[param_idx + feature], v);
}
} else
#endif
{
#pragma unroll
for (uint32_t f = 0; f < N_FEATURES_PER_LEVEL; ++f) {
atomicAdd(&grid_gradient[param_idx], (T)((float)grad[f] * weight));
}
}
}
}
Expand Down

0 comments on commit 6185718

Please sign in to comment.