From efb45deab1e4c5cf0043832115de0216a9a8898e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20M=C3=BCller?= Date: Tue, 18 Jan 2022 10:40:27 +0100 Subject: [PATCH] Add support for sm_37 architecture --- .github/workflows/main.yml | 11 +++++++++-- dependencies/tiny-cuda-nn | 2 +- .../neural-graphics-primitives/common_device.cuh | 15 +++++++++------ include/neural-graphics-primitives/envmap.cuh | 14 +++++++++----- .../takikawa_encoding.cuh | 15 +++++++++------ 5 files changed, 37 insertions(+), 20 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7cd6e20a1..35b134c46 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -26,7 +26,10 @@ jobs: arch: 70 - os: ubuntu-18.04 cuda: "10.2" - arch: 60 + arch: 61 + - os: ubuntu-18.04 + cuda: "10.2" + arch: 37 env: build_dir: "build" config: "Release" @@ -70,7 +73,11 @@ jobs: - os: windows-2019 visual_studio: "Visual Studio 16 2019" cuda: "11.5.1" - arch: 60 + arch: 61 + - os: windows-2019 + visual_studio: "Visual Studio 16 2019" + cuda: "11.5.1" + arch: 37 env: build_dir: "build" config: "Release" diff --git a/dependencies/tiny-cuda-nn b/dependencies/tiny-cuda-nn index 6041eadf3..3eb4cd571 160000 --- a/dependencies/tiny-cuda-nn +++ b/dependencies/tiny-cuda-nn @@ -1 +1 @@ -Subproject commit 6041eadf3fe5f2d7bb8e53b8328c7325a047ac42 +Subproject commit 3eb4cd571f9a2e1276c556e9d8b8bc34d44b84d3 diff --git a/include/neural-graphics-primitives/common_device.cuh b/include/neural-graphics-primitives/common_device.cuh index 022b2b48a..b210075c9 100644 --- a/include/neural-graphics-primitives/common_device.cuh +++ b/include/neural-graphics-primitives/common_device.cuh @@ -122,16 +122,19 @@ __device__ void deposit_image_gradient(const Eigen::Matrix& va pos.x() = std::max(std::min(pos.x(), resolution.x()-1), 0); pos.y() = std::max(std::min(pos.y(), resolution.y()-1), 0); - if (std::is_same::value) { - for (uint32_t c = 0; c < N_DIMS; ++c) { - atomicAdd(&gradient[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], (T)value[c] * weight); - atomicAdd(&gradient_weight[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], weight); - } - } else if (std::is_same::value) { +#if TCNN_MIN_GPU_ARCH >= 60 // atomicAdd(__half2) is only supported with compute capability 60 and above + if (std::is_same::value) { for (uint32_t c = 0; c < N_DIMS; c += 2) { atomicAdd((__half2*)&gradient[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], {(T)value[c] * weight, (T)value[c+1] * weight}); atomicAdd((__half2*)&gradient_weight[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], {weight, weight}); } + } else +#endif + { + for (uint32_t c = 0; c < N_DIMS; ++c) { + atomicAdd(&gradient[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], (T)value[c] * weight); + atomicAdd(&gradient_weight[(pos.x() + pos.y() * resolution.x()) * N_DIMS + c], weight); + } } }; diff --git a/include/neural-graphics-primitives/envmap.cuh b/include/neural-graphics-primitives/envmap.cuh index 4185f1dd5..273f66100 100644 --- a/include/neural-graphics-primitives/envmap.cuh +++ b/include/neural-graphics-primitives/envmap.cuh @@ -82,14 +82,18 @@ __device__ void deposit_envmap_gradient(const tcnn::vector_t& value, T* __ pos.y() = std::max(std::min(pos.y(), envmap_resolution.y()-1), 0); Eigen::Array4f result; - if (std::is_same::value) { - for (uint32_t c = 0; c < 4; ++c) { - atomicAdd(&envmap_gradient[(pos.x() + pos.y() * envmap_resolution.x()) * 4 + c], value[c] * weight); - } - } else if (std::is_same::value) { + +#if TCNN_MIN_GPU_ARCH >= 60 // atomicAdd(__half2) is only supported with compute capability 60 and above + if (std::is_same::value) { for (uint32_t c = 0; c < 4; c += 2) { atomicAdd((__half2*)&envmap_gradient[(pos.x() + pos.y() * envmap_resolution.x()) * 4 + c], {value[c] * weight, value[c+1] * weight}); } + } else +#endif + { + for (uint32_t c = 0; c < 4; ++c) { + atomicAdd(&envmap_gradient[(pos.x() + pos.y() * envmap_resolution.x()) * 4 + c], value[c] * weight); + } } }; diff --git a/include/neural-graphics-primitives/takikawa_encoding.cuh b/include/neural-graphics-primitives/takikawa_encoding.cuh index ad0cb9634..555dcc440 100644 --- a/include/neural-graphics-primitives/takikawa_encoding.cuh +++ b/include/neural-graphics-primitives/takikawa_encoding.cuh @@ -240,17 +240,20 @@ __global__ void kernel_takikawa_backward( int param_idx = node.vertices[idx] * N_FEATURES_PER_LEVEL; - if (N_FEATURES_PER_LEVEL == 1 || !std::is_same::value) { - #pragma unroll - for (uint32_t f = 0; f < N_FEATURES_PER_LEVEL; ++f) { - atomicAdd(&grid_gradient[param_idx], (T)((float)grad[f] * weight)); - } - } else { +#if TCNN_MIN_GPU_ARCH >= 60 // atomicAdd(__half2) is only supported with compute capability 60 and above + if (N_FEATURES_PER_LEVEL > 1 && std::is_same::value) { #pragma unroll for (uint32_t feature = 0; feature < N_FEATURES_PER_LEVEL; feature += 2) { __half2 v = {(__half)((float)grad[feature] * weight), (__half)((float)grad[feature+1] * weight)}; atomicAdd((__half2*)&grid_gradient[param_idx + feature], v); } + } else +#endif + { + #pragma unroll + for (uint32_t f = 0; f < N_FEATURES_PER_LEVEL; ++f) { + atomicAdd(&grid_gradient[param_idx], (T)((float)grad[f] * weight)); + } } } }