From 659e8771c2b906927a116ac3a2633782ba255256 Mon Sep 17 00:00:00 2001 From: Eyal Rozenberg Date: Sun, 10 Mar 2024 23:12:55 +0200 Subject: [PATCH] Fixes #606: Can now use contiguous containers as sources and destinations of memory copying functions --- .../vectorAdd/vectorAdd.cu | 6 ++-- .../vectorAddMMAP/vectorAddMMAP.cpp | 6 ++-- .../vectorAdd_nvrtc/vectorAdd_nvrtc.cpp | 6 ++-- .../vectorAdd_ptx/vectorAdd_ptx.cpp | 6 ++-- .../vectorAdd_unique_regions.cu | 6 ++-- src/cuda/api/detail/region.hpp | 28 ++++++++++++++++++- 6 files changed, 42 insertions(+), 16 deletions(-) diff --git a/examples/modified_cuda_samples/vectorAdd/vectorAdd.cu b/examples/modified_cuda_samples/vectorAdd/vectorAdd.cu index e68fccd9..31fdca0c 100644 --- a/examples/modified_cuda_samples/vectorAdd/vectorAdd.cu +++ b/examples/modified_cuda_samples/vectorAdd/vectorAdd.cu @@ -46,8 +46,8 @@ int main() auto d_B = cuda::memory::make_unique_span(device, numElements); auto d_C = cuda::memory::make_unique_span(device, numElements); - cuda::memory::copy(d_A, h_A.data()); - cuda::memory::copy(d_B, h_B.data()); + cuda::memory::copy(d_A, h_A); + cuda::memory::copy(d_B, h_B); auto launch_config = cuda::launch_config_builder() .overall_size(numElements) @@ -63,7 +63,7 @@ int main() d_A.data(), d_B.data(), d_C.data(), numElements ); - cuda::memory::copy(h_C.data(), d_C); + cuda::memory::copy(h_C, d_C); // Verify that the result vector is correct for (int i = 0; i < numElements; ++i) { diff --git a/examples/modified_cuda_samples/vectorAddMMAP/vectorAddMMAP.cpp b/examples/modified_cuda_samples/vectorAddMMAP/vectorAddMMAP.cpp index 115d2522..1fc5bf4b 100644 --- a/examples/modified_cuda_samples/vectorAddMMAP/vectorAddMMAP.cpp +++ b/examples/modified_cuda_samples/vectorAddMMAP/vectorAddMMAP.cpp @@ -278,8 +278,8 @@ int main() auto d_B_sp = d_B.as_requested().as_span(); auto d_C_sp = d_C.as_requested().as_span(); - cuda::memory::copy(d_A_sp, h_A.data()); - cuda::memory::copy(d_B_sp, h_B.data()); + cuda::memory::copy(d_A_sp, h_A); + cuda::memory::copy(d_B_sp, h_B); // Launch the Vector Add CUDA Kernel auto launch_config = cuda::launch_config_builder() @@ -295,7 +295,7 @@ int main() d_A_sp.data(), d_B_sp.data(), d_C_sp.data(), num_elements ); - cuda::memory::copy(h_C.data(), d_C_sp); + cuda::memory::copy(h_C, d_C_sp); // std::cout << "Checking results...\n\n"; diff --git a/examples/modified_cuda_samples/vectorAdd_nvrtc/vectorAdd_nvrtc.cpp b/examples/modified_cuda_samples/vectorAdd_nvrtc/vectorAdd_nvrtc.cpp index 7baba835..b80605ff 100644 --- a/examples/modified_cuda_samples/vectorAdd_nvrtc/vectorAdd_nvrtc.cpp +++ b/examples/modified_cuda_samples/vectorAdd_nvrtc/vectorAdd_nvrtc.cpp @@ -70,8 +70,8 @@ int main(void) auto d_B = cuda::memory::make_unique_span(device, numElements); auto d_C = cuda::memory::make_unique_span(device, numElements); - cuda::memory::copy(d_A, h_A.data()); - cuda::memory::copy(d_B, h_B.data()); + cuda::memory::copy(d_A, h_A); + cuda::memory::copy(d_B, h_B); auto launch_config = cuda::launch_config_builder() .overall_size(numElements) @@ -87,7 +87,7 @@ int main(void) d_A.get(), d_B.get(), d_C.get(), numElements ); - cuda::memory::copy(h_C.data(), d_C); + cuda::memory::copy(h_C, d_C); // Verify that the result vector is correct for (int i = 0; i < numElements; ++i) { diff --git a/examples/modified_cuda_samples/vectorAdd_ptx/vectorAdd_ptx.cpp b/examples/modified_cuda_samples/vectorAdd_ptx/vectorAdd_ptx.cpp index 350797e5..6960f8ed 100644 --- a/examples/modified_cuda_samples/vectorAdd_ptx/vectorAdd_ptx.cpp +++ b/examples/modified_cuda_samples/vectorAdd_ptx/vectorAdd_ptx.cpp @@ -110,8 +110,8 @@ int main(void) auto d_B = cuda::memory::make_unique_span(device, numElements); auto d_C = cuda::memory::make_unique_span(device, numElements); - cuda::memory::copy(d_A, h_A.data()); - cuda::memory::copy(d_B, h_B.data()); + cuda::memory::copy(d_A, h_A); + cuda::memory::copy(d_B, h_B); auto launch_config = cuda::launch_config_builder() .overall_size(numElements) @@ -127,7 +127,7 @@ int main(void) d_A.get(), d_B.get(), d_C.get(), numElements ); - cuda::memory::copy(h_C.data(), d_C); + cuda::memory::copy(h_C, d_C); // Verify that the result vector is correct for (int i = 0; i < numElements; ++i) { diff --git a/examples/modified_cuda_samples/vectorAdd_unique_regions/vectorAdd_unique_regions.cu b/examples/modified_cuda_samples/vectorAdd_unique_regions/vectorAdd_unique_regions.cu index 2a8ddd8d..6b2901b4 100644 --- a/examples/modified_cuda_samples/vectorAdd_unique_regions/vectorAdd_unique_regions.cu +++ b/examples/modified_cuda_samples/vectorAdd_unique_regions/vectorAdd_unique_regions.cu @@ -51,8 +51,8 @@ int main() auto sp_B = d_B.as_span(); auto sp_C = d_C.as_span(); - cuda::memory::copy(sp_A, h_A.data()); - cuda::memory::copy(sp_B, h_B.data()); + cuda::memory::copy(sp_A, h_A); + cuda::memory::copy(sp_B, h_B); auto launch_config = cuda::launch_config_builder() .overall_size(numElements) @@ -68,7 +68,7 @@ int main() sp_A.data(), sp_B.data(), sp_C.data(), numElements ); - cuda::memory::copy(h_C.data(), sp_C); + cuda::memory::copy(h_C, sp_C); // Verify that the result vector is correct for (int i = 0; i < numElements; ++i) { diff --git a/src/cuda/api/detail/region.hpp b/src/cuda/api/detail/region.hpp index 521f5398..9f8e0adb 100644 --- a/src/cuda/api/detail/region.hpp +++ b/src/cuda/api/detail/region.hpp @@ -12,7 +12,7 @@ #ifndef CUDA_API_WRAPPERS_REGION_HPP_ #define CUDA_API_WRAPPERS_REGION_HPP_ -#include +#include "type_traits.hpp" #include #ifndef CPP14_CONSTEXPR @@ -60,12 +60,38 @@ class base_region_t { constexpr base_region_t(pointer start, size_type size_in_bytes) noexcept : start_(start), size_in_bytes_(size_in_bytes) {} +/* template constexpr base_region_t(span span) noexcept : start_(span.data()), size_in_bytes_(span.size() * sizeof(U)) { static_assert(::std::is_const::value or not ::std::is_const::value, "Attempt to construct a non-const memory region from a const span"); } +*/ + /** + * A constructor from types such as `::std::span`'s or `::std::vector`'s, whose data is in + * a contiguous region of memory + */ + template ::value, void>> + constexpr base_region_t(ContiguousContainer&& contiguous_container) noexcept + : start_(contiguous_container.data()), size_in_bytes_(contiguous_container.size() * sizeof(*(contiguous_container.data()))) + { + static_assert(::std::is_const::value or not ::std::is_const::value, + "Attempt to construct a non-const memory region from a container of const data"); + } + +/* + template class ContiguousContainer, + typename = cuda::detail_::enable_if_t< + cuda::detail_::is_kinda_like_contiguous_container>::value, void>> + constexpr base_region_t(ContiguousContainer&& contiguous) noexcept + : start_(contiguous.data()), size_in_bytes_(contiguous.size() * sizeof(U)) + { + static_assert(::std::is_const::value or not ::std::is_const::value, + "Attempt to construct a non-const memory region from a const contiguous container"); + } +*/ template CPP14_CONSTEXPR span as_span() const NOEXCEPT_IF_NDEBUG