diff --git a/gpu/containers/include/pcl/gpu/containers/device_array.h b/gpu/containers/include/pcl/gpu/containers/device_array.h index 50de78cdedf..74dfcbe8a70 100644 --- a/gpu/containers/include/pcl/gpu/containers/device_array.h +++ b/gpu/containers/include/pcl/gpu/containers/device_array.h @@ -107,12 +107,34 @@ class PCL_EXPORTS DeviceArray : public DeviceMemory { void upload(const T* host_ptr, std::size_t size); + /** \brief Uploads data from CPU memory to internal buffer. + * \return true if upload successful + * \note In contrast to the other upload function, this function + * never allocates memory. + * \param host_ptr pointer to buffer to upload + * \param device_begin_offset begin upload + * \param num_elements number of elements from device_bein_offset + * */ + bool + upload(const T* host_ptr, std::size_t device_begin_offset, std::size_t num_elements); + /** \brief Downloads data from internal buffer to CPU memory * \param host_ptr pointer to buffer to download * */ void download(T* host_ptr) const; + /** \brief Downloads data from internal buffer to CPU memory. + * \return true if download successful + * \param host_ptr pointer to buffer to download + * \param device_begin_offset begin download location + * \param num_elements number of elements from device_begin_offset + * */ + bool + download(T* host_ptr, + std::size_t device_begin_offset, + std::size_t num_elements) const; + /** \brief Uploads data to internal buffer in GPU memory. It calls create() inside to * ensure that intenal buffer size is enough. * \param data host vector to upload from diff --git a/gpu/containers/include/pcl/gpu/containers/device_memory.h b/gpu/containers/include/pcl/gpu/containers/device_memory.h index fb2d6e829de..c08339416d0 100644 --- a/gpu/containers/include/pcl/gpu/containers/device_memory.h +++ b/gpu/containers/include/pcl/gpu/containers/device_memory.h @@ -102,12 +102,36 @@ class PCL_EXPORTS DeviceMemory { void upload(const void* host_ptr_arg, std::size_t sizeBytes_arg); + /** \brief Uploads data from CPU memory to device array. + * \note This overload never allocates memory in contrast to the + * other upload function. + * \return true if upload successful + * \param host_ptr_arg pointer to buffer to upload + * \param device_begin_byte_offset first byte position to upload to + * \param num_bytes number of bytes to upload + * */ + bool + upload(const void* host_ptr, + std::size_t device_begin_byte_offset, + std::size_t num_bytes); + /** \brief Downloads data from internal buffer to CPU memory * \param host_ptr_arg pointer to buffer to download * */ void download(void* host_ptr_arg) const; + /** \brief Downloads data from internal buffer to CPU memory. + * \return true if download successful + * \param host_ptr_arg pointer to buffer to download + * \param device_begin_byte_offset first byte position to download + * \param num_bytes number of bytes to download + * */ + bool + download(void* host_ptr, + std::size_t device_begin_byte_offset, + std::size_t num_bytes) const; + /** \brief Performs swap of data pointed with another device memory. * \param other_arg device memory to swap with * */ diff --git a/gpu/containers/include/pcl/gpu/containers/impl/device_array.hpp b/gpu/containers/include/pcl/gpu/containers/impl/device_array.hpp index 4d4afe89a2e..5ada5e07470 100644 --- a/gpu/containers/include/pcl/gpu/containers/impl/device_array.hpp +++ b/gpu/containers/include/pcl/gpu/containers/impl/device_array.hpp @@ -96,6 +96,17 @@ DeviceArray::upload(const T* host_ptr, std::size_t size) DeviceMemory::upload(host_ptr, size * elem_size); } +template +inline bool +DeviceArray::upload(const T* host_ptr, + std::size_t device_begin_offset, + std::size_t num_elements) +{ + std::size_t begin_byte_offset = device_begin_offset * sizeof(T); + std::size_t num_bytes = num_elements * sizeof(T); + return DeviceMemory::upload(host_ptr, begin_byte_offset, num_bytes); +} + template inline void DeviceArray::download(T* host_ptr) const @@ -103,6 +114,17 @@ DeviceArray::download(T* host_ptr) const DeviceMemory::download(host_ptr); } +template +inline bool +DeviceArray::download(T* host_ptr, + std::size_t device_begin_offset, + std::size_t num_elements) const +{ + std::size_t begin_byte_offset = device_begin_offset * sizeof(T); + std::size_t num_bytes = num_elements * sizeof(T); + return DeviceMemory::download(host_ptr, begin_byte_offset, num_bytes); +} + template void DeviceArray::swap(DeviceArray& other_arg) diff --git a/gpu/containers/src/device_memory.cpp b/gpu/containers/src/device_memory.cpp index 5689ff61ffe..065dab7f971 100644 --- a/gpu/containers/src/device_memory.cpp +++ b/gpu/containers/src/device_memory.cpp @@ -282,6 +282,20 @@ pcl::gpu::DeviceMemory::upload(const void* host_ptr_arg, std::size_t sizeBytes_a cudaSafeCall(cudaDeviceSynchronize()); } +bool +pcl::gpu::DeviceMemory::upload(const void* host_ptr_arg, + std::size_t device_begin_byte_offset, + std::size_t num_bytes) +{ + if (device_begin_byte_offset + num_bytes > sizeBytes_) { + return false; + } + void* begin = static_cast(data_) + device_begin_byte_offset; + cudaSafeCall(cudaMemcpy(begin, host_ptr_arg, num_bytes, cudaMemcpyHostToDevice)); + cudaSafeCall(cudaDeviceSynchronize()); + return true; +} + void pcl::gpu::DeviceMemory::download(void* host_ptr_arg) const { @@ -289,6 +303,20 @@ pcl::gpu::DeviceMemory::download(void* host_ptr_arg) const cudaSafeCall(cudaDeviceSynchronize()); } +bool +pcl::gpu::DeviceMemory::download(void* host_ptr_arg, + std::size_t device_begin_byte_offset, + std::size_t num_bytes) const +{ + if (device_begin_byte_offset + num_bytes > sizeBytes_) { + return false; + } + const void* begin = static_cast(data_) + device_begin_byte_offset; + cudaSafeCall(cudaMemcpy(host_ptr_arg, begin, num_bytes, cudaMemcpyDeviceToHost)); + cudaSafeCall(cudaDeviceSynchronize()); + return true; +} + void pcl::gpu::DeviceMemory::swap(DeviceMemory& other_arg) {