Skip to content

Commit

Permalink
[gpu] Add ability to download contiguous chunk of memory to host usin…
Browse files Browse the repository at this point in the history
…g `Device{Array,Memory}` (PointCloudLibrary#4741)
  • Loading branch information
FabianSchuetze authored and tin1254 committed Aug 10, 2021
1 parent 2701de9 commit 16d5cc6
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 0 deletions.
22 changes: 22 additions & 0 deletions gpu/containers/include/pcl/gpu/containers/device_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,34 @@ class PCL_EXPORTS DeviceArray : public DeviceMemory {
void
upload(const T* host_ptr, std::size_t size);

/** \brief Uploads data from CPU memory to internal buffer.
* \return true if upload successful
* \note In contrast to the other upload function, this function
* never allocates memory.
* \param host_ptr pointer to buffer to upload
* \param device_begin_offset begin upload
* \param num_elements number of elements from device_bein_offset
* */
bool
upload(const T* host_ptr, std::size_t device_begin_offset, std::size_t num_elements);

/** \brief Downloads data from internal buffer to CPU memory
* \param host_ptr pointer to buffer to download
* */
void
download(T* host_ptr) const;

/** \brief Downloads data from internal buffer to CPU memory.
* \return true if download successful
* \param host_ptr pointer to buffer to download
* \param device_begin_offset begin download location
* \param num_elements number of elements from device_begin_offset
* */
bool
download(T* host_ptr,
std::size_t device_begin_offset,
std::size_t num_elements) const;

/** \brief Uploads data to internal buffer in GPU memory. It calls create() inside to
* ensure that intenal buffer size is enough.
* \param data host vector to upload from
Expand Down
24 changes: 24 additions & 0 deletions gpu/containers/include/pcl/gpu/containers/device_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,36 @@ class PCL_EXPORTS DeviceMemory {
void
upload(const void* host_ptr_arg, std::size_t sizeBytes_arg);

/** \brief Uploads data from CPU memory to device array.
* \note This overload never allocates memory in contrast to the
* other upload function.
* \return true if upload successful
* \param host_ptr_arg pointer to buffer to upload
* \param device_begin_byte_offset first byte position to upload to
* \param num_bytes number of bytes to upload
* */
bool
upload(const void* host_ptr,
std::size_t device_begin_byte_offset,
std::size_t num_bytes);

/** \brief Downloads data from internal buffer to CPU memory
* \param host_ptr_arg pointer to buffer to download
* */
void
download(void* host_ptr_arg) const;

/** \brief Downloads data from internal buffer to CPU memory.
* \return true if download successful
* \param host_ptr_arg pointer to buffer to download
* \param device_begin_byte_offset first byte position to download
* \param num_bytes number of bytes to download
* */
bool
download(void* host_ptr,
std::size_t device_begin_byte_offset,
std::size_t num_bytes) const;

/** \brief Performs swap of data pointed with another device memory.
* \param other_arg device memory to swap with
* */
Expand Down
22 changes: 22 additions & 0 deletions gpu/containers/include/pcl/gpu/containers/impl/device_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,35 @@ DeviceArray<T>::upload(const T* host_ptr, std::size_t size)
DeviceMemory::upload(host_ptr, size * elem_size);
}

template <class T>
inline bool
DeviceArray<T>::upload(const T* host_ptr,
std::size_t device_begin_offset,
std::size_t num_elements)
{
std::size_t begin_byte_offset = device_begin_offset * sizeof(T);
std::size_t num_bytes = num_elements * sizeof(T);
return DeviceMemory::upload(host_ptr, begin_byte_offset, num_bytes);
}

template <class T>
inline void
DeviceArray<T>::download(T* host_ptr) const
{
DeviceMemory::download(host_ptr);
}

template <class T>
inline bool
DeviceArray<T>::download(T* host_ptr,
std::size_t device_begin_offset,
std::size_t num_elements) const
{
std::size_t begin_byte_offset = device_begin_offset * sizeof(T);
std::size_t num_bytes = num_elements * sizeof(T);
return DeviceMemory::download(host_ptr, begin_byte_offset, num_bytes);
}

template <class T>
void
DeviceArray<T>::swap(DeviceArray& other_arg)
Expand Down
28 changes: 28 additions & 0 deletions gpu/containers/src/device_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,41 @@ pcl::gpu::DeviceMemory::upload(const void* host_ptr_arg, std::size_t sizeBytes_a
cudaSafeCall(cudaDeviceSynchronize());
}

bool
pcl::gpu::DeviceMemory::upload(const void* host_ptr_arg,
std::size_t device_begin_byte_offset,
std::size_t num_bytes)
{
if (device_begin_byte_offset + num_bytes > sizeBytes_) {
return false;
}
void* begin = static_cast<char*>(data_) + device_begin_byte_offset;
cudaSafeCall(cudaMemcpy(begin, host_ptr_arg, num_bytes, cudaMemcpyHostToDevice));
cudaSafeCall(cudaDeviceSynchronize());
return true;
}

void
pcl::gpu::DeviceMemory::download(void* host_ptr_arg) const
{
cudaSafeCall(cudaMemcpy(host_ptr_arg, data_, sizeBytes_, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaDeviceSynchronize());
}

bool
pcl::gpu::DeviceMemory::download(void* host_ptr_arg,
std::size_t device_begin_byte_offset,
std::size_t num_bytes) const
{
if (device_begin_byte_offset + num_bytes > sizeBytes_) {
return false;
}
const void* begin = static_cast<char*>(data_) + device_begin_byte_offset;
cudaSafeCall(cudaMemcpy(host_ptr_arg, begin, num_bytes, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaDeviceSynchronize());
return true;
}

void
pcl::gpu::DeviceMemory::swap(DeviceMemory& other_arg)
{
Expand Down

0 comments on commit 16d5cc6

Please sign in to comment.