Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[gpu] Add ability to download contiguous chunk of memory to host using Device{Array,Memory} #4741

Merged
22 changes: 22 additions & 0 deletions gpu/containers/include/pcl/gpu/containers/device_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,34 @@ class PCL_EXPORTS DeviceArray : public DeviceMemory {
void
upload(const T* host_ptr, std::size_t size);

/** \brief Uploads data from CPU memory to internal buffer.
* \return true if upload successful
* \note In contrast to the other upload function, this function
* never allocates memory.
* \param host_ptr pointer to buffer to upload
* \param device_begin_offset begin upload
* \param num_elements number of elements from device_bein_offset
* */
bool
upload(const T* host_ptr, std::size_t device_begin_offset, std::size_t num_elements);

/** \brief Downloads data from internal buffer to CPU memory
* \param host_ptr pointer to buffer to download
* */
void
download(T* host_ptr) const;

/** \brief Downloads data from internal buffer to CPU memory.
* \return true if download successful
* \param host_ptr pointer to buffer to download
* \param device_begin_offset begin download location
* \param num_elements number of elements from device_begin_offset
* */
bool
download(T* host_ptr,
std::size_t device_begin_offset,
std::size_t num_elements) const;

/** \brief Uploads data to internal buffer in GPU memory. It calls create() inside to
* ensure that intenal buffer size is enough.
* \param data host vector to upload from
Expand Down
24 changes: 24 additions & 0 deletions gpu/containers/include/pcl/gpu/containers/device_memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,36 @@ class PCL_EXPORTS DeviceMemory {
void
upload(const void* host_ptr_arg, std::size_t sizeBytes_arg);

/** \brief Uploads data from CPU memory to device array.
* \note This overload never allocates memory in contrast to the
* other upload function.
* \return true if upload successful
* \param host_ptr_arg pointer to buffer to upload
* \param device_begin_byte_offset first byte position to upload to
* \param num_bytes number of bytes to upload
* */
bool
upload(const void* host_ptr,
std::size_t device_begin_byte_offset,
std::size_t num_bytes);

/** \brief Downloads data from internal buffer to CPU memory
* \param host_ptr_arg pointer to buffer to download
* */
void
download(void* host_ptr_arg) const;

/** \brief Downloads data from internal buffer to CPU memory.
* \return true if download successful
* \param host_ptr_arg pointer to buffer to download
* \param device_begin_byte_offset first byte position to download
* \param num_bytes number of bytes to download
* */
bool
download(void* host_ptr,
std::size_t device_begin_byte_offset,
std::size_t num_bytes) const;

/** \brief Performs swap of data pointed with another device memory.
* \param other_arg device memory to swap with
* */
Expand Down
22 changes: 22 additions & 0 deletions gpu/containers/include/pcl/gpu/containers/impl/device_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,35 @@ DeviceArray<T>::upload(const T* host_ptr, std::size_t size)
DeviceMemory::upload(host_ptr, size * elem_size);
}

template <class T>
inline bool
DeviceArray<T>::upload(const T* host_ptr,
std::size_t device_begin_offset,
std::size_t num_elements)
{
std::size_t begin_byte_offset = device_begin_offset * sizeof(T);
std::size_t num_bytes = num_elements * sizeof(T);
return DeviceMemory::upload(host_ptr, begin_byte_offset, num_bytes);
}

template <class T>
inline void
DeviceArray<T>::download(T* host_ptr) const
{
DeviceMemory::download(host_ptr);
}

template <class T>
inline bool
DeviceArray<T>::download(T* host_ptr,
std::size_t device_begin_offset,
std::size_t num_elements) const
{
std::size_t begin_byte_offset = device_begin_offset * sizeof(T);
std::size_t num_bytes = num_elements * sizeof(T);
return DeviceMemory::download(host_ptr, begin_byte_offset, num_bytes);
}

template <class T>
void
DeviceArray<T>::swap(DeviceArray& other_arg)
Expand Down
28 changes: 28 additions & 0 deletions gpu/containers/src/device_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,41 @@ pcl::gpu::DeviceMemory::upload(const void* host_ptr_arg, std::size_t sizeBytes_a
cudaSafeCall(cudaDeviceSynchronize());
}

bool
pcl::gpu::DeviceMemory::upload(const void* host_ptr_arg,
std::size_t device_begin_byte_offset,
std::size_t num_bytes)
{
if (device_begin_byte_offset + num_bytes > sizeBytes_) {
return false;
}
void* begin = static_cast<char*>(data_) + device_begin_byte_offset;
cudaSafeCall(cudaMemcpy(begin, host_ptr_arg, num_bytes, cudaMemcpyHostToDevice));
cudaSafeCall(cudaDeviceSynchronize());
return true;
}

void
pcl::gpu::DeviceMemory::download(void* host_ptr_arg) const
{
cudaSafeCall(cudaMemcpy(host_ptr_arg, data_, sizeBytes_, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaDeviceSynchronize());
}

bool
pcl::gpu::DeviceMemory::download(void* host_ptr_arg,
std::size_t device_begin_byte_offset,
std::size_t num_bytes) const
{
if (device_begin_byte_offset + num_bytes > sizeBytes_) {
return false;
}
const void* begin = static_cast<char*>(data_) + device_begin_byte_offset;
cudaSafeCall(cudaMemcpy(host_ptr_arg, begin, num_bytes, cudaMemcpyDeviceToHost));
cudaSafeCall(cudaDeviceSynchronize());
return true;
}

void
pcl::gpu::DeviceMemory::swap(DeviceMemory& other_arg)
{
Expand Down