Skip to content

Commit

Permalink
[SYCL][COMPAT] Migrate currently unsupported memcpy_parameter overloa…
Browse files Browse the repository at this point in the history
…ds (#14039)

This PR adds `syclcompat::experimental::memcpy` and
`syclcompat::experimental::memcpy_async` taking `memcpy_parameter`
arguments which wrap either:
 - deprecated `sycl::image`
 - experimental bindless images

Since image support was decided to be out of scope for 2025.0, these
APIs are introduced for forward compatibility only, and currently throw
a `std::runtime_error`.

---------

Signed-off-by: Joe Todd <joe.todd@codeplay.com>
  • Loading branch information
joeatodd authored Jun 11, 2024
1 parent 6ac0a3f commit 9332d1a
Show file tree
Hide file tree
Showing 6 changed files with 603 additions and 38 deletions.
60 changes: 59 additions & 1 deletion sycl/doc/syclcompat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ static void destroy_event(event_ptr event);
} // syclcompat
```
### Memory Allocation
### Memory Operations
This library provides interfaces to allocate memory to be accessed within kernel
functions and on the host. The `syclcompat::malloc` function allocates device
Expand Down Expand Up @@ -510,6 +510,64 @@ public:
} // syclcompat
```

The `syclcompat::experimental` namespace contains currently unsupported `memcpy` overloads which take a `syclcompat::experimental::memcpy_parameter` argument. These are included for forwards compatibility and currently throw a `std::runtime_error`.

```cpp
namespace syclcompat {
namespace experimental {
// Forward declarations for types relating to unsupported memcpy_parameter API:

enum memcpy_direction {
host_to_host,
host_to_device,
device_to_host,
device_to_device,
automatic
};

#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
class image_mem_wrapper;
#endif
class image_matrix;

/// Memory copy parameters for 2D/3D memory data.
struct memcpy_parameter {
struct data_wrapper {
pitched_data pitched{};
sycl::id<3> pos{};
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
experimental::image_mem_wrapper *image_bindless{nullptr};
#endif
image_matrix *image{nullptr};
};
data_wrapper from{};
data_wrapper to{};
sycl::range<3> size{};
syclcompat::detail::memcpy_direction direction{syclcompat::detail::memcpy_direction::automatic};
};

/// [UNSUPPORTED] Synchronously copies 2D/3D memory data specified by \p param .
/// The function will return after the copy is completed.
///
/// \param param Memory copy parameters.
/// \param q Queue to execute the copy task.
/// \returns no return value.
static inline void memcpy(const memcpy_parameter &param,
sycl::queue q = get_default_queue());

/// [UNSUPPORTED] Asynchronously copies 2D/3D memory data specified by \p param
/// . The return of the function does NOT guarantee the copy is completed.
///
/// \param param Memory copy parameters.
/// \param q Queue to execute the copy task.
/// \returns no return value.
static inline void memcpy_async(const memcpy_parameter &param,
sycl::queue q = get_default_queue());

} // namespace experimental
} // namespace syclcompat
```
Finally, the class `pitched_data`, which manages memory allocation for 3D
spaces, padded to avoid uncoalesced memory accesses.
Expand Down
178 changes: 143 additions & 35 deletions sycl/include/syclcompat/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,15 @@ template <typename AllocT> auto *local_mem() {
return As;
}

namespace detail {
namespace experimental {
enum memcpy_direction {
host_to_host,
host_to_device,
device_to_host,
device_to_device,
automatic
};
} // namespace detail
}

enum class memory_region {
global = 0, // device global memory
Expand Down Expand Up @@ -122,6 +122,42 @@ class pitched_data {
size_t _pitch, _x, _y;
};

namespace experimental {
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
class image_mem_wrapper;
namespace detail {
static sycl::event memcpy(const image_mem_wrapper *src,
const sycl::id<3> &src_id, pitched_data &dest,
const sycl::id<3> &dest_id,
const sycl::range<3> &copy_extend, sycl::queue q);
static sycl::event memcpy(const pitched_data src, const sycl::id<3> &src_id,
image_mem_wrapper *dest, const sycl::id<3> &dest_id,
const sycl::range<3> &copy_extend, sycl::queue q);
} // namespace detail
#endif
class image_matrix;
namespace detail {
static pitched_data to_pitched_data(image_matrix *image);
}

/// Memory copy parameters for 2D/3D memory data.
struct memcpy_parameter {
struct data_wrapper {
pitched_data pitched{};
sycl::id<3> pos{};
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
experimental::image_mem_wrapper *image_bindless{nullptr};
#endif
image_matrix *image{nullptr};
};
data_wrapper from{};
data_wrapper to{};
sycl::range<3> size{};
syclcompat::experimental::memcpy_direction direction{
syclcompat::experimental::memcpy_direction::automatic};
};
} // namespace experimental

namespace detail {

template <class T, memory_region Memory, size_t Dimension> class accessor;
Expand Down Expand Up @@ -263,21 +299,16 @@ static pointer_access_attribute get_pointer_attribute(sycl::queue q,
}
}

static memcpy_direction deduce_memcpy_direction(sycl::queue q, void *to_ptr,
const void *from_ptr) {
static experimental::memcpy_direction
deduce_memcpy_direction(sycl::queue q, void *to_ptr, const void *from_ptr) {
// table[to_attribute][from_attribute]
using namespace experimental; // for memcpy_direction
static const memcpy_direction
direction_table[static_cast<unsigned>(pointer_access_attribute::end)]
[static_cast<unsigned>(pointer_access_attribute::end)] = {
{memcpy_direction::host_to_host,
memcpy_direction::device_to_host,
memcpy_direction::host_to_host},
{memcpy_direction::host_to_device,
memcpy_direction::device_to_device,
memcpy_direction::device_to_device},
{memcpy_direction::host_to_host,
memcpy_direction::device_to_device,
memcpy_direction::device_to_device}};
{host_to_host, device_to_host, host_to_host},
{host_to_device, device_to_device, device_to_device},
{host_to_host, device_to_device, device_to_device}};
return direction_table[static_cast<unsigned>(get_pointer_attribute(
q, to_ptr))][static_cast<unsigned>(get_pointer_attribute(q, from_ptr))];
}
Expand All @@ -300,35 +331,36 @@ static inline size_t get_offset(sycl::id<3> id, size_t slice, size_t pitch) {
return slice * id.get(2) + pitch * id.get(1) + id.get(0);
}

// RAII for host pointer
class host_buffer {
void *_buf;
size_t _size;
sycl::queue _q;
const std::vector<sycl::event> &_deps; // free operation depends

public:
host_buffer(size_t size, sycl::queue q, const std::vector<sycl::event> &deps)
: _buf(std::malloc(size)), _size(size), _q(q), _deps(deps) {}
void *get_ptr() const { return _buf; }
size_t get_size() const { return _size; }
~host_buffer() {
if (_buf) {
_q.submit([&](sycl::handler &cgh) {
cgh.depends_on(_deps);
cgh.host_task([buf = _buf] { std::free(buf); });
});
}
}
};

/// copy 3D matrix specified by \p size from 3D matrix specified by \p from_ptr
/// and \p from_range to another specified by \p to_ptr and \p to_range.
static inline std::vector<sycl::event>
memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
sycl::range<3> to_range, sycl::range<3> from_range, sycl::id<3> to_id,
sycl::id<3> from_id, sycl::range<3> size,
const std::vector<sycl::event> &dep_events = {}) {
// RAII for host pointer
class host_buffer {
void *_buf;
size_t _size;
sycl::queue _q;
const std::vector<sycl::event> &_deps; // free operation depends

public:
host_buffer(size_t size, sycl::queue q,
const std::vector<sycl::event> &deps)
: _buf(std::malloc(size)), _size(size), _q(q), _deps(deps) {}
void *get_ptr() const { return _buf; }
size_t get_size() const { return _size; }
~host_buffer() {
if (_buf) {
_q.submit([&](sycl::handler &cgh) {
cgh.depends_on(_deps);
cgh.host_task([buf = _buf] { std::free(buf); });
});
}
}
};

std::vector<sycl::event> event_list;

size_t to_slice = to_range.get(1) * to_range.get(0);
Expand All @@ -343,6 +375,7 @@ memcpy(sycl::queue q, void *to_ptr, const void *from_ptr,
return {memcpy(q, to_surface, from_surface, to_slice * size.get(2),
dep_events)};
}
using namespace experimental; // for memcpy_direction
memcpy_direction direction = deduce_memcpy_direction(q, to_ptr, from_ptr);
size_t size_slice = size.get(1) * size.get(0);
switch (direction) {
Expand Down Expand Up @@ -448,6 +481,56 @@ static sycl::event combine_events(std::vector<sycl::event> &events,

} // namespace detail

namespace experimental {
namespace detail {
static inline std::vector<sycl::event>
memcpy(sycl::queue q, const experimental::memcpy_parameter &param) {
auto to = param.to.pitched;
auto from = param.from.pitched;
#ifdef SYCL_EXT_ONEAPI_BINDLESS_IMAGES
if (param.to.image_bindless != nullptr &&
param.from.image_bindless != nullptr) {
throw std::runtime_error(
"[SYCLcompat] memcpy: Unsupported bindless_image API.");
// TODO: Need change logic when sycl support image_mem to image_mem copy.
std::vector<sycl::event> event_list;
syclcompat::detail::host_buffer buf(param.size.size(), q, event_list);
to.set_data_ptr(buf.get_ptr());
experimental::detail::memcpy(param.from.image_bindless, param.from.pos, to,
sycl::id<3>(0, 0, 0), param.size, q);
from.set_data_ptr(buf.get_ptr());
event_list.push_back(experimental::detail::memcpy(
from, sycl::id<3>(0, 0, 0), param.to.image_bindless, param.to.pos,
param.size, q));
return event_list;
} else if (param.to.image_bindless != nullptr) {
throw std::runtime_error(
"[SYCLcompat] memcpy: Unsupported bindless_image API.");
return {experimental::detail::memcpy(from, param.from.pos,
param.to.image_bindless, param.to.pos,
param.size, q)};
} else if (param.from.image_bindless != nullptr) {
throw std::runtime_error(
"[SYCLcompat] memcpy: Unsupported bindless_image API.");
return {experimental::detail::memcpy(param.from.image_bindless,
param.from.pos, to, param.to.pos,
param.size, q)};
}
#endif
if (param.to.image != nullptr) {
throw std::runtime_error("[SYCLcompat] memcpy: Unsupported image API.");
to = experimental::detail::to_pitched_data(param.to.image);
}
if (param.from.image != nullptr) {
throw std::runtime_error("[SYCLcompat] memcpy: Unsupported image API.");
from = experimental::detail::to_pitched_data(param.from.image);
}
return syclcompat::detail::memcpy(q, to, param.to.pos, from, param.from.pos,
param.size);
}
} // namespace detail
} // namespace experimental

/// Allocate memory block on the device.
/// \param num_bytes Number of bytes to allocate.
/// \param q Queue to execute the allocate task.
Expand Down Expand Up @@ -757,6 +840,31 @@ static sycl::event inline fill_async(void *dev_ptr, const T &pattern,
return detail::fill(q, dev_ptr, pattern, count);
}

namespace experimental {

/// [UNSUPPORTED] Synchronously copies 2D/3D memory data specified by \p param .
/// The function will return after the copy is completed.
///
/// \param param Memory copy parameters.
/// \param q Queue to execute the copy task.
/// \returns no return value.
static inline void memcpy(const memcpy_parameter &param,
sycl::queue q = get_default_queue()) {
sycl::event::wait(syclcompat::experimental::detail::memcpy(q, param));
}

/// [UNSUPPORTED] Asynchronously copies 2D/3D memory data specified by \p param
/// . The return of the function does NOT guarantee the copy is completed.
///
/// \param param Memory copy parameters.
/// \param q Queue to execute the copy task.
/// \returns no return value.
static inline void memcpy_async(const memcpy_parameter &param,
sycl::queue q = get_default_queue()) {
syclcompat::experimental::detail::memcpy(q, param);
}
} // namespace experimental

/// Synchronously sets \p value to the first \p size bytes starting from \p
/// dev_ptr. The function will return after the memset operation is completed.
///
Expand Down
Loading

0 comments on commit 9332d1a

Please sign in to comment.