Skip to content

Commit

Permalink
Fixes #565: Completed the implementation of a library kernel class - …
Browse files Browse the repository at this point in the history
…completing the support for libraries overall

CAVEAT: No example code to actually load and launch any cuLibraries
  • Loading branch information
eyalroz committed Mar 1, 2024
1 parent 53d715b commit ed77753
Show file tree
Hide file tree
Showing 6 changed files with 354 additions and 32 deletions.
1 change: 1 addition & 0 deletions src/cuda/api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include "api/module.hpp"
#if CUDA_VERSION >= 12000
#include "api/library.hpp"
#include "api/kernels/in_library.hpp"
#endif
#include "api/link.hpp"

Expand Down
38 changes: 36 additions & 2 deletions src/cuda/api/kernel_launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@
#include "launch_configuration.hpp"
#include "kernel.hpp"
#include "kernels/apriori_compiled.hpp"
#if CUDA_VERSION >= 12000
#include "kernels/in_library.hpp"
#endif


#if CUDA_VERSION >= 9000
// The following is necessary for cudaLaunchCooperativeKernel
Expand Down Expand Up @@ -124,6 +128,7 @@ struct enqueue_launch_helper {

template<typename Kernel, typename... KernelParameters>
void enqueue_launch(
::std::integral_constant<bool, false>,
::std::integral_constant<bool, false>,
Kernel&& kernel_function,
const stream_t& stream,
Expand All @@ -132,6 +137,16 @@ void enqueue_launch(

template<typename Kernel, typename... KernelParameters>
void enqueue_launch(
::std::integral_constant<bool, true>,
::std::integral_constant<bool, false>,
Kernel&& kernel,
const stream_t& stream,
launch_configuration_t launch_configuration,
KernelParameters&&... parameters);

template<typename Kernel, typename... KernelParameters>
void enqueue_launch(
::std::integral_constant<bool, false>,
::std::integral_constant<bool, true>,
Kernel&& kernel,
const stream_t& stream,
Expand Down Expand Up @@ -308,12 +323,18 @@ void enqueue_launch(
static_assert(
detail_::all_true<::std::is_trivially_copy_constructible<detail_::kernel_parameter_decay_t<KernelParameters>>::value...>::value,
"All kernel parameter types must be of a trivially copy-constructible (decayed) type." );
static constexpr const bool wrapped_kernel = ::std::is_base_of<kernel_t, typename ::std::decay<Kernel>::type>::value;
static constexpr const bool wrapped_contextual_kernel = ::std::is_base_of<kernel_t, typename ::std::decay<Kernel>::type>::value;
#if CUDA_VERSION >= 12000
static constexpr const bool library_kernel = cuda::detail_::is_library_kernel<Kernel>::value;
#else
static constexpr const bool library_kernel = false;
#endif // CUDA_VERSION >= 12000
// We would have liked an "if constexpr" here, but that is unsupported by C++11, so we have to
// use tagged dispatch for the separate behavior for raw and wrapped kernels - although the enqueue_launch
// function for each of them will basically be just a one-liner :-(
detail_::enqueue_launch<Kernel, KernelParameters...>(
::std::integral_constant<bool, wrapped_kernel>{},
::std::integral_constant<bool, wrapped_contextual_kernel>{},
::std::integral_constant<bool, library_kernel>{},
::std::forward<Kernel>(kernel), stream, launch_configuration,
::std::forward<KernelParameters>(parameters)...);
}
Expand All @@ -338,19 +359,32 @@ void launch(
* Type of the container for the marshalled arguments; typically, this
* would be `span<const void*>` - but it can be an `::std::vector`, or
* have non-const `void*` elements etc.
* @param kernel
* A wrapped GPU kernel
* @param stream
* Proxy for the stream on which to enqueue the kernel launch; may be the
* default stream of a context.
* @param marshalled_arguments
* A container of `void` or `const void` pointers to the argument values
*/
///@{
template <typename SpanOfConstVoidPtrLike>
void launch_type_erased(
const kernel_t& kernel,
const stream_t& stream,
launch_configuration_t launch_configuration,
SpanOfConstVoidPtrLike marshalled_arguments);

#if CUDA_VERSION >= 12000
template <typename SpanOfConstVoidPtrLike>
void launch_type_erased(
const library::kernel_t& kernel,
const stream_t& stream,
launch_configuration_t launch_configuration,
SpanOfConstVoidPtrLike marshalled_arguments);
///@}
#endif // CUDA_VERSION >= 12000

} // namespace cuda

#endif // CUDA_API_WRAPPERS_KERNEL_LAUNCH_CUH_
206 changes: 206 additions & 0 deletions src/cuda/api/kernels/in_library.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/**
* @file
*
* @brief A @ref A wrapper class for compiled kernels in a loaded library,
* which are unassociated with a device and a context.
*/
#pragma once
#ifndef CUDA_API_WRAPPERS_IN_LIBRARY_KERNEL_HPP_
#define CUDA_API_WRAPPERS_IN_LIBRARY_KERNEL_HPP_

#include "../library.hpp"

namespace cuda {

///@cond
class kernel_t;
///@nocond

namespace detail_ {

template <typename Kernel>
struct is_library_kernel : ::std::is_same<typename ::std::decay<Kernel>::type, library::kernel_t> { };

} // namespace detail_

// TODO: Avoid the copy?
kernel_t contextualize(const library::kernel_t& kernel, const context_t& context);

namespace library {

///@cond
class kernel_t;
///@nocond

namespace kernel {

using handle_t = CUkernel;
using cuda::kernel::attribute_t;
using cuda::kernel::attribute_value_t;
// using cuda::kernel::apriori_compiled::attributes_t;

namespace detail_ {

// Note: library kernels never hold a PC refcount unit, nor do they own anything;
// only the library wrapper owns (and it's not associated with the kernel).
kernel_t wrap(library::handle_t library_handle, kernel::handle_t handle);

inline ::std::string identify(kernel::handle_t handle)
{
return "library kernel at " + cuda::detail_::ptr_as_hex(handle);
}

inline ::std::string identify(library::handle_t library_handle, kernel::handle_t handle)
{
return identify(handle) + " within " + library::detail_::identify(library_handle);
}

::std::string identify(const kernel_t &kernel);

inline ::std::pair<cuda::kernel::handle_t, status_t> contextualize_in_current_context(
const kernel::handle_t& library_kernel_handle)
{
cuda::kernel::handle_t contextualized_kernel_handle;
auto status = cuKernelGetFunction(&contextualized_kernel_handle, library_kernel_handle);
return {contextualized_kernel_handle, status};
}

inline cuda::kernel::handle_t contextualize(
const handle_t& kernel_handle,
const context::handle_t context_handle)
{
CAW_SET_SCOPE_CONTEXT(context_handle);
auto handle_and_status = contextualize_in_current_context(kernel_handle);
throw_if_error_lazy(handle_and_status.second, "Failed placing " + identify(kernel_handle) + " in "
+ context::detail_::identify(context_handle));
return handle_and_status.first;
}

inline attribute_value_t get_attribute(
handle_t library_kernel_handle,
device::id_t device_id,
kernel::attribute_t attribute)
{
attribute_value_t value;
auto status = cuKernelGetAttribute(&value, attribute, library_kernel_handle, device_id);
throw_if_error_lazy(status, ::std::string("Failed getting attribute ")
+ cuda::kernel::detail_::attribute_name(attribute) + " for " + identify(library_kernel_handle)
+ " on " + device::detail_::identify(device_id));
}

inline attribute_value_t set_attribute(
kernel::handle_t library_kernel_handle,
device::id_t device_id,
kernel::attribute_t attribute,
attribute_value_t value)
{
auto status = cuKernelSetAttribute(attribute, value, library_kernel_handle, device_id);
throw_if_error_lazy(status, ::std::string("Failed setting attribute ")
+ cuda::kernel::detail_::attribute_name(attribute) + " value to " + ::std::to_string(value)
+ " for " + identify(library_kernel_handle) + " on " + device::detail_::identify(device_id));
}

} // namespace detail

attribute_value_t get_attribute(
const library::kernel_t& library_kernel,
kernel::attribute_t attribute,
const device_t& device);

inline void set_attribute(
const library::kernel_t& library_kernel,
kernel::attribute_t attribute,
const device_t& device,
attribute_value_t value);

} // namespace kernel

/**
* @brief A subclass of the @ref `kernel_t` interface for kernels being
* functions marked as __global__ in source files and compiled apriori.
*/
class kernel_t {
public: // getters
kernel::handle_t handle() const noexcept { return handle_; }
library::handle_t library_handle() const noexcept { return library_handle_; }
library_t library() const noexcept { return library::detail_::wrap(library_handle_); }

public: // type_conversions

public: // non-mutators

#if CUDA_VERSION >= 12300
/**
* Return the kernel function name as registered within its library
*
* @note This may return a mangled name if the kernel function was not declared as having C linkage.
*/
const char* name() const
{
if (name_ != nullptr) { return name_; }
const char* result;
auto status = cuKernelGetName(&result, handle_);
throw_if_error_lazy(status, "Retrieving the name of " + kernel::detail_::identify(*this));
name_ = result;
return name_;
}
#endif
cuda::kernel_t contextualize(const context_t& context) const;

protected: // ctors & dtor
kernel_t(library::handle_t library_handle, kernel::handle_t handle)
:
library_handle_(library_handle), handle_(handle) {}

public: // ctors & dtor
kernel_t(const kernel_t &) = default;
kernel_t(kernel_t&& other) = default;

public: // friends
friend kernel_t kernel::detail_::wrap(library::handle_t, kernel::handle_t);

protected: // data members
library::handle_t library_handle_;
kernel::handle_t handle_;
mutable const char* name_ { nullptr }; // The name is cached after having been retrieved for the first time
}; // kernel_t

namespace kernel {
namespace detail_ {

inline kernel_t wrap(library::handle_t library_handle, kernel::handle_t handle)
{
return {library_handle, handle};
}

inline ::std::string identify(const kernel_t& library_kernel)
{
return identify(library_kernel.library_handle(), library_kernel.handle());
}

} // namespace detail_

inline kernel_t get(const library_t& library, const char* name)
{
auto kernel_handle = library::detail_::get_kernel(library.handle(), name);
return kernel::detail_::wrap(library.handle(), kernel_handle);
}

} // namespace kernel

} // namespace library

inline library::kernel_t library_t::get_kernel(const char* name) const
{
return library::kernel::get(*this, name);
}

inline library::kernel_t library_t::get_kernel(const ::std::string& name) const
{
return get_kernel(name.c_str());
}

} // namespace cuda

#endif // CUDA_API_WRAPPERS_IN_LIBRARY_KERNEL_HPP_

Loading

0 comments on commit ed77753

Please sign in to comment.