Skip to content

Commit

Permalink
Fixes #575: Moved/renamed: apriori_compiled_kernel_t -> `kernel::ap…
Browse files Browse the repository at this point in the history
…riori_compiled_t`
  • Loading branch information
eyalroz committed Jan 27, 2024
1 parent 55ae1d6 commit 844c036
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 51 deletions.
8 changes: 4 additions & 4 deletions src/cuda/api/kernel_launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ void enqueue_raw_kernel_launch_in_current_context(
// look at things.
detail_::collect_argument_addresses(argument_ptrs, ::std::forward<KernelParameters>(parameters)...);
#if CUDA_VERSION >= 11000
kernel::handle_t kernel_function_handle = kernel::detail_::get_handle( (const void*) kernel_function);
kernel::handle_t kernel_function_handle = kernel::apriori_compiled::detail_::get_handle( (const void*) kernel_function);
auto status = cuLaunchCooperativeKernel(
kernel_function_handle,
launch_configuration.dimensions.grid.x,
Expand Down Expand Up @@ -248,7 +248,7 @@ struct raw_kernel_typegen {

template<typename... KernelParameters>
typename detail_::raw_kernel_typegen<KernelParameters...>::type
unwrap(const apriori_compiled_kernel_t& kernel)
unwrap(const kernel::apriori_compiled_t& kernel)
{
using raw_kernel_t = typename detail_::raw_kernel_typegen<KernelParameters ...>::type;
return reinterpret_cast<raw_kernel_t>(const_cast<void *>(kernel.ptr()));
Expand All @@ -259,9 +259,9 @@ unwrap(const apriori_compiled_kernel_t& kernel)
namespace detail_ {

template<typename... KernelParameters>
struct enqueue_launch_helper<apriori_compiled_kernel_t, KernelParameters...> {
struct enqueue_launch_helper<kernel::apriori_compiled_t, KernelParameters...> {
void operator()(
const apriori_compiled_kernel_t& wrapped_kernel,
const kernel::apriori_compiled_t& wrapped_kernel,
const stream_t & stream,
launch_configuration_t launch_configuration,
KernelParameters &&... parameters) const;
Expand Down
50 changes: 27 additions & 23 deletions src/cuda/api/kernels/apriori_compiled.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@

namespace cuda {

namespace kernel {

///@cond
class device_t;
class apriori_compiled_kernel_t;
class apriori_compiled_t;
///@nocond

namespace kernel {
namespace apriori_compiled {

namespace detail_ {

Expand All @@ -46,16 +47,16 @@ inline handle_t get_handle(const void *kernel_function_ptr, const char* name = n
}
#endif

apriori_compiled_kernel_t wrap(
apriori_compiled_t wrap(
device::id_t device_id,
context::handle_t primary_context_handle,
kernel::handle_t f,
const void* ptr,
bool hold_primary_context_refcount_unit = false);


} // namespace detail_


#if ! CAN_GET_APRIORI_KERNEL_HANDLE
/**
* @brief a wrapper around `cudaFuncAttributes`, offering
Expand Down Expand Up @@ -299,13 +300,13 @@ inline grid::dimension_t max_active_blocks_per_multiprocessor(

} // namespace occupancy

} // namespace kernel
} // namespace apriori_compiled

/**
* @brief A subclass of the @ref `kernel_t` interface for kernels being
* functions marked as __global__ in source files and compiled apriori.
*/
class apriori_compiled_kernel_t final : public kernel_t {
class apriori_compiled_t final : public kernel_t {
public: // getters
const void *ptr() const noexcept { return ptr_; }
const void *get() const noexcept { return ptr_; }
Expand All @@ -316,7 +317,7 @@ class apriori_compiled_kernel_t final : public kernel_t {
public: // non-mutators

#if ! CAN_GET_APRIORI_KERNEL_HANDLE
kernel::attributes_t attributes() const;
apriori_compiled::attributes_t attributes() const;
void set_cache_preference(multiprocessor_cache_preference_t preference) const override;
void set_shared_memory_bank_size(multiprocessor_shared_memory_bank_size_option_t config) const override;

Expand Down Expand Up @@ -387,7 +388,7 @@ class apriori_compiled_kernel_t final : public kernel_t {
memory::shared::size_t dynamic_shared_memory_per_block,
bool disable_caching_override = false) const override
{
return kernel::occupancy::detail_::max_active_blocks_per_multiprocessor(
return apriori_compiled::occupancy::detail_::max_active_blocks_per_multiprocessor(
ptr(),
block_size_in_threads,
dynamic_shared_memory_per_block,
Expand All @@ -396,42 +397,43 @@ class apriori_compiled_kernel_t final : public kernel_t {
#endif // ! CAN_GET_APRIORI_KERNEL_HANDLE

protected: // ctors & dtor
apriori_compiled_kernel_t(device::id_t device_id, context::handle_t primary_context_handle,
apriori_compiled_t(device::id_t device_id, context::handle_t primary_context_handle,
kernel::handle_t handle, const void *f, bool hold_pc_refcount_unit)
: kernel_t(device_id, primary_context_handle, handle, hold_pc_refcount_unit), ptr_(f) {
// TODO: Consider checking whether this actually is a device function, at all and in this context
#ifndef NDEBUG
assert(f != nullptr && "Attempt to construct a kernel object for a nullptr kernel function pointer");
#endif
}
apriori_compiled_kernel_t(
apriori_compiled_t(
device::id_t device_id,
context::handle_t primary_context_handle,
const void *f,
bool hold_primary_context_refcount_unit)
: apriori_compiled_kernel_t(
: apriori_compiled_t(
device_id,
primary_context_handle,
kernel::detail_::get_handle(f),
apriori_compiled::detail_::get_handle(f),
f,
hold_primary_context_refcount_unit)
{ }

public: // ctors & dtor
apriori_compiled_kernel_t(const apriori_compiled_kernel_t&) = default;
apriori_compiled_kernel_t(apriori_compiled_kernel_t&&) = default;
apriori_compiled_t(const apriori_compiled_t&) = default;
apriori_compiled_t(apriori_compiled_t&&) = default;

public: // friends
friend apriori_compiled_kernel_t kernel::detail_::wrap(device::id_t, context::handle_t, kernel::handle_t, const void*, bool);
friend apriori_compiled_t apriori_compiled::detail_::wrap(device::id_t, context::handle_t, kernel::handle_t, const void*, bool);

protected: // data members
const void *const ptr_;
};
}; // class apriori_compiled_t

namespace apriori_compiled {

namespace kernel {
namespace detail_ {

inline apriori_compiled_kernel_t wrap(
inline apriori_compiled_t wrap(
device::id_t device_id,
context::handle_t primary_context_handle,
kernel::handle_t f,
Expand All @@ -442,7 +444,7 @@ inline apriori_compiled_kernel_t wrap(
}

#if ! CAN_GET_APRIORI_KERNEL_HANDLE
inline ::std::string identify(const apriori_compiled_kernel_t& kernel)
inline ::std::string identify(const apriori_compiled_t& kernel)
{
return "apriori-compiled kernel " + cuda::detail_::ptr_as_hex(kernel.ptr())
+ " in " + context::detail_::identify(kernel.context());
Expand All @@ -461,7 +463,7 @@ inline attribute_value_t get_attribute(const void* function_ptr, attribute_t att
inline void set_attribute(const void* function_ptr, attribute_t attribute, attribute_value_t value)
{
auto handle = detail_::get_handle(function_ptr);
return detail_::set_attribute_in_current_context(handle, attribute, value);
return kernel::detail_::set_attribute_in_current_context(handle, attribute, value);
}

inline attribute_value_t get_attribute(
Expand All @@ -484,15 +486,17 @@ inline void set_attribute(
}
#endif // CAN_GET_APRIORI_KERNEL_HANDLE

} // namespace apriori_compiled

/**
* @note The returned kernel proxy object will keep the device's primary
* context active while the kernel exists.
*/
template<typename KernelFunctionPtr>
apriori_compiled_kernel_t get(const device_t& device, KernelFunctionPtr function_ptr);
apriori_compiled_t get(const device_t& device, KernelFunctionPtr function_ptr);

template<typename KernelFunctionPtr>
apriori_compiled_kernel_t get(context_t context, KernelFunctionPtr function_ptr);
apriori_compiled_t get(context_t context, KernelFunctionPtr function_ptr);

} // namespace kernel

Expand Down
41 changes: 22 additions & 19 deletions src/cuda/api/multi_wrapper_impls/apriori_compiled_kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,25 @@

namespace cuda {

namespace kernel {

#if ! CAN_GET_APRIORI_KERNEL_HANDLE
#if defined(__CUDACC__)

// Unfortunately, the CUDA runtime API does not allow for computation of the grid parameters for maximum occupancy
// from code compiled with a host-side-only compiler! See cuda_runtime.h for details

inline kernel::attributes_t apriori_compiled_kernel_t::attributes() const
inline apriori_compiled::attributes_t apriori_compiled_t::attributes() const
{
// Note: assuming the primary context is active
CAW_SET_SCOPE_CONTEXT(context_handle_);
kernel::attributes_t function_attributes;
apriori_compiled::attributes_t function_attributes;
auto status = cudaFuncGetAttributes(&function_attributes, ptr_);
throw_if_error_lazy(status, "Failed obtaining attributes for a CUDA device function");
return function_attributes;
}

inline void apriori_compiled_kernel_t::set_cache_preference(multiprocessor_cache_preference_t preference) const
inline void apriori_compiled_t::set_cache_preference(multiprocessor_cache_preference_t preference) const
{
// Note: assuming the primary context is active
CAW_SET_SCOPE_CONTEXT(context_handle_);
Expand All @@ -41,7 +43,7 @@ inline void apriori_compiled_kernel_t::set_cache_preference(multiprocessor_cache
"CUDA device function");
}

inline void apriori_compiled_kernel_t::set_shared_memory_bank_size(
inline void apriori_compiled_t::set_shared_memory_bank_size(
multiprocessor_shared_memory_bank_size_option_t config) const
{
// Note: assuming the primary context is active
Expand All @@ -50,7 +52,7 @@ inline void apriori_compiled_kernel_t::set_shared_memory_bank_size(
throw_if_error_lazy(result, "Failed setting shared memory bank size to " + ::std::to_string(config));
}

inline void apriori_compiled_kernel_t::set_attribute(kernel::attribute_t attribute, kernel::attribute_value_t value) const
inline void apriori_compiled_t::set_attribute(attribute_t attribute, attribute_value_t value) const
{
// Note: assuming the primary context is active
CAW_SET_SCOPE_CONTEXT(context_handle_);
Expand All @@ -70,9 +72,9 @@ inline void apriori_compiled_kernel_t::set_attribute(kernel::attribute_t attribu
throw_if_error_lazy(result, "Setting CUDA device function attribute " + ::std::to_string(attribute) + " to value " + ::std::to_string(value));
}

inline kernel::attribute_value_t apriori_compiled_kernel_t::get_attribute(kernel::attribute_t attribute) const
inline attribute_value_t apriori_compiled_t::get_attribute(attribute_t attribute) const
{
kernel::attributes_t attrs = attributes();
apriori_compiled::attributes_t attrs = attributes();
switch(attribute) {
case CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK:
return attrs.maxThreadsPerBlock;
Expand All @@ -98,9 +100,9 @@ inline kernel::attribute_value_t apriori_compiled_kernel_t::get_attribute(kernel
throw cuda::runtime_error(status::not_supported,
::std::string("Attribute ") +
#ifdef NDEBUG
::std::to_string(static_cast<::std::underlying_type<kernel::attribute_t>::type>(attribute))
::std::to_string(static_cast<::std::underlying_type<attribute_t>::type>(attribute))
#else
kernel::detail_::attribute_name(attribute)
detail_::attribute_name(attribute)
#endif
+ " cannot be obtained for apriori-compiled kernels before CUDA version 11.0"
);
Expand All @@ -110,16 +112,15 @@ inline kernel::attribute_value_t apriori_compiled_kernel_t::get_attribute(kernel
#endif // defined(__CUDACC__)
#endif // ! CAN_GET_APRIORI_KERNEL_HANDLE

namespace kernel {

namespace apriori_compiled {

namespace detail_ {

template<typename KernelFunctionPtr>
apriori_compiled_kernel_t get(
::cuda::device::id_t device_id,
context::handle_t & primary_context_handle,
KernelFunctionPtr function_ptr)
apriori_compiled_t get(
device::id_t device_id,
context::handle_t & primary_context_handle,
KernelFunctionPtr function_ptr)
{
static_assert(
::std::is_pointer<KernelFunctionPtr>::value
Expand All @@ -137,6 +138,8 @@ apriori_compiled_kernel_t get(

} // namespace detail_

} // namespace apriori_compiled


/**
* @brief Obtain a wrapped kernel object corresponding to a "raw" kernel function
Expand All @@ -148,10 +151,10 @@ apriori_compiled_kernel_t get(
* context active while the kernel exists.
*/
template<typename KernelFunctionPtr>
apriori_compiled_kernel_t get(const device_t& device, KernelFunctionPtr function_ptr)
apriori_compiled_t get(const device_t &device, KernelFunctionPtr function_ptr)
{
auto primary_context_handle = device::primary_context::detail_::obtain_and_increase_refcount(device.id());
return detail_::get(device.id(), primary_context_handle, function_ptr);
return apriori_compiled::detail_::get(device.id(), primary_context_handle, function_ptr);
}

} // namespace kernel
Expand All @@ -160,9 +163,9 @@ namespace detail_ {

template<>
inline ::cuda::device::primary_context_t
get_implicit_primary_context<apriori_compiled_kernel_t>(apriori_compiled_kernel_t kernel)
get_implicit_primary_context<kernel::apriori_compiled_t>(kernel::apriori_compiled_t kernel)
{
const kernel_t& kernel_ = kernel;
const kernel_t &kernel_ = kernel;
return get_implicit_primary_context(kernel_);
}

Expand Down
1 change: 0 additions & 1 deletion src/cuda/api/multi_wrapper_impls/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include "../pointer.hpp"
#include "../primary_context.hpp"
#include "../kernel.hpp"
#include "../current_context.hpp"

namespace cuda {

Expand Down
8 changes: 4 additions & 4 deletions src/cuda/api/multi_wrapper_impls/kernel_launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ inline void validate_block_dimension_compatibility(
}

template<typename... KernelParameters>
void enqueue_launch_helper<apriori_compiled_kernel_t, KernelParameters...>::operator()(
const apriori_compiled_kernel_t& wrapped_kernel,
void enqueue_launch_helper<kernel::apriori_compiled_t, KernelParameters...>::operator()(
const kernel::apriori_compiled_t& wrapped_kernel,
const stream_t & stream,
launch_configuration_t launch_configuration,
KernelParameters &&... parameters) const
Expand Down Expand Up @@ -334,7 +334,7 @@ inline grid::composite_dimensions_t min_grid_params_for_max_occupancy(
} // namespace detail_

inline grid::composite_dimensions_t min_grid_params_for_max_occupancy(
const apriori_compiled_kernel_t& kernel,
const kernel::apriori_compiled_t& kernel,
memory::shared::size_t dynamic_shared_memory_size,
grid::block_dimension_t block_size_limit,
bool disable_caching_override)
Expand All @@ -345,7 +345,7 @@ inline grid::composite_dimensions_t min_grid_params_for_max_occupancy(

template <typename UnaryFunction>
grid::composite_dimensions_t min_grid_params_for_max_occupancy(
const apriori_compiled_kernel_t& kernel,
const kernel::apriori_compiled_t& kernel,
UnaryFunction block_size_to_dynamic_shared_mem_size,
grid::block_dimension_t block_size_limit,
bool disable_caching_override)
Expand Down

0 comments on commit 844c036

Please sign in to comment.