-
-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Regards #565: Added supprort for CUDA libraries
Caveat: Not yet supporting library kernels as first-class citizens; they are currently only intermediate entities for obtaining context-associated kernels (CUfunction's)
- Loading branch information
Showing
2 changed files
with
390 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,387 @@ | ||
/** | ||
* @file | ||
* | ||
* @brief Wrappers for working with "libraries" of compiled CUDA code (which are similar | ||
* to modules, but not associated with any CUDA context). | ||
*/ | ||
#pragma once | ||
#ifndef CUDA_API_WRAPPERS_LIBRARY_HPP_ | ||
#define CUDA_API_WRAPPERS_LIBRARY_HPP_ | ||
|
||
#if CUDA_VERSION >= 12000 | ||
|
||
#include "module.hpp" | ||
|
||
#if __cplusplus >= 201703L | ||
#include <filesystem> | ||
#endif | ||
|
||
namespace cuda { | ||
|
||
///@cond | ||
class device_t; | ||
class context_t; | ||
class module_t; | ||
class library_t; | ||
class kernel_t; | ||
///@endcond | ||
|
||
namespace library { | ||
|
||
using handle_t = CUlibrary; | ||
|
||
namespace kernel { | ||
|
||
using handle_t = CUkernel; // Don't be confused; a context-associated kernel is a CUfunction :-( | ||
|
||
} // namespace kernel | ||
|
||
namespace detail_ { | ||
|
||
using option_t = CUlibraryOption; | ||
|
||
} // namespace detail_ | ||
|
||
class kernel_t; // A kernel stored within a library; strangely, a context-associated kernel is a CUfunction. | ||
|
||
namespace detail_ { | ||
|
||
inline library_t wrap( | ||
handle_t handle, | ||
bool take_ownership = false) noexcept; | ||
|
||
inline ::std::string identify(const library::handle_t &handle) | ||
{ | ||
return ::std::string("library ") + cuda::detail_::ptr_as_hex(handle); | ||
} | ||
|
||
::std::string identify(const library_t &library); | ||
|
||
} // namespace detail_ | ||
|
||
/** | ||
* Create a CUDA driver library of compiled code from raw image data. | ||
* | ||
* @param[in] module_data the opaque, raw binary data for the module - in a contiguous container | ||
* such as a span, a cuda::dynarray etc.. | ||
*/ | ||
///@{ | ||
template <typename ContiguousContainer, | ||
cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true > | ||
library_t create( | ||
ContiguousContainer library_data, | ||
optional<link::options_t> link_options, | ||
bool code_is_preserved); | ||
///@} | ||
|
||
} // namespace library | ||
|
||
memory::region_t get_global(const context_t& context, const library_t& library, const char* name); | ||
kernel_t get_kernel(const context_t& context, const library_t& library, const char* name); | ||
memory::region_t get_managed_region(const library_t& library, const char* name); | ||
|
||
namespace module { | ||
|
||
module_t create(const context_t& context, const library_t& library); | ||
module_t create(const library_t& library); | ||
|
||
} // namespace module | ||
|
||
void* get_unified_function(const context_t& context, const library_t& library, const char* symbol); | ||
|
||
/** | ||
* Wrapper class for a CUDA compiled code library (like a @ref module_t , but not associated | ||
* with a context) | ||
*/ | ||
class library_t { | ||
|
||
public: // getters | ||
|
||
library::handle_t handle() const { return handle_; } | ||
|
||
/** | ||
* Obtains an already-compiled kernel previously associated with | ||
* this library, in the current context. | ||
* | ||
* @param name The function name, in case of a C-style function, | ||
* or the mangled function signature, in case of a C++-style | ||
* function. | ||
* | ||
* @return An enqueable kernel proxy object for the requested kernel, | ||
* in the current context. | ||
*/ | ||
cuda::kernel_t get_kernel(const char* name) const | ||
{ | ||
return cuda::get_kernel(context::current::get(), *this, name); | ||
} | ||
|
||
cuda::kernel_t get_kernel(const ::std::string& name) const | ||
{ | ||
return get_kernel(name.c_str()); | ||
} | ||
|
||
memory::region_t get_global(const char* name) const | ||
{ | ||
return cuda::get_global(context::current::get(), *this, name); | ||
} | ||
|
||
memory::region_t get_global(const ::std::string& name) const | ||
{ | ||
return get_global(name.c_str()); | ||
} | ||
|
||
memory::region_t get_managed_region(const char* name) | ||
{ | ||
return cuda::get_managed_region(*this, name); | ||
} | ||
|
||
memory::region_t get_managed(const ::std::string& name) | ||
{ | ||
return get_managed(name.c_str()); | ||
} | ||
|
||
protected: // constructors | ||
|
||
library_t(library::handle_t handle, bool owning) noexcept | ||
: handle_(handle), owning_(owning) | ||
{ } | ||
|
||
public: // friendship | ||
|
||
friend library_t library::detail_::wrap(library::handle_t, bool) noexcept; | ||
|
||
public: // constructors and destructor | ||
|
||
library_t(const library_t&) = delete; | ||
|
||
library_t(library_t&& other) noexcept : library_t(other.handle_, other.owning_) | ||
{ | ||
other.owning_ = false; | ||
}; | ||
|
||
~library_t() noexcept(false) | ||
{ | ||
if (owning_) { | ||
auto status = cuLibraryUnload(handle_); | ||
throw_if_error_lazy(status, "Failed unloading " + library::detail_::identify(handle_)); | ||
} | ||
} | ||
|
||
public: // operators | ||
|
||
library_t& operator=(const library_t&) = delete; | ||
library_t& operator=(library_t&& other) noexcept | ||
{ | ||
::std::swap(handle_, other.handle_); | ||
::std::swap(owning_, other.owning_); | ||
return *this; | ||
} | ||
|
||
protected: // data members | ||
library::handle_t handle_; | ||
bool owning_; | ||
// this field is mutable only for enabling move construction; other | ||
// than in that case it must not be altered | ||
}; | ||
|
||
inline memory::region_t get_global(const context_t& context, const library_t& library, const char* name) | ||
{ | ||
CUdeviceptr dptr; | ||
size_t size; | ||
auto result = cuLibraryGetGlobal(&dptr, &size, library.handle(), name); | ||
throw_if_error_lazy(result, | ||
::std::string("Obtaining the memory address and size for the global object '") + name + "' from " | ||
+ library::detail_::identify(library) + " in context " + context::detail_::identify(context)); | ||
return { memory::as_pointer(dptr), size }; | ||
// Note: Nothing is holding a PC refcount unit here! | ||
} | ||
|
||
// Implement other get's | ||
|
||
inline kernel_t get_kernel(const context_t& context, const library_t& library, const char* name) | ||
{ | ||
CAW_SET_SCOPE_CONTEXT(context.handle()); | ||
library::kernel::handle_t new_handle; | ||
auto status = cuLibraryGetKernel(&new_handle, library.handle(), name); | ||
throw_if_error_lazy(status, ::std::string("Failed obtaining kernel '") + name | ||
+ "' from " + library::detail_::identify(library)); | ||
kernel::handle_t new_proper_kernel_handle; | ||
status = cuKernelGetFunction(&new_proper_kernel_handle, new_handle); | ||
throw_if_error_lazy(status, ::std::string("Failed obtaining a context-associated kernel ") | ||
+ "from kernel '" + name + "' in " + library::detail_::identify(library)); | ||
return kernel::wrap(context.device_id(), context.handle(), | ||
new_proper_kernel_handle, do_hold_primary_context_refcount_unit); | ||
} | ||
|
||
inline memory::region_t get_managed_region(const library_t& library, const char* name) | ||
{ | ||
memory::device::address_t region_start; | ||
size_t region_size; | ||
auto status = cuLibraryGetManaged(®ion_start, ®ion_size, library.handle(), name); | ||
throw_if_error_lazy(status, ::std::string("Failed obtaining the managed memory region '") + name | ||
+ "' from " + library::detail_::identify(library)); | ||
return { region_start, region_size }; | ||
} | ||
|
||
namespace module { | ||
|
||
inline module_t create(const context_t& context, const library_t& library) | ||
{ | ||
CAW_SET_SCOPE_CONTEXT(context.handle()); | ||
module::handle_t new_handle; | ||
auto status = cuLibraryGetModule(&new_handle, library.handle()); | ||
throw_if_error_lazy(status, ::std::string("Failed creating a module '") + | ||
+ "' from " + library::detail_::identify(library) + " in " + context::detail_::identify(context)); | ||
constexpr const bool is_owning { true }; | ||
return module::detail_::wrap(context.device_id(), context.handle(), new_handle, library.link_options(), | ||
is_owning, do_hold_primary_context_refcount_unit); | ||
// TODO: We could consider adding a variant of this function taking a context&&, and using that | ||
// to decide whether or not to hold a PC refcount unit | ||
} | ||
|
||
} // namespace module | ||
|
||
inline void* get_unified_function(const context_t& context, const library_t& library, const char* symbol) | ||
{ | ||
CAW_SET_SCOPE_CONTEXT(context.handle()); | ||
void* function_ptr; | ||
auto status = cuLibraryGetUnifiedFunction(&function_ptr, library.handle(), symbol); | ||
throw_if_error_lazy(status, ::std::string("Failed obtaining a pointer for function '") + symbol | ||
+ "' from " + library::detail_::identify(library) + " in " + context::detail_::identify(context)); | ||
return function_ptr; | ||
} | ||
|
||
namespace library { | ||
|
||
namespace detail_ { | ||
|
||
template <typename Creator, typename DataSource, typename ErrorStringGenerator> | ||
library_t create( | ||
Creator creator, | ||
DataSource data_source, | ||
ErrorStringGenerator error_string_generator, | ||
const link::options_t& link_options = {}, | ||
bool code_is_preserved = false) | ||
{ | ||
handle_t new_lib_handle; | ||
auto raw_link_opts = marshal(link_options); | ||
struct { | ||
detail_::option_t options[1]; | ||
void* values[1]; | ||
unsigned count; | ||
} raw_opts = { { CU_LIBRARY_BINARY_IS_PRESERVED }, { &code_is_preserved }, 1 }; | ||
auto status = creator( | ||
&new_lib_handle, data_source, | ||
const_cast<link::option_t*>(raw_link_opts.options()), | ||
const_cast<void**>(raw_link_opts.values()), raw_link_opts.count(), | ||
raw_opts.options, raw_opts.values, raw_opts.count | ||
); | ||
throw_if_error_lazy(status, | ||
::std::string("Failed loading a compiled CUDA code library from ") + error_string_generator()); | ||
bool do_take_ownership{true}; | ||
return detail_::wrap(new_lib_handle, do_take_ownership); | ||
} | ||
|
||
} // namespace detail_ | ||
|
||
/** | ||
* Load a library from an appropriate compiled or semi-compiled file, allocating all | ||
* relevant resources for it. | ||
* | ||
* @param path of a cubin, PTX, or fatbin file constituting the module to be loaded. | ||
* @return the loaded library | ||
* | ||
* @note this covers cuModuleLoadFatBinary() even though that's not directly used | ||
* | ||
* @todo: When switching to the C++17 standard, use string_view's instead of the const char* | ||
*/ | ||
///@{ | ||
inline library_t load_from_file( | ||
const char* path, | ||
const link::options_t& link_options = {}, | ||
bool code_is_preserved = false) | ||
{ | ||
return detail_::create( | ||
cuLibraryLoadFromFile, path, | ||
[path]() { return ::std::string("file ") + path; }, | ||
link_options, code_is_preserved); | ||
} | ||
|
||
inline library_t load_from_file( | ||
const ::std::string& path, | ||
const link::options_t& link_options = {}, | ||
bool code_is_preserved = false) | ||
{ | ||
return load_from_file(path.c_str(), link_options, code_is_preserved); | ||
} | ||
|
||
#if __cplusplus >= 201703L | ||
|
||
inline library_t load_from_file( | ||
const ::std::filesystem::path& path, | ||
const link::options_t& link_options = {}, | ||
bool code_is_preserved = false) | ||
{ | ||
return load_from_file(path.c_str(), link_options, code_is_preserved); | ||
} | ||
|
||
#endif | ||
///@} | ||
|
||
namespace detail_ { | ||
|
||
inline library_t wrap(handle_t handle, bool take_ownership) noexcept | ||
{ | ||
return library_t{handle, take_ownership}; | ||
} | ||
|
||
} // namespace detail_ | ||
|
||
/** | ||
* Creates a new module in a context using raw compiled code | ||
* | ||
* @param module_data The raw compiled code for the module. | ||
* @param link_options Potential options for the PTX compilation and device linking of the code. | ||
* @param code_is_preserved See @ref | ||
*/ | ||
inline library_t create( | ||
const void* module_data, | ||
const link::options_t& link_options = {}, | ||
bool code_is_preserved = false) | ||
{ | ||
return detail_::create( | ||
cuLibraryLoadData, module_data, | ||
[module_data]() { return ::std::string("data at ") + cuda::detail_::ptr_as_hex(module_data); }, | ||
link_options, code_is_preserved); | ||
} | ||
|
||
|
||
// TODO: Use an optional to reduce the number of functions here... when the | ||
// library starts requiring C++14. | ||
|
||
namespace detail_ { | ||
|
||
inline ::std::string identify(const library_t& library) | ||
{ | ||
return identify(library.handle()); | ||
} | ||
|
||
} // namespace detail_ | ||
|
||
template <typename ContiguousContainer, | ||
cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> > | ||
library_t create( | ||
ContiguousContainer library_data, | ||
optional<link::options_t> link_options, | ||
bool code_is_preserved) | ||
{ | ||
return create(library_data.data(), link_options, code_is_preserved); | ||
} | ||
|
||
} // namespace library | ||
|
||
} // namespace cuda | ||
|
||
#endif // CUDA_VERSION >= 12000 | ||
|
||
#endif // CUDA_API_WRAPPERS_LIBRARY_HPP_ |