Skip to content

Commit

Permalink
Regards #565: Added supprort for CUDA libraries
Browse files Browse the repository at this point in the history
Caveat: Not yet supporting library kernels as first-class citizens; they are currently only intermediate entities for obtaining context-associated kernels (CUfunction's)
  • Loading branch information
eyalroz committed Jan 27, 2024
1 parent 0d0731c commit b480d56
Show file tree
Hide file tree
Showing 2 changed files with 390 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/cuda/api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
#include "api/event.hpp"
#include "api/kernel.hpp"
#include "api/module.hpp"
#if CUDA_VERSION >= 12000
#include "api/library.hpp"
#endif
#include "api/link.hpp"

#include "api/current_device.hpp"
Expand Down
387 changes: 387 additions & 0 deletions src/cuda/api/library.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,387 @@
/**
* @file
*
* @brief Wrappers for working with "libraries" of compiled CUDA code (which are similar
* to modules, but not associated with any CUDA context).
*/
#pragma once
#ifndef CUDA_API_WRAPPERS_LIBRARY_HPP_
#define CUDA_API_WRAPPERS_LIBRARY_HPP_

#if CUDA_VERSION >= 12000

#include "module.hpp"

#if __cplusplus >= 201703L
#include <filesystem>
#endif

namespace cuda {

///@cond
class device_t;
class context_t;
class module_t;
class library_t;
class kernel_t;
///@endcond

namespace library {

using handle_t = CUlibrary;

namespace kernel {

using handle_t = CUkernel; // Don't be confused; a context-associated kernel is a CUfunction :-(

} // namespace kernel

namespace detail_ {

using option_t = CUlibraryOption;

} // namespace detail_

class kernel_t; // A kernel stored within a library; strangely, a context-associated kernel is a CUfunction.

namespace detail_ {

inline library_t wrap(
handle_t handle,
bool take_ownership = false) noexcept;

inline ::std::string identify(const library::handle_t &handle)
{
return ::std::string("library ") + cuda::detail_::ptr_as_hex(handle);
}

::std::string identify(const library_t &library);

} // namespace detail_

/**
* Create a CUDA driver library of compiled code from raw image data.
*
* @param[in] module_data the opaque, raw binary data for the module - in a contiguous container
* such as a span, a cuda::dynarray etc..
*/
///@{
template <typename ContiguousContainer,
cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> = true >
library_t create(
ContiguousContainer library_data,
optional<link::options_t> link_options,
bool code_is_preserved);
///@}

} // namespace library

memory::region_t get_global(const context_t& context, const library_t& library, const char* name);
kernel_t get_kernel(const context_t& context, const library_t& library, const char* name);
memory::region_t get_managed_region(const library_t& library, const char* name);

namespace module {

module_t create(const context_t& context, const library_t& library);
module_t create(const library_t& library);

} // namespace module

void* get_unified_function(const context_t& context, const library_t& library, const char* symbol);

/**
* Wrapper class for a CUDA compiled code library (like a @ref module_t , but not associated
* with a context)
*/
class library_t {

public: // getters

library::handle_t handle() const { return handle_; }

/**
* Obtains an already-compiled kernel previously associated with
* this library, in the current context.
*
* @param name The function name, in case of a C-style function,
* or the mangled function signature, in case of a C++-style
* function.
*
* @return An enqueable kernel proxy object for the requested kernel,
* in the current context.
*/
cuda::kernel_t get_kernel(const char* name) const
{
return cuda::get_kernel(context::current::get(), *this, name);
}

cuda::kernel_t get_kernel(const ::std::string& name) const
{
return get_kernel(name.c_str());
}

memory::region_t get_global(const char* name) const
{
return cuda::get_global(context::current::get(), *this, name);
}

memory::region_t get_global(const ::std::string& name) const
{
return get_global(name.c_str());
}

memory::region_t get_managed_region(const char* name)
{
return cuda::get_managed_region(*this, name);
}

memory::region_t get_managed(const ::std::string& name)
{
return get_managed(name.c_str());
}

protected: // constructors

library_t(library::handle_t handle, bool owning) noexcept
: handle_(handle), owning_(owning)
{ }

public: // friendship

friend library_t library::detail_::wrap(library::handle_t, bool) noexcept;

public: // constructors and destructor

library_t(const library_t&) = delete;

library_t(library_t&& other) noexcept : library_t(other.handle_, other.owning_)
{
other.owning_ = false;
};

~library_t() noexcept(false)
{
if (owning_) {
auto status = cuLibraryUnload(handle_);
throw_if_error_lazy(status, "Failed unloading " + library::detail_::identify(handle_));
}
}

public: // operators

library_t& operator=(const library_t&) = delete;
library_t& operator=(library_t&& other) noexcept
{
::std::swap(handle_, other.handle_);
::std::swap(owning_, other.owning_);
return *this;
}

protected: // data members
library::handle_t handle_;
bool owning_;
// this field is mutable only for enabling move construction; other
// than in that case it must not be altered
};

inline memory::region_t get_global(const context_t& context, const library_t& library, const char* name)
{
CUdeviceptr dptr;
size_t size;
auto result = cuLibraryGetGlobal(&dptr, &size, library.handle(), name);
throw_if_error_lazy(result,
::std::string("Obtaining the memory address and size for the global object '") + name + "' from "
+ library::detail_::identify(library) + " in context " + context::detail_::identify(context));
return { memory::as_pointer(dptr), size };
// Note: Nothing is holding a PC refcount unit here!
}

// Implement other get's

inline kernel_t get_kernel(const context_t& context, const library_t& library, const char* name)
{
CAW_SET_SCOPE_CONTEXT(context.handle());
library::kernel::handle_t new_handle;
auto status = cuLibraryGetKernel(&new_handle, library.handle(), name);
throw_if_error_lazy(status, ::std::string("Failed obtaining kernel '") + name
+ "' from " + library::detail_::identify(library));
kernel::handle_t new_proper_kernel_handle;
status = cuKernelGetFunction(&new_proper_kernel_handle, new_handle);
throw_if_error_lazy(status, ::std::string("Failed obtaining a context-associated kernel ")
+ "from kernel '" + name + "' in " + library::detail_::identify(library));
return kernel::wrap(context.device_id(), context.handle(),
new_proper_kernel_handle, do_hold_primary_context_refcount_unit);
}

inline memory::region_t get_managed_region(const library_t& library, const char* name)
{
memory::device::address_t region_start;
size_t region_size;
auto status = cuLibraryGetManaged(&region_start, &region_size, library.handle(), name);
throw_if_error_lazy(status, ::std::string("Failed obtaining the managed memory region '") + name
+ "' from " + library::detail_::identify(library));
return { region_start, region_size };
}

namespace module {

inline module_t create(const context_t& context, const library_t& library)
{
CAW_SET_SCOPE_CONTEXT(context.handle());
module::handle_t new_handle;
auto status = cuLibraryGetModule(&new_handle, library.handle());
throw_if_error_lazy(status, ::std::string("Failed creating a module '") +
+ "' from " + library::detail_::identify(library) + " in " + context::detail_::identify(context));
constexpr const bool is_owning { true };
return module::detail_::wrap(context.device_id(), context.handle(), new_handle, library.link_options(),
is_owning, do_hold_primary_context_refcount_unit);
// TODO: We could consider adding a variant of this function taking a context&&, and using that
// to decide whether or not to hold a PC refcount unit
}

} // namespace module

inline void* get_unified_function(const context_t& context, const library_t& library, const char* symbol)
{
CAW_SET_SCOPE_CONTEXT(context.handle());
void* function_ptr;
auto status = cuLibraryGetUnifiedFunction(&function_ptr, library.handle(), symbol);
throw_if_error_lazy(status, ::std::string("Failed obtaining a pointer for function '") + symbol
+ "' from " + library::detail_::identify(library) + " in " + context::detail_::identify(context));
return function_ptr;
}

namespace library {

namespace detail_ {

template <typename Creator, typename DataSource, typename ErrorStringGenerator>
library_t create(
Creator creator,
DataSource data_source,
ErrorStringGenerator error_string_generator,
const link::options_t& link_options = {},
bool code_is_preserved = false)
{
handle_t new_lib_handle;
auto raw_link_opts = marshal(link_options);
struct {
detail_::option_t options[1];
void* values[1];
unsigned count;
} raw_opts = { { CU_LIBRARY_BINARY_IS_PRESERVED }, { &code_is_preserved }, 1 };
auto status = creator(
&new_lib_handle, data_source,
const_cast<link::option_t*>(raw_link_opts.options()),
const_cast<void**>(raw_link_opts.values()), raw_link_opts.count(),
raw_opts.options, raw_opts.values, raw_opts.count
);
throw_if_error_lazy(status,
::std::string("Failed loading a compiled CUDA code library from ") + error_string_generator());
bool do_take_ownership{true};
return detail_::wrap(new_lib_handle, do_take_ownership);
}

} // namespace detail_

/**
* Load a library from an appropriate compiled or semi-compiled file, allocating all
* relevant resources for it.
*
* @param path of a cubin, PTX, or fatbin file constituting the module to be loaded.
* @return the loaded library
*
* @note this covers cuModuleLoadFatBinary() even though that's not directly used
*
* @todo: When switching to the C++17 standard, use string_view's instead of the const char*
*/
///@{
inline library_t load_from_file(
const char* path,
const link::options_t& link_options = {},
bool code_is_preserved = false)
{
return detail_::create(
cuLibraryLoadFromFile, path,
[path]() { return ::std::string("file ") + path; },
link_options, code_is_preserved);
}

inline library_t load_from_file(
const ::std::string& path,
const link::options_t& link_options = {},
bool code_is_preserved = false)
{
return load_from_file(path.c_str(), link_options, code_is_preserved);
}

#if __cplusplus >= 201703L

inline library_t load_from_file(
const ::std::filesystem::path& path,
const link::options_t& link_options = {},
bool code_is_preserved = false)
{
return load_from_file(path.c_str(), link_options, code_is_preserved);
}

#endif
///@}

namespace detail_ {

inline library_t wrap(handle_t handle, bool take_ownership) noexcept
{
return library_t{handle, take_ownership};
}

} // namespace detail_

/**
* Creates a new module in a context using raw compiled code
*
* @param module_data The raw compiled code for the module.
* @param link_options Potential options for the PTX compilation and device linking of the code.
* @param code_is_preserved See @ref
*/
inline library_t create(
const void* module_data,
const link::options_t& link_options = {},
bool code_is_preserved = false)
{
return detail_::create(
cuLibraryLoadData, module_data,
[module_data]() { return ::std::string("data at ") + cuda::detail_::ptr_as_hex(module_data); },
link_options, code_is_preserved);
}


// TODO: Use an optional to reduce the number of functions here... when the
// library starts requiring C++14.

namespace detail_ {

inline ::std::string identify(const library_t& library)
{
return identify(library.handle());
}

} // namespace detail_

template <typename ContiguousContainer,
cuda::detail_::enable_if_t<cuda::detail_::is_kinda_like_contiguous_container<ContiguousContainer>::value, bool> >
library_t create(
ContiguousContainer library_data,
optional<link::options_t> link_options,
bool code_is_preserved)
{
return create(library_data.data(), link_options, code_is_preserved);
}

} // namespace library

} // namespace cuda

#endif // CUDA_VERSION >= 12000

#endif // CUDA_API_WRAPPERS_LIBRARY_HPP_

0 comments on commit b480d56

Please sign in to comment.