From 6edb860ce9a4189c5c28fa4c2b1f5bc8bbe34a19 Mon Sep 17 00:00:00 2001 From: pciolkosz Date: Wed, 6 Nov 2024 11:24:09 -0800 Subject: [PATCH] [CUDAX] Rename memory resource and memory pool from async to device (#2710) * Rename the type * Update tests * Rename async memory pool * Rename the tests * Change name in the docs * Generalise the memory_pool_properties name * Fix docs --------- Co-authored-by: Michael Schellenberger Costa --- ...memory_pool.cuh => device_memory_pool.cuh} | 98 ++++++------- ...esource.cuh => device_memory_resource.cuh} | 130 +++++++++--------- .../cuda/experimental/memory_resource.cuh | 4 +- cudax/test/CMakeLists.txt | 4 +- cudax/test/algorithm/copy.cu | 2 +- .../containers/uninitialized_async_buffer.cu | 26 ++-- ...c_memory_pool.cu => device_memory_pool.cu} | 50 +++---- ..._resource.cu => device_memory_resource.cu} | 76 +++++----- docs/cudax/memory_resource.rst | 10 +- 9 files changed, 200 insertions(+), 200 deletions(-) rename cudax/include/cuda/experimental/__memory_resource/{async_memory_pool.cuh => device_memory_pool.cuh} (79%) rename cudax/include/cuda/experimental/__memory_resource/{async_memory_resource.cuh => device_memory_resource.cuh} (76%) rename cudax/test/memory_resource/{async_memory_pool.cu => device_memory_pool.cu} (90%) rename cudax/test/memory_resource/{async_memory_resource.cu => device_memory_resource.cu} (85%) diff --git a/cudax/include/cuda/experimental/__memory_resource/async_memory_pool.cuh b/cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh similarity index 79% rename from cudax/include/cuda/experimental/__memory_resource/async_memory_pool.cuh rename to cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh index 5cb0b841c49..93db61e0d02 100644 --- a/cudax/include/cuda/experimental/__memory_resource/async_memory_pool.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/device_memory_pool.cuh @@ -8,8 +8,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _CUDAX__MEMORY_RESOURCE_CUDA_MEMORY_POOL -#define _CUDAX__MEMORY_RESOURCE_CUDA_MEMORY_POOL +#ifndef _CUDAX__MEMORY_RESOURCE_DEVICE_MEMORY_POOL +#define _CUDAX__MEMORY_RESOURCE_DEVICE_MEMORY_POOL #include @@ -42,7 +42,7 @@ # if _CCCL_STD_VER >= 2014 //! @file -//! The \c async_memory_pool class provides a wrapper around a `cudaMempool_t`. +//! The \c device_memory_pool class provides a wrapper around a `cudaMempool_t`. namespace cuda::experimental::mr { @@ -105,20 +105,20 @@ enum class cudaMemAllocationHandleType cudaMemHandleTypeFabric = 0x8, ///< Allows a fabric handle to be used for exporting. (cudaMemFabricHandle_t) }; -//! @brief \c async_memory_pool_properties is a wrapper around properties passed to \c async_memory_pool to create a +//! @brief \c memory_pool_properties is a wrapper around properties passed to \c device_memory_pool to create a //! cudaMemPool_t. -struct async_memory_pool_properties +struct memory_pool_properties { size_t initial_pool_size = 0; size_t release_threshold = 0; cudaMemAllocationHandleType allocation_handle_type = cudaMemAllocationHandleType::cudaMemHandleTypeNone; }; -//! @brief \c async_memory_pool is an owning wrapper around a +//! @brief \c device_memory_pool is an owning wrapper around a //! cudaMemPool_t. //! -//! It handles creation and destruction of the underlying pool utilizing the provided \c async_memory_pool_properties. -class async_memory_pool +//! It handles creation and destruction of the underlying pool utilizing the provided \c memory_pool_properties. +class device_memory_pool { private: ::cudaMemPool_t __pool_handle_ = nullptr; @@ -164,10 +164,10 @@ private: //! @throws cuda_error If the creation of the CUDA memory pool failed. //! @returns The created CUDA memory pool. _CCCL_NODISCARD static cudaMemPool_t - __create_cuda_mempool(const int __device_id, async_memory_pool_properties __properties) noexcept + __create_cuda_mempool(const int __device_id, memory_pool_properties __properties) noexcept { ::cuda::experimental::mr::__device_supports_stream_ordered_allocations(__device_id); - async_memory_pool::__cuda_supports_export_handle_type(__device_id, __properties.allocation_handle_type); + device_memory_pool::__cuda_supports_export_handle_type(__device_id, __properties.allocation_handle_type); ::cudaMemPoolProps __pool_properties{}; __pool_properties.allocType = ::cudaMemAllocationTypePinned; @@ -210,13 +210,13 @@ private: void* __ptr{nullptr}; _CCCL_TRY_CUDA_API( ::cudaMallocAsync, - "async_memory_pool failed to allocate the initial pool size", + "device_memory_pool failed to allocate the initial pool size", &__ptr, __properties.initial_pool_size, __temp_stream.get()); _CCCL_ASSERT_CUDA_API( - ::cudaFreeAsync, "async_memory_pool failed to free the initial pool allocation", __ptr, __temp_stream.get()); + ::cudaFreeAsync, "device_memory_pool failed to free the initial pool allocation", __ptr, __temp_stream.get()); } return __cuda_pool_handle; } @@ -224,36 +224,36 @@ private: struct __from_handle_t {}; - //! @brief Constructs a \c async_memory_pool from a handle taking ownership of the pool + //! @brief Constructs a \c device_memory_pool from a handle taking ownership of the pool //! @param __handle The handle to the existing pool - explicit async_memory_pool(__from_handle_t, ::cudaMemPool_t __handle) noexcept + explicit device_memory_pool(__from_handle_t, ::cudaMemPool_t __handle) noexcept : __pool_handle_(__handle) {} public: - //! @brief Constructs a \c async_memory_pool with the optionally specified initial pool size and release threshold. + //! @brief Constructs a \c device_memory_pool with the optionally specified initial pool size and release threshold. //! If the pool size grows beyond the release threshold, unused memory held by the pool will be released at the next //! synchronization event. //! @throws cuda_error if the CUDA version does not support ``cudaMallocAsync``. //! @param __device_id The device id of the device the stream pool is constructed on. //! @param __pool_properties Optional, additional properties of the pool to be created. - explicit async_memory_pool(const ::cuda::experimental::device_ref __device_id, - async_memory_pool_properties __properties = {}) + explicit device_memory_pool(const ::cuda::experimental::device_ref __device_id, + memory_pool_properties __properties = {}) : __pool_handle_(__create_cuda_mempool(__device_id.get(), __properties)) {} //! @brief Disables construction from a plain `cudaMemPool_t`. We want to ensure clean ownership semantics. - async_memory_pool(::cudaMemPool_t) = delete; + device_memory_pool(::cudaMemPool_t) = delete; - async_memory_pool(async_memory_pool const&) = delete; - async_memory_pool(async_memory_pool&&) = delete; - async_memory_pool& operator=(async_memory_pool const&) = delete; - async_memory_pool& operator=(async_memory_pool&&) = delete; + device_memory_pool(device_memory_pool const&) = delete; + device_memory_pool(device_memory_pool&&) = delete; + device_memory_pool& operator=(device_memory_pool const&) = delete; + device_memory_pool& operator=(device_memory_pool&&) = delete; - //! @brief Destroys the \c async_memory_pool by releasing the internal ``cudaMemPool_t``. - ~async_memory_pool() noexcept + //! @brief Destroys the \c device_memory_pool by releasing the internal ``cudaMemPool_t``. + ~device_memory_pool() noexcept { - _CCCL_ASSERT_CUDA_API(::cudaMemPoolDestroy, "~async_memory_pool() failed to destroy pool", __pool_handle_); + _CCCL_ASSERT_CUDA_API(::cudaMemPoolDestroy, "~device_memory_pool() failed to destroy pool", __pool_handle_); } //! @brief Tries to release memory. @@ -263,7 +263,7 @@ public: void trim_to(const size_t __min_bytes_to_keep) { _CCCL_TRY_CUDA_API(::cudaMemPoolTrimTo, - "Failed to call cudaMemPoolTrimTo in async_memory_pool::trim_to", + "Failed to call cudaMemPoolTrimTo in device_memory_pool::trim_to", __pool_handle_, __min_bytes_to_keep); } @@ -276,7 +276,7 @@ public: size_t __value = 0; _CCCL_TRY_CUDA_API( ::cudaMemPoolGetAttribute, - "Failed to call cudaMemPoolSetAttribute in async_memory_pool::get_attribute", + "Failed to call cudaMemPoolSetAttribute in device_memory_pool::get_attribute", __pool_handle_, __attr, static_cast(&__value)); @@ -291,18 +291,18 @@ public: { if (__attr == ::cudaMemPoolAttrReservedMemCurrent || __attr == cudaMemPoolAttrUsedMemCurrent) { - _CUDA_VSTD_NOVERSION::__throw_invalid_argument("Invalid attribute passed to async_memory_pool::set_attribute."); + _CUDA_VSTD_NOVERSION::__throw_invalid_argument("Invalid attribute passed to device_memory_pool::set_attribute."); } else if ((__attr == ::cudaMemPoolAttrReservedMemHigh || __attr == cudaMemPoolAttrUsedMemHigh) && __value != 0) { _CUDA_VSTD_NOVERSION::__throw_invalid_argument( - "async_memory_pool::set_attribute: It is illegal to set this " + "device_memory_pool::set_attribute: It is illegal to set this " "attribute to a non-zero value."); } _CCCL_TRY_CUDA_API( ::cudaMemPoolSetAttribute, - "Failed to call cudaMemPoolSetAttribute in async_memory_pool::set_attribute", + "Failed to call cudaMemPoolSetAttribute in device_memory_pool::set_attribute", __pool_handle_, __attr, static_cast(&__value)); @@ -355,17 +355,17 @@ public: return ::cuda::experimental::mr::__mempool_get_access(__pool_handle_, __device); } - //! @brief Equality comparison with another \c async_memory_pool. + //! @brief Equality comparison with another \c device_memory_pool. //! @returns true if the stored ``cudaMemPool_t`` are equal. - _CCCL_NODISCARD constexpr bool operator==(async_memory_pool const& __rhs) const noexcept + _CCCL_NODISCARD constexpr bool operator==(device_memory_pool const& __rhs) const noexcept { return __pool_handle_ == __rhs.__pool_handle_; } # if _CCCL_STD_VER <= 2017 - //! @brief Inequality comparison with another \c async_memory_pool. + //! @brief Inequality comparison with another \c device_memory_pool. //! @returns true if the stored ``cudaMemPool_t`` are not equal. - _CCCL_NODISCARD constexpr bool operator!=(async_memory_pool const& __rhs) const noexcept + _CCCL_NODISCARD constexpr bool operator!=(device_memory_pool const& __rhs) const noexcept { return __pool_handle_ != __rhs.__pool_handle_; } @@ -374,26 +374,26 @@ public: //! @brief Equality comparison with a \c cudaMemPool_t. //! @param __rhs A \c cudaMemPool_t. //! @returns true if the stored ``cudaMemPool_t`` is equal to \p __rhs. - _CCCL_NODISCARD_FRIEND constexpr bool operator==(async_memory_pool const& __lhs, ::cudaMemPool_t __rhs) noexcept + _CCCL_NODISCARD_FRIEND constexpr bool operator==(device_memory_pool const& __lhs, ::cudaMemPool_t __rhs) noexcept { return __lhs.__pool_handle_ == __rhs; } # if _CCCL_STD_VER <= 2017 - //! @copydoc async_memory_pool::operator==(async_memory_pool const&, ::cudaMemPool_t) - _CCCL_NODISCARD_FRIEND constexpr bool operator==(::cudaMemPool_t __lhs, async_memory_pool const& __rhs) noexcept + //! @copydoc device_memory_pool::operator==(device_memory_pool const&, ::cudaMemPool_t) + _CCCL_NODISCARD_FRIEND constexpr bool operator==(::cudaMemPool_t __lhs, device_memory_pool const& __rhs) noexcept { return __rhs.__pool_handle_ == __lhs; } - //! @copydoc async_memory_pool::operator==(async_memory_pool const&, ::cudaMemPool_t) - _CCCL_NODISCARD_FRIEND constexpr bool operator!=(async_memory_pool const& __lhs, ::cudaMemPool_t __rhs) noexcept + //! @copydoc device_memory_pool::operator==(device_memory_pool const&, ::cudaMemPool_t) + _CCCL_NODISCARD_FRIEND constexpr bool operator!=(device_memory_pool const& __lhs, ::cudaMemPool_t __rhs) noexcept { return __lhs.__pool_handle_ != __rhs; } - //! @copydoc async_memory_pool::operator==(async_memory_pool const&, ::cudaMemPool_t) - _CCCL_NODISCARD_FRIEND constexpr bool operator!=(::cudaMemPool_t __lhs, async_memory_pool const& __rhs) noexcept + //! @copydoc device_memory_pool::operator==(device_memory_pool const&, ::cudaMemPool_t) + _CCCL_NODISCARD_FRIEND constexpr bool operator!=(::cudaMemPool_t __lhs, device_memory_pool const& __rhs) noexcept { return __rhs.__pool_handle_ != __lhs; } @@ -405,23 +405,23 @@ public: return __pool_handle_; } - //! @brief Construct an `async_memory_pool` object from a native `cudaMemPool_t` handle. + //! @brief Construct an `device_memory_pool` object from a native `cudaMemPool_t` handle. //! //! @param __handle The native handle //! - //! @return The constructed `async_memory_pool` object + //! @return The constructed `device_memory_pool` object //! - //! @note The constructed `async_memory_pool` object takes ownership of the native handle. - _CCCL_NODISCARD static async_memory_pool from_native_handle(::cudaMemPool_t __handle) noexcept + //! @note The constructed `device_memory_pool` object takes ownership of the native handle. + _CCCL_NODISCARD static device_memory_pool from_native_handle(::cudaMemPool_t __handle) noexcept { - return async_memory_pool(__from_handle_t{}, __handle); + return device_memory_pool(__from_handle_t{}, __handle); } // Disallow construction from an `int`, e.g., `0`. - static async_memory_pool from_native_handle(int) = delete; + static device_memory_pool from_native_handle(int) = delete; // Disallow construction from `nullptr`. - static async_memory_pool from_native_handle(_CUDA_VSTD::nullptr_t) = delete; + static device_memory_pool from_native_handle(_CUDA_VSTD::nullptr_t) = delete; }; } // namespace cuda::experimental::mr @@ -430,4 +430,4 @@ public: #endif // !_CCCL_COMPILER_MSVC_2017 && !_CCCL_CUDACC_BELOW_11_2 -#endif // _CUDAX__MEMORY_RESOURCE_CUDA_MEMORY_POOL +#endif // _CUDAX__MEMORY_RESOURCE_DEVICE_MEMORY_POOL diff --git a/cudax/include/cuda/experimental/__memory_resource/async_memory_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh similarity index 76% rename from cudax/include/cuda/experimental/__memory_resource/async_memory_resource.cuh rename to cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh index 44507f7ee91..66986765307 100644 --- a/cudax/include/cuda/experimental/__memory_resource/async_memory_resource.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/device_memory_resource.cuh @@ -8,8 +8,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _CUDAX__MEMORY_RESOURCE_CUDA_ASYNC_MEMORY_RESOURCE -#define _CUDAX__MEMORY_RESOURCE_CUDA_ASYNC_MEMORY_RESOURCE +#ifndef _CUDAX__MEMORY_RESOURCE_CUDA_DEVICE_MEMORY_RESOURCE +#define _CUDAX__MEMORY_RESOURCE_CUDA_DEVICE_MEMORY_RESOURCE #include @@ -39,19 +39,19 @@ # include # include -# include +# include # include # if _CCCL_STD_VER >= 2014 //! @file -//! The \c async_memory_pool class provides an asynchronous memory resource that allocates device memory in stream +//! The \c device_memory_pool class provides an asynchronous memory resource that allocates device memory in stream //! order. namespace cuda::experimental::mr { -//! @brief global stream to synchronize in the synchronous interface of \c async_memory_resource -inline ::cuda::stream_ref __async_memory_resource_sync_stream() +//! @brief global stream to synchronize in the synchronous interface of \c device_memory_resource +inline ::cuda::stream_ref __device_memory_resource_sync_stream() { static ::cuda::experimental::stream __stream{}; return __stream; @@ -63,17 +63,17 @@ inline ::cuda::stream_ref __async_memory_resource_sync_stream() //! Stream ordered memory resource //! ------------------------------ //! -//! ``async_memory_resource`` uses `cudaMallocFromPoolAsync / cudaFreeAsync +//! ``device_memory_resource`` uses `cudaMallocFromPoolAsync / cudaFreeAsync //! `__ for allocation/deallocation. A -//! ``async_memory_resource`` is a thin wrapper around a \c cudaMemPool_t. +//! ``device_memory_resource`` is a thin wrapper around a \c cudaMemPool_t. //! //! .. warning:: //! -//! ``async_memory_resource`` does not own the pool and it is the responsibility of the user to ensure that the -//! lifetime of the pool exceeds the lifetime of the ``async_memory_resource``. +//! ``device_memory_resource`` does not own the pool and it is the responsibility of the user to ensure that the +//! lifetime of the pool exceeds the lifetime of the ``device_memory_resource``. //! //! @endrst -class async_memory_resource +class device_memory_resource { private: ::cudaMemPool_t __pool_; @@ -101,30 +101,30 @@ private: } public: - //! @brief Default constructs the async_memory_resource using the default \c cudaMemPool_t of the default device. + //! @brief Default constructs the device_memory_resource using the default \c cudaMemPool_t of the default device. //! @throws cuda_error if retrieving the default \c cudaMemPool_t fails. - async_memory_resource() + device_memory_resource() : __pool_(__get_default_mem_pool(0)) {} - //! @brief Constructs a async_memory_resource using the default \c cudaMemPool_t of a given device. + //! @brief Constructs a device_memory_resource using the default \c cudaMemPool_t of a given device. //! @throws cuda_error if retrieving the default \c cudaMemPool_t fails. - explicit async_memory_resource(::cuda::experimental::device_ref __device) + explicit device_memory_resource(::cuda::experimental::device_ref __device) : __pool_(__get_default_mem_pool(__device.get())) {} - async_memory_resource(int) = delete; - async_memory_resource(_CUDA_VSTD::nullptr_t) = delete; + device_memory_resource(int) = delete; + device_memory_resource(_CUDA_VSTD::nullptr_t) = delete; - //! @brief Constructs the async_memory_resource from a \c cudaMemPool_t. + //! @brief Constructs the device_memory_resource from a \c cudaMemPool_t. //! @param __pool The \c cudaMemPool_t used to allocate memory. - explicit async_memory_resource(::cudaMemPool_t __pool) noexcept + explicit device_memory_resource(::cudaMemPool_t __pool) noexcept : __pool_(__pool) {} - //! @brief Constructs the async_memory_resource from a \c async_memory_pool by calling get(). - //! @param __pool The \c async_memory_pool used to allocate memory. - explicit async_memory_resource(async_memory_pool& __pool) noexcept + //! @brief Constructs the device_memory_resource from a \c device_memory_pool by calling get(). + //! @param __pool The \c device_memory_pool used to allocate memory. + explicit device_memory_resource(device_memory_pool& __pool) noexcept : __pool_(__pool.get()) {} @@ -141,18 +141,18 @@ public: { _CUDA_VSTD_NOVERSION::__throw_invalid_argument( "Invalid alignment passed to " - "async_memory_resource::allocate_async."); + "device_memory_resource::allocate_async."); } void* __ptr{nullptr}; _CCCL_TRY_CUDA_API( ::cudaMallocFromPoolAsync, - "async_memory_resource::allocate failed to allocate with cudaMallocFromPoolAsync", + "device_memory_resource::allocate failed to allocate with cudaMallocFromPoolAsync", &__ptr, __bytes, __pool_, - __async_memory_resource_sync_stream().get()); - __async_memory_resource_sync_stream().wait(); + __device_memory_resource_sync_stream().get()); + __device_memory_resource_sync_stream().wait(); return __ptr; } @@ -164,10 +164,10 @@ public: //! properly synchronize all relevant streams before calling `deallocate`. void deallocate(void* __ptr, const size_t, const size_t __alignment = _CUDA_VMR::default_cuda_malloc_alignment) { - _CCCL_ASSERT(__is_valid_alignment(__alignment), "Invalid alignment passed to async_memory_resource::deallocate."); + _CCCL_ASSERT(__is_valid_alignment(__alignment), "Invalid alignment passed to device_memory_resource::deallocate."); _CCCL_ASSERT_CUDA_API( - ::cudaFreeAsync, "async_memory_resource::deallocate failed", __ptr, __async_memory_resource_sync_stream().get()); - __async_memory_resource_sync_stream().wait(); + ::cudaFreeAsync, "device_memory_resource::deallocate failed", __ptr, __device_memory_resource_sync_stream().get()); + __device_memory_resource_sync_stream().wait(); (void) __alignment; } @@ -184,7 +184,7 @@ public: { _CUDA_VSTD_NOVERSION::__throw_invalid_argument( "Invalid alignment passed to " - "async_memory_resource::allocate_async."); + "device_memory_resource::allocate_async."); } return allocate_async(__bytes, __stream); @@ -200,7 +200,7 @@ public: void* __ptr{nullptr}; _CCCL_TRY_CUDA_API( ::cudaMallocFromPoolAsync, - "async_memory_resource::allocate_async failed to allocate with cudaMallocFromPoolAsync", + "device_memory_resource::allocate_async failed to allocate with cudaMallocFromPoolAsync", &__ptr, __bytes, __pool_, @@ -220,7 +220,7 @@ public: void deallocate_async(void* __ptr, const size_t __bytes, const size_t __alignment, const ::cuda::stream_ref __stream) { // We need to ensure that the provided alignment matches the minimal provided alignment - _CCCL_ASSERT(__is_valid_alignment(__alignment), "Invalid alignment passed to async_memory_resource::deallocate."); + _CCCL_ASSERT(__is_valid_alignment(__alignment), "Invalid alignment passed to device_memory_resource::deallocate."); deallocate_async(__ptr, __bytes, __stream); (void) __alignment; } @@ -235,7 +235,7 @@ public: //! It is the caller's responsibility to properly synchronize all relevant streams before calling `deallocate_async`. void deallocate_async(void* __ptr, size_t, const ::cuda::stream_ref __stream) { - _CCCL_ASSERT_CUDA_API(::cudaFreeAsync, "async_memory_resource::deallocate_async failed", __ptr, __stream.get()); + _CCCL_ASSERT_CUDA_API(::cudaFreeAsync, "device_memory_resource::deallocate_async failed", __ptr, __stream.get()); } //! @brief Enable peer access to memory allocated through this memory resource by the supplied devices @@ -294,34 +294,34 @@ public: return ::cuda::experimental::mr::__mempool_get_access(__pool_, __device); } - //! @brief Equality comparison with another async_memory_resource. + //! @brief Equality comparison with another device_memory_resource. //! @returns true if underlying \c cudaMemPool_t are equal. - _CCCL_NODISCARD constexpr bool operator==(async_memory_resource const& __rhs) const noexcept + _CCCL_NODISCARD constexpr bool operator==(device_memory_resource const& __rhs) const noexcept { return __pool_ == __rhs.__pool_; } # if _CCCL_STD_VER <= 2017 - //! @brief Inequality comparison with another \c async_memory_resource. + //! @brief Inequality comparison with another \c device_memory_resource. //! @returns true if underlying \c cudaMemPool_t are inequal. - _CCCL_NODISCARD constexpr bool operator!=(async_memory_resource const& __rhs) const noexcept + _CCCL_NODISCARD constexpr bool operator!=(device_memory_resource const& __rhs) const noexcept { return __pool_ != __rhs.__pool_; } # endif // _CCCL_STD_VER <= 2017 # if _CCCL_STD_VER >= 2020 - //! @brief Equality comparison between a \c async_memory_resource and another resource. + //! @brief Equality comparison between a \c device_memory_resource and another resource. //! @param __rhs The resource to compare to. //! @returns If the underlying types are equality comparable, returns the result of equality comparison of both //! resources. Otherwise, returns false. _LIBCUDACXX_TEMPLATE(class _Resource) - _LIBCUDACXX_REQUIRES((_CUDA_VMR::__different_resource) ) + _LIBCUDACXX_REQUIRES((_CUDA_VMR::__different_resource) ) _CCCL_NODISCARD bool operator==(_Resource const& __rhs) const noexcept { if constexpr (has_property<_Resource, _CUDA_VMR::device_accessible>) { - return _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast(this)} + return _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast(this)} == _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast<_Resource&>(__rhs)}; } else @@ -331,68 +331,68 @@ public: } # else // ^^^ C++20 ^^^ / vvv C++17 template - _CCCL_NODISCARD_FRIEND auto operator==(async_memory_resource const& __lhs, _Resource const& __rhs) noexcept - _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource&& + _CCCL_NODISCARD_FRIEND auto operator==(device_memory_resource const& __lhs, _Resource const& __rhs) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource&& has_property<_Resource, _CUDA_VMR::device_accessible>) { - return _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast(__lhs)} + return _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast(__lhs)} == _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast<_Resource&>(__rhs)}; } template - _CCCL_NODISCARD_FRIEND auto operator==(async_memory_resource const&, _Resource const&) noexcept - _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource + _CCCL_NODISCARD_FRIEND auto operator==(device_memory_resource const&, _Resource const&) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource && !has_property<_Resource, _CUDA_VMR::device_accessible>) { return false; } template - _CCCL_NODISCARD_FRIEND auto operator==(_Resource const& __rhs, async_memory_resource const& __lhs) noexcept - _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource&& + _CCCL_NODISCARD_FRIEND auto operator==(_Resource const& __rhs, device_memory_resource const& __lhs) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource&& has_property<_Resource, _CUDA_VMR::device_accessible>) { - return _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast(__lhs)} + return _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast(__lhs)} == _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast<_Resource&>(__rhs)}; } template - _CCCL_NODISCARD_FRIEND auto operator==(_Resource const&, async_memory_resource const&) noexcept - _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource + _CCCL_NODISCARD_FRIEND auto operator==(_Resource const&, device_memory_resource const&) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource && !has_property<_Resource, _CUDA_VMR::device_accessible>) { return false; } template - _CCCL_NODISCARD_FRIEND auto operator!=(async_memory_resource const& __lhs, _Resource const& __rhs) noexcept - _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource&& + _CCCL_NODISCARD_FRIEND auto operator!=(device_memory_resource const& __lhs, _Resource const& __rhs) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource&& has_property<_Resource, _CUDA_VMR::device_accessible>) { - return _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast(__lhs)} + return _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast(__lhs)} != _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast<_Resource&>(__rhs)}; } template - _CCCL_NODISCARD_FRIEND auto operator!=(async_memory_resource const&, _Resource const&) noexcept - _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource + _CCCL_NODISCARD_FRIEND auto operator!=(device_memory_resource const&, _Resource const&) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource && !has_property<_Resource, _CUDA_VMR::device_accessible>) { return true; } template - _CCCL_NODISCARD_FRIEND auto operator!=(_Resource const& __rhs, async_memory_resource const& __lhs) noexcept - _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource&& + _CCCL_NODISCARD_FRIEND auto operator!=(_Resource const& __rhs, device_memory_resource const& __lhs) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource&& has_property<_Resource, _CUDA_VMR::device_accessible>) { - return _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast(__lhs)} + return _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast(__lhs)} != _CUDA_VMR::resource_ref<_CUDA_VMR::device_accessible>{const_cast<_Resource&>(__rhs)}; } template - _CCCL_NODISCARD_FRIEND auto operator!=(_Resource const&, async_memory_resource const&) noexcept - _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource + _CCCL_NODISCARD_FRIEND auto operator!=(_Resource const&, device_memory_resource const&) noexcept + _LIBCUDACXX_TRAILING_REQUIRES(bool)(_CUDA_VMR::__different_resource && !has_property<_Resource, _CUDA_VMR::device_accessible>) { return true; @@ -406,12 +406,12 @@ public: } # ifndef DOXYGEN_SHOULD_SKIP_THIS // Doxygen cannot handle the friend function - //! @brief Enables the \c device_accessible property for \c async_memory_resource. - //! @relates async_memory_resource - friend constexpr void get_property(async_memory_resource const&, _CUDA_VMR::device_accessible) noexcept {} + //! @brief Enables the \c device_accessible property for \c device_memory_resource. + //! @relates device_memory_resource + friend constexpr void get_property(device_memory_resource const&, _CUDA_VMR::device_accessible) noexcept {} # endif // DOXYGEN_SHOULD_SKIP_THIS }; -static_assert(_CUDA_VMR::resource_with, ""); +static_assert(_CUDA_VMR::resource_with, ""); } // namespace cuda::experimental::mr @@ -419,4 +419,4 @@ static_assert(_CUDA_VMR::resource_with -#include -#include +#include +#include #include #endif // __CUDAX_MEMORY_RESOURCE___ diff --git a/cudax/test/CMakeLists.txt b/cudax/test/CMakeLists.txt index 38f826c0b4e..9e9a1831946 100644 --- a/cudax/test/CMakeLists.txt +++ b/cudax/test/CMakeLists.txt @@ -104,8 +104,8 @@ foreach(cn_target IN LISTS cudax_TARGETS) cudax_add_catch2_test(test_target memory_resource ${cn_target} memory_resource/any_async_resource.cu memory_resource/any_resource.cu - memory_resource/async_memory_pool.cu - memory_resource/async_memory_resource.cu + memory_resource/device_memory_pool.cu + memory_resource/device_memory_resource.cu memory_resource/shared_resource.cu ) diff --git a/cudax/test/algorithm/copy.cu b/cudax/test/algorithm/copy.cu index 0066f4feba9..07eabba32e6 100644 --- a/cudax/test/algorithm/copy.cu +++ b/cudax/test/algorithm/copy.cu @@ -16,7 +16,7 @@ TEST_CASE("Copy", "[data_manipulation]") SECTION("Device resource") { - cudax::mr::async_memory_resource device_resource; + cudax::mr::device_memory_resource device_resource; std::vector host_vector(buffer_size); { diff --git a/cudax/test/containers/uninitialized_async_buffer.cu b/cudax/test/containers/uninitialized_async_buffer.cu index 83a3c8515d0..8803c510279 100644 --- a/cudax/test/containers/uninitialized_async_buffer.cu +++ b/cudax/test/containers/uninitialized_async_buffer.cu @@ -52,7 +52,7 @@ TEMPLATE_TEST_CASE( static_assert(!cuda::std::is_copy_constructible::value, ""); static_assert(!cuda::std::is_copy_assignable::value, ""); - cuda::experimental::mr::async_memory_resource resource{}; + cuda::experimental::mr::device_memory_resource resource{}; cuda::experimental::stream stream{}; SECTION("construction") @@ -168,50 +168,50 @@ TEMPLATE_TEST_CASE( // A test resource that keeps track of the number of resources are // currently alive. -struct test_async_memory_resource : cudax::mr::async_memory_resource +struct test_async_device_memory_resource : cudax::mr::device_memory_resource { static int count; - test_async_memory_resource() + test_async_device_memory_resource() { ++count; } - test_async_memory_resource(const test_async_memory_resource& other) - : cudax::mr::async_memory_resource{other} + test_async_device_memory_resource(const test_async_device_memory_resource& other) + : cudax::mr::device_memory_resource{other} { ++count; } - ~test_async_memory_resource() + ~test_async_device_memory_resource() { --count; } }; -int test_async_memory_resource::count = 0; +int test_async_device_memory_resource::count = 0; TEST_CASE("uninitialized_async_buffer's memory resource does not dangle", "[container]") { cuda::experimental::stream stream{}; cudax::uninitialized_async_buffer buffer{ - cudax::mr::async_memory_resource{}, stream, 0}; + cudax::mr::device_memory_resource{}, stream, 0}; { - CHECK(test_async_memory_resource::count == 0); + CHECK(test_async_device_memory_resource::count == 0); cudax::uninitialized_async_buffer src_buffer{ - test_async_memory_resource{}, stream, 1024}; + test_async_device_memory_resource{}, stream, 1024}; - CHECK(test_async_memory_resource::count == 1); + CHECK(test_async_device_memory_resource::count == 1); cudax::uninitialized_async_buffer dst_buffer{ src_buffer.get_resource(), stream, 1024}; - CHECK(test_async_memory_resource::count == 2); + CHECK(test_async_device_memory_resource::count == 2); buffer = ::cuda::std::move(dst_buffer); } - CHECK(test_async_memory_resource::count == 1); + CHECK(test_async_device_memory_resource::count == 1); } diff --git a/cudax/test/memory_resource/async_memory_pool.cu b/cudax/test/memory_resource/device_memory_pool.cu similarity index 90% rename from cudax/test/memory_resource/async_memory_pool.cu rename to cudax/test/memory_resource/device_memory_pool.cu index 019fb239e9f..dc51a27b7e9 100644 --- a/cudax/test/memory_resource/async_memory_pool.cu +++ b/cudax/test/memory_resource/device_memory_pool.cu @@ -22,7 +22,7 @@ #include namespace cudax = cuda::experimental; -using pool = cudax::mr::async_memory_pool; +using pool = cudax::mr::device_memory_pool; static_assert(!cuda::std::is_trivial::value, ""); static_assert(!cuda::std::is_trivially_default_constructible::value, ""); static_assert(!cuda::std::is_default_constructible::value, ""); @@ -69,7 +69,7 @@ static bool ensure_export_handle(::cudaMemPool_t pool, const ::cudaMemAllocation return allocation_handle == ::cudaMemHandleTypeNone ? status == ::cudaErrorInvalidValue : status == ::cudaSuccess; } -TEST_CASE("async_memory_pool construction", "[memory_resource]") +TEST_CASE("device_memory_pool construction", "[memory_resource]") { int current_device{}; { @@ -89,10 +89,10 @@ TEST_CASE("async_memory_pool construction", "[memory_resource]") current_device); } - using memory_pool = cudax::mr::async_memory_pool; + using memory_pool = cudax::mr::device_memory_pool; SECTION("Construct from device id") { - cudax::mr::async_memory_pool from_device{current_device}; + cudax::mr::device_memory_pool from_device{current_device}; ::cudaMemPool_t get = from_device.get(); CHECK(get != current_default_pool); @@ -109,7 +109,7 @@ TEST_CASE("async_memory_pool construction", "[memory_resource]") SECTION("Construct with empty properties") { - cudax::mr::async_memory_pool_properties props{}; + cudax::mr::memory_pool_properties props{}; memory_pool from_defaulted_properties{current_device, props}; ::cudaMemPool_t get = from_defaulted_properties.get(); @@ -127,7 +127,7 @@ TEST_CASE("async_memory_pool construction", "[memory_resource]") SECTION("Construct with initial pool size") { - cudax::mr::async_memory_pool_properties props = {42, 20}; + cudax::mr::memory_pool_properties props = {42, 20}; memory_pool with_threshold{current_device, props}; ::cudaMemPool_t get = with_threshold.get(); @@ -147,7 +147,7 @@ TEST_CASE("async_memory_pool construction", "[memory_resource]") #if !defined(_CCCL_CUDACC_BELOW_11_2) SECTION("Construct with allocation handle") { - cudax::mr::async_memory_pool_properties props = { + cudax::mr::memory_pool_properties props = { 42, 20, cudax::mr::cudaMemAllocationHandleType::cudaMemHandleTypePosixFileDescriptor}; memory_pool with_allocation_handle{current_device, props}; @@ -175,12 +175,12 @@ TEST_CASE("async_memory_pool construction", "[memory_resource]") ::cudaMemPool_t new_pool{}; _CCCL_TRY_CUDA_API(::cudaMemPoolCreate, "Failed to call cudaMemPoolCreate", &new_pool, &pool_properties); - cudax::mr::async_memory_pool from_handle = cudax::mr::async_memory_pool::from_native_handle(new_pool); + cudax::mr::device_memory_pool from_handle = cudax::mr::device_memory_pool::from_native_handle(new_pool); CHECK(from_handle.get() == new_pool); } } -TEST_CASE("async_memory_pool comparison", "[memory_resource]") +TEST_CASE("device_memory_pool comparison", "[memory_resource]") { int current_device{}; { @@ -200,9 +200,9 @@ TEST_CASE("async_memory_pool comparison", "[memory_resource]") current_device); } - cudax::mr::async_memory_pool first{current_device}; - { // comparison against a plain async_memory_pool - cudax::mr::async_memory_pool second{current_device}; + cudax::mr::device_memory_pool first{current_device}; + { // comparison against a plain device_memory_pool + cudax::mr::device_memory_pool second{current_device}; CHECK(first == first); CHECK(first != second); } @@ -215,7 +215,7 @@ TEST_CASE("async_memory_pool comparison", "[memory_resource]") } } -TEST_CASE("async_memory_pool accessors", "[memory_resource]") +TEST_CASE("device_memory_pool accessors", "[memory_resource]") { int current_device{}; { @@ -235,9 +235,9 @@ TEST_CASE("async_memory_pool accessors", "[memory_resource]") current_device); } - SECTION("async_memory_pool::set_attribute") + SECTION("device_memory_pool::set_attribute") { - cudax::mr::async_memory_pool pool{current_device}; + cudax::mr::device_memory_pool pool{current_device}; { // cudaMemPoolReuseFollowEventDependencies // Get the attribute value @@ -300,7 +300,7 @@ TEST_CASE("async_memory_pool accessors", "[memory_resource]") } // prime the pool to a given size - cudax::mr::async_memory_resource resource{pool}; + cudax::mr::device_memory_resource resource{pool}; cudax::stream stream{}; // Allocate a buffer to prime @@ -327,7 +327,7 @@ TEST_CASE("async_memory_pool accessors", "[memory_resource]") catch (::std::invalid_argument& err) { CHECK(strcmp(err.what(), - "async_memory_pool::set_attribute: It is illegal to set this attribute to a non-zero value.") + "device_memory_pool::set_attribute: It is illegal to set this attribute to a non-zero value.") == 0); } catch (...) @@ -356,7 +356,7 @@ TEST_CASE("async_memory_pool accessors", "[memory_resource]") catch (::std::invalid_argument& err) { CHECK(strcmp(err.what(), - "async_memory_pool::set_attribute: It is illegal to set this attribute to a non-zero value.") + "device_memory_pool::set_attribute: It is illegal to set this attribute to a non-zero value.") == 0); } catch (...) @@ -382,7 +382,7 @@ TEST_CASE("async_memory_pool accessors", "[memory_resource]") } catch (::std::invalid_argument& err) { - CHECK(strcmp(err.what(), "Invalid attribute passed to async_memory_pool::set_attribute.") == 0); + CHECK(strcmp(err.what(), "Invalid attribute passed to device_memory_pool::set_attribute.") == 0); } catch (...) { @@ -402,7 +402,7 @@ TEST_CASE("async_memory_pool accessors", "[memory_resource]") } catch (::std::invalid_argument& err) { - CHECK(strcmp(err.what(), "Invalid attribute passed to async_memory_pool::set_attribute.") == 0); + CHECK(strcmp(err.what(), "Invalid attribute passed to device_memory_pool::set_attribute.") == 0); } catch (...) { @@ -415,11 +415,11 @@ TEST_CASE("async_memory_pool accessors", "[memory_resource]") stream.wait(); } - SECTION("async_memory_pool::trim_to") + SECTION("device_memory_pool::trim_to") { - cudax::mr::async_memory_pool pool{current_device}; + cudax::mr::device_memory_pool pool{current_device}; // prime the pool to a given size - cudax::mr::async_memory_resource resource{pool}; + cudax::mr::device_memory_resource resource{pool}; cudax::stream stream{}; // Allocate 2 buffers @@ -469,14 +469,14 @@ TEST_CASE("async_memory_pool accessors", "[memory_resource]") CHECK(still_no_backing == 0); } - SECTION("async_memory_pool::enable_peer_access") + SECTION("device_memory_pool::enable_peer_access") { if (cudax::devices.size() > 1) { auto peers = cudax::devices[0].get_peers(); if (peers.size() > 0) { - cudax::mr::async_memory_pool pool{cudax::devices[0]}; + cudax::mr::device_memory_pool pool{cudax::devices[0]}; CUDAX_CHECK(pool.is_accessible_from(cudax::devices[0])); pool.enable_peer_access(peers); diff --git a/cudax/test/memory_resource/async_memory_resource.cu b/cudax/test/memory_resource/device_memory_resource.cu similarity index 85% rename from cudax/test/memory_resource/async_memory_resource.cu rename to cudax/test/memory_resource/device_memory_resource.cu index 5cf6b97c38f..9bf06b44634 100644 --- a/cudax/test/memory_resource/async_memory_resource.cu +++ b/cudax/test/memory_resource/device_memory_resource.cu @@ -21,15 +21,15 @@ namespace cudax = cuda::experimental; -static_assert(!cuda::std::is_trivial::value, ""); -static_assert(!cuda::std::is_trivially_default_constructible::value, ""); -static_assert(cuda::std::is_default_constructible::value, ""); -static_assert(cuda::std::is_copy_constructible::value, ""); -static_assert(cuda::std::is_move_constructible::value, ""); -static_assert(cuda::std::is_copy_assignable::value, ""); -static_assert(cuda::std::is_move_assignable::value, ""); -static_assert(cuda::std::is_trivially_destructible::value, ""); -static_assert(!cuda::std::is_empty::value, ""); +static_assert(!cuda::std::is_trivial::value, ""); +static_assert(!cuda::std::is_trivially_default_constructible::value, ""); +static_assert(cuda::std::is_default_constructible::value, ""); +static_assert(cuda::std::is_copy_constructible::value, ""); +static_assert(cuda::std::is_move_constructible::value, ""); +static_assert(cuda::std::is_copy_assignable::value, ""); +static_assert(cuda::std::is_move_assignable::value, ""); +static_assert(cuda::std::is_trivially_destructible::value, ""); +static_assert(!cuda::std::is_empty::value, ""); static bool ensure_release_threshold(::cudaMemPool_t pool, const size_t expected_threshold) { @@ -67,7 +67,7 @@ static bool ensure_export_handle(::cudaMemPool_t pool, const ::cudaMemAllocation return allocation_handle == ::cudaMemHandleTypeNone ? status == ::cudaErrorInvalidValue : status == ::cudaSuccess; } -TEST_CASE("async_memory_resource construction", "[memory_resource]") +TEST_CASE("device_memory_resource construction", "[memory_resource]") { int current_device{}; { @@ -87,7 +87,7 @@ TEST_CASE("async_memory_resource construction", "[memory_resource]") current_device); } - using async_resource = cuda::experimental::mr::async_memory_resource; + using async_resource = cuda::experimental::mr::device_memory_resource; SECTION("Default construction") { { @@ -99,7 +99,7 @@ TEST_CASE("async_memory_resource construction", "[memory_resource]") void* ptr{nullptr}; _CCCL_TRY_CUDA_API( ::cudaMallocAsync, - "Failed to allocate with pool passed to cuda::experimental::mr::async_memory_resource", + "Failed to allocate with pool passed to cuda::experimental::mr::device_memory_resource", &ptr, 42, current_default_pool, @@ -108,7 +108,7 @@ TEST_CASE("async_memory_resource construction", "[memory_resource]") _CCCL_ASSERT_CUDA_API( ::cudaFreeAsync, - "Failed to deallocate with pool passed to cuda::experimental::mr::async_memory_resource", + "Failed to deallocate with pool passed to cuda::experimental::mr::device_memory_resource", ptr, ::cudaStream_t{0}); } @@ -133,7 +133,7 @@ TEST_CASE("async_memory_resource construction", "[memory_resource]") void* ptr{nullptr}; _CCCL_TRY_CUDA_API( ::cudaMallocAsync, - "Failed to allocate with pool passed to cuda::experimental::mr::async_memory_resource", + "Failed to allocate with pool passed to cuda::experimental::mr::device_memory_resource", &ptr, 42, current_default_pool, @@ -142,17 +142,17 @@ TEST_CASE("async_memory_resource construction", "[memory_resource]") _CCCL_ASSERT_CUDA_API( ::cudaFreeAsync, - "Failed to deallocate with pool passed to cuda::experimental::mr::async_memory_resource", + "Failed to deallocate with pool passed to cuda::experimental::mr::device_memory_resource", ptr, ::cudaStream_t{0}); } SECTION("Construct with initial pool size") { - cuda::experimental::mr::async_memory_pool_properties props = { + cuda::experimental::mr::memory_pool_properties props = { 42, }; - cuda::experimental::mr::async_memory_pool pool{current_device, props}; + cuda::experimental::mr::device_memory_pool pool{current_device, props}; async_resource from_initial_pool_size{pool}; ::cudaMemPool_t get = from_initial_pool_size.get(); @@ -170,11 +170,11 @@ TEST_CASE("async_memory_resource construction", "[memory_resource]") SECTION("Construct with release threshold") { - cuda::experimental::mr::async_memory_pool_properties props = { + cuda::experimental::mr::memory_pool_properties props = { 42, 20, }; - cuda::experimental::mr::async_memory_pool pool{current_device, props}; + cuda::experimental::mr::device_memory_pool pool{current_device, props}; async_resource with_threshold{pool}; ::cudaMemPool_t get = with_threshold.get(); @@ -194,12 +194,12 @@ TEST_CASE("async_memory_resource construction", "[memory_resource]") #if !defined(_CCCL_CUDACC_BELOW_11_2) SECTION("Construct with allocation handle") { - cuda::experimental::mr::async_memory_pool_properties props = { + cuda::experimental::mr::memory_pool_properties props = { 42, 20, cuda::experimental::mr::cudaMemAllocationHandleType::cudaMemHandleTypePosixFileDescriptor, }; - cuda::experimental::mr::async_memory_pool pool{current_device, props}; + cuda::experimental::mr::device_memory_pool pool{current_device, props}; async_resource with_allocation_handle{pool}; ::cudaMemPool_t get = with_allocation_handle.get(); @@ -226,9 +226,9 @@ static void ensure_device_ptr(void* ptr) CHECK(attributes.type == cudaMemoryTypeDevice); } -TEST_CASE("async_memory_resource allocation", "[memory_resource]") +TEST_CASE("device_memory_resource allocation", "[memory_resource]") { - cuda::experimental::mr::async_memory_resource res{}; + cuda::experimental::mr::device_memory_resource res{}; { // allocate / deallocate auto* ptr = res.allocate(42); @@ -397,21 +397,21 @@ static_assert(cuda::mr::async_resource static_assert(cuda::mr::async_resource_with, cuda::mr::device_accessible>, ""); -TEST_CASE("async_memory_resource comparison", "[memory_resource]") +TEST_CASE("device_memory_resource comparison", "[memory_resource]") { int current_device{}; { _CCCL_TRY_CUDA_API(::cudaGetDevice, "Failed to query current device with cudaGetDevice.", ¤t_device); } - cuda::experimental::mr::async_memory_resource first{}; - { // comparison against a plain async_memory_resource - cuda::experimental::mr::async_memory_resource second{}; + cuda::experimental::mr::device_memory_resource first{}; + { // comparison against a plain device_memory_resource + cuda::experimental::mr::device_memory_resource second{}; CHECK(first == second); CHECK(!(first != second)); } - { // comparison against a plain async_memory_resource with a different pool + { // comparison against a plain device_memory_resource with a different pool cudaMemPool_t cuda_pool_handle{}; { ::cudaMemPoolProps pool_properties{}; @@ -421,13 +421,13 @@ TEST_CASE("async_memory_resource comparison", "[memory_resource]") pool_properties.location.id = current_device; _CCCL_TRY_CUDA_API(::cudaMemPoolCreate, "Failed to call cudaMemPoolCreate", &cuda_pool_handle, &pool_properties); } - cuda::experimental::mr::async_memory_resource second{cuda_pool_handle}; + cuda::experimental::mr::device_memory_resource second{cuda_pool_handle}; CHECK(first != second); CHECK(!(first == second)); } - { // comparison against a async_memory_resource wrapped inside a resource_ref - cuda::experimental::mr::async_memory_resource second{}; + { // comparison against a device_memory_resource wrapped inside a resource_ref + cuda::experimental::mr::device_memory_resource second{}; cuda::mr::resource_ref second_ref{second}; CHECK(first == second_ref); CHECK(!(first != second_ref)); @@ -435,8 +435,8 @@ TEST_CASE("async_memory_resource comparison", "[memory_resource]") CHECK(!(second_ref != first)); } - { // comparison against a async_memory_resource wrapped inside a async_resource_ref - cuda::experimental::mr::async_memory_resource second{}; + { // comparison against a device_memory_resource wrapped inside a async_resource_ref + cuda::experimental::mr::device_memory_resource second{}; cuda::mr::async_resource_ref second_ref{second}; CHECK(first == second_ref); @@ -481,8 +481,8 @@ TEST_CASE("Async memory resource peer access") auto peers = cudax::devices[0].get_peers(); if (peers.size() > 0) { - cudax::mr::async_memory_pool pool{cudax::devices[0]}; - cudax::mr::async_memory_resource resource{pool}; + cudax::mr::device_memory_pool pool{cudax::devices[0]}; + cudax::mr::device_memory_resource resource{pool}; cudax::stream stream{peers.front()}; CUDAX_CHECK(resource.is_accessible_from(cudax::devices[0])); @@ -503,7 +503,7 @@ TEST_CASE("Async memory resource peer access") CUDAX_CHECK(resource.is_accessible_from(peers.front())); allocate_and_check_access(resource); - cudax::mr::async_memory_resource another_resource{pool}; + cudax::mr::device_memory_resource another_resource{pool}; CUDAX_CHECK(another_resource.is_accessible_from(peers.front())); allocate_and_check_access(another_resource); @@ -527,8 +527,8 @@ TEST_CASE("Async memory resource peer access") resource.enable_peer_access(peers); // Check the resource using the default pool - cudax::mr::async_memory_resource default_pool_resource{}; - cudax::mr::async_memory_resource another_default_pool_resource{}; + cudax::mr::device_memory_resource default_pool_resource{}; + cudax::mr::device_memory_resource another_default_pool_resource{}; default_pool_resource.enable_peer_access(peers.front()); diff --git a/docs/cudax/memory_resource.rst b/docs/cudax/memory_resource.rst index e37c16c30a1..dc3f5a8f82b 100644 --- a/docs/cudax/memory_resource.rst +++ b/docs/cudax/memory_resource.rst @@ -8,10 +8,10 @@ Memory Resources :maxdepth: 3 ${repo_docs_api_path}/*any__resource* - ${repo_docs_api_path}/enum*async__memory__pool* - ${repo_docs_api_path}/struct*async__memory__pool__properties* - ${repo_docs_api_path}/class*async__memory__pool* - ${repo_docs_api_path}/class*async__memory__resource* + ${repo_docs_api_path}/enum*device__memory__pool* + ${repo_docs_api_path}/struct*memory__pool__properties* + ${repo_docs_api_path}/class*device__memory__pool* + ${repo_docs_api_path}/class*device__memory__resource* ${repo_docs_api_path}/*shared__resource* The ```` header provides: @@ -19,7 +19,7 @@ The ```` header provides: :ref:`any_async_resource ` type erased memory resources similar to ``std::any``. In contrast to :ref:`resource_ref ` they own the contained resource. - - :ref:`async_memory_resource ` A standard C++ interface for *heterogeneous*, + - :ref:`device_memory_resource ` A standard C++ interface for *heterogeneous*, *stream-ordered* memory allocation tailored to the needs of CUDA C++ developers. This design builds off of the success of the `RAPIDS Memory Manager (RMM) `__ project and evolves the design based on lessons learned.