From 3c9b9bd28452357c90883dcf73342020f3d2ecf0 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Mon, 24 Feb 2020 15:02:30 -0500 Subject: [PATCH] Add managed_memory_pointer that is compatible with STL. The existing `cuda::pointer` uses a fancy reference that overloads `operator&`, and some STL implementations misbehave when that operator does not return the actual memory address of the object. Since universal_memory_resource allocates memory that works on both host and device, we need to be able to use these types with stl containers, such as std::vector, std::unique_ptr, etc. This patch adds a managed_pointer implementation that behaves like `cuda::pointer`, but returns a regular c++ reference, allowing the thrust universal allocator to work with STL containers. --- testing/cuda/managed_memory_pointer.cu | 141 +++++++++++++ testing/cuda/managed_memory_pointer.mk | 1 + testing/vector.cu | 15 +- thrust/detail/pointer.inl | 36 +++- thrust/detail/vector_base.inl | 4 +- thrust/mr/allocator.h | 2 + .../cuda/detail/managed_memory_pointer.h | 195 ++++++++++++++++++ thrust/system/cuda/memory_resource.h | 3 +- 8 files changed, 385 insertions(+), 12 deletions(-) create mode 100644 testing/cuda/managed_memory_pointer.cu create mode 100644 testing/cuda/managed_memory_pointer.mk create mode 100644 thrust/system/cuda/detail/managed_memory_pointer.h diff --git a/testing/cuda/managed_memory_pointer.cu b/testing/cuda/managed_memory_pointer.cu new file mode 100644 index 000000000..46a2191fa --- /dev/null +++ b/testing/cuda/managed_memory_pointer.cu @@ -0,0 +1,141 @@ +#include + +#if THRUST_CPP_DIALECT >= 2011 + +# include + +# include +# include +# include +# include + +# include +# include + +namespace +{ + +template +using allocator = + thrust::mr::stateless_resource_allocator; + +// The managed_memory_pointer class should be identified as a +// contiguous_iterator +THRUST_STATIC_ASSERT( + thrust::is_contiguous_iterator::pointer>::value); + +template +struct some_object { + some_object(T data) + : m_data(data) + {} + + void setter(T data) { m_data = data; } + T getter() const { return m_data; } + +private: + T m_data; +}; + +} // namespace + +template +void TestAllocateUnique() +{ + // Simple test to ensure that pointers created with universal_memory_resource + // can be dereferenced and used with STL code. This is necessary as some + // STL implementations break when using fancy references that overload + // `operator&`, so universal_memory_resource uses a special pointer type that + // returns regular C++ references that can be safely used host-side. + + // These operations fail to compile with fancy references: + auto pRaw = thrust::allocate_unique(allocator{}, 42); + auto pObj = + thrust::allocate_unique >(allocator >{}, 42); + + static_assert( + std::is_same >::value, + "Unexpected pointer returned from unique_ptr::get."); + static_assert( + std::is_same > >::value, + "Unexpected pointer returned from unique_ptr::get."); + + ASSERT_EQUAL(*pRaw, T(42)); + ASSERT_EQUAL(*pRaw.get(), T(42)); + ASSERT_EQUAL(pObj->getter(), T(42)); + ASSERT_EQUAL((*pObj).getter(), T(42)); + ASSERT_EQUAL(pObj.get()->getter(), T(42)); + ASSERT_EQUAL((*pObj.get()).getter(), T(42)); +} +DECLARE_GENERIC_UNITTEST(TestAllocateUnique); + +template +void TestIterationRaw() +{ + auto array = thrust::allocate_unique_n(allocator{}, 6, 42); + + static_assert( + std::is_same >::value, + "Unexpected pointer returned from unique_ptr::get."); + + for (auto iter = array.get(), end = array.get() + 6; iter < end; ++iter) + { + ASSERT_EQUAL(*iter, T(42)); + ASSERT_EQUAL(*iter.get(), T(42)); + } +} +DECLARE_GENERIC_UNITTEST(TestIterationRaw); + +template +void TestIterationObj() +{ + auto array = + thrust::allocate_unique_n >(allocator >{}, + 6, + 42); + + static_assert( + std::is_same > >::value, + "Unexpected pointer returned from unique_ptr::get."); + + for (auto iter = array.get(), end = array.get() + 6; iter < end; ++iter) + { + ASSERT_EQUAL(iter->getter(), T(42)); + ASSERT_EQUAL((*iter).getter(), T(42)); + ASSERT_EQUAL(iter.get()->getter(), T(42)); + ASSERT_EQUAL((*iter.get()).getter(), T(42)); + } +} +DECLARE_GENERIC_UNITTEST(TestIterationObj); + +template +void TestStdVector() +{ + // Verify that a std::vector using the universal allocator will work with + // STL algorithms. + std::vector > v0; + + static_assert( + std::is_same::type::pointer, + thrust::system::cuda::detail::managed_memory_pointer< + T > >::value, + "Unexpected pointer returned from unique_ptr::get."); + + v0.resize(6); + std::iota(v0.begin(), v0.end(), 0); + ASSERT_EQUAL(v0[0], T(0)); + ASSERT_EQUAL(v0[1], T(1)); + ASSERT_EQUAL(v0[2], T(2)); + ASSERT_EQUAL(v0[3], T(3)); + ASSERT_EQUAL(v0[4], T(4)); + ASSERT_EQUAL(v0[5], T(5)); +} +DECLARE_GENERIC_UNITTEST(TestStdVector); + +#endif // C++11 diff --git a/testing/cuda/managed_memory_pointer.mk b/testing/cuda/managed_memory_pointer.mk new file mode 100644 index 000000000..7d930481e --- /dev/null +++ b/testing/cuda/managed_memory_pointer.mk @@ -0,0 +1 @@ +CUDACC_FLAGS += -rdc=true diff --git a/testing/vector.cu b/testing/vector.cu index 28db257d8..ed39d0edf 100644 --- a/testing/vector.cu +++ b/testing/vector.cu @@ -52,24 +52,27 @@ DECLARE_VECTOR_UNITTEST(TestVectorFrontBack); template void TestVectorData(void) { + typedef typename Vector::pointer PointerT; + typedef typename Vector::const_pointer PointerConstT; + Vector v(3); v[0] = 0; v[1] = 1; v[2] = 2; ASSERT_EQUAL(0, *v.data()); ASSERT_EQUAL(1, *(v.data() + 1)); ASSERT_EQUAL(2, *(v.data() + 2)); - ASSERT_EQUAL(&v.front(), v.data()); - ASSERT_EQUAL(&*v.begin(), v.data()); - ASSERT_EQUAL(&v[0], v.data()); + ASSERT_EQUAL(PointerT(&v.front()), v.data()); + ASSERT_EQUAL(PointerT(&*v.begin()), v.data()); + ASSERT_EQUAL(PointerT(&v[0]), v.data()); const Vector &c_v = v; ASSERT_EQUAL(0, *c_v.data()); ASSERT_EQUAL(1, *(c_v.data() + 1)); ASSERT_EQUAL(2, *(c_v.data() + 2)); - ASSERT_EQUAL(&c_v.front(), c_v.data()); - ASSERT_EQUAL(&*c_v.begin(), c_v.data()); - ASSERT_EQUAL(&c_v[0], c_v.data()); + ASSERT_EQUAL(PointerConstT(&c_v.front()), c_v.data()); + ASSERT_EQUAL(PointerConstT(&*c_v.begin()), c_v.data()); + ASSERT_EQUAL(PointerConstT(&c_v[0]), c_v.data()); } DECLARE_VECTOR_UNITTEST(TestVectorData); diff --git a/thrust/detail/pointer.inl b/thrust/detail/pointer.inl index 66e7cdf36..464c3579e 100644 --- a/thrust/detail/pointer.inl +++ b/thrust/detail/pointer.inl @@ -16,6 +16,7 @@ #include #include +#include namespace thrust @@ -109,14 +110,43 @@ template return static_cast(*this); } // end pointer::operator= +namespace detail +{ + +// Implementation for dereference() when Reference is Element&, +// e.g. cuda's managed_memory_pointer +template +__host__ __device__ +Reference pointer_dereference_impl(const Derived& ptr, + thrust::detail::true_type /* is_cpp_ref */) +{ + return *ptr.get(); +} + +// Implementation for pointers with proxy references: +template +__host__ __device__ +Reference pointer_dereference_impl(const Derived& ptr, + thrust::detail::false_type /* is_cpp_ref */) +{ + return Reference(ptr); +} + +} // namespace detail template __host__ __device__ typename pointer::super_t::reference - pointer - ::dereference() const + pointer + ::dereference() const { - return typename super_t::reference(static_cast(*this)); + // Need to handle cpp refs and fancy refs differently: + typedef typename super_t::reference RefT; + typedef typename thrust::detail::is_reference::type IsCppRef; + + const derived_type& derivedPtr = static_cast(*this); + + return detail::pointer_dereference_impl(derivedPtr, IsCppRef()); } // end pointer::dereference diff --git a/thrust/detail/vector_base.inl b/thrust/detail/vector_base.inl index 77fd4e7de..9d5511e26 100644 --- a/thrust/detail/vector_base.inl +++ b/thrust/detail/vector_base.inl @@ -540,7 +540,7 @@ template vector_base ::data(void) { - return &front(); + return pointer(&front()); } // end vector_base::data() template @@ -548,7 +548,7 @@ template vector_base ::data(void) const { - return &front(); + return const_pointer(&front()); } // end vector_base::data() template diff --git a/thrust/mr/allocator.h b/thrust/mr/allocator.h index 8315f5fce..7645759ea 100644 --- a/thrust/mr/allocator.h +++ b/thrust/mr/allocator.h @@ -22,6 +22,7 @@ #include +#include #include #include @@ -93,6 +94,7 @@ class allocator : private validator * * \returns the maximum value of \p std::size_t, divided by the size of \p T. */ + __thrust_exec_check_disable__ __host__ __device__ size_type max_size() const { diff --git a/thrust/system/cuda/detail/managed_memory_pointer.h b/thrust/system/cuda/detail/managed_memory_pointer.h new file mode 100644 index 000000000..c6a4c9756 --- /dev/null +++ b/thrust/system/cuda/detail/managed_memory_pointer.h @@ -0,0 +1,195 @@ +/* + * Copyright 2020 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +namespace thrust +{ +namespace system +{ +namespace cuda +{ +namespace detail +{ + +// forward decl for iterator traits: +template +class managed_memory_pointer; + +} // end namespace detail +} // end namespace cuda +} // end namespace system + +// Specialize iterator traits to define `pointer` to something meaningful. +template +struct iterator_traits > > { +private: + typedef thrust::pointer< + Element, + Tag, + Reference, + thrust::system::cuda::detail::managed_memory_pointer > + ptr; + +public: + typedef typename ptr::iterator_category iterator_category; + typedef typename ptr::value_type value_type; + typedef typename ptr::difference_type difference_type; + typedef Element* pointer; + typedef typename ptr::reference reference; +}; // end iterator_traits + +namespace system +{ +namespace cuda +{ +namespace detail +{ + +/*! A version of thrust::cuda_cub::pointer that uses c++ references instead + * of thrust::cuda::reference. This is to allow managed memory pointers to + * be used with host-side code in standard libraries that are not compatible + * with proxy references. + */ +template +class managed_memory_pointer + : public thrust::pointer< + T, + thrust::cuda_cub::tag, + typename thrust::detail::add_reference::type, + thrust::system::cuda::detail::managed_memory_pointer > +{ +private: + typedef thrust::pointer< + T, + thrust::cuda_cub::tag, + typename thrust::detail::add_reference::type, + thrust::system::cuda::detail::managed_memory_pointer > + super_t; + +public: + typedef typename super_t::raw_pointer pointer; + + /*! \p managed_memory_pointer's no-argument constructor initializes its + * encapsulated pointer to \c 0. + */ + __host__ __device__ managed_memory_pointer() + : super_t() + {} + +#if THRUST_CPP_DIALECT >= 2011 + // NOTE: This is needed so that Thrust smart pointers can be used in + // `std::unique_ptr`. + __host__ __device__ managed_memory_pointer(decltype(nullptr)) + : super_t(nullptr) + {} +#endif + + /*! This constructor allows construction of a from a + * T*. + * + * \param ptr A raw pointer to copy from, presumed to point to a location + * in memory accessible by the \p cuda system. \tparam OtherT \p OtherT + * shall be convertible to \p T. + */ + template + __host__ __device__ explicit managed_memory_pointer(OtherT* ptr) + : super_t(ptr) + {} + + /*! This constructor allows construction from another pointer-like object + * with related type. + * + * \param other The \p OtherPointer to copy. + * \tparam OtherPointer The system tag associated with \p OtherPointer + * shall be convertible to \p thrust::system::cuda::tag and its element + * type shall be convertible to \p T. + */ + template + __host__ __device__ managed_memory_pointer( + const OtherPointer& other, + typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + managed_memory_pointer>::type* = 0) + : super_t(other) + {} + + /*! This constructor allows construction from another pointer-like object + * with \p void type. + * + * \param other The \p OtherPointer to copy. + * \tparam OtherPointer The system tag associated with \p OtherPointer + * shall be convertible to \p thrust::system::cuda::tag and its element + * type shall be \p void. + */ + template + __host__ __device__ explicit managed_memory_pointer( + const OtherPointer& other, + typename thrust::detail::enable_if_void_pointer_is_system_convertible< + OtherPointer, + managed_memory_pointer>::type* = 0) + : super_t(other) + {} + + /*! Assignment operator allows assigning from another pointer-like object + * with related type. + * + * \param other The other pointer-like object to assign from. + * \tparam OtherPointer The system tag associated with \p OtherPointer + * shall be convertible to \p thrust::system::cuda::tag and its element + * type shall be convertible to \p T. + */ + template + __host__ __device__ typename thrust::detail::enable_if_pointer_is_convertible< + OtherPointer, + managed_memory_pointer, + managed_memory_pointer&>::type + operator=(const OtherPointer& other) + { + return super_t::operator=(other); + } + +#if THRUST_CPP_DIALECT >= 2011 + // NOTE: This is needed so that Thrust smart pointers can be used in + // `std::unique_ptr`. + __host__ __device__ managed_memory_pointer& operator=(decltype(nullptr)) + { + super_t::operator=(nullptr); + return *this; + } +#endif + + __host__ __device__ + pointer operator->() const + { + return this->get(); + } + +}; // class managed_memory_pointer + +} // namespace detail +} // namespace cuda +} // namespace system +} // namespace thrust diff --git a/thrust/system/cuda/memory_resource.h b/thrust/system/cuda/memory_resource.h index 1e2896ffe..2298981f7 100644 --- a/thrust/system/cuda/memory_resource.h +++ b/thrust/system/cuda/memory_resource.h @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -86,7 +87,7 @@ namespace detail thrust::cuda::pointer > device_memory_resource; typedef detail::cuda_memory_resource > + detail::managed_memory_pointer > managed_memory_resource; typedef detail::cuda_memory_resource