Skip to content

Commit

Permalink
PODVector Updates (#3425)
Browse files Browse the repository at this point in the history
Remove deprecated and unused PolymorphicAllocator. It has been replaced
by PolymorphicArenaAllocator.

Restrict PODVector's Allocator to std::allocator and AMReX's various
Arena based allocators. This simplifies the implementation of PODVector,
because std::allocator is stateless and Arena based allocators are
simple even when it's polymorphic.

Fix a few issues of PODVectors with a PolymorphicArenaAllocator. For
example, copy assignment operator should copy the Allocator. Copy
constructor should consider the possibility that the other PODVector has
a different type of Arena.

Add placeholders for potentially growing and shrinking memory allocation
in-place that will be implemented in a follow-up PR.

Update PODVector's growth strategy. Hopefully this helps to reduce the
memory consumption.

  * Always try to grow in-place.

* For constructors, assign, operator=, resize & reserve, allocate the
specified size without extra capacity.

* For push_back & emplace_back, grow the capacity by a factor that is
1.5 by default, when there is need to grow.

* For insert, the capacity grows either by a factor that is 1.5 by
default or to the new size, whichever is greater, when there is need to
grow.
  • Loading branch information
WeiqunZhang authored Jul 17, 2023
1 parent 01b750d commit 10b6cb2
Show file tree
Hide file tree
Showing 8 changed files with 586 additions and 619 deletions.
2 changes: 1 addition & 1 deletion Src/AmrCore/AMReX_TagBox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ TagBoxArray::local_collate_gpu (Gpu::PinnedVector<IntVect>& v) const
Gpu::dtoh_memcpy(hv_ntags.data(), dv_ntags.data(), ntotblocks*sizeof(int));

Gpu::PinnedVector<int> hv_tags_offset(ntotblocks+1);
hv_tags_offset[0] = 0;
if (! hv_tags_offset.empty()) { hv_tags_offset[0] = 0; }
std::partial_sum(hv_ntags.begin(), hv_ntags.end(), hv_tags_offset.begin()+1);
int ntotaltags = hv_tags_offset.back();

Expand Down
21 changes: 21 additions & 0 deletions Src/Base/AMReX_Arena.H
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstddef>
#include <cstdlib>
#include <limits>
#include <utility>

namespace amrex {

Expand Down Expand Up @@ -100,6 +101,26 @@ public:
* \return a pointer to the allocated memory
*/
[[nodiscard]] virtual void* alloc (std::size_t sz) = 0;

/**
* Try to allocate in-place by extending the capacity of given pointer.
*/
[[nodiscard]] virtual std::pair<void*,std::size_t>
alloc_in_place (void* /*pt*/, std::size_t /*szmin*/, std::size_t szmax)
{
auto* p = alloc(szmax);
return std::make_pair(p, szmax);
}

/**
* Try to shrink in-place
*/
[[nodiscard]] virtual void*
shrink_in_place (void* /*pt*/, std::size_t sz)
{
return alloc(sz);
}

/**
* \brief A pure virtual function for deleting the arena pointed to by pt
*/
Expand Down
265 changes: 116 additions & 149 deletions Src/Base/AMReX_GpuAllocators.H
Original file line number Diff line number Diff line change
Expand Up @@ -20,205 +20,156 @@
namespace amrex {

template <typename T>
struct RunOnGpu : std::false_type {};

template <typename T>
struct IsPolymorphicArenaAllocator : std::false_type {};

struct ArenaAllocatorTraits {
typedef std::true_type propagate_on_container_copy_assignment;
typedef std::true_type propagate_on_container_move_assignment;
typedef std::true_type propagate_on_container_swap;
typedef std::true_type is_always_equal;
struct FatPtr
{
T* m_ptr = nullptr;
std::size_t m_size = 0;
[[nodiscard]] constexpr T* ptr () const noexcept { return m_ptr; }
[[nodiscard]] constexpr std::size_t size () const noexcept { return m_size; }
};

template<typename T>
class ArenaAllocator
: public ArenaAllocatorTraits
template <class T, class AR>
struct ArenaAllocatorBase
{
public :

using value_type = T;
using arena_wrapper_type = AR;

inline value_type* allocate(std::size_t n)
{
value_type* result = nullptr;
result = (value_type*) The_Arena()->alloc(n * sizeof(T));
return result;
}
constexpr ArenaAllocatorBase () = default;
explicit constexpr ArenaAllocatorBase (AR a_ar) : m_ar(a_ar) {}

inline void deallocate(value_type* ptr, std::size_t)
[[nodiscard]] T* allocate (std::size_t n)
{
if (ptr != nullptr) { The_Arena()->free(ptr); }
return (T*) arena()->alloc(n * sizeof(T));
}
};

template<typename T>
class DeviceArenaAllocator
: public ArenaAllocatorTraits
{
public :

using value_type = T;

inline value_type* allocate(std::size_t n)
[[nodiscard]] FatPtr<T>
allocate_in_place (T* p, std::size_t nmin, std::size_t nmax)
{
value_type* result = nullptr;
result = (value_type*) The_Device_Arena()->alloc(n * sizeof(T));
return result;
auto pn = arena()->alloc_in_place(p, nmin*sizeof(T), nmax*sizeof(T));
return FatPtr<T>{(T*)pn.first, pn.second/sizeof(T)};
}

inline void deallocate(value_type* ptr, std::size_t)
[[nodiscard]] T*
shrink_in_place (T* p, std::size_t n)
{
if (ptr != nullptr) { The_Device_Arena()->free(ptr); }
return (T*) arena()->shrink_in_place(p,n*sizeof(T));
}
};

template<typename T>
class PinnedArenaAllocator
: public ArenaAllocatorTraits
{
public :

using value_type = T;

inline value_type* allocate(std::size_t n)
void deallocate (T* ptr, std::size_t)
{
value_type* result = nullptr;
result = (value_type*) The_Pinned_Arena()->alloc(n * sizeof(T));
return result;
if (ptr != nullptr) { arena()->free(ptr); }
}

inline void deallocate(value_type* ptr, std::size_t)
{
if (ptr != nullptr) { The_Pinned_Arena()->free(ptr); }
[[nodiscard]] Arena* arena () const noexcept {
return m_ar.arena();
}
};

template<typename T>
class ManagedArenaAllocator
: public ArenaAllocatorTraits
{
public :

using value_type = T;
private:
AR m_ar{};
};

inline value_type* allocate(std::size_t n)
{
value_type* result = nullptr;
result = (value_type*) The_Managed_Arena()->alloc(n * sizeof(T));
return result;
}

inline void deallocate(value_type* ptr, std::size_t)
{
if (ptr != nullptr) { The_Managed_Arena()->free(ptr); }
struct ArenaWrapper {
[[nodiscard]] static Arena* arena () noexcept {
return The_Arena();
}
};

template<typename T>
class AsyncArenaAllocator
: public ArenaAllocatorTraits
{
public :

using value_type = T;

inline value_type* allocate(std::size_t n)
{
value_type* result = nullptr;
result = (value_type*) The_Async_Arena()->alloc(n * sizeof(T));
return result;
struct DeviceArenaWrapper {
[[nodiscard]] static Arena* arena () noexcept {
return The_Device_Arena();
}
};

inline void deallocate(value_type* ptr, std::size_t)
{
if (ptr != nullptr) { The_Async_Arena()->free(ptr); }
struct PinnedArenaWrapper {
[[nodiscard]] static Arena* arena () noexcept {
return The_Pinned_Arena();
}
};

template<typename T>
class PolymorphicArenaAllocator
: public ArenaAllocatorTraits
{
public :

using value_type = T;

inline value_type* allocate(std::size_t n)
{
value_type* result = nullptr;
result = (value_type*) arena()->alloc(n * sizeof(T));
return result;
struct ManagedArenaWrapper {
[[nodiscard]] static Arena* arena () noexcept {
return The_Managed_Arena();
}
};

inline void deallocate(value_type* ptr, std::size_t)
{
if (ptr != nullptr) { arena()->free(ptr); }
struct AsyncArenaWrapper {
[[nodiscard]] static Arena* arena () noexcept {
return The_Async_Arena();
}
};

struct PolymorphicArenaWrapper {
constexpr PolymorphicArenaWrapper () = default;
explicit constexpr PolymorphicArenaWrapper (Arena* a_arena)
: m_arena(a_arena) {}
[[nodiscard]] Arena* arena () const noexcept {
return (m_arena) ? m_arena : The_Arena();
}

Arena* m_arena = nullptr;
};

template<typename T>
class PolymorphicAllocator
class ArenaAllocator
: public ArenaAllocatorBase<T,ArenaWrapper>
{
public :

using value_type = T;
};

PolymorphicAllocator () : m_use_gpu_aware_mpi(ParallelDescriptor::UseGpuAwareMpi()) {}
template<typename T>
class DeviceArenaAllocator
: public ArenaAllocatorBase<T,DeviceArenaWrapper>
{
};

inline value_type* allocate(std::size_t n)
{
value_type* result = nullptr;
if (m_use_gpu_aware_mpi)
{
result = (value_type*) The_Arena()->alloc(n * sizeof(T));
}
else
{
result = (value_type*) The_Pinned_Arena()->alloc(n * sizeof(T));
}
return result;
}
template<typename T>
class PinnedArenaAllocator
: public ArenaAllocatorBase<T,PinnedArenaWrapper>
{
};

inline void deallocate(value_type* ptr, std::size_t)
{
if (ptr != nullptr)
{
if (m_use_gpu_aware_mpi)
{
The_Arena()->free(ptr);
}
else
{
The_Pinned_Arena()->free(ptr);
}
}
}
template<typename T>
class ManagedArenaAllocator
: public ArenaAllocatorBase<T,ManagedArenaWrapper>
{
};

bool m_use_gpu_aware_mpi;
template<typename T>
class AsyncArenaAllocator
: public ArenaAllocatorBase<T,AsyncArenaWrapper>
{
};

template <class U, class V>
friend bool
operator== (PolymorphicAllocator<U> const& a, PolymorphicAllocator<V> const& b) noexcept
template<typename T>
class PolymorphicArenaAllocator
: public ArenaAllocatorBase<T,PolymorphicArenaWrapper>
{
public :
constexpr PolymorphicArenaAllocator () = default;
explicit constexpr PolymorphicArenaAllocator (Arena* a_arena)
: ArenaAllocatorBase<T,PolymorphicArenaWrapper>
(PolymorphicArenaWrapper(a_arena))
{}
void setArena (Arena* a_ar) noexcept
{
return a.m_use_gpu_aware_mpi == b.m_use_gpu_aware_mpi;
*this = PolymorphicArenaAllocator<T>(a_ar);
}
};

template <class U, class V>
friend bool
operator!= (PolymorphicAllocator<U> const& a, PolymorphicAllocator<V> const& b) noexcept
{
return a.m_use_gpu_aware_mpi != b.m_use_gpu_aware_mpi;
}
template <typename T>
struct RunOnGpu : std::false_type {};

};
template <class T, class Enable = void>
struct IsArenaAllocator : std::false_type {};
//
template <class T>
struct IsArenaAllocator
<T,std::enable_if_t<std::is_base_of
<ArenaAllocatorBase<typename T::value_type,
typename T::arena_wrapper_type>,
T>::value>>
: std::true_type {};

template <typename T>
struct IsPolymorphicArenaAllocator : std::false_type {};

#ifdef AMREX_USE_GPU
template <typename T>
Expand Down Expand Up @@ -246,6 +197,22 @@ namespace amrex {
using DefaultAllocator = std::allocator<T>;
#endif // AMREX_USE_GPU

template <typename A1, typename A2,
std::enable_if_t<IsArenaAllocator<A1>::value &&
IsArenaAllocator<A2>::value, int> = 0>
bool operator== (A1 const& a1, A2 const& a2)
{
return a1.arena() == a2.arena();
}

template <typename A1, typename A2,
std::enable_if_t<IsArenaAllocator<A1>::value &&
IsArenaAllocator<A2>::value, int> = 0>
bool operator!= (A1 const& a1, A2 const& a2)
{
return a1.arena() != a2.arena();
}

} // namespace amrex

#endif // AMREX_GPUALLOCATORS_H_
13 changes: 0 additions & 13 deletions Src/Base/AMReX_GpuContainers.H
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,6 @@ namespace amrex::Gpu {
template <class T>
using HostVector = PinnedVector<T>;

/**
* \brief The behavior of PolymorphicVector changes depending on
* the amrex.use_gpu_aware_mpi runtime flag. If the flag is true,
* this vector will use device memory. If it is false, this Vector
* will use pinned memory.
*
*/
template <class T>
using PolymorphicVector = PODVector<T, PolymorphicAllocator<T> >;

/**
* \brief This is identical to ManagedVector<T>. The ManagedDeviceVector
* form is deprecated and will be removed in a future release.
Expand Down Expand Up @@ -101,9 +91,6 @@ namespace amrex::Gpu {

template <class T>
using AsyncVector = PODVector<T>;

template <class T>
using PolymorphicVector = PODVector<T>;
#endif

struct HostToDevice {};
Expand Down
Loading

0 comments on commit 10b6cb2

Please sign in to comment.