PODVector Updates (#3425)

Remove deprecated and unused PolymorphicAllocator. It has been replaced by PolymorphicArenaAllocator. Restrict PODVector's Allocator to std::allocator and AMReX's various Arena based allocators. This simplifies the implementation of PODVector, because std::allocator is stateless and Arena based allocators are simple even when it's polymorphic. Fix a few issues of PODVectors with a PolymorphicArenaAllocator. For example, copy assignment operator should copy the Allocator. Copy constructor should consider the possibility that the other PODVector has a different type of Arena. Add placeholders for potentially growing and shrinking memory allocation in-place that will be implemented in a follow-up PR. Update PODVector's growth strategy. Hopefully this helps to reduce the memory consumption. * Always try to grow in-place. * For constructors, assign, operator=, resize & reserve, allocate the specified size without extra capacity. * For push_back & emplace_back, grow the capacity by a factor that is 1.5 by default, when there is need to grow. * For insert, the capacity grows either by a factor that is 1.5 by default or to the new size, whichever is greater, when there is need to grow.
AMReX-Codes · Jul 17, 2023 · 10b6cb2 · 10b6cb2
1 parent 01b750d
commit 10b6cb2
Show file tree

Hide file tree

Showing 8 changed files with 586 additions and 619 deletions.
diff --git a/Src/AmrCore/AMReX_TagBox.cpp b/Src/AmrCore/AMReX_TagBox.cpp
@@ -492,7 +492,7 @@ TagBoxArray::local_collate_gpu (Gpu::PinnedVector<IntVect>& v) const
     Gpu::dtoh_memcpy(hv_ntags.data(), dv_ntags.data(), ntotblocks*sizeof(int));
 
     Gpu::PinnedVector<int> hv_tags_offset(ntotblocks+1);
-    hv_tags_offset[0] = 0;
+    if (! hv_tags_offset.empty()) { hv_tags_offset[0] = 0; }
     std::partial_sum(hv_ntags.begin(), hv_ntags.end(), hv_tags_offset.begin()+1);
     int ntotaltags = hv_tags_offset.back();
 

diff --git a/Src/Base/AMReX_Arena.H b/Src/Base/AMReX_Arena.H
@@ -7,6 +7,7 @@
 #include <cstddef>
 #include <cstdlib>
 #include <limits>
+#include <utility>
 
 namespace amrex {
 
@@ -100,6 +101,26 @@ public:
     * \return a pointer to the allocated memory
     */
     [[nodiscard]] virtual void* alloc (std::size_t sz) = 0;
+
+    /**
+     * Try to allocate in-place by extending the capacity of given pointer.
+     */
+    [[nodiscard]] virtual std::pair<void*,std::size_t>
+    alloc_in_place (void* /*pt*/, std::size_t /*szmin*/, std::size_t szmax)
+    {
+        auto* p = alloc(szmax);
+        return std::make_pair(p, szmax);
+    }
+
+    /**
+     * Try to shrink in-place
+     */
+    [[nodiscard]] virtual void*
+    shrink_in_place (void* /*pt*/, std::size_t sz)
+    {
+        return alloc(sz);
+    }
+
     /**
     * \brief A pure virtual function for deleting the arena pointed to by pt
     */

diff --git a/Src/Base/AMReX_GpuAllocators.H b/Src/Base/AMReX_GpuAllocators.H
@@ -20,205 +20,156 @@
 namespace amrex {
 
     template <typename T>
-    struct RunOnGpu : std::false_type {};
-
-    template <typename T>
-    struct IsPolymorphicArenaAllocator : std::false_type {};
-
-    struct ArenaAllocatorTraits {
-        typedef std::true_type propagate_on_container_copy_assignment;
-        typedef std::true_type propagate_on_container_move_assignment;
-        typedef std::true_type propagate_on_container_swap;
-        typedef std::true_type is_always_equal;
+    struct FatPtr
+    {
+        T* m_ptr = nullptr;
+        std::size_t m_size = 0;
+        [[nodiscard]] constexpr T* ptr () const noexcept { return m_ptr; }
+        [[nodiscard]] constexpr std::size_t size () const noexcept { return m_size; }
     };
 
-    template<typename T>
-    class ArenaAllocator
-        : public ArenaAllocatorTraits
+    template <class T, class AR>
+    struct ArenaAllocatorBase
     {
-    public :
-
         using value_type = T;
+        using arena_wrapper_type = AR;
 
-        inline value_type* allocate(std::size_t n)
-        {
-            value_type* result = nullptr;
-            result = (value_type*) The_Arena()->alloc(n * sizeof(T));
-            return result;
-        }
+        constexpr ArenaAllocatorBase () = default;
+        explicit constexpr ArenaAllocatorBase (AR a_ar) : m_ar(a_ar) {}
 
-        inline void deallocate(value_type* ptr, std::size_t)
+        [[nodiscard]] T* allocate (std::size_t n)
         {
-            if (ptr != nullptr) { The_Arena()->free(ptr); }
+            return (T*) arena()->alloc(n * sizeof(T));
         }
-    };
-
-    template<typename T>
-    class DeviceArenaAllocator
-        : public ArenaAllocatorTraits
-    {
-    public :
 
-        using value_type = T;
-
-        inline value_type* allocate(std::size_t n)
+        [[nodiscard]] FatPtr<T>
+        allocate_in_place (T* p, std::size_t nmin, std::size_t nmax)
         {
-            value_type* result = nullptr;
-            result = (value_type*) The_Device_Arena()->alloc(n * sizeof(T));
-            return result;
+            auto pn = arena()->alloc_in_place(p, nmin*sizeof(T), nmax*sizeof(T));
+            return FatPtr<T>{(T*)pn.first, pn.second/sizeof(T)};
         }
 
-        inline void deallocate(value_type* ptr, std::size_t)
+        [[nodiscard]] T*
+        shrink_in_place (T* p, std::size_t n)
         {
-            if (ptr != nullptr) { The_Device_Arena()->free(ptr); }
+            return (T*) arena()->shrink_in_place(p,n*sizeof(T));
         }
-    };
-
-    template<typename T>
-    class PinnedArenaAllocator
-        : public ArenaAllocatorTraits
-    {
-    public :
-
-        using value_type = T;
 
-        inline value_type* allocate(std::size_t n)
+        void deallocate (T* ptr, std::size_t)
         {
-            value_type* result = nullptr;
-            result = (value_type*) The_Pinned_Arena()->alloc(n * sizeof(T));
-            return result;
+            if (ptr != nullptr) { arena()->free(ptr); }
         }
 
-        inline void deallocate(value_type* ptr, std::size_t)
-        {
-            if (ptr != nullptr) { The_Pinned_Arena()->free(ptr); }
+        [[nodiscard]] Arena* arena () const noexcept {
+            return m_ar.arena();
         }
-    };
-
-    template<typename T>
-    class ManagedArenaAllocator
-        : public ArenaAllocatorTraits
-    {
-    public :
 
-        using value_type = T;
+    private:
+        AR m_ar{};
+    };
 
-        inline value_type* allocate(std::size_t n)
-        {
-            value_type* result = nullptr;
-            result = (value_type*) The_Managed_Arena()->alloc(n * sizeof(T));
-            return result;
-        }
-
-        inline void deallocate(value_type* ptr, std::size_t)
-        {
-            if (ptr != nullptr) { The_Managed_Arena()->free(ptr); }
+    struct ArenaWrapper {
+        [[nodiscard]] static Arena* arena () noexcept {
+            return The_Arena();
         }
     };
 
-    template<typename T>
-    class AsyncArenaAllocator
-        : public ArenaAllocatorTraits
-    {
-    public :
-
-        using value_type = T;
-
-        inline value_type* allocate(std::size_t n)
-        {
-            value_type* result = nullptr;
-            result = (value_type*) The_Async_Arena()->alloc(n * sizeof(T));
-            return result;
+    struct DeviceArenaWrapper {
+        [[nodiscard]] static Arena* arena () noexcept {
+            return The_Device_Arena();
         }
+    };
 
-        inline void deallocate(value_type* ptr, std::size_t)
-        {
-            if (ptr != nullptr) { The_Async_Arena()->free(ptr); }
+    struct PinnedArenaWrapper {
+        [[nodiscard]] static Arena* arena () noexcept {
+            return The_Pinned_Arena();
         }
     };
 
-    template<typename T>
-    class PolymorphicArenaAllocator
-        : public ArenaAllocatorTraits
-    {
-    public :
-
-        using value_type = T;
-
-        inline value_type* allocate(std::size_t n)
-        {
-            value_type* result = nullptr;
-            result = (value_type*) arena()->alloc(n * sizeof(T));
-            return result;
+    struct ManagedArenaWrapper {
+        [[nodiscard]] static Arena* arena () noexcept {
+            return The_Managed_Arena();
         }
+    };
 
-        inline void deallocate(value_type* ptr, std::size_t)
-        {
-            if (ptr != nullptr) { arena()->free(ptr); }
+    struct AsyncArenaWrapper {
+        [[nodiscard]] static Arena* arena () noexcept {
+            return The_Async_Arena();
         }
+    };
 
+    struct PolymorphicArenaWrapper {
+        constexpr PolymorphicArenaWrapper () = default;
+        explicit constexpr PolymorphicArenaWrapper (Arena* a_arena)
+            : m_arena(a_arena) {}
         [[nodiscard]] Arena* arena () const noexcept {
             return (m_arena) ? m_arena : The_Arena();
         }
-
         Arena* m_arena = nullptr;
     };
 
     template<typename T>
-    class PolymorphicAllocator
+    class ArenaAllocator
+        : public ArenaAllocatorBase<T,ArenaWrapper>
     {
-    public :
-
-        using value_type = T;
+    };
 
-        PolymorphicAllocator () : m_use_gpu_aware_mpi(ParallelDescriptor::UseGpuAwareMpi()) {}
+    template<typename T>
+    class DeviceArenaAllocator
+        : public ArenaAllocatorBase<T,DeviceArenaWrapper>
+    {
+    };
 
-        inline value_type* allocate(std::size_t n)
-        {
-            value_type* result = nullptr;
-            if (m_use_gpu_aware_mpi)
-            {
-                result = (value_type*) The_Arena()->alloc(n * sizeof(T));
-            }
-            else
-            {
-                result = (value_type*) The_Pinned_Arena()->alloc(n * sizeof(T));
-            }
-            return result;
-        }
+    template<typename T>
+    class PinnedArenaAllocator
+        : public ArenaAllocatorBase<T,PinnedArenaWrapper>
+    {
+    };
 
-        inline void deallocate(value_type* ptr, std::size_t)
-        {
-            if (ptr != nullptr)
-            {
-                if (m_use_gpu_aware_mpi)
-                {
-                    The_Arena()->free(ptr);
-                }
-                else
-                {
-                    The_Pinned_Arena()->free(ptr);
-                }
-            }
-        }
+    template<typename T>
+    class ManagedArenaAllocator
+        : public ArenaAllocatorBase<T,ManagedArenaWrapper>
+    {
+    };
 
-        bool m_use_gpu_aware_mpi;
+    template<typename T>
+    class AsyncArenaAllocator
+        : public ArenaAllocatorBase<T,AsyncArenaWrapper>
+    {
+    };
 
-        template <class U, class V>
-        friend bool
-        operator== (PolymorphicAllocator<U> const& a, PolymorphicAllocator<V> const& b) noexcept
+    template<typename T>
+    class PolymorphicArenaAllocator
+        : public ArenaAllocatorBase<T,PolymorphicArenaWrapper>
+    {
+    public :
+        constexpr PolymorphicArenaAllocator () = default;
+        explicit constexpr PolymorphicArenaAllocator (Arena* a_arena)
+            : ArenaAllocatorBase<T,PolymorphicArenaWrapper>
+                (PolymorphicArenaWrapper(a_arena))
+            {}
+        void setArena (Arena* a_ar) noexcept
         {
-            return a.m_use_gpu_aware_mpi == b.m_use_gpu_aware_mpi;
+            *this = PolymorphicArenaAllocator<T>(a_ar);
         }
+    };
 
-        template <class U, class V>
-        friend bool
-        operator!= (PolymorphicAllocator<U> const& a, PolymorphicAllocator<V> const& b) noexcept
-        {
-            return a.m_use_gpu_aware_mpi != b.m_use_gpu_aware_mpi;
-        }
+    template <typename T>
+    struct RunOnGpu : std::false_type {};
 
-    };
+    template <class T, class Enable = void>
+    struct IsArenaAllocator : std::false_type {};
+    //
+    template <class T>
+    struct IsArenaAllocator
+                <T,std::enable_if_t<std::is_base_of
+                                    <ArenaAllocatorBase<typename T::value_type,
+                                                        typename T::arena_wrapper_type>,
+                                     T>::value>>
+        : std::true_type {};
+
+    template <typename T>
+    struct IsPolymorphicArenaAllocator : std::false_type {};
 
 #ifdef AMREX_USE_GPU
     template <typename T>
@@ -246,6 +197,22 @@ namespace amrex {
     using DefaultAllocator = std::allocator<T>;
 #endif // AMREX_USE_GPU
 
+    template <typename A1, typename A2,
+              std::enable_if_t<IsArenaAllocator<A1>::value &&
+                               IsArenaAllocator<A2>::value, int> = 0>
+    bool operator== (A1 const& a1, A2 const& a2)
+    {
+        return a1.arena() == a2.arena();
+    }
+
+    template <typename A1, typename A2,
+              std::enable_if_t<IsArenaAllocator<A1>::value &&
+                               IsArenaAllocator<A2>::value, int> = 0>
+    bool operator!= (A1 const& a1, A2 const& a2)
+    {
+        return a1.arena() != a2.arena();
+    }
+
 } // namespace amrex
 
 #endif // AMREX_GPUALLOCATORS_H_
diff --git a/Src/Base/AMReX_GpuContainers.H b/Src/Base/AMReX_GpuContainers.H
@@ -61,16 +61,6 @@ namespace amrex::Gpu {
     template <class T>
     using HostVector = PinnedVector<T>;
 
-    /**
-     * \brief The behavior of PolymorphicVector changes depending on
-     * the amrex.use_gpu_aware_mpi runtime flag. If the flag is true,
-     * this vector will use device memory. If it is false, this Vector
-     * will use pinned memory.
-     *
-     */
-    template <class T>
-    using PolymorphicVector = PODVector<T, PolymorphicAllocator<T> >;
-
     /**
      * \brief This is identical to ManagedVector<T>. The ManagedDeviceVector
      * form is deprecated and will be removed in a future release.
@@ -101,9 +91,6 @@ namespace amrex::Gpu {
 
     template <class T>
     using AsyncVector = PODVector<T>;
-
-    template <class T>
-    using PolymorphicVector = PODVector<T>;
 #endif
 
     struct HostToDevice {};