From 34d5508c8a11272d26fb0768c43732d0c8f267ff Mon Sep 17 00:00:00 2001 From: AlexanderSinn Date: Tue, 27 Aug 2024 12:45:48 +0200 Subject: [PATCH 1/3] TinyProfiler with BArena and PArena --- Src/Base/AMReX_Arena.H | 32 +++++++++++++++++++++++- Src/Base/AMReX_Arena.cpp | 44 +++++++++++++++++++++++++++++++-- Src/Base/AMReX_BArena.cpp | 5 +++- Src/Base/AMReX_CArena.H | 12 --------- Src/Base/AMReX_CArena.cpp | 39 ++++++----------------------- Src/Base/AMReX_PArena.cpp | 2 ++ Src/Base/AMReX_TinyProfiler.H | 2 +- Src/Base/AMReX_TinyProfiler.cpp | 5 ++-- 8 files changed, 91 insertions(+), 50 deletions(-) diff --git a/Src/Base/AMReX_Arena.H b/Src/Base/AMReX_Arena.H index 2a6cbb25a08..52ebd2878f5 100644 --- a/Src/Base/AMReX_Arena.H +++ b/Src/Base/AMReX_Arena.H @@ -4,9 +4,21 @@ #include #include + +#ifdef AMREX_TINY_PROFILING +#include +#else +namespace amrex { + struct MemStat {}; +} +#endif + #include #include #include +#include +#include +#include #include namespace amrex { @@ -156,7 +168,7 @@ public: * \brief Add this Arena to the list of Arenas that are profiled by TinyProfiler. * \param memory_name The name of this arena in the TinyProfiler output. */ - virtual void registerForProfiling (const std::string& memory_name); + void registerForProfiling (const std::string& memory_name); #ifdef AMREX_USE_GPU //! Is this GPU stream ordered memory allocator? @@ -199,6 +211,24 @@ protected: virtual std::size_t freeUnused_protected () { return 0; } void* allocate_system (std::size_t nbytes); void deallocate_system (void* p, std::size_t nbytes); + + struct ArenaProfiler { + //! If this arena is profiled by TinyProfiler + bool m_do_profiling = false; + //! Mutex for the profiling + std::mutex m_arena_profiler_mutex; + //! Data structure used for profiling with TinyProfiler + std::map m_profiling_stats; + //! Track the currently allocated memory, not used by CArena + std::unordered_map> m_currently_allocated; + + ~ArenaProfiler (); + + void alloc (void* ptr, std::size_t nbytes); + + void free (void* ptr); + + } m_profiler; }; } diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp index ce4ece3b643..394a927cb23 100644 --- a/Src/Base/AMReX_Arena.cpp +++ b/Src/Base/AMReX_Arena.cpp @@ -117,9 +117,13 @@ Arena::hasFreeDeviceMemory (std::size_t) } void -Arena::registerForProfiling (const std::string&) +Arena::registerForProfiling ([[maybe_unused]] const std::string& memory_name) { - amrex::Abort("Profiling is not implemented for this type of Arena"); +#ifdef AMREX_TINY_PROFILING + AMREX_ALWAYS_ASSERT(m_profiler.m_do_profiling == false); + m_profiler.m_do_profiling = + TinyProfiler::RegisterArena(memory_name, m_profiler.m_profiling_stats); +#endif } std::size_t @@ -330,6 +334,7 @@ Arena::Initialize () } the_async_arena = new PArena(the_async_arena_release_threshold); + the_async_arena->registerForProfiling("Async Memory"); #ifdef AMREX_USE_GPU if (the_arena->isDevice()) { @@ -403,6 +408,7 @@ Arena::Initialize () } the_cpu_arena = The_BArena(); + the_cpu_arena->registerForProfiling("Cpu Memory"); // Initialize the null arena auto* null_arena = The_Null_Arena(); @@ -654,4 +660,38 @@ The_Comms_Arena () } } +Arena::ArenaProfiler::~ArenaProfiler () { +#ifdef AMREX_TINY_PROFILING + if (m_do_profiling) { + TinyProfiler::DeregisterArena(m_profiling_stats); + } +#endif +} + +void Arena::ArenaProfiler::alloc ([[maybe_unused]] void* ptr, [[maybe_unused]] std::size_t nbytes) { +#ifdef AMREX_TINY_PROFILING + if (m_do_profiling) { + std::lock_guard lock(m_arena_profiler_mutex); + MemStat* stat = TinyProfiler::memory_alloc(nbytes, m_profiling_stats); + if (stat) { + m_currently_allocated.insert({ptr, {stat, nbytes}}); + } + } +#endif +} + +void Arena::ArenaProfiler::free ([[maybe_unused]] void* ptr) { +#ifdef AMREX_TINY_PROFILING + if (m_do_profiling) { + std::lock_guard lock(m_arena_profiler_mutex); + auto it = m_currently_allocated.find(ptr); + if (it != m_currently_allocated.end()) { + auto [stat, nbytes] = it->second; + TinyProfiler::memory_free(nbytes, stat); + m_currently_allocated.erase(it); + } + } +#endif +} + } diff --git a/Src/Base/AMReX_BArena.cpp b/Src/Base/AMReX_BArena.cpp index c22affa687a..6fa75684039 100644 --- a/Src/Base/AMReX_BArena.cpp +++ b/Src/Base/AMReX_BArena.cpp @@ -3,13 +3,16 @@ void* amrex::BArena::alloc (std::size_t sz_) { - return std::malloc(sz_); + void* pt = std::malloc(sz_); + m_profiler.alloc(pt, sz_); + return pt; } void amrex::BArena::free (void* pt) { std::free(pt); + m_profiler.free(pt); } bool diff --git a/Src/Base/AMReX_CArena.H b/Src/Base/AMReX_CArena.H index 9547bc92f21..bc46d008241 100644 --- a/Src/Base/AMReX_CArena.H +++ b/Src/Base/AMReX_CArena.H @@ -16,8 +16,6 @@ namespace amrex { -struct MemStat; - /** * \brief A Concrete Class for Dynamic Memory Management using first fit. * This is a coalescing memory manager. It allocates (possibly) large @@ -75,12 +73,6 @@ public: */ [[nodiscard]] bool hasFreeDeviceMemory (std::size_t sz) final; - /** - * \brief Add this Arena to the list of Arenas that are profiled by TinyProfiler. - * \param memory_name The name of this arena in the TinyProfiler output. - */ - void registerForProfiling (const std::string& memory_name) final; - //! The current amount of heap space used by the CArena object. std::size_t heap_space_used () const noexcept; @@ -191,10 +183,6 @@ protected: std::size_t m_used{0}; //! The amount of memory given out via alloc(). std::size_t m_actually_used{0}; - //! If this arena is profiled by TinyProfiler - bool m_do_profiling = false; - //! Data structure used for profiling with TinyProfiler - std::map m_profiling_stats; std::mutex carena_mutex; diff --git a/Src/Base/AMReX_CArena.cpp b/Src/Base/AMReX_CArena.cpp index 42987f47a86..bc5297f4d52 100644 --- a/Src/Base/AMReX_CArena.cpp +++ b/Src/Base/AMReX_CArena.cpp @@ -5,14 +5,6 @@ #include #include -#ifdef AMREX_TINY_PROFILING -#include -#else -namespace amrex { - struct MemStat {}; -} -#endif - #include #include #include @@ -32,12 +24,6 @@ CArena::~CArena () for (auto const& a : m_alloc) { deallocate_system(a.first, a.second); } - -#ifdef AMREX_TINY_PROFILING - if (m_do_profiling) { - TinyProfiler::DeregisterArena(m_profiling_stats); - } -#endif } void* @@ -53,8 +39,8 @@ CArena::alloc_protected (std::size_t nbytes) { MemStat* stat = nullptr; #ifdef AMREX_TINY_PROFILING - if (m_do_profiling) { - stat = TinyProfiler::memory_alloc(nbytes, m_profiling_stats); + if (m_profiler.m_do_profiling) { + stat = TinyProfiler::memory_alloc(nbytes, m_profiler.m_profiling_stats); } #endif @@ -173,10 +159,10 @@ CArena::alloc_in_place (void* pt, std::size_t szmin, std::size_t szmax) free_node.size(left_size); } #ifdef AMREX_TINY_PROFILING - if (m_do_profiling) { + if (m_profiler.m_do_profiling) { TinyProfiler::memory_free(busy_it->size(), busy_it->mem_stat()); auto* stat = TinyProfiler::memory_alloc(new_size, - m_profiling_stats); + m_profiler.m_profiling_stats); const_cast(*busy_it).mem_stat(stat); } #endif @@ -186,10 +172,10 @@ CArena::alloc_in_place (void* pt, std::size_t szmin, std::size_t szmax) } else if (total_size >= szmin) { m_freelist.erase(next_it); #ifdef AMREX_TINY_PROFILING - if (m_do_profiling) { + if (m_profiler.m_do_profiling) { TinyProfiler::memory_free(busy_it->size(), busy_it->mem_stat()); auto* stat = TinyProfiler::memory_alloc(total_size, - m_profiling_stats); + m_profiler.m_profiling_stats); const_cast(*busy_it).mem_stat(stat); } #endif @@ -255,9 +241,9 @@ CArena::shrink_in_place (void* pt, std::size_t new_size) m_actually_used -= leftover_size; #ifdef AMREX_TINY_PROFILING - if (m_do_profiling) { + if (m_profiler.m_do_profiling) { TinyProfiler::memory_free(old_size, busy_it->mem_stat()); - auto* stat = TinyProfiler::memory_alloc(new_size, m_profiling_stats); + auto* stat = TinyProfiler::memory_alloc(new_size, m_profiler.m_profiling_stats); const_cast(*busy_it).mem_stat(stat); } #endif @@ -431,15 +417,6 @@ CArena::hasFreeDeviceMemory (std::size_t sz) } } -void -CArena::registerForProfiling ([[maybe_unused]] const std::string& memory_name) -{ -#ifdef AMREX_TINY_PROFILING - m_do_profiling = true; - TinyProfiler::RegisterArena(memory_name, m_profiling_stats); -#endif -} - std::size_t CArena::heap_space_used () const noexcept { diff --git a/Src/Base/AMReX_PArena.cpp b/Src/Base/AMReX_PArena.cpp index 36155f3d32c..94100a5155d 100644 --- a/Src/Base/AMReX_PArena.cpp +++ b/Src/Base/AMReX_PArena.cpp @@ -62,6 +62,7 @@ PArena::alloc (std::size_t nbytes) AMREX_HIP_SAFE_CALL(hipMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream()));, AMREX_CUDA_SAFE_CALL(cudaMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream())); ) + m_profiler.alloc(p, nbytes); return p; } else #endif @@ -97,6 +98,7 @@ PArena::free (void* p) AMREX_HIP_SAFE_CALL(hipFreeAsync(p, Gpu::gpuStream()));, AMREX_CUDA_SAFE_CALL(cudaFreeAsync(p, Gpu::gpuStream())); ) + m_profiler.free(p); } else #endif { diff --git a/Src/Base/AMReX_TinyProfiler.H b/Src/Base/AMReX_TinyProfiler.H index 0228949beb1..9a0731175bd 100644 --- a/Src/Base/AMReX_TinyProfiler.H +++ b/Src/Base/AMReX_TinyProfiler.H @@ -57,7 +57,7 @@ public: static void MemoryInitialize () noexcept; static void MemoryFinalize (bool bFlushing = false) noexcept; - static void RegisterArena (const std::string& memory_name, + static bool RegisterArena (const std::string& memory_name, std::map& memstats) noexcept; static void DeregisterArena (std::map& memstats) noexcept; diff --git a/Src/Base/AMReX_TinyProfiler.cpp b/Src/Base/AMReX_TinyProfiler.cpp index db922745784..fab9e38aa82 100644 --- a/Src/Base/AMReX_TinyProfiler.cpp +++ b/Src/Base/AMReX_TinyProfiler.cpp @@ -490,14 +490,15 @@ TinyProfiler::MemoryFinalize (bool bFlushing) noexcept if(os) { os->precision(oldprec); } } -void +bool TinyProfiler::RegisterArena (const std::string& memory_name, std::map& memstats) noexcept { - if (!memprof_enabled) { return; } + if (!memprof_enabled) { return false; } all_memstats.push_back(&memstats); all_memnames.push_back(memory_name); + return true; } void From 020166bc48f62720d9f8bdcd7d3a56304b0249f9 Mon Sep 17 00:00:00 2001 From: AlexanderSinn Date: Tue, 27 Aug 2024 13:03:16 +0200 Subject: [PATCH 2/3] fix use after free --- Src/Base/AMReX_BArena.cpp | 2 +- Src/Base/AMReX_PArena.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Src/Base/AMReX_BArena.cpp b/Src/Base/AMReX_BArena.cpp index 6fa75684039..cfa49ea95f0 100644 --- a/Src/Base/AMReX_BArena.cpp +++ b/Src/Base/AMReX_BArena.cpp @@ -11,8 +11,8 @@ amrex::BArena::alloc (std::size_t sz_) void amrex::BArena::free (void* pt) { - std::free(pt); m_profiler.free(pt); + std::free(pt); } bool diff --git a/Src/Base/AMReX_PArena.cpp b/Src/Base/AMReX_PArena.cpp index 94100a5155d..da445171ceb 100644 --- a/Src/Base/AMReX_PArena.cpp +++ b/Src/Base/AMReX_PArena.cpp @@ -94,11 +94,11 @@ PArena::free (void* p) #if defined (AMREX_GPU_STREAM_ALLOC_SUPPORT) if (Gpu::Device::memoryPoolsSupported()) { + m_profiler.free(p); AMREX_HIP_OR_CUDA( AMREX_HIP_SAFE_CALL(hipFreeAsync(p, Gpu::gpuStream()));, AMREX_CUDA_SAFE_CALL(cudaFreeAsync(p, Gpu::gpuStream())); ) - m_profiler.free(p); } else #endif { From f1cf67d8f34ff04c12986edc149e672c994c2fdd Mon Sep 17 00:00:00 2001 From: AlexanderSinn Date: Tue, 27 Aug 2024 13:31:39 +0200 Subject: [PATCH 3/3] try fix clang tidy --- Src/Base/AMReX_Arena.H | 9 +++++++-- Src/Base/AMReX_Arena.cpp | 5 +++-- Src/Base/AMReX_BArena.cpp | 4 ++-- Src/Base/AMReX_PArena.cpp | 4 ++-- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/Src/Base/AMReX_Arena.H b/Src/Base/AMReX_Arena.H index 52ebd2878f5..51b5d983fdf 100644 --- a/Src/Base/AMReX_Arena.H +++ b/Src/Base/AMReX_Arena.H @@ -223,10 +223,15 @@ protected: std::unordered_map> m_currently_allocated; ~ArenaProfiler (); + ArenaProfiler () noexcept = default; + ArenaProfiler (const ArenaProfiler& rhs) = delete; + ArenaProfiler (ArenaProfiler&& rhs) = delete; + ArenaProfiler& operator= (const ArenaProfiler& rhs) = delete; + ArenaProfiler& operator= (ArenaProfiler&& rhs) = delete; - void alloc (void* ptr, std::size_t nbytes); + void profile_alloc (void* ptr, std::size_t nbytes); - void free (void* ptr); + void profile_free (void* ptr); } m_profiler; }; diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp index 394a927cb23..c2de5464574 100644 --- a/Src/Base/AMReX_Arena.cpp +++ b/Src/Base/AMReX_Arena.cpp @@ -668,7 +668,8 @@ Arena::ArenaProfiler::~ArenaProfiler () { #endif } -void Arena::ArenaProfiler::alloc ([[maybe_unused]] void* ptr, [[maybe_unused]] std::size_t nbytes) { +void Arena::ArenaProfiler::profile_alloc ([[maybe_unused]] void* ptr, + [[maybe_unused]] std::size_t nbytes) { #ifdef AMREX_TINY_PROFILING if (m_do_profiling) { std::lock_guard lock(m_arena_profiler_mutex); @@ -680,7 +681,7 @@ void Arena::ArenaProfiler::alloc ([[maybe_unused]] void* ptr, [[maybe_unused]] s #endif } -void Arena::ArenaProfiler::free ([[maybe_unused]] void* ptr) { +void Arena::ArenaProfiler::profile_free ([[maybe_unused]] void* ptr) { #ifdef AMREX_TINY_PROFILING if (m_do_profiling) { std::lock_guard lock(m_arena_profiler_mutex); diff --git a/Src/Base/AMReX_BArena.cpp b/Src/Base/AMReX_BArena.cpp index cfa49ea95f0..054e64b854c 100644 --- a/Src/Base/AMReX_BArena.cpp +++ b/Src/Base/AMReX_BArena.cpp @@ -4,14 +4,14 @@ void* amrex::BArena::alloc (std::size_t sz_) { void* pt = std::malloc(sz_); - m_profiler.alloc(pt, sz_); + m_profiler.profile_alloc(pt, sz_); return pt; } void amrex::BArena::free (void* pt) { - m_profiler.free(pt); + m_profiler.profile_free(pt); std::free(pt); } diff --git a/Src/Base/AMReX_PArena.cpp b/Src/Base/AMReX_PArena.cpp index da445171ceb..bbe2717ab07 100644 --- a/Src/Base/AMReX_PArena.cpp +++ b/Src/Base/AMReX_PArena.cpp @@ -62,7 +62,7 @@ PArena::alloc (std::size_t nbytes) AMREX_HIP_SAFE_CALL(hipMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream()));, AMREX_CUDA_SAFE_CALL(cudaMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream())); ) - m_profiler.alloc(p, nbytes); + m_profiler.profile_alloc(p, nbytes); return p; } else #endif @@ -94,7 +94,7 @@ PArena::free (void* p) #if defined (AMREX_GPU_STREAM_ALLOC_SUPPORT) if (Gpu::Device::memoryPoolsSupported()) { - m_profiler.free(p); + m_profiler.profile_free(p); AMREX_HIP_OR_CUDA( AMREX_HIP_SAFE_CALL(hipFreeAsync(p, Gpu::gpuStream()));, AMREX_CUDA_SAFE_CALL(cudaFreeAsync(p, Gpu::gpuStream()));