Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TinyProfiler with BArena and PArena #4113

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion Src/Base/AMReX_Arena.H
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,21 @@

#include <AMReX_BLassert.H>
#include <AMReX_INT.H>

#ifdef AMREX_TINY_PROFILING
#include <AMReX_TinyProfiler.H>
#else
namespace amrex {
struct MemStat {};
}
#endif

#include <cstddef>
#include <cstdlib>
#include <limits>
#include <map>
#include <mutex>
#include <unordered_map>
#include <utility>

namespace amrex {
Expand Down Expand Up @@ -156,7 +168,7 @@ public:
* \brief Add this Arena to the list of Arenas that are profiled by TinyProfiler.
* \param memory_name The name of this arena in the TinyProfiler output.
*/
virtual void registerForProfiling (const std::string& memory_name);
void registerForProfiling (const std::string& memory_name);

#ifdef AMREX_USE_GPU
//! Is this GPU stream ordered memory allocator?
Expand Down Expand Up @@ -199,6 +211,29 @@ protected:
virtual std::size_t freeUnused_protected () { return 0; }
void* allocate_system (std::size_t nbytes);
void deallocate_system (void* p, std::size_t nbytes);

struct ArenaProfiler {
//! If this arena is profiled by TinyProfiler
bool m_do_profiling = false;
//! Mutex for the profiling
std::mutex m_arena_profiler_mutex;
//! Data structure used for profiling with TinyProfiler
std::map<std::string, MemStat> m_profiling_stats;
//! Track the currently allocated memory, not used by CArena
std::unordered_map<void*, std::pair<MemStat*, std::size_t>> m_currently_allocated;

~ArenaProfiler ();
ArenaProfiler () noexcept = default;
ArenaProfiler (const ArenaProfiler& rhs) = delete;
ArenaProfiler (ArenaProfiler&& rhs) = delete;
ArenaProfiler& operator= (const ArenaProfiler& rhs) = delete;
ArenaProfiler& operator= (ArenaProfiler&& rhs) = delete;

void profile_alloc (void* ptr, std::size_t nbytes);

void profile_free (void* ptr);

} m_profiler;
};

}
Expand Down
45 changes: 43 additions & 2 deletions Src/Base/AMReX_Arena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,13 @@ Arena::hasFreeDeviceMemory (std::size_t)
}

void
Arena::registerForProfiling (const std::string&)
Arena::registerForProfiling ([[maybe_unused]] const std::string& memory_name)
{
amrex::Abort("Profiling is not implemented for this type of Arena");
#ifdef AMREX_TINY_PROFILING
AMREX_ALWAYS_ASSERT(m_profiler.m_do_profiling == false);
m_profiler.m_do_profiling =
TinyProfiler::RegisterArena(memory_name, m_profiler.m_profiling_stats);
#endif
}

std::size_t
Expand Down Expand Up @@ -330,6 +334,7 @@ Arena::Initialize ()
}

the_async_arena = new PArena(the_async_arena_release_threshold);
the_async_arena->registerForProfiling("Async Memory");

#ifdef AMREX_USE_GPU
if (the_arena->isDevice()) {
Expand Down Expand Up @@ -403,6 +408,7 @@ Arena::Initialize ()
}

the_cpu_arena = The_BArena();
the_cpu_arena->registerForProfiling("Cpu Memory");

// Initialize the null arena
auto* null_arena = The_Null_Arena();
Expand Down Expand Up @@ -654,4 +660,39 @@ The_Comms_Arena ()
}
}

Arena::ArenaProfiler::~ArenaProfiler () {
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
TinyProfiler::DeregisterArena(m_profiling_stats);
}
#endif
}

void Arena::ArenaProfiler::profile_alloc ([[maybe_unused]] void* ptr,
[[maybe_unused]] std::size_t nbytes) {
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
std::lock_guard<std::mutex> lock(m_arena_profiler_mutex);
MemStat* stat = TinyProfiler::memory_alloc(nbytes, m_profiling_stats);
if (stat) {
m_currently_allocated.insert({ptr, {stat, nbytes}});
}
}
#endif
}

void Arena::ArenaProfiler::profile_free ([[maybe_unused]] void* ptr) {
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
std::lock_guard<std::mutex> lock(m_arena_profiler_mutex);
auto it = m_currently_allocated.find(ptr);
if (it != m_currently_allocated.end()) {
auto [stat, nbytes] = it->second;
TinyProfiler::memory_free(nbytes, stat);
m_currently_allocated.erase(it);
}
}
#endif
}

}
5 changes: 4 additions & 1 deletion Src/Base/AMReX_BArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
void*
amrex::BArena::alloc (std::size_t sz_)
{
return std::malloc(sz_);
void* pt = std::malloc(sz_);
m_profiler.profile_alloc(pt, sz_);
return pt;
}

void
amrex::BArena::free (void* pt)
{
m_profiler.profile_free(pt);
std::free(pt);
}

Expand Down
12 changes: 0 additions & 12 deletions Src/Base/AMReX_CArena.H
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

namespace amrex {

struct MemStat;

/**
* \brief A Concrete Class for Dynamic Memory Management using first fit.
* This is a coalescing memory manager. It allocates (possibly) large
Expand Down Expand Up @@ -75,12 +73,6 @@ public:
*/
[[nodiscard]] bool hasFreeDeviceMemory (std::size_t sz) final;

/**
* \brief Add this Arena to the list of Arenas that are profiled by TinyProfiler.
* \param memory_name The name of this arena in the TinyProfiler output.
*/
void registerForProfiling (const std::string& memory_name) final;

//! The current amount of heap space used by the CArena object.
std::size_t heap_space_used () const noexcept;

Expand Down Expand Up @@ -191,10 +183,6 @@ protected:
std::size_t m_used{0};
//! The amount of memory given out via alloc().
std::size_t m_actually_used{0};
//! If this arena is profiled by TinyProfiler
bool m_do_profiling = false;
//! Data structure used for profiling with TinyProfiler
std::map<std::string, MemStat> m_profiling_stats;


std::mutex carena_mutex;
Expand Down
39 changes: 8 additions & 31 deletions Src/Base/AMReX_CArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,6 @@
#include <AMReX_MFIter.H>
#include <AMReX_ParallelReduce.H>

#ifdef AMREX_TINY_PROFILING
#include <AMReX_TinyProfiler.H>
#else
namespace amrex {
struct MemStat {};
}
#endif

#include <utility>
#include <cstring>
#include <iostream>
Expand All @@ -32,12 +24,6 @@ CArena::~CArena ()
for (auto const& a : m_alloc) {
deallocate_system(a.first, a.second);
}

#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
TinyProfiler::DeregisterArena(m_profiling_stats);
}
#endif
}

void*
Expand All @@ -53,8 +39,8 @@ CArena::alloc_protected (std::size_t nbytes)
{
MemStat* stat = nullptr;
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
stat = TinyProfiler::memory_alloc(nbytes, m_profiling_stats);
if (m_profiler.m_do_profiling) {
stat = TinyProfiler::memory_alloc(nbytes, m_profiler.m_profiling_stats);
}
#endif

Expand Down Expand Up @@ -173,10 +159,10 @@ CArena::alloc_in_place (void* pt, std::size_t szmin, std::size_t szmax)
free_node.size(left_size);
}
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
if (m_profiler.m_do_profiling) {
TinyProfiler::memory_free(busy_it->size(), busy_it->mem_stat());
auto* stat = TinyProfiler::memory_alloc(new_size,
m_profiling_stats);
m_profiler.m_profiling_stats);
const_cast<Node&>(*busy_it).mem_stat(stat);
}
#endif
Expand All @@ -186,10 +172,10 @@ CArena::alloc_in_place (void* pt, std::size_t szmin, std::size_t szmax)
} else if (total_size >= szmin) {
m_freelist.erase(next_it);
#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
if (m_profiler.m_do_profiling) {
TinyProfiler::memory_free(busy_it->size(), busy_it->mem_stat());
auto* stat = TinyProfiler::memory_alloc(total_size,
m_profiling_stats);
m_profiler.m_profiling_stats);
const_cast<Node&>(*busy_it).mem_stat(stat);
}
#endif
Expand Down Expand Up @@ -255,9 +241,9 @@ CArena::shrink_in_place (void* pt, std::size_t new_size)
m_actually_used -= leftover_size;

#ifdef AMREX_TINY_PROFILING
if (m_do_profiling) {
if (m_profiler.m_do_profiling) {
TinyProfiler::memory_free(old_size, busy_it->mem_stat());
auto* stat = TinyProfiler::memory_alloc(new_size, m_profiling_stats);
auto* stat = TinyProfiler::memory_alloc(new_size, m_profiler.m_profiling_stats);
const_cast<Node&>(*busy_it).mem_stat(stat);
}
#endif
Expand Down Expand Up @@ -431,15 +417,6 @@ CArena::hasFreeDeviceMemory (std::size_t sz)
}
}

void
CArena::registerForProfiling ([[maybe_unused]] const std::string& memory_name)
{
#ifdef AMREX_TINY_PROFILING
m_do_profiling = true;
TinyProfiler::RegisterArena(memory_name, m_profiling_stats);
#endif
}

std::size_t
CArena::heap_space_used () const noexcept
{
Expand Down
2 changes: 2 additions & 0 deletions Src/Base/AMReX_PArena.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ PArena::alloc (std::size_t nbytes)
AMREX_HIP_SAFE_CALL(hipMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream()));,
AMREX_CUDA_SAFE_CALL(cudaMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream()));
)
m_profiler.profile_alloc(p, nbytes);
return p;
} else
#endif
Expand Down Expand Up @@ -93,6 +94,7 @@ PArena::free (void* p)

#if defined (AMREX_GPU_STREAM_ALLOC_SUPPORT)
if (Gpu::Device::memoryPoolsSupported()) {
m_profiler.profile_free(p);
AMREX_HIP_OR_CUDA(
AMREX_HIP_SAFE_CALL(hipFreeAsync(p, Gpu::gpuStream()));,
AMREX_CUDA_SAFE_CALL(cudaFreeAsync(p, Gpu::gpuStream()));
Expand Down
2 changes: 1 addition & 1 deletion Src/Base/AMReX_TinyProfiler.H
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public:
static void MemoryInitialize () noexcept;
static void MemoryFinalize (bool bFlushing = false) noexcept;

static void RegisterArena (const std::string& memory_name,
static bool RegisterArena (const std::string& memory_name,
std::map<std::string, MemStat>& memstats) noexcept;

static void DeregisterArena (std::map<std::string, MemStat>& memstats) noexcept;
Expand Down
5 changes: 3 additions & 2 deletions Src/Base/AMReX_TinyProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,14 +490,15 @@ TinyProfiler::MemoryFinalize (bool bFlushing) noexcept
if(os) { os->precision(oldprec); }
}

void
bool
TinyProfiler::RegisterArena (const std::string& memory_name,
std::map<std::string, MemStat>& memstats) noexcept
{
if (!memprof_enabled) { return; }
if (!memprof_enabled) { return false; }

all_memstats.push_back(&memstats);
all_memnames.push_back(memory_name);
return true;
}

void
Expand Down
Loading