Skip to content

Commit

Permalink
[fix](memory) Fix Jemalloc Cache Memory Tracker (#37464)
Browse files Browse the repository at this point in the history
## Proposed changes

Doris uses Jemalloc as default Allocator, Jemalloc Cache consists of two
parts:
- Thread Cache, cache a specified number of Pages in Thread Cache.
- Dirty Page, memory Page that can be reused in all Arenas.

1. Metadata should not be counted as cache, this will cause memory GC to
be delayed, leading to BE OOM.
2. Fix Jemalloc dirty page memory size, previous code used dirty page
number * page size (4K on x86), which is much smaller than the actual
memory. the fix is ​​the sum of dirty page memory of all size classes of
extents.
  • Loading branch information
xinyiZzz authored and dataroaring committed Jul 17, 2024
1 parent bf76a9e commit ea3fec2
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 10 deletions.
1 change: 0 additions & 1 deletion be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1078,7 +1078,6 @@ DEFINE_mInt32(schema_cache_sweep_time_sec, "100");
DEFINE_mInt32(segment_cache_capacity, "-1");
DEFINE_mInt32(estimated_num_columns_per_segment, "200");
DEFINE_mInt32(estimated_mem_per_column_reader, "1024");
// The value is calculate by storage_page_cache_limit * index_page_cache_percentage
DEFINE_mInt32(segment_cache_memory_percentage, "2");

// enable feature binlog, default false
Expand Down
12 changes: 10 additions & 2 deletions be/src/runtime/memory/mem_tracker_limiter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,21 @@ void MemTrackerLimiter::make_process_snapshots(std::vector<MemTracker::Snapshot>
}

snapshot.type = "overview";
snapshot.label = "tc/jemalloc cache";
snapshot.label = "tc/jemalloc_cache";
snapshot.limit = -1;
snapshot.cur_consumption = MemInfo::allocator_cache_mem();
snapshot.peak_consumption = -1;
(*snapshots).emplace_back(snapshot);
all_tracker_mem_sum += MemInfo::allocator_cache_mem();

snapshot.type = "overview";
snapshot.label = "tc/jemalloc_metadata";
snapshot.limit = -1;
snapshot.cur_consumption = MemInfo::allocator_metadata_mem();
snapshot.peak_consumption = -1;
(*snapshots).emplace_back(snapshot);
all_tracker_mem_sum += MemInfo::allocator_metadata_mem();

snapshot.type = "overview";
snapshot.label = "sum of all trackers"; // is virtual memory
snapshot.limit = -1;
Expand All @@ -287,7 +295,7 @@ void MemTrackerLimiter::make_process_snapshots(std::vector<MemTracker::Snapshot>
(*snapshots).emplace_back(snapshot);

snapshot.type = "overview";
snapshot.label = "reserved memory";
snapshot.label = "reserve_memory";
snapshot.limit = -1;
snapshot.cur_consumption = GlobalMemoryArbitrator::process_reserved_memory();
snapshot.peak_consumption = -1;
Expand Down
27 changes: 20 additions & 7 deletions be/src/util/mem_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ std::atomic<int64_t> MemInfo::_s_mem_limit = std::numeric_limits<int64_t>::max()
std::atomic<int64_t> MemInfo::_s_soft_mem_limit = std::numeric_limits<int64_t>::max();

std::atomic<int64_t> MemInfo::_s_allocator_cache_mem = 0;
std::atomic<int64_t> MemInfo::_s_allocator_metadata_mem = 0;
std::atomic<int64_t> MemInfo::_s_je_dirty_pages_mem = std::numeric_limits<int64_t>::min();
std::atomic<int64_t> MemInfo::_s_je_dirty_pages_mem_limit = std::numeric_limits<int64_t>::max();
std::atomic<int64_t> MemInfo::_s_virtual_memory_used = 0;
Expand All @@ -75,21 +76,33 @@ std::atomic<bool> MemInfo::je_purge_dirty_pages_notify {false};
void MemInfo::refresh_allocator_mem() {
#if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER)
#elif defined(USE_JEMALLOC)
// jemalloc mallctl refer to : https://jemalloc.net/jemalloc.3.html
// https://www.bookstack.cn/read/aliyun-rds-core/4a0cdf677f62feb3.md
// Check the Doris BE web page `http://ip:webserver_port/memz` to get the Jemalloc Profile.

// 'epoch' is a special mallctl -- it updates the statistics. Without it, all
// the following calls will return stale values. It increments and returns
// the current epoch number, which might be useful to log as a sanity check.
uint64_t epoch = 0;
size_t sz = sizeof(epoch);
jemallctl("epoch", &epoch, &sz, &epoch, sz);

// https://jemalloc.net/jemalloc.3.html
// https://www.bookstack.cn/read/aliyun-rds-core/4a0cdf677f62feb3.md
_s_allocator_cache_mem.store(get_je_all_arena_metrics("tcache_bytes") +
get_je_metrics("stats.metadata") +
get_je_all_arena_metrics("pdirty") * get_page_size(),
// Number of extents of the given type in this arena in the bucket corresponding to page size index.
// Large size class starts at 16384, the extents have three sizes before 16384: 4096, 8192, and 12288, so + 3
int64_t dirty_pages_bytes = 0;
for (unsigned i = 0; i < get_je_unsigned_metrics("arenas.nlextents") + 3; i++) {
dirty_pages_bytes += get_je_all_arena_extents_metrics(i, "dirty_bytes");
}
_s_je_dirty_pages_mem.store(dirty_pages_bytes, std::memory_order_relaxed);

// Doris uses Jemalloc as default Allocator, Jemalloc Cache consists of two parts:
// - Thread Cache, cache a specified number of Pages in Thread Cache.
// - Dirty Page, memory Page that can be reused in all Arenas.
_s_allocator_cache_mem.store(get_je_all_arena_metrics("tcache_bytes") + dirty_pages_bytes,
std::memory_order_relaxed);
_s_je_dirty_pages_mem.store(get_je_all_arena_metrics("pdirty") * get_page_size(),
std::memory_order_relaxed);
// Total number of bytes dedicated to metadata, which comprise base allocations used
// for bootstrap-sensitive allocator metadata structures.
_s_allocator_metadata_mem.store(get_je_metrics("stats.metadata"), std::memory_order_relaxed);
_s_virtual_memory_used.store(get_je_metrics("stats.mapped"), std::memory_order_relaxed);
#else
_s_allocator_cache_mem.store(get_tc_metrics("tcmalloc.pageheap_free_bytes") +
Expand Down
24 changes: 24 additions & 0 deletions be/src/util/mem_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,33 @@ class MemInfo {
return 0;
}

static inline unsigned get_je_unsigned_metrics(const std::string& name) {
#ifdef USE_JEMALLOC
unsigned value = 0;
size_t sz = sizeof(value);
if (jemallctl(name.c_str(), &value, &sz, nullptr, 0) == 0) {
return value;
}
#endif
return 0;
}

static inline int64_t get_je_all_arena_metrics(const std::string& name) {
#ifdef USE_JEMALLOC
return get_je_metrics(fmt::format("stats.arenas.{}.{}", MALLCTL_ARENAS_ALL, name));
#endif
return 0;
}

static inline int64_t get_je_all_arena_extents_metrics(int64_t page_size_index,
const std::string& extent_type) {
#ifdef USE_JEMALLOC
return get_je_metrics(fmt::format("stats.arenas.{}.extents.{}.{}", MALLCTL_ARENAS_ALL,
page_size_index, extent_type));
#endif
return 0;
}

static inline void je_purge_all_arena_dirty_pages() {
#ifdef USE_JEMALLOC
// https://github.com/jemalloc/jemalloc/issues/2470
Expand Down Expand Up @@ -147,6 +167,9 @@ class MemInfo {
static inline size_t allocator_cache_mem() {
return _s_allocator_cache_mem.load(std::memory_order_relaxed);
}
static inline size_t allocator_metadata_mem() {
return _s_allocator_metadata_mem.load(std::memory_order_relaxed);
}
static inline int64_t je_dirty_pages_mem() {
return _s_je_dirty_pages_mem.load(std::memory_order_relaxed);
}
Expand Down Expand Up @@ -187,6 +210,7 @@ class MemInfo {
static std::atomic<int64_t> _s_soft_mem_limit;

static std::atomic<int64_t> _s_allocator_cache_mem;
static std::atomic<int64_t> _s_allocator_metadata_mem;
static std::atomic<int64_t> _s_je_dirty_pages_mem;
static std::atomic<int64_t> _s_je_dirty_pages_mem_limit;
static std::atomic<int64_t> _s_virtual_memory_used;
Expand Down

0 comments on commit ea3fec2

Please sign in to comment.