From 69440459a772e503108014f5070b15a87af8c6a6 Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Thu, 21 Nov 2024 02:53:41 +0200 Subject: [PATCH] chore: allow slow and precise memory measurement of an object Specifically fixes "MEMORY USAGE" for lists. Signed-off-by: Roman Gershman --- helio | 2 +- src/core/compact_object.cc | 22 ++++++++++++++-------- src/core/compact_object.h | 8 ++++---- src/core/qlist.cc | 13 ++++++++++--- src/core/qlist.h | 2 +- src/server/dfly_main.cc | 4 ++++ src/server/dragonfly_test.cc | 15 +++++++++++++++ src/server/memory_cmd.cc | 3 ++- 8 files changed, 51 insertions(+), 18 deletions(-) diff --git a/helio b/helio index e6f69f118a1a..944be564ecd4 160000 --- a/helio +++ b/helio @@ -1 +1 @@ -Subproject commit e6f69f118a1af4d677d0ef8a2da3f0b50fcc7cef +Subproject commit 944be564ecd44865a9a057c09b5d4bbf7f6db772 diff --git a/src/core/compact_object.cc b/src/core/compact_object.cc index cb884c425a86..846550c2c092 100644 --- a/src/core/compact_object.cc +++ b/src/core/compact_object.cc @@ -46,9 +46,15 @@ constexpr XXH64_hash_t kHashSeed = 24061983; constexpr size_t kAlignSize = 8u; // Approximation since does not account for listpacks. -size_t QlMAllocSize(quicklist* ql) { - size_t res = ql->len * sizeof(quicklistNode) + znallocx(sizeof(quicklist)); - return res + ql->count * 16; // we account for each member 16 bytes. +size_t QlMAllocSize(quicklist* ql, bool slow) { + size_t node_size = ql->len * sizeof(quicklistNode) + znallocx(sizeof(quicklist)); + if (slow) { + for (quicklistNode* node = ql->head; node; node = node->next) { + node_size += zmalloc_usable_size(node->entry); + } + return node_size; + } + return node_size + ql->count * 16; // we account for each member 16 bytes. } inline void FreeObjSet(unsigned encoding, void* ptr, MemoryResource* mr) { @@ -291,7 +297,7 @@ static_assert(sizeof(CompactObj) == 18); namespace detail { -size_t RobjWrapper::MallocUsed() const { +size_t RobjWrapper::MallocUsed(bool slow) const { if (!inner_obj_) return 0; @@ -301,8 +307,8 @@ size_t RobjWrapper::MallocUsed() const { return InnerObjMallocUsed(); case OBJ_LIST: if (encoding_ == OBJ_ENCODING_QUICKLIST) - return QlMAllocSize((quicklist*)inner_obj_); - return ((QList*)inner_obj_)->MallocUsed(); + return QlMAllocSize((quicklist*)inner_obj_, slow); + return ((QList*)inner_obj_)->MallocUsed(slow); case OBJ_SET: return MallocUsedSet(encoding_, inner_obj_); case OBJ_HASH: @@ -1132,12 +1138,12 @@ void CompactObj::Free() { memset(u_.inline_str, 0, kInlineLen); } -size_t CompactObj::MallocUsed() const { +size_t CompactObj::MallocUsed(bool slow) const { if (!HasAllocated()) return 0; if (taglen_ == ROBJ_TAG) { - return u_.r_obj.MallocUsed(); + return u_.r_obj.MallocUsed(slow); } if (taglen_ == JSON_TAG) { diff --git a/src/core/compact_object.h b/src/core/compact_object.h index 9ea0fefe817e..910b5ffc1627 100644 --- a/src/core/compact_object.h +++ b/src/core/compact_object.h @@ -37,7 +37,7 @@ class RobjWrapper { RobjWrapper() : sz_(0), type_(0), encoding_(0) { } - size_t MallocUsed() const; + size_t MallocUsed(bool slow) const; uint64_t HashCode() const; bool Equal(const RobjWrapper& ow) const; @@ -359,9 +359,9 @@ class CompactObj { // Postcondition: The object is an in-memory string. void Materialize(std::string_view str, bool is_raw); - // In case this object a single blob, returns number of bytes allocated on heap - // for that blob. Otherwise returns 0. - size_t MallocUsed() const; + // Returns the approximation of memory used by the object. + // If slow is true, may use more expensive methods to calculate the precise size. + size_t MallocUsed(bool slow = false) const; // Resets the object to empty state (string). void Reset(); diff --git a/src/core/qlist.cc b/src/core/qlist.cc index 01fa2e93513a..b88568f6f5bd 100644 --- a/src/core/qlist.cc +++ b/src/core/qlist.cc @@ -431,10 +431,17 @@ bool QList::Replace(long index, std::string_view elem) { return false; } -size_t QList::MallocUsed() const { +size_t QList::MallocUsed(bool slow) const { // Approximation since does not account for listpacks. - size_t res = len_ * sizeof(quicklistNode) + znallocx(sizeof(quicklist)); - return res + count_ * 16; // we account for each member 16 bytes. + size_t node_size = len_ * sizeof(quicklistNode) + znallocx(sizeof(quicklist)); + if (slow) { + for (quicklistNode* node = head_; node; node = node->next) { + node_size += zmalloc_usable_size(node->entry); + } + return node_size; + } + + return node_size + count_ * 16; // we account for each member 16 bytes. } void QList::Iterate(IterateFunc cb, long start, long end) const { diff --git a/src/core/qlist.h b/src/core/qlist.h index 19f0058e0110..d69ea9f56106 100644 --- a/src/core/qlist.h +++ b/src/core/qlist.h @@ -103,7 +103,7 @@ class QList { // Returns true if item was replaced, false if index is out of range. bool Replace(long index, std::string_view elem); - size_t MallocUsed() const; + size_t MallocUsed(bool slow) const; void Iterate(IterateFunc cb, long start, long end) const; diff --git a/src/server/dfly_main.cc b/src/server/dfly_main.cc index 5caa7cdc1bf9..513243948e01 100644 --- a/src/server/dfly_main.cc +++ b/src/server/dfly_main.cc @@ -799,5 +799,9 @@ Usage: dragonfly [FLAGS] unlink(pidfile_path.c_str()); } + // Returns memory to OS. + // This is a workaround for a bug in mi_malloc that may cause a crash on exit. + mi_collect(true); + return res; } diff --git a/src/server/dragonfly_test.cc b/src/server/dragonfly_test.cc index 0a1b131b419f..d2c97af93897 100644 --- a/src/server/dragonfly_test.cc +++ b/src/server/dragonfly_test.cc @@ -768,6 +768,21 @@ TEST_F(DflyEngineTest, EvalBug2664) { EXPECT_THAT(resp, DoubleArg(42.9)); } +TEST_F(DflyEngineTest, MemoryUsage) { + for (unsigned i = 0; i < 1000; ++i) { + Run({"rpush", "l1", StrCat("val", i)}); + } + + for (unsigned i = 0; i < 1000; ++i) { + Run({"rpush", "l2", StrCat(string('a', 200), i)}); + } + auto resp = Run({"memory", "usage", "l1"}); + EXPECT_GT(*resp.GetInt(), 8000); + + resp = Run({"memory", "usage", "l2"}); + EXPECT_GT(*resp.GetInt(), 100000); +} + // TODO: to test transactions with a single shard since then all transactions become local. // To consider having a parameter in dragonfly engine controlling number of shards // unconditionally from number of cpus. TO TEST BLPOP under multi for single/multi argument case. diff --git a/src/server/memory_cmd.cc b/src/server/memory_cmd.cc index be8d168061ca..00d1cb88d372 100644 --- a/src/server/memory_cmd.cc +++ b/src/server/memory_cmd.cc @@ -83,7 +83,8 @@ std::string MallocStatsCb(bool backing, unsigned tid) { } size_t MemoryUsage(PrimeIterator it) { - return it->first.MallocUsed() + it->second.MallocUsed(); + size_t key_size = it->first.MallocUsed(); + return key_size + it->second.MallocUsed(true); } } // namespace