From 2fb6bff2d91fde1b64f26923edb937d2eb46d916 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Wed, 14 Sep 2022 05:49:47 +0800 Subject: [PATCH] src: make NearHeapLimitCallback() more robust Instead of removing the callback before generating heap snapshot and then adding it back after the heap snapshot is generated, just remove it once the heap snapshot limit is reached. Otherwise if the worker callback kicks in and sets the heap limit to higher value during the heap snapshot generation, the current_heap_limit in the heap snapshot callback becomes invalid, and we might return a heap limit lower than the current one, resulting in OOM. In addition add more logs and checks in Worker::NearHeapLimit() to help us catch problems. PR-URL: https://github.com/nodejs/node/pull/44581 Refs: https://github.com/nodejs/reliability/issues/372 Reviewed-By: theanarkh Reviewed-By: Rich Trott --- src/env-inl.h | 4 ++++ src/env.cc | 36 ++++++++++++++++++++---------------- src/env.h | 3 ++- src/node_worker.cc | 14 ++++++++++++-- 4 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/env-inl.h b/src/env-inl.h index cb0eb2151edf00..37bb36b38b238e 100644 --- a/src/env-inl.h +++ b/src/env-inl.h @@ -900,6 +900,10 @@ inline void Environment::set_heap_snapshot_near_heap_limit(uint32_t limit) { heap_snapshot_near_heap_limit_ = limit; } +inline bool Environment::is_in_heapsnapshot_heap_limit_callback() const { + return is_in_heapsnapshot_heap_limit_callback_; +} + inline void Environment::AddHeapSnapshotNearHeapLimitCallback() { DCHECK(!heapsnapshot_near_heap_limit_callback_added_); heapsnapshot_near_heap_limit_callback_added_ = true; diff --git a/src/env.cc b/src/env.cc index 0ddced56d587bd..4c9b0a25e56da9 100644 --- a/src/env.cc +++ b/src/env.cc @@ -1962,7 +1962,7 @@ size_t Environment::NearHeapLimitCallback(void* data, "Invoked NearHeapLimitCallback, processing=%d, " "current_limit=%" PRIu64 ", " "initial_limit=%" PRIu64 "\n", - env->is_processing_heap_limit_callback_, + env->is_in_heapsnapshot_heap_limit_callback_, static_cast(current_heap_limit), static_cast(initial_heap_limit)); @@ -2014,8 +2014,8 @@ size_t Environment::NearHeapLimitCallback(void* data, // new limit, so in a heap with unbounded growth the isolate // may eventually crash with this new limit - effectively raising // the heap limit to the new one. - if (env->is_processing_heap_limit_callback_) { - size_t new_limit = current_heap_limit + max_young_gen_size; + size_t new_limit = current_heap_limit + max_young_gen_size; + if (env->is_in_heapsnapshot_heap_limit_callback_) { Debug(env, DebugCategory::DIAGNOSTICS, "Not generating snapshots in nested callback. " @@ -2031,14 +2031,14 @@ size_t Environment::NearHeapLimitCallback(void* data, Debug(env, DebugCategory::DIAGNOSTICS, "Not generating snapshots because it's too risky.\n"); - env->RemoveHeapSnapshotNearHeapLimitCallback(initial_heap_limit); + env->RemoveHeapSnapshotNearHeapLimitCallback(0); // The new limit must be higher than current_heap_limit or V8 might // crash. - return current_heap_limit + 1; + return new_limit; } // Take the snapshot synchronously. - env->is_processing_heap_limit_callback_ = true; + env->is_in_heapsnapshot_heap_limit_callback_ = true; std::string dir = env->options()->diagnostic_dir; if (dir.empty()) { @@ -2049,17 +2049,21 @@ size_t Environment::NearHeapLimitCallback(void* data, Debug(env, DebugCategory::DIAGNOSTICS, "Start generating %s...\n", *name); - // Remove the callback first in case it's triggered when generating - // the snapshot. - env->RemoveHeapSnapshotNearHeapLimitCallback(initial_heap_limit); - heap::WriteSnapshot(env->isolate(), filename.c_str()); env->heap_limit_snapshot_taken_ += 1; - // Don't take more snapshots than the number specified by - // --heapsnapshot-near-heap-limit. - if (env->heap_limit_snapshot_taken_ < env->heap_snapshot_near_heap_limit_) { - env->AddHeapSnapshotNearHeapLimitCallback(); + Debug(env, + DebugCategory::DIAGNOSTICS, + "%" PRIu32 "/%" PRIu32 " snapshots taken.\n", + env->heap_limit_snapshot_taken_, + env->heap_snapshot_near_heap_limit_); + + // Don't take more snapshots than the limit specified. + if (env->heap_limit_snapshot_taken_ == env->heap_snapshot_near_heap_limit_) { + Debug(env, + DebugCategory::DIAGNOSTICS, + "Removing the near heap limit callback"); + env->RemoveHeapSnapshotNearHeapLimitCallback(0); } FPrintF(stderr, "Wrote snapshot to %s\n", filename.c_str()); @@ -2067,11 +2071,11 @@ size_t Environment::NearHeapLimitCallback(void* data, // 95% of the initial limit. env->isolate()->AutomaticallyRestoreInitialHeapLimit(0.95); - env->is_processing_heap_limit_callback_ = false; + env->is_in_heapsnapshot_heap_limit_callback_ = false; // The new limit must be higher than current_heap_limit or V8 might // crash. - return current_heap_limit + 1; + return new_limit; } inline size_t Environment::SelfSize() const { diff --git a/src/env.h b/src/env.h index 21a095194c4061..e6d54bc7273c11 100644 --- a/src/env.h +++ b/src/env.h @@ -1460,6 +1460,7 @@ class Environment : public MemoryRetainer { void ForEachBindingData(T&& iterator); inline void set_heap_snapshot_near_heap_limit(uint32_t limit); + inline bool is_in_heapsnapshot_heap_limit_callback() const; inline void AddHeapSnapshotNearHeapLimitCallback(); @@ -1521,7 +1522,7 @@ class Environment : public MemoryRetainer { std::vector argv_; std::string exec_path_; - bool is_processing_heap_limit_callback_ = false; + bool is_in_heapsnapshot_heap_limit_callback_ = false; uint32_t heap_limit_snapshot_taken_ = 0; uint32_t heap_snapshot_near_heap_limit_ = 0; bool heapsnapshot_near_heap_limit_callback_added_ = false; diff --git a/src/node_worker.cc b/src/node_worker.cc index 86493374af13b4..8b9c02fec97c45 100644 --- a/src/node_worker.cc +++ b/src/node_worker.cc @@ -253,11 +253,21 @@ class WorkerThreadData { size_t Worker::NearHeapLimit(void* data, size_t current_heap_limit, size_t initial_heap_limit) { Worker* worker = static_cast(data); - worker->Exit(1, "ERR_WORKER_OUT_OF_MEMORY", "JS heap out of memory"); // Give the current GC some extra leeway to let it finish rather than // crash hard. We are not going to perform further allocations anyway. constexpr size_t kExtraHeapAllowance = 16 * 1024 * 1024; - return current_heap_limit + kExtraHeapAllowance; + size_t new_limit = current_heap_limit + kExtraHeapAllowance; + Environment* env = worker->env(); + if (env != nullptr) { + DCHECK(!env->is_in_heapsnapshot_heap_limit_callback()); + Debug(env, + DebugCategory::DIAGNOSTICS, + "Throwing ERR_WORKER_OUT_OF_MEMORY, " + "new_limit=%" PRIu64 "\n", + static_cast(new_limit)); + } + worker->Exit(1, "ERR_WORKER_OUT_OF_MEMORY", "JS heap out of memory"); + return new_limit; } void Worker::Run() {