Skip to content

Commit

Permalink
backport to 1.17: cluster: destroy on main thread (#14954) (#15197)
Browse files Browse the repository at this point in the history
* Dispatcher: keeps a stack of tracked objects. (#14573)

Dispatcher will now keep a stack of tracked objects; on crash it'll "unwind" and have those objects dump their state. Moreover, it'll invoke fatal actions with the tracked objects. This allows us to dump more information during crash.

See related PR: #14509

Will follow up with another PR dumping information at the codec/parser level.

Signed-off-by: Kevin Baichoo <kbaichoo@google.com>
Signed-off-by: Christoph Pakulski <christoph@tetrate.io>

* cluster: destroy on main thread (#14954)

Signed-off-by: Yuchen Dai <silentdai@gmail.com>
Signed-off-by: Christoph Pakulski <christoph@tetrate.io>

* Updated release notes.

Signed-off-by: Christoph Pakulski <christoph@tetrate.io>

Co-authored-by: Kevin Baichoo <kbaichoo@google.com>
Co-authored-by: Yuchen Dai <silentdai@gmail.com>
  • Loading branch information
3 people authored Mar 3, 2021
1 parent 8f83d63 commit b961323
Show file tree
Hide file tree
Showing 40 changed files with 707 additions and 119 deletions.
1 change: 1 addition & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Removed Config or Runtime

New Features
------------
* dispatcher: supports a stack of `Envoy::ScopeTrackedObject` instead of a single tracked object. This will allow Envoy to dump more debug information on crash.

Deprecated
----------
6 changes: 6 additions & 0 deletions include/envoy/event/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@ envoy_cc_library(
hdrs = ["deferred_deletable.h"],
)

envoy_cc_library(
name = "dispatcher_thread_deletable",
hdrs = ["dispatcher_thread_deletable.h"],
)

envoy_cc_library(
name = "dispatcher_interface",
hdrs = ["dispatcher.h"],
deps = [
":deferred_deletable",
":dispatcher_thread_deletable",
":file_event_interface",
":schedulable_cb_interface",
":signal_interface",
Expand Down
29 changes: 22 additions & 7 deletions include/envoy/event/dispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "envoy/common/scope_tracker.h"
#include "envoy/common/time.h"
#include "envoy/event/dispatcher_thread_deletable.h"
#include "envoy/event/file_event.h"
#include "envoy/event/schedulable_cb.h"
#include "envoy/event/signal.h"
Expand Down Expand Up @@ -86,15 +87,18 @@ class DispatcherBase {
virtual Event::SchedulableCallbackPtr createSchedulableCallback(std::function<void()> cb) PURE;

/**
* Sets a tracked object, which is currently operating in this Dispatcher.
* This should be cleared with another call to setTrackedObject() when the object is done doing
* work. Calling setTrackedObject(nullptr) results in no object being tracked.
* Appends a tracked object to the current stack of tracked objects operating
* in the dispatcher.
*
* This is optimized for performance, to avoid allocation where we do scoped object tracking.
*
* @return The previously tracked object or nullptr if there was none.
* It's recommended to use ScopeTrackerScopeState to manage the object's tracking. If directly
* invoking, there needs to be a subsequent call to popTrackedObject().
*/
virtual const ScopeTrackedObject* setTrackedObject(const ScopeTrackedObject* object) PURE;
virtual void pushTrackedObject(const ScopeTrackedObject* object) PURE;

/**
* Removes the top of the stack of tracked object and asserts that it was expected.
*/
virtual void popTrackedObject(const ScopeTrackedObject* expected_object) PURE;

/**
* Validates that an operation is thread-safe with respect to this dispatcher; i.e. that the
Expand Down Expand Up @@ -242,6 +246,12 @@ class Dispatcher : public DispatcherBase {
*/
virtual void post(PostCb callback) PURE;

/**
* Post the deletable to this dispatcher. The deletable objects are guaranteed to be destroyed on
* the dispatcher's thread before dispatcher destroy. This is safe cross thread.
*/
virtual void deleteInDispatcherThread(DispatcherThreadDeletableConstPtr deletable) PURE;

/**
* Runs the event loop. This will not return until exit() is called either from within a callback
* or from a different thread.
Expand Down Expand Up @@ -269,6 +279,11 @@ class Dispatcher : public DispatcherBase {
* Updates approximate monotonic time to current value.
*/
virtual void updateApproximateMonotonicTime() PURE;

/**
* Shutdown the dispatcher by clear dispatcher thread deletable.
*/
virtual void shutdown() PURE;
};

using DispatcherPtr = std::unique_ptr<Dispatcher>;
Expand Down
21 changes: 21 additions & 0 deletions include/envoy/event/dispatcher_thread_deletable.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#pragma once

#include <memory>

namespace Envoy {
namespace Event {

/**
* If an object derives from this class, it can be passed to the destination dispatcher who
* guarantees to delete it in that dispatcher thread. The common use case is to ensure config
* related objects are deleted in the main thread.
*/
class DispatcherThreadDeletable {
public:
virtual ~DispatcherThreadDeletable() = default;
};

using DispatcherThreadDeletableConstPtr = std::unique_ptr<const DispatcherThreadDeletable>;

} // namespace Event
} // namespace Envoy
8 changes: 4 additions & 4 deletions include/envoy/server/fatal_action_config.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <memory>
#include <vector>

#include "envoy/common/pure.h"
#include "envoy/config/bootstrap/v3/bootstrap.pb.h"
Expand All @@ -17,11 +18,10 @@ class FatalAction {
public:
virtual ~FatalAction() = default;
/**
* Callback function to run when we are crashing.
* @param current_object the object we were working on when we started
* crashing.
* Callback function to run when Envoy is crashing.
* @param tracked_objects a span of objects Envoy was working on when Envoy started crashing.
*/
virtual void run(const ScopeTrackedObject* current_object) PURE;
virtual void run(absl::Span<const ScopeTrackedObject* const> tracked_objects) PURE;

/**
* @return whether the action is async-signal-safe.
Expand Down
25 changes: 18 additions & 7 deletions source/common/common/scope_tracker.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,35 @@
#include "envoy/common/scope_tracker.h"
#include "envoy/event/dispatcher.h"

#include "common/common/assert.h"

namespace Envoy {

// A small class for tracking the scope of the object which is currently having
// A small class for managing the scope of a tracked object which is currently having
// work done in this thread.
//
// When created, it sets the tracked object in the dispatcher, and when destroyed it points the
// dispatcher at the previously tracked object.
// When created, it appends the tracked object to the dispatcher's stack of tracked objects, and
// when destroyed it pops the dispatcher's stack of tracked object, which should be the object it
// registered.
class ScopeTrackerScopeState {
public:
ScopeTrackerScopeState(const ScopeTrackedObject* object, Event::Dispatcher& dispatcher)
: dispatcher_(dispatcher) {
latched_object_ = dispatcher_.setTrackedObject(object);
: registered_object_(object), dispatcher_(dispatcher) {
dispatcher_.pushTrackedObject(registered_object_);
}

~ScopeTrackerScopeState() {
// If ScopeTrackerScopeState is always used for managing tracked objects,
// then the object popped off should be the object we registered.
dispatcher_.popTrackedObject(registered_object_);
}

~ScopeTrackerScopeState() { dispatcher_.setTrackedObject(latched_object_); }
// Make this object stack-only, it doesn't make sense for it
// to be on the heap since it's tracking a stack of active operations.
void* operator new(std::size_t) = delete;

private:
const ScopeTrackedObject* latched_object_;
const ScopeTrackedObject* registered_object_;
Event::Dispatcher& dispatcher_;
};

Expand Down
3 changes: 3 additions & 0 deletions source/common/event/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@ envoy_cc_library(
"file_event_impl.h",
"schedulable_cb_impl.h",
],
external_deps = [
"abseil_inlined_vector",
],
deps = [
":libevent_lib",
":libevent_scheduler_lib",
Expand Down
101 changes: 98 additions & 3 deletions source/common/event/dispatcher_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
#include <vector>

#include "envoy/api/api.h"
#include "envoy/common/scope_tracker.h"
#include "envoy/network/listen_socket.h"
#include "envoy/network/listener.h"

#include "common/buffer/buffer_impl.h"
#include "common/common/assert.h"
#include "common/common/lock_guard.h"
#include "common/common/thread.h"
#include "common/event/file_event_impl.h"
Expand Down Expand Up @@ -44,6 +46,8 @@ DispatcherImpl::DispatcherImpl(const std::string& name, Api::Api& api,
buffer_factory_(factory != nullptr ? factory
: std::make_shared<Buffer::WatermarkBufferFactory>()),
scheduler_(time_system.createScheduler(base_scheduler_, base_scheduler_)),
thread_local_delete_cb_(
base_scheduler_.createSchedulableCallback([this]() -> void { runThreadLocalDelete(); })),
deferred_delete_cb_(base_scheduler_.createSchedulableCallback(
[this]() -> void { clearDeferredDeleteList(); })),
post_cb_(base_scheduler_.createSchedulableCallback([this]() -> void { runPostCallbacks(); })),
Expand All @@ -55,7 +59,12 @@ DispatcherImpl::DispatcherImpl(const std::string& name, Api::Api& api,
std::bind(&DispatcherImpl::updateApproximateMonotonicTime, this));
}

DispatcherImpl::~DispatcherImpl() { FatalErrorHandler::removeFatalErrorHandler(*this); }
DispatcherImpl::~DispatcherImpl() {
ENVOY_LOG(debug, "destroying dispatcher {}", name_);
FatalErrorHandler::removeFatalErrorHandler(*this);
// TODO(lambdai): Resolve https://github.com/envoyproxy/envoy/issues/15072 and enable
// ASSERT(deletable_in_dispatcher_thread_.empty())
}

void DispatcherImpl::registerWatchdog(const Server::WatchDogSharedPtr& watchdog,
std::chrono::milliseconds min_touch_interval) {
Expand Down Expand Up @@ -236,9 +245,23 @@ void DispatcherImpl::post(std::function<void()> callback) {
}
}

void DispatcherImpl::deleteInDispatcherThread(DispatcherThreadDeletableConstPtr deletable) {
bool need_schedule;
{
Thread::LockGuard lock(thread_local_deletable_lock_);
need_schedule = deletables_in_dispatcher_thread_.empty();
deletables_in_dispatcher_thread_.emplace_back(std::move(deletable));
// TODO(lambdai): Enable below after https://github.com/envoyproxy/envoy/issues/15072
// ASSERT(!shutdown_called_, "inserted after shutdown");
}

if (need_schedule) {
thread_local_delete_cb_->scheduleCallbackCurrentIteration();
}
}

void DispatcherImpl::run(RunType type) {
run_tid_ = api_.threadFactory().currentThreadId();

// Flush all post callbacks before we run the event loop. We do this because there are post
// callbacks that have to get run before the initial event loop starts running. libevent does
// not guarantee that events are run in any particular order. So even if we post() and call
Expand All @@ -251,12 +274,56 @@ MonotonicTime DispatcherImpl::approximateMonotonicTime() const {
return approximate_monotonic_time_;
}

void DispatcherImpl::shutdown() {
// TODO(lambdai): Resolve https://github.com/envoyproxy/envoy/issues/15072 and loop delete below
// below 3 lists until all lists are empty. The 3 lists are list of deferred delete objects, post
// callbacks and dispatcher thread deletable objects.
ASSERT(isThreadSafe());
auto deferred_deletables_size = current_to_delete_->size();
std::list<std::function<void()>>::size_type post_callbacks_size;
{
Thread::LockGuard lock(post_lock_);
post_callbacks_size = post_callbacks_.size();
}

std::list<DispatcherThreadDeletableConstPtr> local_deletables;
{
Thread::LockGuard lock(thread_local_deletable_lock_);
local_deletables = std::move(deletables_in_dispatcher_thread_);
}
auto thread_local_deletables_size = local_deletables.size();
while (!local_deletables.empty()) {
local_deletables.pop_front();
}
ASSERT(!shutdown_called_);
shutdown_called_ = true;
ENVOY_LOG(
trace,
"{} destroyed {} thread local objects. Peek {} deferred deletables, {} post callbacks. ",
__FUNCTION__, deferred_deletables_size, post_callbacks_size, thread_local_deletables_size);
}

void DispatcherImpl::updateApproximateMonotonicTime() { updateApproximateMonotonicTimeInternal(); }

void DispatcherImpl::updateApproximateMonotonicTimeInternal() {
approximate_monotonic_time_ = api_.timeSource().monotonicTime();
}

void DispatcherImpl::runThreadLocalDelete() {
std::list<DispatcherThreadDeletableConstPtr> to_be_delete;
{
Thread::LockGuard lock(thread_local_deletable_lock_);
to_be_delete = std::move(deletables_in_dispatcher_thread_);
ASSERT(deletables_in_dispatcher_thread_.empty());
}
while (!to_be_delete.empty()) {
// Touch the watchdog before deleting the objects to avoid spurious watchdog miss events when
// executing complicated destruction.
touchWatchdog();
// Delete in FIFO order.
to_be_delete.pop_front();
}
}
void DispatcherImpl::runPostCallbacks() {
// Clear the deferred delete list before running post callbacks to reduce non-determinism in
// callback processing, and more easily detect if a scheduled post callback refers to one of the
Expand Down Expand Up @@ -287,6 +354,16 @@ void DispatcherImpl::runPostCallbacks() {
}
}

void DispatcherImpl::onFatalError(std::ostream& os) const {
// Dump the state of the tracked objects in the dispatcher if thread safe. This generally
// results in dumping the active state only for the thread which caused the fatal error.
if (isThreadSafe()) {
for (auto iter = tracked_object_stack_.rbegin(); iter != tracked_object_stack_.rend(); ++iter) {
(*iter)->dumpState(os);
}
}
}

void DispatcherImpl::runFatalActionsOnTrackedObject(
const FatalAction::FatalActionPtrList& actions) const {
// Only run if this is the dispatcher of the current thread and
Expand All @@ -296,7 +373,7 @@ void DispatcherImpl::runFatalActionsOnTrackedObject(
}

for (const auto& action : actions) {
action->run(current_object_);
action->run(tracked_object_stack_);
}
}

Expand All @@ -306,5 +383,23 @@ void DispatcherImpl::touchWatchdog() {
}
}

void DispatcherImpl::pushTrackedObject(const ScopeTrackedObject* object) {
ASSERT(isThreadSafe());
ASSERT(object != nullptr);
tracked_object_stack_.push_back(object);
ASSERT(tracked_object_stack_.size() <= ExpectedMaxTrackedObjectStackDepth);
}

void DispatcherImpl::popTrackedObject(const ScopeTrackedObject* expected_object) {
ASSERT(isThreadSafe());
ASSERT(expected_object != nullptr);
RELEASE_ASSERT(!tracked_object_stack_.empty(), "Tracked Object Stack is empty, nothing to pop!");

const ScopeTrackedObject* top = tracked_object_stack_.back();
tracked_object_stack_.pop_back();
ASSERT(top == expected_object,
"Popped the top of the tracked object stack, but it wasn't the expected object!");
}

} // namespace Event
} // namespace Envoy
Loading

0 comments on commit b961323

Please sign in to comment.