Skip to content

Commit

Permalink
Moved logic to add abort_action to kill/multikill if enabled into the…
Browse files Browse the repository at this point in the history
… guarddog.

Signed-off-by: Kevin Baichoo <kbaichoo@google.com>
  • Loading branch information
KBaichoo committed Oct 14, 2020
1 parent ab416ac commit 8a27924
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 104 deletions.
2 changes: 1 addition & 1 deletion source/server/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ envoy_cc_library(
"@envoy_api//envoy/config/bootstrap/v3:pkg_cc_proto",
"@envoy_api//envoy/config/metrics/v3:pkg_cc_proto",
"@envoy_api//envoy/config/trace/v3:pkg_cc_proto",
"@envoy_api//envoy/watchdog/v3alpha:pkg_cc_proto",
],
)

Expand Down Expand Up @@ -130,6 +129,7 @@ envoy_cc_library(
"//source/common/stats:symbol_table_lib",
"//source/common/watchdog:abort_action_config",
"@envoy_api//envoy/config/bootstrap/v3:pkg_cc_proto",
"@envoy_api//envoy/watchdog/v3alpha:pkg_cc_proto",
],
)

Expand Down
21 changes: 1 addition & 20 deletions source/server/configuration_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include "envoy/server/instance.h"
#include "envoy/server/tracer_config.h"
#include "envoy/ssl/context_manager.h"
#include "envoy/watchdog/v3alpha/abort_action.pb.h"

#include "common/common/assert.h"
#include "common/common/utility.h"
Expand Down Expand Up @@ -172,25 +171,7 @@ WatchdogImpl::WatchdogImpl(const envoy::config::bootstrap::v3::Watchdog& watchdo
multikill_timeout_ =
std::chrono::milliseconds(PROTOBUF_GET_MS_OR_DEFAULT(watchdog, multikill_timeout, 0));
multikill_threshold_ = PROTOBUF_PERCENT_TO_DOUBLE_OR_DEFAULT(watchdog, multikill_threshold, 0.0);
auto actions = watchdog.actions();

// Add abort_action if killing is enabled.
envoy::watchdog::v3alpha::AbortActionConfig abort_config;

if (kill_timeout > 0) {
envoy::config::bootstrap::v3::Watchdog::WatchdogAction* abort_action_config = actions.Add();
abort_action_config->set_event(envoy::config::bootstrap::v3::Watchdog::WatchdogAction::KILL);
abort_action_config->mutable_config()->mutable_typed_config()->PackFrom(abort_config);
}

if (multikill_timeout_.count() > 0) {
envoy::config::bootstrap::v3::Watchdog::WatchdogAction* abort_action_config = actions.Add();
abort_action_config->set_event(
envoy::config::bootstrap::v3::Watchdog::WatchdogAction::MULTIKILL);
abort_action_config->mutable_config()->mutable_typed_config()->PackFrom(abort_config);
}

actions_ = actions;
actions_ = watchdog.actions();
}

InitialImpl::InitialImpl(const envoy::config::bootstrap::v3::Bootstrap& bootstrap) {
Expand Down
26 changes: 18 additions & 8 deletions source/server/guarddog_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "envoy/server/guarddog.h"
#include "envoy/server/guarddog_config.h"
#include "envoy/stats/scope.h"
#include "envoy/watchdog/v3alpha/abort_action.pb.h"

#include "common/common/assert.h"
#include "common/common/fmt.h"
Expand Down Expand Up @@ -64,7 +65,23 @@ GuardDogImpl::GuardDogImpl(Stats::Scope& stats_scope, const Server::Configuratio
Configuration::GuardDogActionFactoryContext context = {api, *dispatcher_, stats_scope,
name};

const auto& actions = config.actions();
auto actions = config.actions();

// Add default abort_action if kill and/or multi-kill is enabled.
envoy::watchdog::v3alpha::AbortActionConfig abort_config;

if (config.killTimeout().count() > 0) {
WatchDogAction* abort_action_config = actions.Add();
abort_action_config->set_event(WatchDogAction::KILL);
abort_action_config->mutable_config()->mutable_typed_config()->PackFrom(abort_config);
}

if (config.multiKillTimeout().count() > 0) {
WatchDogAction* abort_action_config = actions.Add();
abort_action_config->set_event(WatchDogAction::MULTIKILL);
abort_action_config->mutable_config()->mutable_typed_config()->PackFrom(abort_config);
}

for (const auto& action : actions) {
// Get factory and add the created cb
auto& factory = Config::Utility::getAndCheckFactory<Configuration::GuardDogActionFactory>(
Expand Down Expand Up @@ -133,19 +150,12 @@ void GuardDogImpl::step() {
}
if (killEnabled() && delta > kill_timeout_) {
invokeGuardDogActions(WatchDogAction::KILL, {{tid, last_checkin}}, now);

PANIC(fmt::format("GuardDog: one thread ({}) stuck for more than watchdog_kill_timeout",
watched_dog->dog_->threadId().debugString()));
}
if (multikillEnabled() && delta > multi_kill_timeout_) {
multi_kill_threads.emplace_back(tid, last_checkin);

if (multi_kill_threads.size() >= required_for_multi_kill) {
invokeGuardDogActions(WatchDogAction::MULTIKILL, multi_kill_threads, now);

PANIC(fmt::format("GuardDog: At least {} threads ({},...) stuck for more than "
"watchdog_multikill_timeout",
multi_kill_threads.size(), tid.debugString()));
}
}
}
Expand Down
42 changes: 0 additions & 42 deletions test/server/configuration_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -797,48 +797,6 @@ TEST_F(ConfigurationImplTest, DoesNotSkewIfKillTimeoutDisabled) {
EXPECT_EQ(config.workerWatchdogConfig().killTimeout(), std::chrono::milliseconds(0));
}

TEST_F(ConfigurationImplTest, ShouldAddAbortActionIfKillingIsEnabled) {
envoy::config::bootstrap::v3::Bootstrap bootstrap;
MainImpl config;
const std::string kill_json = R"EOF(
{
"watchdog": {
"kill_timeout": "1.0s",
"multikill_timeout" : "1.0s"
},
})EOF";

TestUtility::loadFromJson(kill_json, bootstrap);
config.initialize(bootstrap, server_, cluster_manager_factory_);

// We should have the abort action added to both KILL and MULTIKILL events.
EXPECT_EQ(config.workerWatchdogConfig().actions().size(), 2);
EXPECT_EQ(config.mainThreadWatchdogConfig().actions().size(), 2);

for (const auto& action : config.mainThreadWatchdogConfig().actions()) {
EXPECT_EQ(action.config().typed_config().type_url(),
"type.googleapis.com/envoy.watchdog.v3alpha.AbortActionConfig");
}

for (const auto& action : config.workerWatchdogConfig().actions()) {
EXPECT_EQ(action.config().typed_config().type_url(),
"type.googleapis.com/envoy.watchdog.v3alpha.AbortActionConfig");
}
}

TEST_F(ConfigurationImplTest, ShouldNotAddAbortActionIfKillingIsDisabled) {
envoy::config::bootstrap::v3::Bootstrap bootstrap;
MainImpl config;
const std::string killing_disabled_json = R"EOF( { "watchdog": { "miss_timeout": "1.0s" }})EOF";

TestUtility::loadFromJson(killing_disabled_json, bootstrap);
config.initialize(bootstrap, server_, cluster_manager_factory_);

// We should have the abort action added
EXPECT_EQ(config.workerWatchdogConfig().actions().size(), 0);
EXPECT_EQ(config.mainThreadWatchdogConfig().actions().size(), 0);
}

TEST_F(ConfigurationImplTest, ShouldErrorIfBothWatchdogsAndWatchdogSet) {
const std::string json = R"EOF( { "watchdogs": {}, "watchdog": {}})EOF";

Expand Down
19 changes: 0 additions & 19 deletions test/server/server_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1311,25 +1311,6 @@ TEST_P(ServerInstanceImplTest, DisabledExtension) {
ASSERT_TRUE(disabled_filter_found);
}

// Validates that server uses the abort action exit.
TEST_P(ServerInstanceImplTest, UsesAbortActionOnWatchdogKill) {
options_.concurrency_ = 2;

EXPECT_DEATH(
{
startTestServer("test/server/test_data/server/watchdogs_use_abort_action.yaml", false);
absl::Notification main_thread_slept;
server_->dispatcher().post([&] {
absl::SleepFor(absl::Seconds(5));
main_thread_slept.Notify();
});

// Wait for the sleep to run, we should die by the watchdog by then.
main_thread_slept.WaitForNotification();
},
"(ABRT|Caught Abort)");
}

} // namespace
} // namespace Server
} // namespace Envoy

This file was deleted.

7 changes: 0 additions & 7 deletions test/server/test_data/server/watchdogs_use_abort_action.yaml

This file was deleted.

0 comments on commit 8a27924

Please sign in to comment.