From 523b829c1d4184fca78f0900df87f123df5b607a Mon Sep 17 00:00:00 2001 From: JaySon Date: Fri, 15 Dec 2023 19:31:50 +0800 Subject: [PATCH] Disagg: Fix infinite retries on owner election (#8534) close pingcap/tiflash#8519 --- contrib/poco | 2 +- dbms/src/Common/FailPoint.cpp | 1 + dbms/src/TiDB/Etcd/Client.cpp | 9 ++- dbms/src/TiDB/OwnerManager.cpp | 18 +++-- dbms/src/TiDB/tests/gtest_owner_manager.cpp | 76 +++++++++++++++++++++ 5 files changed, 99 insertions(+), 7 deletions(-) diff --git a/contrib/poco b/contrib/poco index 170872b2a15..17085230cf6 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 170872b2a15ea042d99f8168a01aec896eef0840 +Subproject commit 17085230cf6f0b02e0b86193e22468eaae4da13a diff --git a/dbms/src/Common/FailPoint.cpp b/dbms/src/Common/FailPoint.cpp index ea3195b25a5..1e6d4184a29 100644 --- a/dbms/src/Common/FailPoint.cpp +++ b/dbms/src/Common/FailPoint.cpp @@ -65,6 +65,7 @@ namespace DB M(force_ps_wal_compact) \ M(pause_before_full_gc_prepare) \ M(force_owner_mgr_state) \ + M(force_owner_mgr_campaign_failed) \ M(exception_during_spill) \ M(force_fail_to_create_etcd_session) \ M(force_remote_read_for_batch_cop_once) \ diff --git a/dbms/src/TiDB/Etcd/Client.cpp b/dbms/src/TiDB/Etcd/Client.cpp index d891b3a770a..321c54d8d3c 100644 --- a/dbms/src/TiDB/Etcd/Client.cpp +++ b/dbms/src/TiDB/Etcd/Client.cpp @@ -388,7 +388,14 @@ bool Session::keepAliveOne() // the lease is not valid anymore if (resp.ttl() <= 0) { - LOG_DEBUG(log, "keep alive fail, ttl={}", resp.ttl()); + auto status = writer->Finish(); + LOG_INFO( + log, + "keep alive fail, ttl={}, code={} msg={}", + resp.ttl(), + magic_enum::enum_name(status.error_code()), + status.error_message()); + finished = true; return false; } lease_deadline = next_timepoint + std::chrono::seconds(resp.ttl()); diff --git a/dbms/src/TiDB/OwnerManager.cpp b/dbms/src/TiDB/OwnerManager.cpp index 8719dc23aa7..5d46e1eacb7 100644 --- a/dbms/src/TiDB/OwnerManager.cpp +++ b/dbms/src/TiDB/OwnerManager.cpp @@ -51,6 +51,7 @@ namespace DB namespace FailPoints { extern const char force_owner_mgr_state[]; +extern const char force_owner_mgr_campaign_failed[]; extern const char force_fail_to_create_etcd_session[]; } // namespace FailPoints @@ -227,10 +228,10 @@ std::pair EtcdOwnerManager::runNextCampaign(Etcd::Sessio void EtcdOwnerManager::tryChangeState(State coming_state) { std::unique_lock lk(mtx_camaign); - // The campaign is stopping, it will cause - // - leader key deleted in watch thread - // - etcd lease expired in keepalive task - // Do not overwrite `CancelByStop` because the next campaign is expected to be stopped. + // When the campaign is stopping, it will cause + // - leader key deleted in watch thread => try set state to `CancelByKeyDeleted` + // - etcd lease expired in keepalive task => try set state to `CancelByLeaseInvalid` + // Do not let those actually overwrite `CancelByStop` because the next campaign is expected to be stopped. if (state != State::CancelByStop) { // ok to set the state @@ -258,6 +259,10 @@ void EtcdOwnerManager::camaignLoop(Etcd::SessionPtr session) campaing_ctx = std::make_unique(); } auto && [new_leader, status] = client->campaign(campaing_ctx.get(), campaign_name, id, lease_id); + fiu_do_on(FailPoints::force_owner_mgr_campaign_failed, { + status = grpc::Status(grpc::StatusCode::UNKNOWN, " etcdserver: requested lease not found"); + LOG_WARNING(log, "force_owner_mgr_campaign_failed enabled, return failed grpc::Status"); + }); if (!status.ok()) { // if error, continue next campaign @@ -268,6 +273,9 @@ void EtcdOwnerManager::camaignLoop(Etcd::SessionPtr session) lease_id, status.error_code(), status.error_message()); + // The error is possible cause by lease invalid, create a new etcd + // session next round. + tryChangeState(State::CancelByLeaseInvalid); static constexpr std::chrono::milliseconds CampaignRetryInterval(200); std::this_thread::sleep_for(CampaignRetryInterval); continue; @@ -372,7 +380,7 @@ void EtcdOwnerManager::watchOwner(const String & owner_key, grpc::ClientContext } } // loop until key deleted or failed auto s = rw->Finish(); - LOG_DEBUG(log, "watch finish, code={} msg={}", s.error_code(), s.error_message()); + LOG_INFO(log, "watch finish, code={} msg={}", s.error_code(), s.error_message()); } catch (...) { diff --git a/dbms/src/TiDB/tests/gtest_owner_manager.cpp b/dbms/src/TiDB/tests/gtest_owner_manager.cpp index 95e0e034dbc..52d1bd67a0a 100644 --- a/dbms/src/TiDB/tests/gtest_owner_manager.cpp +++ b/dbms/src/TiDB/tests/gtest_owner_manager.cpp @@ -43,6 +43,7 @@ extern String getPrefix(String key); namespace FailPoints { extern const char force_owner_mgr_state[]; +extern const char force_owner_mgr_campaign_failed[]; extern const char force_fail_to_create_etcd_session[]; } // namespace FailPoints } // namespace DB @@ -371,6 +372,81 @@ try } CATCH +TEST_F(OwnerManagerTest, KeyDeletedWithSessionInvalid) +try +{ + auto etcd_endpoint = Poco::Environment::get("ETCD_ENDPOINT", ""); + if (etcd_endpoint.empty()) + { + const auto * t = ::testing::UnitTest::GetInstance()->current_test_info(); + LOG_INFO( + log, + "{}.{} is skipped because env ETCD_ENDPOINT not set. " + "Run it with an etcd cluster using `ETCD_ENDPOINT=127.0.0.1:2379 ./dbms/gtests_dbms ...`", + t->test_case_name(), + t->name()); + return; + } + + using namespace std::chrono_literals; + + auto ctx = TiFlashTestEnv::getContext(); + pingcap::ClusterConfig config; + pingcap::pd::ClientPtr pd_client = std::make_shared(Strings{etcd_endpoint}, config); + auto etcd_client = DB::Etcd::Client::create(pd_client, config); + const String id = "owner_0"; + auto owner0 + = std::static_pointer_cast(OwnerManager::createS3GCOwner(*ctx, id, etcd_client, test_ttl)); + auto owner_info = owner0->getOwnerID(); + EXPECT_EQ(owner_info.status, OwnerType::NoLeader) << magic_enum::enum_name(owner_info.status); + + std::mutex mtx; + std::condition_variable cv; + bool become_owner = false; + + owner0->setBeOwnerHook([&] { + { + std::unique_lock lk(mtx); + become_owner = true; + } + cv.notify_one(); + }); + + owner0->campaignOwner(); + { + std::unique_lock lk(mtx); + cv.wait(lk, [&] { return become_owner; }); + } + + ASSERT_TRUE(owner0->isOwner()); + { + auto owner_id = owner0->getOwnerID(); + EXPECT_EQ(owner_id.status, OwnerType::IsOwner); + EXPECT_EQ(owner_id.owner_id, id); + } + auto lease_0 = getLeaderLease(owner0.get()); + + LOG_INFO(log, "test wait for n*ttl"); + FailPointHelper::enableFailPoint(FailPoints::force_owner_mgr_state, EtcdOwnerManager::State::CancelByKeyDeleted); + FailPointHelper::enableFailPoint(FailPoints::force_owner_mgr_campaign_failed); + SCOPE_EXIT({ FailPointHelper::disableFailPoint(FailPoints::force_owner_mgr_state); }); + SCOPE_EXIT({ FailPointHelper::disableFailPoint(FailPoints::force_owner_mgr_campaign_failed); }); + changeState(owner0.get(), EtcdOwnerManager::State::CancelByKeyDeleted); + + std::this_thread::sleep_for(std::chrono::seconds(test_pause)); + ASSERT_TRUE(owner0->isOwner()); + { + auto owner_id = owner0->getOwnerID(); + EXPECT_EQ(owner_id.status, OwnerType::IsOwner); + EXPECT_EQ(owner_id.owner_id, id); + } + auto lease_1 = getLeaderLease(owner0.get()); // should be a new lease + EXPECT_NE(lease_0, lease_1) << "should use a new etcd lease for elec"; + + LOG_INFO(log, "test wait for n*ttl passed"); +} +CATCH + TEST_F(OwnerManagerTest, CreateEtcdSessionFail) try {