From 51d6ff0760141b2d7bd5f56a50528788dfd1948b Mon Sep 17 00:00:00 2001 From: Shulin Jia Date: Mon, 19 Aug 2024 16:20:39 -0700 Subject: [PATCH] Use previously calculated healthcheck when endpoints move priority levels repeatedly. This is done by changing how mutable_cross_priority_host_map_ tracks hosts. mutable_cross_priority_host_map_ is used to track if a host already exists in the previous configuration, and its healthcheck should not be recalculated. This worked only some of the time because it would remove and then add all hosts that have changed in a priority, to a map of string (ip addr:port) to Host instance. However this did not account for when an endpoint had two Host representatives in different priorities, as is in the case when an endpoint changes priorities and there is an edge case where a host can be removed from mutable_cross_priority_host_map_ before the "should we skip activate healthchecks" logic triggers. This PR fixes that by only removing an endpoint from mutable_cross_priority_host_map_ if that removal is executed from the lowest priority. This fix does assume that memberships in priorities are always calculated starting from the lowest number to the highest. Signed-off-by: Shulin Jia --- changelogs/current.yaml | 3 + source/common/upstream/upstream_impl.cc | 16 ++++- source/common/upstream/upstream_impl.h | 3 +- test/extensions/clusters/eds/eds_test.cc | 74 ++++++++++++++++++++++++ 4 files changed, 92 insertions(+), 4 deletions(-) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 102c5f4a908b3..e9bcfa24a0cb3 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -89,6 +89,9 @@ minor_behavior_changes: bug_fixes: # *Changes expected to improve the state of the world and are unlikely to have negative effects* +- area: upstream + change: | + Use previously calculated healthcheck when endpoints move priority levels repeatedly. - area: dispatcher change: | Update approximate now after polling instead of before polling. This is only used by QUIC. diff --git a/source/common/upstream/upstream_impl.cc b/source/common/upstream/upstream_impl.cc index cece139a64ee6..b75c0898e6220 100644 --- a/source/common/upstream/upstream_impl.cc +++ b/source/common/upstream/upstream_impl.cc @@ -956,7 +956,7 @@ void MainPrioritySetImpl::updateHosts(uint32_t priority, UpdateHostsParams&& upd HostMapConstSharedPtr cross_priority_host_map) { ASSERT(cross_priority_host_map == nullptr, "External cross-priority host map is meaningless to MainPrioritySetImpl"); - updateCrossPriorityHostMap(hosts_added, hosts_removed); + updateCrossPriorityHostMap(priority, hosts_added, hosts_removed); PrioritySetImpl::updateHosts(priority, std::move(update_hosts_params), locality_weights, hosts_added, hosts_removed, seed, weighted_priority_health, @@ -972,7 +972,8 @@ HostMapConstSharedPtr MainPrioritySetImpl::crossPriorityHostMap() const { return const_cross_priority_host_map_; } -void MainPrioritySetImpl::updateCrossPriorityHostMap(const HostVector& hosts_added, +void MainPrioritySetImpl::updateCrossPriorityHostMap(uint32_t priority, + const HostVector& hosts_added, const HostVector& hosts_removed) { if (hosts_added.empty() && hosts_removed.empty()) { // No new hosts have been added and no old hosts have been removed. @@ -987,7 +988,16 @@ void MainPrioritySetImpl::updateCrossPriorityHostMap(const HostVector& hosts_add } for (const auto& host : hosts_removed) { - mutable_cross_priority_host_map_->erase(addressToString(host->address())); + const auto host_address = addressToString(host->address()); + const auto existing_host = mutable_cross_priority_host_map_->find(host_address); + if (existing_host != mutable_cross_priority_host_map_->end()) { + // Only delete from the current priority to protect from situations where + // the add operation was already executed and has already moved the metadata of the host + // from a higher priority value to a lower priority value. + if (existing_host->second->priority() == priority) { + mutable_cross_priority_host_map_->erase(host_address); + } + } } for (const auto& host : hosts_added) { diff --git a/source/common/upstream/upstream_impl.h b/source/common/upstream/upstream_impl.h index e7573e4f50fda..4ce4791a01c5f 100644 --- a/source/common/upstream/upstream_impl.h +++ b/source/common/upstream/upstream_impl.h @@ -794,7 +794,8 @@ class MainPrioritySetImpl : public PrioritySetImpl, public Logger::Loggable(); + EXPECT_CALL(*health_checker, start()); + EXPECT_CALL(*health_checker, addHostCheckCompleteCb(_)).Times(2); + cluster_->setHealthChecker(health_checker); + + auto add_endpoint = [&cluster_load_assignment](int port, int priority) { + auto* endpoints = cluster_load_assignment.add_endpoints(); + endpoints->set_priority(priority); + + auto* socket_address = endpoints->add_lb_endpoints() + ->mutable_endpoint() + ->mutable_address() + ->mutable_socket_address(); + socket_address->set_address("1.2.3.4"); + socket_address->set_port_value(port); + }; + + add_endpoint(80, 0); + add_endpoint(81, 0); + + doOnConfigUpdateVerifyNoThrow(cluster_load_assignment); + + { + auto& hosts = cluster_->prioritySet().hostSetsPerPriority()[0]->hosts(); + EXPECT_EQ(hosts.size(), 2); + + // Mark the hosts as healthy + for (auto& host : hosts) { + EXPECT_TRUE(host->healthFlagGet(Host::HealthFlag::FAILED_ACTIVE_HC)); + host->healthFlagClear(Host::HealthFlag::FAILED_ACTIVE_HC); + host->healthFlagClear(Host::HealthFlag::PENDING_ACTIVE_HC); + } + } + + std::vector priority_levels = {1, 2, 3, 2, 1}; + for (uint32_t priority : priority_levels) { + cluster_load_assignment.clear_endpoints(); + add_endpoint(80, priority); + add_endpoint(81, priority); + + doOnConfigUpdateVerifyNoThrow(cluster_load_assignment); + + { + for (uint32_t i = 0; i < cluster_->prioritySet().hostSetsPerPriority().size(); i++) { + auto& hosts = cluster_->prioritySet().hostSetsPerPriority()[i]->hosts(); + if (i == priority) { + // Priorities equal to this one should have the endpoints with port 80 and 81 + EXPECT_EQ(hosts.size(), 2); + } else { + // Priorities not equal to this one should now be empty. + EXPECT_EQ(hosts.size(), 0); + } + } + } + + { + auto& hosts = cluster_->prioritySet().hostSetsPerPriority()[priority]->hosts(); + + // The endpoints were healthy, so moving them around should preserve that. + EXPECT_FALSE(hosts[0]->healthFlagGet(Host::HealthFlag::FAILED_ACTIVE_HC)); + EXPECT_FALSE(hosts[0]->healthFlagGet(Host::HealthFlag::PENDING_DYNAMIC_REMOVAL)); + EXPECT_FALSE(hosts[1]->healthFlagGet(Host::HealthFlag::FAILED_ACTIVE_HC)); + EXPECT_FALSE(hosts[1]->healthFlagGet(Host::HealthFlag::PENDING_DYNAMIC_REMOVAL)); + } + } +} + // Verifies that if an endpoint is moved between priorities, the health check value // of the host is preserved TEST_F(EdsTest, EndpointMoved) {