Skip to content

Commit

Permalink
getaddrinfo: retrying on eai_again (envoyproxy#33130)
Browse files Browse the repository at this point in the history
Risk Level: low
Testing: new tests
Docs Changes: n/a
Release Notes: inline
[Optional Runtime guard:] yes

Signed-off-by: Alyssa Wilk <alyssar@chromium.org>
  • Loading branch information
alyssawilk authored Mar 27, 2024
1 parent 03a782a commit 7f6df24
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 1 deletion.
4 changes: 4 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ minor_behavior_changes:
- area: dns
change: |
Allowing <envoy_v3_api_field_extensions.common.dynamic_forward_proxy.v3.DnsCacheConfig.dns_min_refresh_rate>` to go as low as 1s.
- area: dns
change: |
Fixing a bug in the getaddrinfo resolver where it did not reresolve on EAI_AGAIN. This behavioral change can be temporarily
reverted by setting ``envoy.reloadable_features.dns_reresolve_on_eai_again`` to false.
- area: upstream
change: |
Upstream now excludes hosts set to ``DRAINING`` state via EDS from load balancing and panic routing
Expand Down
1 change: 1 addition & 0 deletions source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ RUNTIME_GUARD(envoy_reloadable_features_detect_and_raise_rst_tcp_connection);
RUNTIME_GUARD(envoy_reloadable_features_dfp_mixed_scheme);
RUNTIME_GUARD(envoy_reloadable_features_disallow_quic_client_udp_mmsg);
RUNTIME_GUARD(envoy_reloadable_features_dns_cache_set_first_resolve_complete);
RUNTIME_GUARD(envoy_reloadable_features_dns_reresolve_on_eai_again);
RUNTIME_GUARD(envoy_reloadable_features_edf_lb_host_scheduler_init_fix);
RUNTIME_GUARD(envoy_reloadable_features_edf_lb_locality_scheduler_init_fix);
RUNTIME_GUARD(envoy_reloadable_features_enable_compression_bomb_protection);
Expand Down
10 changes: 10 additions & 0 deletions source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggable<Logge

while (true) {
PendingQuerySharedPtr next_query;
const bool reresolve =
Runtime::runtimeFeatureEnabled("envoy.reloadable_features.dns_reresolve_on_eai_again");
{
absl::MutexLock guard(&mutex_);
auto condition = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) {
Expand All @@ -157,6 +159,9 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggable<Logge

next_query = std::move(pending_queries_.front());
pending_queries_.pop_front();
if (reresolve && next_query->cancelled_) {
continue;
}
}

ENVOY_LOG(debug, "popped pending query [{}]", next_query->dns_name_);
Expand All @@ -178,6 +183,11 @@ class GetAddrInfoDnsResolver : public DnsResolver, public Logger::Loggable<Logge
auto addrinfo_wrapper = AddrInfoWrapper(addrinfo_result_do_not_use);
if (rc.return_value_ == 0) {
response = processResponse(*next_query, addrinfo_wrapper.get());
} else if (reresolve && rc.return_value_ == EAI_AGAIN) {
ENVOY_LOG(debug, "retrying query [{}]", next_query->dns_name_);
absl::MutexLock guard(&mutex_);
pending_queries_.push_back(next_query);
continue;
} else {
// TODO(mattklein123): Handle some errors differently such as `EAI_NODATA`.
ENVOY_LOG(debug, "getaddrinfo failed with rc={} errno={}", gai_strerror(rc.return_value_),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ TEST_F(GetAddrInfoDnsImplTest, Failure) {
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls_);

EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _))
.WillOnce(Return(Api::SysCallIntResult{EAI_AGAIN, 0}));
.WillOnce(Return(Api::SysCallIntResult{EAI_FAIL, 0}));
resolver_->resolve(
"localhost", DnsLookupFamily::All,
[this](DnsResolver::ResolutionStatus status, std::list<DnsResponse>&& response) {
Expand All @@ -153,6 +153,46 @@ TEST_F(GetAddrInfoDnsImplTest, Failure) {
dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit);
}

TEST_F(GetAddrInfoDnsImplTest, TryAgainAndSuccess) {
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls_);

// 2 calls - one EAGAIN, one success.
EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _))
.Times(2)
.WillOnce(Return(Api::SysCallIntResult{EAI_AGAIN, 0}))
.WillOnce(Return(Api::SysCallIntResult{0, 0}));
resolver_->resolve(
"localhost", DnsLookupFamily::All,
[this](DnsResolver::ResolutionStatus status, std::list<DnsResponse>&& response) {
EXPECT_EQ(status, DnsResolver::ResolutionStatus::Success);
EXPECT_TRUE(response.empty());

dispatcher_->exit();
});

dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit);
}

TEST_F(GetAddrInfoDnsImplTest, TryAgainThenCancel) {
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls_);

std::atomic<ActiveDnsQuery*> query = nullptr;

EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _))
.Times(testing::AnyNumber())
.WillOnce(Invoke([&](const char*, const char*, const addrinfo*, addrinfo**) {
query.load()->cancel(ActiveDnsQuery::CancelReason::QueryAbandoned);
dispatcher_->exit();
return Api::SysCallIntResult{EAI_AGAIN, 0};
}));
query =
resolver_->resolve("localhost", DnsLookupFamily::All,
[](DnsResolver::ResolutionStatus, std::list<DnsResponse>&&) { FAIL(); });

dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit);
resolver_.reset();
}

TEST_F(GetAddrInfoDnsImplTest, All) {
// See https://github.com/envoyproxy/envoy/issues/28504.
DISABLE_UNDER_WINDOWS;
Expand Down

0 comments on commit 7f6df24

Please sign in to comment.