From e2998e1332cddb12f3d35db1e6f7d9459b643e86 Mon Sep 17 00:00:00 2001 From: Ali Beyad Date: Mon, 6 May 2024 09:02:53 -0400 Subject: [PATCH] getaddrinfo: Treat EAI_NONAME and EAI_NODATA as non-failures (#33934) When the DNS resolution status is Failure, the DNS cache treats it as a retryable error, using a retry policy to enable a timer for retries. With EAI_NONAME and EAI_NODATA, we do not expect these to be transient failures, so the getaddrinfo now treats it as a succesful query with empty results, so we don't trigger the failure refresh/retry timer. NOTE: The c-ares resolver also treats NODATA and NONAME results as successful queries. Signed-off-by: Ali Beyad * todo Signed-off-by: Ali Beyad --------- Signed-off-by: Ali Beyad --- changelogs/current.yaml | 6 ++++ envoy/network/dns.h | 3 ++ source/common/runtime/runtime_features.cc | 1 + .../dns_resolver/getaddrinfo/getaddrinfo.cc | 12 ++++++- .../getaddrinfo/getaddrinfo_test.cc | 34 +++++++++++++++++++ tools/spelling/spelling_dictionary.txt | 2 ++ 6 files changed, 57 insertions(+), 1 deletion(-) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index d5bb493b2931..304eebdc438a 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -30,6 +30,12 @@ minor_behavior_changes: - area: statistics change: | Hot restart statistics like hot_restart_epoch are only set when hot restart is enabled. +- area: dns + change: | + Changes the behavior of the getaddrinfo DNS resolver so that it treats EAI_NODATA and EAI_NONAME + as successful queries with empty results, instead of as DNS failures. This change brings the + getaddrinfo behavior in-line with the c-ares resolver behavior. This behavior can be reverted by + setting the runtime guard ``envoy.reloadable_features.dns_nodata_noname_is_success`` to false. bug_fixes: # *Changes expected to improve the state of the world and are unlikely to have negative effects* diff --git a/envoy/network/dns.h b/envoy/network/dns.h index d3e7504b02a9..3d06542685fa 100644 --- a/envoy/network/dns.h +++ b/envoy/network/dns.h @@ -86,6 +86,9 @@ class DnsResolver { /** * Final status for a DNS resolution. + * TODO(abeyad): Rename `Success` to `Completed` or something similar. DNS resolution can return + * result statuses like NODATA and NONAME, which indicate successful completion of the query but + * no results, and `Completed` is a more accurate way of reflecting that. */ enum class ResolutionStatus { Success, Failure }; diff --git a/source/common/runtime/runtime_features.cc b/source/common/runtime/runtime_features.cc index 7ca0e3ea3ce4..36067076890e 100644 --- a/source/common/runtime/runtime_features.cc +++ b/source/common/runtime/runtime_features.cc @@ -37,6 +37,7 @@ RUNTIME_GUARD(envoy_reloadable_features_defer_processing_backedup_streams); RUNTIME_GUARD(envoy_reloadable_features_dfp_mixed_scheme); RUNTIME_GUARD(envoy_reloadable_features_disallow_quic_client_udp_mmsg); RUNTIME_GUARD(envoy_reloadable_features_dns_cache_set_first_resolve_complete); +RUNTIME_GUARD(envoy_reloadable_features_dns_nodata_noname_is_success); RUNTIME_GUARD(envoy_reloadable_features_dns_reresolve_on_eai_again); RUNTIME_GUARD(envoy_reloadable_features_edf_lb_host_scheduler_init_fix); RUNTIME_GUARD(envoy_reloadable_features_edf_lb_locality_scheduler_init_fix); diff --git a/source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc b/source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc index 5b4a4ffb4cfc..d055fa728c25 100644 --- a/source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc +++ b/source/extensions/network/dns_resolver/getaddrinfo/getaddrinfo.cc @@ -114,6 +114,8 @@ void GetAddrInfoDnsResolver::resolveThreadRoutine() { PendingQuerySharedPtr next_query; const bool reresolve = Runtime::runtimeFeatureEnabled("envoy.reloadable_features.dns_reresolve_on_eai_again"); + const bool treat_nodata_noname_as_success = + Runtime::runtimeFeatureEnabled("envoy.reloadable_features.dns_nodata_noname_is_success"); { absl::MutexLock guard(&mutex_); auto condition = [this]() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { @@ -155,8 +157,16 @@ void GetAddrInfoDnsResolver::resolveThreadRoutine() { absl::MutexLock guard(&mutex_); pending_queries_.push_back(next_query); continue; + } else if (treat_nodata_noname_as_success && + (rc.return_value_ == EAI_NONAME || rc.return_value_ == EAI_NODATA)) { + // Treat NONAME and NODATA as DNS records with no results. + // NODATA and NONAME are typically not transient failures, so we don't expect success if + // the DNS query is retried. + // NOTE: this is also how the c-ares resolver treats NONAME and NODATA: + // https://github.com/envoyproxy/envoy/blob/099d85925b32ce8bf06e241ee433375a0a3d751b/source/extensions/network/dns_resolver/cares/dns_impl.h#L109-L111. + ENVOY_LOG(debug, "getaddrinfo no results rc={}", gai_strerror(rc.return_value_)); + response = std::make_pair(ResolutionStatus::Success, std::list()); } else { - // TODO(mattklein123): Handle some errors differently such as `EAI_NODATA`. ENVOY_LOG(debug, "getaddrinfo failed with rc={} errno={}", gai_strerror(rc.return_value_), errorDetails(rc.errno_)); response = std::make_pair(ResolutionStatus::Failure, std::list()); diff --git a/test/extensions/network/dns_resolver/getaddrinfo/getaddrinfo_test.cc b/test/extensions/network/dns_resolver/getaddrinfo/getaddrinfo_test.cc index c6e6a2f6156a..f6bd65f270bd 100644 --- a/test/extensions/network/dns_resolver/getaddrinfo/getaddrinfo_test.cc +++ b/test/extensions/network/dns_resolver/getaddrinfo/getaddrinfo_test.cc @@ -153,6 +153,40 @@ TEST_F(GetAddrInfoDnsImplTest, Failure) { dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit); } +TEST_F(GetAddrInfoDnsImplTest, NoData) { + TestThreadsafeSingletonInjector os_calls(&os_sys_calls_); + + EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _)) + .WillOnce(Return(Api::SysCallIntResult{EAI_NODATA, 0})); + resolver_->resolve( + "localhost", DnsLookupFamily::All, + [this](DnsResolver::ResolutionStatus status, std::list&& response) { + EXPECT_EQ(status, DnsResolver::ResolutionStatus::Success); + EXPECT_TRUE(response.empty()); + + dispatcher_->exit(); + }); + + dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit); +} + +TEST_F(GetAddrInfoDnsImplTest, NoName) { + TestThreadsafeSingletonInjector os_calls(&os_sys_calls_); + + EXPECT_CALL(os_sys_calls_, getaddrinfo(_, _, _, _)) + .WillOnce(Return(Api::SysCallIntResult{EAI_NONAME, 0})); + resolver_->resolve( + "localhost", DnsLookupFamily::All, + [this](DnsResolver::ResolutionStatus status, std::list&& response) { + EXPECT_EQ(status, DnsResolver::ResolutionStatus::Success); + EXPECT_TRUE(response.empty()); + + dispatcher_->exit(); + }); + + dispatcher_->run(Event::Dispatcher::RunType::RunUntilExit); +} + TEST_F(GetAddrInfoDnsImplTest, TryAgainAndSuccess) { TestThreadsafeSingletonInjector os_calls(&os_sys_calls_); diff --git a/tools/spelling/spelling_dictionary.txt b/tools/spelling/spelling_dictionary.txt index 2cbd354446ca..38acc5051614 100644 --- a/tools/spelling/spelling_dictionary.txt +++ b/tools/spelling/spelling_dictionary.txt @@ -301,9 +301,11 @@ NGHTTP NLOHMANN NOAUTH NOCHECKRESP +NODATA NODELAY NOLINT NOLINTNEXTLINE +NONAME NONBLOCK NONCES NOSORT