diff --git a/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst b/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst index 874b9d9f28fa..b6e5389e7da3 100644 --- a/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst +++ b/docs/root/configuration/upstream/cluster_manager/cluster_stats.rst @@ -94,6 +94,7 @@ Every cluster has a statistics tree rooted at *cluster..* with the followi update_attempt, Counter, Total attempted cluster membership updates by service discovery update_success, Counter, Total successful cluster membership updates by service discovery update_failure, Counter, Total failed cluster membership updates by service discovery + update_duration, Histogram, Amount of time spent updating configs update_empty, Counter, Total cluster membership updates ending with empty cluster load assignment and continuing with previous config update_no_rebuild, Counter, Total successful cluster membership updates that didn't result in any cluster load balancing structure rebuilds version, Gauge, Hash of the contents from the last successful API fetch diff --git a/include/envoy/config/subscription.h b/include/envoy/config/subscription.h index c8b0ae54c470..05e1967f3e9d 100644 --- a/include/envoy/config/subscription.h +++ b/include/envoy/config/subscription.h @@ -201,7 +201,7 @@ using SubscriptionPtr = std::unique_ptr; /** * Per subscription stats. @see stats_macros.h */ -#define ALL_SUBSCRIPTION_STATS(COUNTER, GAUGE, TEXT_READOUT) \ +#define ALL_SUBSCRIPTION_STATS(COUNTER, GAUGE, TEXT_READOUT, HISTOGRAM) \ COUNTER(init_fetch_timeout) \ COUNTER(update_attempt) \ COUNTER(update_failure) \ @@ -209,6 +209,7 @@ using SubscriptionPtr = std::unique_ptr; COUNTER(update_success) \ GAUGE(update_time, NeverImport) \ GAUGE(version, NeverImport) \ + HISTOGRAM(update_duration, Milliseconds) \ TEXT_READOUT(version_text) /** @@ -216,7 +217,7 @@ using SubscriptionPtr = std::unique_ptr; */ struct SubscriptionStats { ALL_SUBSCRIPTION_STATS(GENERATE_COUNTER_STRUCT, GENERATE_GAUGE_STRUCT, - GENERATE_TEXT_READOUT_STRUCT) + GENERATE_TEXT_READOUT_STRUCT, GENERATE_HISTOGRAM_STRUCT) }; } // namespace Config diff --git a/source/common/config/grpc_subscription_impl.cc b/source/common/config/grpc_subscription_impl.cc index 263e1ca92b2d..62a6c5387651 100644 --- a/source/common/config/grpc_subscription_impl.cc +++ b/source/common/config/grpc_subscription_impl.cc @@ -1,5 +1,7 @@ #include "common/config/grpc_subscription_impl.h" +#include + #include "common/common/assert.h" #include "common/common/logger.h" #include "common/common/utility.h" @@ -12,6 +14,8 @@ namespace Envoy { namespace Config { +constexpr std::chrono::milliseconds UpdateDurationLogThreshold = std::chrono::milliseconds(50); + GrpcSubscriptionImpl::GrpcSubscriptionImpl(GrpcMuxSharedPtr grpc_mux, SubscriptionCallbacks& callbacks, OpaqueResourceDecoder& resource_decoder, @@ -69,14 +73,23 @@ void GrpcSubscriptionImpl::onConfigUpdate(const std::vector( + dispatcher_.timeSource().monotonicTime() - start); stats_.update_success_.inc(); stats_.update_attempt_.inc(); stats_.update_time_.set(DateUtil::nowToMilliseconds(dispatcher_.timeSource())); stats_.version_.set(HashUtil::xxHash64(version_info)); stats_.version_text_.set(version_info); + stats_.update_duration_.recordValue(update_duration.count()); ENVOY_LOG(debug, "gRPC config for {} accepted with {} resources with version {}", type_url_, resources.size(), version_info); + + if (update_duration > UpdateDurationLogThreshold) { + ENVOY_LOG(debug, "gRPC config update took {} ms! Resources names: {}", update_duration.count(), + absl::StrJoin(resources, ",", ResourceNameFormatter())); + } } void GrpcSubscriptionImpl::onConfigUpdate( @@ -85,11 +98,15 @@ void GrpcSubscriptionImpl::onConfigUpdate( const std::string& system_version_info) { disableInitFetchTimeoutTimer(); stats_.update_attempt_.inc(); + auto start = dispatcher_.timeSource().monotonicTime(); callbacks_.onConfigUpdate(added_resources, removed_resources, system_version_info); + std::chrono::milliseconds update_duration = std::chrono::duration_cast( + dispatcher_.timeSource().monotonicTime() - start); stats_.update_success_.inc(); stats_.update_time_.set(DateUtil::nowToMilliseconds(dispatcher_.timeSource())); stats_.version_.set(HashUtil::xxHash64(system_version_info)); stats_.version_text_.set(system_version_info); + stats_.update_duration_.recordValue(update_duration.count()); } void GrpcSubscriptionImpl::onConfigUpdateFailed(ConfigUpdateFailureReason reason, diff --git a/source/common/config/grpc_subscription_impl.h b/source/common/config/grpc_subscription_impl.h index b595e191d914..b1892d4ac046 100644 --- a/source/common/config/grpc_subscription_impl.h +++ b/source/common/config/grpc_subscription_impl.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include "envoy/config/grpc_mux.h" @@ -59,6 +60,12 @@ class GrpcSubscriptionImpl : public Subscription, Event::TimerPtr init_fetch_timeout_timer_; const bool is_aggregated_; const bool use_namespace_matching_; + + struct ResourceNameFormatter { + void operator()(std::string* out, const Config::DecodedResourceRef& resource) { + out->append(resource.get().name()); + } + }; }; using GrpcSubscriptionImplPtr = std::unique_ptr; diff --git a/source/common/config/utility.h b/source/common/config/utility.h index 828dd82ba308..d4d0e5c891ee 100644 --- a/source/common/config/utility.h +++ b/source/common/config/utility.h @@ -14,6 +14,7 @@ #include "envoy/server/filter_config.h" #include "envoy/stats/histogram.h" #include "envoy/stats/scope.h" +#include "envoy/stats/stats_macros.h" #include "envoy/stats/stats_matcher.h" #include "envoy/stats/tag_producer.h" #include "envoy/upstream/cluster_manager.h" @@ -238,8 +239,8 @@ class Utility { * @return SubscriptionStats for scope. */ static SubscriptionStats generateStats(Stats::Scope& scope) { - return { - ALL_SUBSCRIPTION_STATS(POOL_COUNTER(scope), POOL_GAUGE(scope), POOL_TEXT_READOUT(scope))}; + return {ALL_SUBSCRIPTION_STATS(POOL_COUNTER(scope), POOL_GAUGE(scope), POOL_TEXT_READOUT(scope), + POOL_HISTOGRAM(scope))}; } /**