From e97933bcb9f41a44cdb21f1c8812d5c22e357d36 Mon Sep 17 00:00:00 2001 From: pokom Date: Wed, 5 Jun 2024 14:18:43 -0400 Subject: [PATCH 1/5] fix(server): Add read and write timeouts There are a few documented scenarios where `kube-state-metrics` will lock up(#995, #1028). I believe a much simpler solution to ensure `kube-state-metrics` doesn't lock up and require a restart to server `/metrics` requests is to add default read and write timeouts and to allow them to be configurable. At Grafana, we've experienced a few scenarios where `kube-state-metrics` running in larger clusters falls behind and starts getting scraped multiple times. When this occurs, `kube-state-metrics` becomes completely unresponsive and requires a reboot. This is somewhat easily reproduceable(I'll provide a script in an issue) and causes other critical workloads(KEDA, VPA) to fail in weird ways. Adds two flags: - `server-read-timeout` - `server-write-timeout` Updates the metrics http server to set the `ReadTimeout` and `WriteTimeout` to the configured values. --- docs/developer/cli-arguments.md | 2 ++ pkg/app/server.go | 3 ++- pkg/options/options.go | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/developer/cli-arguments.md b/docs/developer/cli-arguments.md index 1918a7e4e8..82a2a1f3d7 100644 --- a/docs/developer/cli-arguments.md +++ b/docs/developer/cli-arguments.md @@ -67,6 +67,8 @@ Flags: --pod-namespace string Name of the namespace of the pod specified by --pod. When set, it is expected that --pod and --pod-namespace are both set. Most likely this should be passed via the downward API. This is used for auto-detecting sharding. If set, this has preference over statically configured sharding. This is experimental, it may be removed without notice. --port int Port to expose metrics on. (default 8080) --resources string Comma-separated list of Resources to be enabled. Defaults to "certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments" + --server-read-timeout duration The maximum duration for reading the entire request, including the body. (default 30s) + --server-write-timeout duration The maximum duration before timing out writes of the response. (default 1m0s) --shard int32 The instances shard nominal (zero indexed) within the total number of shards. (default 0) --skip_headers If true, avoid header prefixes in the log messages --skip_log_headers If true, avoid headers when opening log files (no effect when -logtostderr=true) diff --git a/pkg/app/server.go b/pkg/app/server.go index aa5b2c916c..c079498460 100644 --- a/pkg/app/server.go +++ b/pkg/app/server.go @@ -326,6 +326,8 @@ func RunKubeStateMetrics(ctx context.Context, opts *options.Options) error { metricsServer := http.Server{ Handler: metricsMux, ReadHeaderTimeout: 5 * time.Second, + ReadTimeout: opts.ServerReadTimeout, + WriteTimeout: opts.ServerWriteTimeout, } metricsFlags := web.FlagConfig{ WebListenAddresses: &[]string{metricsServerListenAddress}, @@ -401,7 +403,6 @@ func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prome mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace)) mux.Handle(metricsPath, promhttp.InstrumentHandlerDuration(durationObserver, m)) - // Add healthzPath mux.HandleFunc(healthzPath, func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) diff --git a/pkg/options/options.go b/pkg/options/options.go index 09adf8939a..f8c707510f 100644 --- a/pkg/options/options.go +++ b/pkg/options/options.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "strings" + "time" "github.com/prometheus/common/version" "github.com/spf13/cobra" @@ -55,6 +56,8 @@ type Options struct { TelemetryPort int `yaml:"telemetry_port"` TotalShards int `yaml:"total_shards"` UseAPIServerCache bool `yaml:"use_api_server_cache"` + ServerReadTimeout time.Duration `yaml:"server_read_timeout"` + ServerWriteTimeout time.Duration `yaml:"server_write_timeout"` Config string @@ -146,6 +149,8 @@ func (o *Options) AddFlags(cmd *cobra.Command) { o.cmd.Flags().Var(&o.Namespaces, "namespaces", fmt.Sprintf("Comma-separated list of namespaces to be enabled. Defaults to %q", &DefaultNamespaces)) o.cmd.Flags().Var(&o.NamespacesDenylist, "namespaces-denylist", "Comma-separated list of namespaces not to be enabled. If namespaces and namespaces-denylist are both set, only namespaces that are excluded in namespaces-denylist will be used.") o.cmd.Flags().Var(&o.Resources, "resources", fmt.Sprintf("Comma-separated list of Resources to be enabled. Defaults to %q", &DefaultResources)) + o.cmd.Flags().DurationVar(&o.ServerReadTimeout, "server-read-timeout", 30*time.Second, "The maximum duration for reading the entire request, including the body.") + o.cmd.Flags().DurationVar(&o.ServerWriteTimeout, "server-write-timeout", 60*time.Second, "The maximum duration before timing out writes of the response.") } // Parse parses the flag definitions from the argument list. From b4f032ecfdf60953ec193754696152d19a1d8887 Mon Sep 17 00:00:00 2001 From: pokom Date: Thu, 6 Jun 2024 13:11:00 -0400 Subject: [PATCH 2/5] Add additional flags for IdleTimeouts --- docs/developer/cli-arguments.md | 2 ++ pkg/app/server.go | 3 ++- pkg/options/options.go | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/developer/cli-arguments.md b/docs/developer/cli-arguments.md index 82a2a1f3d7..817cb8b375 100644 --- a/docs/developer/cli-arguments.md +++ b/docs/developer/cli-arguments.md @@ -67,6 +67,8 @@ Flags: --pod-namespace string Name of the namespace of the pod specified by --pod. When set, it is expected that --pod and --pod-namespace are both set. Most likely this should be passed via the downward API. This is used for auto-detecting sharding. If set, this has preference over statically configured sharding. This is experimental, it may be removed without notice. --port int Port to expose metrics on. (default 8080) --resources string Comma-separated list of Resources to be enabled. Defaults to "certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments" + --server-idle-timeout duration The maximum amount of time to wait for the next request when keep-alives are enabled. (default 5m0s) + --server-read-header-timeout duration The maximum duration for reading the header of requests. (default 5s) --server-read-timeout duration The maximum duration for reading the entire request, including the body. (default 30s) --server-write-timeout duration The maximum duration before timing out writes of the response. (default 1m0s) --shard int32 The instances shard nominal (zero indexed) within the total number of shards. (default 0) diff --git a/pkg/app/server.go b/pkg/app/server.go index c079498460..3efd97b354 100644 --- a/pkg/app/server.go +++ b/pkg/app/server.go @@ -325,9 +325,10 @@ func RunKubeStateMetrics(ctx context.Context, opts *options.Options) error { metricsServerListenAddress := net.JoinHostPort(opts.Host, strconv.Itoa(opts.Port)) metricsServer := http.Server{ Handler: metricsMux, - ReadHeaderTimeout: 5 * time.Second, + ReadHeaderTimeout: opts.ServerReadHeaderTimeout, ReadTimeout: opts.ServerReadTimeout, WriteTimeout: opts.ServerWriteTimeout, + IdleTimeout: opts.ServerIdleTimeout, } metricsFlags := web.FlagConfig{ WebListenAddresses: &[]string{metricsServerListenAddress}, diff --git a/pkg/options/options.go b/pkg/options/options.go index f8c707510f..5d279d6f52 100644 --- a/pkg/options/options.go +++ b/pkg/options/options.go @@ -58,6 +58,8 @@ type Options struct { UseAPIServerCache bool `yaml:"use_api_server_cache"` ServerReadTimeout time.Duration `yaml:"server_read_timeout"` ServerWriteTimeout time.Duration `yaml:"server_write_timeout"` + ServerIdleTimeout time.Duration `yaml:"server_idle_timeout"` + ServerReadHeaderTimeout time.Duration `yaml:"server_read_header_timeout"` Config string @@ -149,8 +151,11 @@ func (o *Options) AddFlags(cmd *cobra.Command) { o.cmd.Flags().Var(&o.Namespaces, "namespaces", fmt.Sprintf("Comma-separated list of namespaces to be enabled. Defaults to %q", &DefaultNamespaces)) o.cmd.Flags().Var(&o.NamespacesDenylist, "namespaces-denylist", "Comma-separated list of namespaces not to be enabled. If namespaces and namespaces-denylist are both set, only namespaces that are excluded in namespaces-denylist will be used.") o.cmd.Flags().Var(&o.Resources, "resources", fmt.Sprintf("Comma-separated list of Resources to be enabled. Defaults to %q", &DefaultResources)) + o.cmd.Flags().DurationVar(&o.ServerReadTimeout, "server-read-timeout", 30*time.Second, "The maximum duration for reading the entire request, including the body.") o.cmd.Flags().DurationVar(&o.ServerWriteTimeout, "server-write-timeout", 60*time.Second, "The maximum duration before timing out writes of the response.") + o.cmd.Flags().DurationVar(&o.ServerIdleTimeout, "server-idle-timeout", 5*time.Minute, "The maximum amount of time to wait for the next request when keep-alives are enabled.") + o.cmd.Flags().DurationVar(&o.ServerReadHeaderTimeout, "server-read-header-timeout", 5*time.Second, "The maximum duration for reading the header of requests.") } // Parse parses the flag definitions from the argument list. From 28dbd26540485c8d8f7a6009193e69a7596b25f0 Mon Sep 17 00:00:00 2001 From: pokom Date: Fri, 7 Jun 2024 13:14:57 -0400 Subject: [PATCH 3/5] Create variables for default values of new flags --- pkg/options/options.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pkg/options/options.go b/pkg/options/options.go index 5d279d6f52..f537e077c7 100644 --- a/pkg/options/options.go +++ b/pkg/options/options.go @@ -28,6 +28,16 @@ import ( "k8s.io/klog/v2" ) +var ( + // Align with the default scrape interval from Prometheus: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config + defaultServerReadTimeout = 60 * time.Second + defaultServerWriteTimeout = 60 * time.Second + // ServerIdleTimeout is set to 5 minutes to match the default idle timeout of Prometheus scrape clients + // https://github.com/prometheus/common/blob/318309999517402ad522877ac7e55fa650a11114/config/http_config.go#L55 + defaultServerIdleTimeout = 5 * time.Minute + defaultServerReadHeaderTimeout = 5 * time.Second +) + // Options are the configurable parameters for kube-state-metrics. type Options struct { AnnotationsAllowList LabelsAllowList `yaml:"annotations_allow_list"` @@ -152,10 +162,10 @@ func (o *Options) AddFlags(cmd *cobra.Command) { o.cmd.Flags().Var(&o.NamespacesDenylist, "namespaces-denylist", "Comma-separated list of namespaces not to be enabled. If namespaces and namespaces-denylist are both set, only namespaces that are excluded in namespaces-denylist will be used.") o.cmd.Flags().Var(&o.Resources, "resources", fmt.Sprintf("Comma-separated list of Resources to be enabled. Defaults to %q", &DefaultResources)) - o.cmd.Flags().DurationVar(&o.ServerReadTimeout, "server-read-timeout", 30*time.Second, "The maximum duration for reading the entire request, including the body.") - o.cmd.Flags().DurationVar(&o.ServerWriteTimeout, "server-write-timeout", 60*time.Second, "The maximum duration before timing out writes of the response.") - o.cmd.Flags().DurationVar(&o.ServerIdleTimeout, "server-idle-timeout", 5*time.Minute, "The maximum amount of time to wait for the next request when keep-alives are enabled.") - o.cmd.Flags().DurationVar(&o.ServerReadHeaderTimeout, "server-read-header-timeout", 5*time.Second, "The maximum duration for reading the header of requests.") + o.cmd.Flags().DurationVar(&o.ServerReadTimeout, "server-read-timeout", defaultServerReadTimeout, "The maximum duration for reading the entire request, including the body. Align with the scrape interval or timeout of scraping clients. ") + o.cmd.Flags().DurationVar(&o.ServerWriteTimeout, "server-write-timeout", defaultServerWriteTimeout, "The maximum duration before timing out writes of the response. Align with the scrape interval or timeout of scraping clients..") + o.cmd.Flags().DurationVar(&o.ServerIdleTimeout, "server-idle-timeout", defaultServerIdleTimeout, "The maximum amount of time to wait for the next request when keep-alives are enabled. Align with the idletimeout of your scrape clients.") + o.cmd.Flags().DurationVar(&o.ServerReadHeaderTimeout, "server-read-header-timeout", defaultServerReadHeaderTimeout, "The maximum duration for reading the header of requests.") } // Parse parses the flag definitions from the argument list. From ee3913967e6ccffbb482ad477fdd17972d48509e Mon Sep 17 00:00:00 2001 From: Mark Date: Fri, 7 Jun 2024 14:13:44 -0400 Subject: [PATCH 4/5] Update docs/developer/cli-arguments.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Manuel RĂ¼ger --- docs/developer/cli-arguments.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/developer/cli-arguments.md b/docs/developer/cli-arguments.md index 817cb8b375..aca2697e64 100644 --- a/docs/developer/cli-arguments.md +++ b/docs/developer/cli-arguments.md @@ -69,7 +69,7 @@ Flags: --resources string Comma-separated list of Resources to be enabled. Defaults to "certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments" --server-idle-timeout duration The maximum amount of time to wait for the next request when keep-alives are enabled. (default 5m0s) --server-read-header-timeout duration The maximum duration for reading the header of requests. (default 5s) - --server-read-timeout duration The maximum duration for reading the entire request, including the body. (default 30s) + --server-read-timeout duration The maximum duration for reading the entire request, including the body. (default 1m0s) --server-write-timeout duration The maximum duration before timing out writes of the response. (default 1m0s) --shard int32 The instances shard nominal (zero indexed) within the total number of shards. (default 0) --skip_headers If true, avoid header prefixes in the log messages From cd460fef2901206b455f5b8562e789f736e59902 Mon Sep 17 00:00:00 2001 From: pokom Date: Tue, 11 Jun 2024 06:48:22 -0400 Subject: [PATCH 5/5] Update cli-arguments.md --- docs/developer/cli-arguments.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/developer/cli-arguments.md b/docs/developer/cli-arguments.md index aca2697e64..6648c72278 100644 --- a/docs/developer/cli-arguments.md +++ b/docs/developer/cli-arguments.md @@ -67,10 +67,10 @@ Flags: --pod-namespace string Name of the namespace of the pod specified by --pod. When set, it is expected that --pod and --pod-namespace are both set. Most likely this should be passed via the downward API. This is used for auto-detecting sharding. If set, this has preference over statically configured sharding. This is experimental, it may be removed without notice. --port int Port to expose metrics on. (default 8080) --resources string Comma-separated list of Resources to be enabled. Defaults to "certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments" - --server-idle-timeout duration The maximum amount of time to wait for the next request when keep-alives are enabled. (default 5m0s) + --server-idle-timeout duration The maximum amount of time to wait for the next request when keep-alives are enabled. Align with the idletimeout of your scrape clients. (default 5m0s) --server-read-header-timeout duration The maximum duration for reading the header of requests. (default 5s) - --server-read-timeout duration The maximum duration for reading the entire request, including the body. (default 1m0s) - --server-write-timeout duration The maximum duration before timing out writes of the response. (default 1m0s) + --server-read-timeout duration The maximum duration for reading the entire request, including the body. Align with the scrape interval or timeout of scraping clients. (default 1m0s) + --server-write-timeout duration The maximum duration before timing out writes of the response. Align with the scrape interval or timeout of scraping clients.. (default 1m0s) --shard int32 The instances shard nominal (zero indexed) within the total number of shards. (default 0) --skip_headers If true, avoid header prefixes in the log messages --skip_log_headers If true, avoid headers when opening log files (no effect when -logtostderr=true)