From e97933bcb9f41a44cdb21f1c8812d5c22e357d36 Mon Sep 17 00:00:00 2001
From: pokom <mark.poko@grafana.com>
Date: Wed, 5 Jun 2024 14:18:43 -0400
Subject: [PATCH 1/5] fix(server): Add read and write timeouts

There are a few documented scenarios where `kube-state-metrics` will
lock up(#995, #1028). I believe a much simpler solution to ensure
`kube-state-metrics` doesn't lock up and require a restart to server
`/metrics` requests is to add default read and write timeouts and to
allow them to be configurable. At Grafana, we've experienced a few
scenarios where `kube-state-metrics` running in larger clusters falls
behind and starts getting scraped multiple times. When this occurs,
`kube-state-metrics` becomes completely unresponsive and requires a
reboot. This is somewhat easily reproduceable(I'll provide a script in
an issue) and causes other critical workloads(KEDA, VPA) to fail in
weird ways.

Adds two flags:
- `server-read-timeout`
- `server-write-timeout`

Updates the metrics http server to set the `ReadTimeout` and
`WriteTimeout` to the configured values.
---
 docs/developer/cli-arguments.md | 2 ++
 pkg/app/server.go               | 3 ++-
 pkg/options/options.go          | 5 +++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/docs/developer/cli-arguments.md b/docs/developer/cli-arguments.md
index 1918a7e4e8..82a2a1f3d7 100644
--- a/docs/developer/cli-arguments.md
+++ b/docs/developer/cli-arguments.md
@@ -67,6 +67,8 @@ Flags:
       --pod-namespace string                       Name of the namespace of the pod specified by --pod. When set, it is expected that --pod and --pod-namespace are both set. Most likely this should be passed via the downward API. This is used for auto-detecting sharding. If set, this has preference over statically configured sharding. This is experimental, it may be removed without notice.
       --port int                                   Port to expose metrics on. (default 8080)
       --resources string                           Comma-separated list of Resources to be enabled. Defaults to "certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments"
+      --server-read-timeout duration               The maximum duration for reading the entire request, including the body. (default 30s)
+      --server-write-timeout duration              The maximum duration before timing out writes of the response. (default 1m0s)
       --shard int32                                The instances shard nominal (zero indexed) within the total number of shards. (default 0)
       --skip_headers                               If true, avoid header prefixes in the log messages
       --skip_log_headers                           If true, avoid headers when opening log files (no effect when -logtostderr=true)
diff --git a/pkg/app/server.go b/pkg/app/server.go
index aa5b2c916c..c079498460 100644
--- a/pkg/app/server.go
+++ b/pkg/app/server.go
@@ -326,6 +326,8 @@ func RunKubeStateMetrics(ctx context.Context, opts *options.Options) error {
 	metricsServer := http.Server{
 		Handler:           metricsMux,
 		ReadHeaderTimeout: 5 * time.Second,
+		ReadTimeout:       opts.ServerReadTimeout,
+		WriteTimeout:      opts.ServerWriteTimeout,
 	}
 	metricsFlags := web.FlagConfig{
 		WebListenAddresses: &[]string{metricsServerListenAddress},
@@ -401,7 +403,6 @@ func buildMetricsServer(m *metricshandler.MetricsHandler, durationObserver prome
 	mux.Handle("/debug/pprof/trace", http.HandlerFunc(pprof.Trace))
 
 	mux.Handle(metricsPath, promhttp.InstrumentHandlerDuration(durationObserver, m))
-
 	// Add healthzPath
 	mux.HandleFunc(healthzPath, func(w http.ResponseWriter, _ *http.Request) {
 		w.WriteHeader(http.StatusOK)
diff --git a/pkg/options/options.go b/pkg/options/options.go
index 09adf8939a..f8c707510f 100644
--- a/pkg/options/options.go
+++ b/pkg/options/options.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"os"
 	"strings"
+	"time"
 
 	"github.com/prometheus/common/version"
 	"github.com/spf13/cobra"
@@ -55,6 +56,8 @@ type Options struct {
 	TelemetryPort            int             `yaml:"telemetry_port"`
 	TotalShards              int             `yaml:"total_shards"`
 	UseAPIServerCache        bool            `yaml:"use_api_server_cache"`
+	ServerReadTimeout        time.Duration   `yaml:"server_read_timeout"`
+	ServerWriteTimeout       time.Duration   `yaml:"server_write_timeout"`
 
 	Config string
 
@@ -146,6 +149,8 @@ func (o *Options) AddFlags(cmd *cobra.Command) {
 	o.cmd.Flags().Var(&o.Namespaces, "namespaces", fmt.Sprintf("Comma-separated list of namespaces to be enabled. Defaults to %q", &DefaultNamespaces))
 	o.cmd.Flags().Var(&o.NamespacesDenylist, "namespaces-denylist", "Comma-separated list of namespaces not to be enabled. If namespaces and namespaces-denylist are both set, only namespaces that are excluded in namespaces-denylist will be used.")
 	o.cmd.Flags().Var(&o.Resources, "resources", fmt.Sprintf("Comma-separated list of Resources to be enabled. Defaults to %q", &DefaultResources))
+	o.cmd.Flags().DurationVar(&o.ServerReadTimeout, "server-read-timeout", 30*time.Second, "The maximum duration for reading the entire request, including the body.")
+	o.cmd.Flags().DurationVar(&o.ServerWriteTimeout, "server-write-timeout", 60*time.Second, "The maximum duration before timing out writes of the response.")
 }
 
 // Parse parses the flag definitions from the argument list.

From b4f032ecfdf60953ec193754696152d19a1d8887 Mon Sep 17 00:00:00 2001
From: pokom <mark.poko@grafana.com>
Date: Thu, 6 Jun 2024 13:11:00 -0400
Subject: [PATCH 2/5] Add additional flags for IdleTimeouts

---
 docs/developer/cli-arguments.md | 2 ++
 pkg/app/server.go               | 3 ++-
 pkg/options/options.go          | 5 +++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/docs/developer/cli-arguments.md b/docs/developer/cli-arguments.md
index 82a2a1f3d7..817cb8b375 100644
--- a/docs/developer/cli-arguments.md
+++ b/docs/developer/cli-arguments.md
@@ -67,6 +67,8 @@ Flags:
       --pod-namespace string                       Name of the namespace of the pod specified by --pod. When set, it is expected that --pod and --pod-namespace are both set. Most likely this should be passed via the downward API. This is used for auto-detecting sharding. If set, this has preference over statically configured sharding. This is experimental, it may be removed without notice.
       --port int                                   Port to expose metrics on. (default 8080)
       --resources string                           Comma-separated list of Resources to be enabled. Defaults to "certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments"
+      --server-idle-timeout duration               The maximum amount of time to wait for the next request when keep-alives are enabled. (default 5m0s)
+      --server-read-header-timeout duration        The maximum duration for reading the header of requests. (default 5s)
       --server-read-timeout duration               The maximum duration for reading the entire request, including the body. (default 30s)
       --server-write-timeout duration              The maximum duration before timing out writes of the response. (default 1m0s)
       --shard int32                                The instances shard nominal (zero indexed) within the total number of shards. (default 0)
diff --git a/pkg/app/server.go b/pkg/app/server.go
index c079498460..3efd97b354 100644
--- a/pkg/app/server.go
+++ b/pkg/app/server.go
@@ -325,9 +325,10 @@ func RunKubeStateMetrics(ctx context.Context, opts *options.Options) error {
 	metricsServerListenAddress := net.JoinHostPort(opts.Host, strconv.Itoa(opts.Port))
 	metricsServer := http.Server{
 		Handler:           metricsMux,
-		ReadHeaderTimeout: 5 * time.Second,
+		ReadHeaderTimeout: opts.ServerReadHeaderTimeout,
 		ReadTimeout:       opts.ServerReadTimeout,
 		WriteTimeout:      opts.ServerWriteTimeout,
+		IdleTimeout:       opts.ServerIdleTimeout,
 	}
 	metricsFlags := web.FlagConfig{
 		WebListenAddresses: &[]string{metricsServerListenAddress},
diff --git a/pkg/options/options.go b/pkg/options/options.go
index f8c707510f..5d279d6f52 100644
--- a/pkg/options/options.go
+++ b/pkg/options/options.go
@@ -58,6 +58,8 @@ type Options struct {
 	UseAPIServerCache        bool            `yaml:"use_api_server_cache"`
 	ServerReadTimeout        time.Duration   `yaml:"server_read_timeout"`
 	ServerWriteTimeout       time.Duration   `yaml:"server_write_timeout"`
+	ServerIdleTimeout        time.Duration   `yaml:"server_idle_timeout"`
+	ServerReadHeaderTimeout  time.Duration   `yaml:"server_read_header_timeout"`
 
 	Config string
 
@@ -149,8 +151,11 @@ func (o *Options) AddFlags(cmd *cobra.Command) {
 	o.cmd.Flags().Var(&o.Namespaces, "namespaces", fmt.Sprintf("Comma-separated list of namespaces to be enabled. Defaults to %q", &DefaultNamespaces))
 	o.cmd.Flags().Var(&o.NamespacesDenylist, "namespaces-denylist", "Comma-separated list of namespaces not to be enabled. If namespaces and namespaces-denylist are both set, only namespaces that are excluded in namespaces-denylist will be used.")
 	o.cmd.Flags().Var(&o.Resources, "resources", fmt.Sprintf("Comma-separated list of Resources to be enabled. Defaults to %q", &DefaultResources))
+
 	o.cmd.Flags().DurationVar(&o.ServerReadTimeout, "server-read-timeout", 30*time.Second, "The maximum duration for reading the entire request, including the body.")
 	o.cmd.Flags().DurationVar(&o.ServerWriteTimeout, "server-write-timeout", 60*time.Second, "The maximum duration before timing out writes of the response.")
+	o.cmd.Flags().DurationVar(&o.ServerIdleTimeout, "server-idle-timeout", 5*time.Minute, "The maximum amount of time to wait for the next request when keep-alives are enabled.")
+	o.cmd.Flags().DurationVar(&o.ServerReadHeaderTimeout, "server-read-header-timeout", 5*time.Second, "The maximum duration for reading the header of requests.")
 }
 
 // Parse parses the flag definitions from the argument list.

From 28dbd26540485c8d8f7a6009193e69a7596b25f0 Mon Sep 17 00:00:00 2001
From: pokom <mark.poko@grafana.com>
Date: Fri, 7 Jun 2024 13:14:57 -0400
Subject: [PATCH 3/5] Create variables for default values of new flags

---
 pkg/options/options.go | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/pkg/options/options.go b/pkg/options/options.go
index 5d279d6f52..f537e077c7 100644
--- a/pkg/options/options.go
+++ b/pkg/options/options.go
@@ -28,6 +28,16 @@ import (
 	"k8s.io/klog/v2"
 )
 
+var (
+	// Align with the default scrape interval from Prometheus: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
+	defaultServerReadTimeout  = 60 * time.Second
+	defaultServerWriteTimeout = 60 * time.Second
+	// ServerIdleTimeout is set to 5 minutes to match the default idle timeout of Prometheus scrape clients
+	// https://github.com/prometheus/common/blob/318309999517402ad522877ac7e55fa650a11114/config/http_config.go#L55
+	defaultServerIdleTimeout       = 5 * time.Minute
+	defaultServerReadHeaderTimeout = 5 * time.Second
+)
+
 // Options are the configurable parameters for kube-state-metrics.
 type Options struct {
 	AnnotationsAllowList     LabelsAllowList `yaml:"annotations_allow_list"`
@@ -152,10 +162,10 @@ func (o *Options) AddFlags(cmd *cobra.Command) {
 	o.cmd.Flags().Var(&o.NamespacesDenylist, "namespaces-denylist", "Comma-separated list of namespaces not to be enabled. If namespaces and namespaces-denylist are both set, only namespaces that are excluded in namespaces-denylist will be used.")
 	o.cmd.Flags().Var(&o.Resources, "resources", fmt.Sprintf("Comma-separated list of Resources to be enabled. Defaults to %q", &DefaultResources))
 
-	o.cmd.Flags().DurationVar(&o.ServerReadTimeout, "server-read-timeout", 30*time.Second, "The maximum duration for reading the entire request, including the body.")
-	o.cmd.Flags().DurationVar(&o.ServerWriteTimeout, "server-write-timeout", 60*time.Second, "The maximum duration before timing out writes of the response.")
-	o.cmd.Flags().DurationVar(&o.ServerIdleTimeout, "server-idle-timeout", 5*time.Minute, "The maximum amount of time to wait for the next request when keep-alives are enabled.")
-	o.cmd.Flags().DurationVar(&o.ServerReadHeaderTimeout, "server-read-header-timeout", 5*time.Second, "The maximum duration for reading the header of requests.")
+	o.cmd.Flags().DurationVar(&o.ServerReadTimeout, "server-read-timeout", defaultServerReadTimeout, "The maximum duration for reading the entire request, including the body. Align with the scrape interval or timeout of scraping clients. ")
+	o.cmd.Flags().DurationVar(&o.ServerWriteTimeout, "server-write-timeout", defaultServerWriteTimeout, "The maximum duration before timing out writes of the response. Align with the scrape interval or timeout of scraping clients..")
+	o.cmd.Flags().DurationVar(&o.ServerIdleTimeout, "server-idle-timeout", defaultServerIdleTimeout, "The maximum amount of time to wait for the next request when keep-alives are enabled. Align with the idletimeout of your scrape clients.")
+	o.cmd.Flags().DurationVar(&o.ServerReadHeaderTimeout, "server-read-header-timeout", defaultServerReadHeaderTimeout, "The maximum duration for reading the header of requests.")
 }
 
 // Parse parses the flag definitions from the argument list.

From ee3913967e6ccffbb482ad477fdd17972d48509e Mon Sep 17 00:00:00 2001
From: Mark <markpoko2@gmail.com>
Date: Fri, 7 Jun 2024 14:13:44 -0400
Subject: [PATCH 4/5] Update docs/developer/cli-arguments.md
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Manuel Rüger <manuel@rueg.eu>
---
 docs/developer/cli-arguments.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/developer/cli-arguments.md b/docs/developer/cli-arguments.md
index 817cb8b375..aca2697e64 100644
--- a/docs/developer/cli-arguments.md
+++ b/docs/developer/cli-arguments.md
@@ -69,7 +69,7 @@ Flags:
       --resources string                           Comma-separated list of Resources to be enabled. Defaults to "certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments"
       --server-idle-timeout duration               The maximum amount of time to wait for the next request when keep-alives are enabled. (default 5m0s)
       --server-read-header-timeout duration        The maximum duration for reading the header of requests. (default 5s)
-      --server-read-timeout duration               The maximum duration for reading the entire request, including the body. (default 30s)
+      --server-read-timeout duration               The maximum duration for reading the entire request, including the body. (default 1m0s)
       --server-write-timeout duration              The maximum duration before timing out writes of the response. (default 1m0s)
       --shard int32                                The instances shard nominal (zero indexed) within the total number of shards. (default 0)
       --skip_headers                               If true, avoid header prefixes in the log messages

From cd460fef2901206b455f5b8562e789f736e59902 Mon Sep 17 00:00:00 2001
From: pokom <mark.poko@grafana.com>
Date: Tue, 11 Jun 2024 06:48:22 -0400
Subject: [PATCH 5/5] Update cli-arguments.md

---
 docs/developer/cli-arguments.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/developer/cli-arguments.md b/docs/developer/cli-arguments.md
index aca2697e64..6648c72278 100644
--- a/docs/developer/cli-arguments.md
+++ b/docs/developer/cli-arguments.md
@@ -67,10 +67,10 @@ Flags:
       --pod-namespace string                       Name of the namespace of the pod specified by --pod. When set, it is expected that --pod and --pod-namespace are both set. Most likely this should be passed via the downward API. This is used for auto-detecting sharding. If set, this has preference over statically configured sharding. This is experimental, it may be removed without notice.
       --port int                                   Port to expose metrics on. (default 8080)
       --resources string                           Comma-separated list of Resources to be enabled. Defaults to "certificatesigningrequests,configmaps,cronjobs,daemonsets,deployments,endpoints,horizontalpodautoscalers,ingresses,jobs,leases,limitranges,mutatingwebhookconfigurations,namespaces,networkpolicies,nodes,persistentvolumeclaims,persistentvolumes,poddisruptionbudgets,pods,replicasets,replicationcontrollers,resourcequotas,secrets,services,statefulsets,storageclasses,validatingwebhookconfigurations,volumeattachments"
-      --server-idle-timeout duration               The maximum amount of time to wait for the next request when keep-alives are enabled. (default 5m0s)
+      --server-idle-timeout duration               The maximum amount of time to wait for the next request when keep-alives are enabled. Align with the idletimeout of your scrape clients. (default 5m0s)
       --server-read-header-timeout duration        The maximum duration for reading the header of requests. (default 5s)
-      --server-read-timeout duration               The maximum duration for reading the entire request, including the body. (default 1m0s)
-      --server-write-timeout duration              The maximum duration before timing out writes of the response. (default 1m0s)
+      --server-read-timeout duration               The maximum duration for reading the entire request, including the body. Align with the scrape interval or timeout of scraping clients.  (default 1m0s)
+      --server-write-timeout duration              The maximum duration before timing out writes of the response. Align with the scrape interval or timeout of scraping clients.. (default 1m0s)
       --shard int32                                The instances shard nominal (zero indexed) within the total number of shards. (default 0)
       --skip_headers                               If true, avoid header prefixes in the log messages
       --skip_log_headers                           If true, avoid headers when opening log files (no effect when -logtostderr=true)