From 2e7857831b71c382e87f57983aef1716dbad4158 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Thu, 27 Jun 2024 14:13:11 +0100 Subject: [PATCH] Use controller-runtime metrics server Signed-off-by: Richard Wall --- cmd/app/app.go | 72 ++++++++++++------- cmd/app/options/options.go | 9 ++- deploy/charts/csi-driver/README.md | 9 ++- .../csi-driver/templates/daemonset.yaml | 9 ++- deploy/charts/csi-driver/values.schema.json | 10 ++- deploy/charts/csi-driver/values.yaml | 4 +- go.mod | 5 ++ go.sum | 10 +++ internal/metrics/metrics.go | 37 ---------- test/e2e/suite/cases/metrics.go | 2 +- 10 files changed, 93 insertions(+), 74 deletions(-) delete mode 100644 internal/metrics/metrics.go diff --git a/cmd/app/app.go b/cmd/app/app.go index 0be390d0..cd6d2110 100644 --- a/cmd/app/app.go +++ b/cmd/app/app.go @@ -23,9 +23,8 @@ import ( "crypto/x509" "encoding/pem" "fmt" - "time" + "net/http" - "github.com/cert-manager/cert-manager/pkg/server" "github.com/cert-manager/csi-lib/driver" "github.com/cert-manager/csi-lib/manager" "github.com/cert-manager/csi-lib/manager/util" @@ -34,9 +33,10 @@ import ( "github.com/spf13/cobra" "golang.org/x/sync/errgroup" "k8s.io/utils/clock" + ctrl "sigs.k8s.io/controller-runtime" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "github.com/cert-manager/csi-driver/cmd/app/options" - "github.com/cert-manager/csi-driver/internal/metrics" "github.com/cert-manager/csi-driver/internal/version" csiapi "github.com/cert-manager/csi-driver/pkg/apis/v1alpha1" "github.com/cert-manager/csi-driver/pkg/filestore" @@ -61,8 +61,11 @@ func NewCommand(ctx context.Context) *cobra.Command { }, RunE: func(cmd *cobra.Command, args []string) error { log := opts.Logr.WithName("main") - log.Info("Starting driver", "version", version.VersionInfo()) + // Set the controller-runtime logger so that we get the + // controller-runtime metricsserver logs. + ctrl.SetLogger(log) + log.Info("Starting driver", "version", version.VersionInfo()) store, err := storage.NewFilesystem(opts.Logr.WithName("storage"), opts.DataRoot) if err != nil { return fmt.Errorf("failed to setup filesystem: %w", err) @@ -100,14 +103,7 @@ func NewCommand(ctx context.Context) *cobra.Command { return fmt.Errorf("failed to setup driver: " + err.Error()) } - // Start metrics server - metricsLn, err := server.Listen("tcp", opts.MetricsListenAddress) - if err != nil { - return fmt.Errorf("failed to listen on prometheus address %s: %v", opts.MetricsListenAddress, err) - } - metricsServer := metrics.NewServer(metricsLn) - - g, _ := errgroup.WithContext(ctx) + g, gCTX := errgroup.WithContext(ctx) g.Go(func() error { <-ctx.Done() log.Info("shutting down driver", "context", ctx.Err()) @@ -123,21 +119,43 @@ func NewCommand(ctx context.Context) *cobra.Command { return nil }) - g.Go(func() error { - <-ctx.Done() - // allow a timeout for graceful shutdown - shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - // nolint: contextcheck - return metricsServer.Shutdown(shutdownCtx) - }) - - g.Go(func() error { - log.V(3).Info("starting metrics server", "address", metricsLn.Addr()) - return metricsServer.Serve(metricsLn) - }) - + // Start a metrics server if the --metrics-bind-address is not "0". + // + // By default this will serve all the metrics that are registered by controller-runtime to its global metrics registry. + // * Go Runtime metrics: + // * Process metrics + // + // Advantages of using the controller-runtime metricsserver are: + // * It already exists and is actively maintained. + // * Provides optional features for securing the metrics endpoint by + // TLS and by authentication with a K8S service account token, + // should that be requested by users in the future. + // * Consistency with cert-manager/approver-policy, which also uses + // this library and therefore publishes the same set of + // controller-runtime base metrics. + // Disadvantages: + // * It introduces a dependency on controller-runtime, which often + // introduces breaking changes. + // * It uses a global metrics registry, which has the usual risks + // associated with globals and makes it difficult for us to control + // which metrics are published for csi-driver. + // https://github.com/kubernetes-sigs/controller-runtime/issues/210 + var unusedHttpClient *http.Client + metricsServer, err := metricsserver.NewServer( + metricsserver.Options{ + BindAddress: opts.MetricsBindAddress, + }, + opts.RestConfig, + unusedHttpClient, + ) + if err != nil { + return err + } + if metricsServer != nil { + g.Go(func() error { + return metricsServer.Start(gCTX) + }) + } return g.Wait() }, } diff --git a/cmd/app/options/options.go b/cmd/app/options/options.go index 2867c9ff..43e1eae5 100644 --- a/cmd/app/options/options.go +++ b/cmd/app/options/options.go @@ -32,8 +32,6 @@ import ( _ "k8s.io/client-go/plugin/pkg/client/auth" ) -const defaultPrometheusMetricsServerAddress = "0.0.0.0:9402" - // Options are the main options for the driver. Populated via processing // command line flags. type Options struct { @@ -73,7 +71,7 @@ type Options struct { CMClient cmclient.Interface // The host and port that the metrics endpoint should listen on. - MetricsListenAddress string + MetricsBindAddress string } func New() *Options { @@ -157,6 +155,7 @@ func (o *Options) addAppFlags(fs *pflag.FlagSet) { fs.BoolVar(&o.UseTokenRequest, "use-token-request", false, "Use the empty audience token request for creating CertificateRequests. Requires the token request to be defined on the CSIDriver manifest.") - fs.StringVar(&o.MetricsListenAddress, "metrics-listen-address", defaultPrometheusMetricsServerAddress, - "The host and port that the metrics endpoint should listen on.") + fs.StringVar(&o.MetricsBindAddress, "metrics-bind-address", "0", + "The host and port that the metrics endpoint should listen on (for example ::9402). "+ + "If 0, the metrics server will be disabled. (default).") } diff --git a/deploy/charts/csi-driver/README.md b/deploy/charts/csi-driver/README.md index 6883d5ad..5b7d2927 100644 --- a/deploy/charts/csi-driver/README.md +++ b/deploy/charts/csi-driver/README.md @@ -13,7 +13,14 @@ > ``` Enable the metrics server on csi-driver pods. -If false, then the other metrics fields below will be ignored. +If false, the metrics server will be disabled and the other metrics fields below will be ignored. +#### **metrics.port** ~ `number` +> Default value: +> ```yaml +> 9402 +> ``` + +The TCP port on which the metrics server will listen. #### **metrics.podmonitor.enabled** ~ `bool` > Default value: > ```yaml diff --git a/deploy/charts/csi-driver/templates/daemonset.yaml b/deploy/charts/csi-driver/templates/daemonset.yaml index 6cbe9faa..d6ef09c4 100644 --- a/deploy/charts/csi-driver/templates/daemonset.yaml +++ b/deploy/charts/csi-driver/templates/daemonset.yaml @@ -84,6 +84,11 @@ spec: - --endpoint=$(CSI_ENDPOINT) - --data-root=csi-data-dir - --use-token-request={{ .Values.app.driver.useTokenRequest }} +{{- if .Values.metrics.enabled }} + - --metrics-bind-address=:{{ .Values.metrics.port }} +{{- else }} + - --metrics-bind-address=0 +{{- end }} env: - name: NODE_ID valueFrom: @@ -103,8 +108,10 @@ spec: ports: - containerPort: {{.Values.app.livenessProbe.port}} name: healthz - - containerPort: 9402 +{{- if .Values.metrics.enabled }} + - containerPort: {{ .Values.metrics.port }} name: http-metrics +{{- end }} livenessProbe: httpGet: path: /healthz diff --git a/deploy/charts/csi-driver/values.schema.json b/deploy/charts/csi-driver/values.schema.json index 0e050a71..c7d75b4d 100644 --- a/deploy/charts/csi-driver/values.schema.json +++ b/deploy/charts/csi-driver/values.schema.json @@ -248,13 +248,16 @@ }, "podmonitor": { "$ref": "#/$defs/helm-values.metrics.podmonitor" + }, + "port": { + "$ref": "#/$defs/helm-values.metrics.port" } }, "type": "object" }, "helm-values.metrics.enabled": { "default": true, - "description": "Enable the metrics server on csi-driver pods.\nIf false, then the other metrics fields below will be ignored.", + "description": "Enable the metrics server on csi-driver pods.\nIf false, the metrics server will be disabled and the other metrics fields below will be ignored.", "type": "boolean" }, "helm-values.metrics.podmonitor": { @@ -334,6 +337,11 @@ "description": "The timeout before a metrics scrape fails.", "type": "string" }, + "helm-values.metrics.port": { + "default": 9402, + "description": "The TCP port on which the metrics server will listen.", + "type": "number" + }, "helm-values.nodeDriverRegistrarImage": { "additionalProperties": false, "properties": { diff --git a/deploy/charts/csi-driver/values.yaml b/deploy/charts/csi-driver/values.yaml index 45f9421c..0a2796f5 100644 --- a/deploy/charts/csi-driver/values.yaml +++ b/deploy/charts/csi-driver/values.yaml @@ -1,7 +1,9 @@ metrics: # Enable the metrics server on csi-driver pods. - # If false, then the other metrics fields below will be ignored. + # If false, the metrics server will be disabled and the other metrics fields below will be ignored. enabled: true + # The TCP port on which the metrics server will listen. + port: 9402 podmonitor: # Create a PodMonitor to add csi-driver to Prometheus. enabled: false diff --git a/go.mod b/go.mod index 7b7ec48c..3e21621c 100644 --- a/go.mod +++ b/go.mod @@ -35,6 +35,8 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.12.0 // indirect github.com/evanphx/json-patch v5.9.0+incompatible // indirect + github.com/evanphx/json-patch/v5 v5.9.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-asn1-ber/asn1-ber v1.5.6 // indirect github.com/go-errors/errors v1.4.2 // indirect github.com/go-ldap/ldap/v3 v3.4.8 // indirect @@ -44,6 +46,7 @@ require ( github.com/go-openapi/swag v0.23.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/btree v1.1.2 // indirect github.com/google/gnostic-models v0.6.9-0.20230804172637-c7be7c783f49 // indirect @@ -80,6 +83,7 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/crypto v0.23.0 // indirect + golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect golang.org/x/net v0.25.0 // indirect golang.org/x/oauth2 v0.20.0 // indirect golang.org/x/sys v0.20.0 // indirect @@ -87,6 +91,7 @@ require ( golang.org/x/text v0.15.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.21.0 // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291 // indirect google.golang.org/grpc v1.64.0 // indirect google.golang.org/protobuf v1.34.1 // indirect diff --git a/go.sum b/go.sum index 0c77640f..18fe54de 100644 --- a/go.sum +++ b/go.sum @@ -38,6 +38,10 @@ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.m github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls= github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= +github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/go-asn1-ber/asn1-ber v1.5.5/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= github.com/go-asn1-ber/asn1-ber v1.5.6 h1:CYsqysemXfEaQbyrLJmdsCRuufHoLa3P/gGWGl5TDrM= github.com/go-asn1-ber/asn1-ber v1.5.6/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= @@ -60,6 +64,8 @@ github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -217,6 +223,8 @@ golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOM golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= @@ -305,6 +313,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go deleted file mode 100644 index 0ee35224..00000000 --- a/internal/metrics/metrics.go +++ /dev/null @@ -1,37 +0,0 @@ -package metrics - -import ( - "net" - "net/http" - "time" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/collectors" - "github.com/prometheus/client_golang/prometheus/promhttp" -) - -const ( - prometheusMetricsServerReadTimeout = 8 * time.Second - prometheusMetricsServerWriteTimeout = 8 * time.Second - prometheusMetricsServerMaxHeaderBytes = 1 << 20 // 1 MiB -) - -func NewServer(ln net.Listener) *http.Server { - registry := prometheus.NewRegistry() - registry.MustRegister( - collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), - collectors.NewGoCollector(), - ) - - mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{})) - - server := &http.Server{ - Addr: ln.Addr().String(), - ReadTimeout: prometheusMetricsServerReadTimeout, - WriteTimeout: prometheusMetricsServerWriteTimeout, - MaxHeaderBytes: prometheusMetricsServerMaxHeaderBytes, - Handler: mux, - } - return server -} diff --git a/test/e2e/suite/cases/metrics.go b/test/e2e/suite/cases/metrics.go index c83752f0..4a2b290b 100644 --- a/test/e2e/suite/cases/metrics.go +++ b/test/e2e/suite/cases/metrics.go @@ -17,7 +17,7 @@ const ( var _ = framework.CasesDescribe("Metrics", func() { f := framework.NewDefaultFramework("metrics") - It("Should serve Go and process metrics on port 9402", func() { + FIt("Should serve Go and process metrics on port 9402", func() { By("Discovering the name of the csi-driver Pod") pods, err := f.KubeClientSet.CoreV1().Pods(certManagerNamespace).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app.kubernetes.io/instance=csi-driver",