From 9ac6b382dbd6967b6bc7bba9034998ee8ab0a624 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Tue, 25 Jun 2024 13:50:03 +0100 Subject: [PATCH 1/8] E2E test for go_collector and process_collector metrics Signed-off-by: Richard Wall --- make/test-e2e.mk | 1 + test/e2e/suite/cases/metrics.go | 41 +++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 test/e2e/suite/cases/metrics.go diff --git a/make/test-e2e.mk b/make/test-e2e.mk index 516eee18..00657c3e 100644 --- a/make/test-e2e.mk +++ b/make/test-e2e.mk @@ -46,6 +46,7 @@ endif test-e2e-deps: INSTALL_OPTIONS := test-e2e-deps: INSTALL_OPTIONS += --set image.repository=$(oci_manager_image_name_development) +# test-e2e-deps: INSTALL_OPTIONS += --set metrics.enabled=true test-e2e-deps: e2e-setup-cert-manager test-e2e-deps: install diff --git a/test/e2e/suite/cases/metrics.go b/test/e2e/suite/cases/metrics.go new file mode 100644 index 00000000..c83752f0 --- /dev/null +++ b/test/e2e/suite/cases/metrics.go @@ -0,0 +1,41 @@ +package cases + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/cert-manager/csi-driver/test/e2e/framework" +) + +const ( + certManagerNamespace = "cert-manager" +) + +var _ = framework.CasesDescribe("Metrics", func() { + f := framework.NewDefaultFramework("metrics") + + It("Should serve Go and process metrics on port 9402", func() { + By("Discovering the name of the csi-driver Pod") + pods, err := f.KubeClientSet.CoreV1().Pods(certManagerNamespace).List(context.TODO(), metav1.ListOptions{ + LabelSelector: "app.kubernetes.io/instance=csi-driver", + }) + Expect(err).NotTo(HaveOccurred()) + Expect(pods.Items).To(HaveLen(1)) + p := pods.Items[0] + By("Connecting to Pod on default metrics port 9402 and sending a GET request to the /metrics endpoint") + respBytes, err := f.KubeClientSet. + CoreV1(). + Pods(p.Namespace). + ProxyGet("http", p.Name, "9402", "/metrics", map[string]string{}). + DoRaw(context.TODO()) + Expect(err).NotTo(HaveOccurred()) + resp := string(respBytes) + Expect(resp).To(ContainSubstring("# HELP go_threads Number of OS threads created."), + "go_collector metrics should be available") + Expect(resp).To(ContainSubstring("# HELP process_open_fds Number of open file descriptors."), + "process_collector metrics should be available") + }) +}) From 1f6d79fed1f6c077443b08e000390e694b4a4069 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Tue, 25 Jun 2024 18:04:28 +0100 Subject: [PATCH 2/8] Add Go and Process metrics Signed-off-by: Richard Wall --- cmd/app/app.go | 45 +++++++++++++++++++++++++++++++------ cmd/app/options/options.go | 7 ++++++ go.mod | 4 ++-- internal/metrics/metrics.go | 37 ++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 9 deletions(-) create mode 100644 internal/metrics/metrics.go diff --git a/cmd/app/app.go b/cmd/app/app.go index f5b2b0b2..0be390d0 100644 --- a/cmd/app/app.go +++ b/cmd/app/app.go @@ -23,16 +23,20 @@ import ( "crypto/x509" "encoding/pem" "fmt" + "time" + "github.com/cert-manager/cert-manager/pkg/server" "github.com/cert-manager/csi-lib/driver" "github.com/cert-manager/csi-lib/manager" "github.com/cert-manager/csi-lib/manager/util" "github.com/cert-manager/csi-lib/metadata" "github.com/cert-manager/csi-lib/storage" "github.com/spf13/cobra" + "golang.org/x/sync/errgroup" "k8s.io/utils/clock" "github.com/cert-manager/csi-driver/cmd/app/options" + "github.com/cert-manager/csi-driver/internal/metrics" "github.com/cert-manager/csi-driver/internal/version" csiapi "github.com/cert-manager/csi-driver/pkg/apis/v1alpha1" "github.com/cert-manager/csi-driver/pkg/filestore" @@ -96,18 +100,45 @@ func NewCommand(ctx context.Context) *cobra.Command { return fmt.Errorf("failed to setup driver: " + err.Error()) } - go func() { + // Start metrics server + metricsLn, err := server.Listen("tcp", opts.MetricsListenAddress) + if err != nil { + return fmt.Errorf("failed to listen on prometheus address %s: %v", opts.MetricsListenAddress, err) + } + metricsServer := metrics.NewServer(metricsLn) + + g, _ := errgroup.WithContext(ctx) + g.Go(func() error { <-ctx.Done() log.Info("shutting down driver", "context", ctx.Err()) d.Stop() - }() + return nil + }) - log.Info("running driver") - if err := d.Run(); err != nil { - return fmt.Errorf("failed running driver: " + err.Error()) - } + g.Go(func() error { + log.Info("running driver") + if err := d.Run(); err != nil { + return fmt.Errorf("failed running driver: " + err.Error()) + } + return nil + }) + + g.Go(func() error { + <-ctx.Done() + // allow a timeout for graceful shutdown + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + // nolint: contextcheck + return metricsServer.Shutdown(shutdownCtx) + }) + + g.Go(func() error { + log.V(3).Info("starting metrics server", "address", metricsLn.Addr()) + return metricsServer.Serve(metricsLn) + }) - return nil + return g.Wait() }, } diff --git a/cmd/app/options/options.go b/cmd/app/options/options.go index c93d4883..2867c9ff 100644 --- a/cmd/app/options/options.go +++ b/cmd/app/options/options.go @@ -32,6 +32,8 @@ import ( _ "k8s.io/client-go/plugin/pkg/client/auth" ) +const defaultPrometheusMetricsServerAddress = "0.0.0.0:9402" + // Options are the main options for the driver. Populated via processing // command line flags. type Options struct { @@ -69,6 +71,9 @@ type Options struct { // CMClient is a rest client for interacting with cert-manager resources. CMClient cmclient.Interface + + // The host and port that the metrics endpoint should listen on. + MetricsListenAddress string } func New() *Options { @@ -152,4 +157,6 @@ func (o *Options) addAppFlags(fs *pflag.FlagSet) { fs.BoolVar(&o.UseTokenRequest, "use-token-request", false, "Use the empty audience token request for creating CertificateRequests. Requires the token request to be defined on the CSIDriver manifest.") + fs.StringVar(&o.MetricsListenAddress, "metrics-listen-address", defaultPrometheusMetricsServerAddress, + "The host and port that the metrics endpoint should listen on.") } diff --git a/go.mod b/go.mod index 84718b9a..1b8c714b 100644 --- a/go.mod +++ b/go.mod @@ -8,9 +8,11 @@ require ( github.com/go-logr/logr v1.4.2 github.com/onsi/ginkgo/v2 v2.19.0 github.com/onsi/gomega v1.33.1 + github.com/prometheus/client_golang v1.18.0 github.com/spf13/cobra v1.8.1 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.9.0 + golang.org/x/sync v0.7.0 k8s.io/api v0.30.2 k8s.io/apimachinery v0.30.2 k8s.io/cli-runtime v0.30.2 @@ -70,7 +72,6 @@ require ( github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect - github.com/prometheus/client_golang v1.18.0 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.46.0 // indirect github.com/prometheus/procfs v0.15.0 // indirect @@ -81,7 +82,6 @@ require ( golang.org/x/crypto v0.24.0 // indirect golang.org/x/net v0.26.0 // indirect golang.org/x/oauth2 v0.20.0 // indirect - golang.org/x/sync v0.7.0 // indirect golang.org/x/sys v0.21.0 // indirect golang.org/x/term v0.21.0 // indirect golang.org/x/text v0.16.0 // indirect diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go new file mode 100644 index 00000000..0ee35224 --- /dev/null +++ b/internal/metrics/metrics.go @@ -0,0 +1,37 @@ +package metrics + +import ( + "net" + "net/http" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +const ( + prometheusMetricsServerReadTimeout = 8 * time.Second + prometheusMetricsServerWriteTimeout = 8 * time.Second + prometheusMetricsServerMaxHeaderBytes = 1 << 20 // 1 MiB +) + +func NewServer(ln net.Listener) *http.Server { + registry := prometheus.NewRegistry() + registry.MustRegister( + collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), + collectors.NewGoCollector(), + ) + + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{})) + + server := &http.Server{ + Addr: ln.Addr().String(), + ReadTimeout: prometheusMetricsServerReadTimeout, + WriteTimeout: prometheusMetricsServerWriteTimeout, + MaxHeaderBytes: prometheusMetricsServerMaxHeaderBytes, + Handler: mux, + } + return server +} From 273dd1eb38fbaeadf92933450d5502e8e49ec42e Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Thu, 27 Jun 2024 11:19:31 +0100 Subject: [PATCH 3/8] Update the Helm chart with metrics settings Signed-off-by: Richard Wall --- deploy/charts/csi-driver/README.md | 82 ++++++++++++++++ .../csi-driver/templates/daemonset.yaml | 2 + .../csi-driver/templates/podmonitor.yaml | 41 ++++++++ deploy/charts/csi-driver/values.schema.json | 97 +++++++++++++++++++ deploy/charts/csi-driver/values.yaml | 55 ++++++++++- 5 files changed, 273 insertions(+), 4 deletions(-) create mode 100644 deploy/charts/csi-driver/templates/podmonitor.yaml diff --git a/deploy/charts/csi-driver/README.md b/deploy/charts/csi-driver/README.md index d3b5432a..6883d5ad 100644 --- a/deploy/charts/csi-driver/README.md +++ b/deploy/charts/csi-driver/README.md @@ -6,6 +6,88 @@ +#### **metrics.enabled** ~ `bool` +> Default value: +> ```yaml +> true +> ``` + +Enable the metrics server on csi-driver pods. +If false, then the other metrics fields below will be ignored. +#### **metrics.podmonitor.enabled** ~ `bool` +> Default value: +> ```yaml +> false +> ``` + +Create a PodMonitor to add csi-driver to Prometheus. +#### **metrics.podmonitor.namespace** ~ `string` + +The namespace that the pod monitor should live in, defaults to the cert-manager-csi-driver namespace. + +#### **metrics.podmonitor.prometheusInstance** ~ `string` +> Default value: +> ```yaml +> default +> ``` + +Specifies the `prometheus` label on the created PodMonitor. This is used when different Prometheus instances have label selectors matching different PodMonitors. +#### **metrics.podmonitor.interval** ~ `string` +> Default value: +> ```yaml +> 60s +> ``` + +The interval to scrape metrics. +#### **metrics.podmonitor.scrapeTimeout** ~ `string` +> Default value: +> ```yaml +> 30s +> ``` + +The timeout before a metrics scrape fails. +#### **metrics.podmonitor.labels** ~ `object` +> Default value: +> ```yaml +> {} +> ``` + +Additional labels to add to the PodMonitor. +#### **metrics.podmonitor.annotations** ~ `object` +> Default value: +> ```yaml +> {} +> ``` + +Additional annotations to add to the PodMonitor. +#### **metrics.podmonitor.honorLabels** ~ `bool` +> Default value: +> ```yaml +> false +> ``` + +Keep labels from scraped data, overriding server-side labels. +#### **metrics.podmonitor.endpointAdditionalProperties** ~ `object` +> Default value: +> ```yaml +> {} +> ``` + +EndpointAdditionalProperties allows setting additional properties on the endpoint such as relabelings, metricRelabelings etc. + +For example: + +```yaml +endpointAdditionalProperties: + relabelings: + - action: replace + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: instance +``` + + + #### **image.registry** ~ `string` Target image registry. This value is prepended to the target image repository, if set. diff --git a/deploy/charts/csi-driver/templates/daemonset.yaml b/deploy/charts/csi-driver/templates/daemonset.yaml index f1ea8bce..6cbe9faa 100644 --- a/deploy/charts/csi-driver/templates/daemonset.yaml +++ b/deploy/charts/csi-driver/templates/daemonset.yaml @@ -103,6 +103,8 @@ spec: ports: - containerPort: {{.Values.app.livenessProbe.port}} name: healthz + - containerPort: 9402 + name: http-metrics livenessProbe: httpGet: path: /healthz diff --git a/deploy/charts/csi-driver/templates/podmonitor.yaml b/deploy/charts/csi-driver/templates/podmonitor.yaml new file mode 100644 index 00000000..800bb60c --- /dev/null +++ b/deploy/charts/csi-driver/templates/podmonitor.yaml @@ -0,0 +1,41 @@ +{{- if and .Values.metrics.enabled .Values.metrics.podmonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: {{ include "cert-manager-csi-driver.name" . }} +{{- if .Values.metrics.podmonitor.namespace }} + namespace: {{ .Values.metrics.podmonitor.namespace }} +{{- else }} + namespace: {{ .Release.Namespace | quote }} +{{- end }} + labels: + {{- include "cert-manager-csi-driver.labels" . | nindent 4 }} + prometheus: {{ .Values.metrics.podmonitor.prometheusInstance }} + {{- with .Values.metrics.podmonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- with .Values.metrics.podmonitor.annotations }} + annotations: + {{- toYaml . | nindent 4 }} +{{- end }} +spec: + jobLabel: {{ include "cert-manager-csi-driver.name" . }} + selector: + matchLabels: + app.kubernetes.io/name: {{ include "cert-manager-csi-driver.name" . }} + app.kubernetes.io/instance: {{ .Release.Name }} +{{- if .Values.metrics.podmonitor.namespace }} + namespaceSelector: + matchNames: + - {{ .Release.Namespace | quote }} +{{- end }} + podMetricsEndpoints: + - port: http-metrics + path: /metrics + interval: {{ .Values.metrics.podmonitor.interval }} + scrapeTimeout: {{ .Values.metrics.podmonitor.scrapeTimeout }} + honorLabels: {{ .Values.metrics.podmonitor.honorLabels }} + {{- with .Values.metrics.podmonitor.endpointAdditionalProperties }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/deploy/charts/csi-driver/values.schema.json b/deploy/charts/csi-driver/values.schema.json index 62540708..0e050a71 100644 --- a/deploy/charts/csi-driver/values.schema.json +++ b/deploy/charts/csi-driver/values.schema.json @@ -27,6 +27,9 @@ "livenessProbeImage": { "$ref": "#/$defs/helm-values.livenessProbeImage" }, + "metrics": { + "$ref": "#/$defs/helm-values.metrics" + }, "nodeDriverRegistrarImage": { "$ref": "#/$defs/helm-values.nodeDriverRegistrarImage" }, @@ -237,6 +240,100 @@ "description": "Override the image tag to deploy by setting this variable. If no value is set, the chart's appVersion is used.", "type": "string" }, + "helm-values.metrics": { + "additionalProperties": false, + "properties": { + "enabled": { + "$ref": "#/$defs/helm-values.metrics.enabled" + }, + "podmonitor": { + "$ref": "#/$defs/helm-values.metrics.podmonitor" + } + }, + "type": "object" + }, + "helm-values.metrics.enabled": { + "default": true, + "description": "Enable the metrics server on csi-driver pods.\nIf false, then the other metrics fields below will be ignored.", + "type": "boolean" + }, + "helm-values.metrics.podmonitor": { + "additionalProperties": false, + "properties": { + "annotations": { + "$ref": "#/$defs/helm-values.metrics.podmonitor.annotations" + }, + "enabled": { + "$ref": "#/$defs/helm-values.metrics.podmonitor.enabled" + }, + "endpointAdditionalProperties": { + "$ref": "#/$defs/helm-values.metrics.podmonitor.endpointAdditionalProperties" + }, + "honorLabels": { + "$ref": "#/$defs/helm-values.metrics.podmonitor.honorLabels" + }, + "interval": { + "$ref": "#/$defs/helm-values.metrics.podmonitor.interval" + }, + "labels": { + "$ref": "#/$defs/helm-values.metrics.podmonitor.labels" + }, + "namespace": { + "$ref": "#/$defs/helm-values.metrics.podmonitor.namespace" + }, + "prometheusInstance": { + "$ref": "#/$defs/helm-values.metrics.podmonitor.prometheusInstance" + }, + "scrapeTimeout": { + "$ref": "#/$defs/helm-values.metrics.podmonitor.scrapeTimeout" + } + }, + "type": "object" + }, + "helm-values.metrics.podmonitor.annotations": { + "default": {}, + "description": "Additional annotations to add to the PodMonitor.", + "type": "object" + }, + "helm-values.metrics.podmonitor.enabled": { + "default": false, + "description": "Create a PodMonitor to add csi-driver to Prometheus.", + "type": "boolean" + }, + "helm-values.metrics.podmonitor.endpointAdditionalProperties": { + "default": {}, + "description": "EndpointAdditionalProperties allows setting additional properties on the endpoint such as relabelings, metricRelabelings etc.\n\nFor example:\nendpointAdditionalProperties:\n relabelings:\n - action: replace\n sourceLabels:\n - __meta_kubernetes_pod_node_name\n targetLabel: instance", + "type": "object" + }, + "helm-values.metrics.podmonitor.honorLabels": { + "default": false, + "description": "Keep labels from scraped data, overriding server-side labels.", + "type": "boolean" + }, + "helm-values.metrics.podmonitor.interval": { + "default": "60s", + "description": "The interval to scrape metrics.", + "type": "string" + }, + "helm-values.metrics.podmonitor.labels": { + "default": {}, + "description": "Additional labels to add to the PodMonitor.", + "type": "object" + }, + "helm-values.metrics.podmonitor.namespace": { + "description": "The namespace that the pod monitor should live in, defaults to the cert-manager-csi-driver namespace.", + "type": "string" + }, + "helm-values.metrics.podmonitor.prometheusInstance": { + "default": "default", + "description": "Specifies the `prometheus` label on the created PodMonitor. This is used when different Prometheus instances have label selectors matching different PodMonitors.", + "type": "string" + }, + "helm-values.metrics.podmonitor.scrapeTimeout": { + "default": "30s", + "description": "The timeout before a metrics scrape fails.", + "type": "string" + }, "helm-values.nodeDriverRegistrarImage": { "additionalProperties": false, "properties": { diff --git a/deploy/charts/csi-driver/values.yaml b/deploy/charts/csi-driver/values.yaml index 54e772c5..45f9421c 100644 --- a/deploy/charts/csi-driver/values.yaml +++ b/deploy/charts/csi-driver/values.yaml @@ -1,3 +1,50 @@ +metrics: + # Enable the metrics server on csi-driver pods. + # If false, then the other metrics fields below will be ignored. + enabled: true + podmonitor: + # Create a PodMonitor to add csi-driver to Prometheus. + enabled: false + + # The namespace that the pod monitor should live in, defaults + # to the cert-manager-csi-driver namespace. + # +docs:property + # namespace: cert-manager + + # Specifies the `prometheus` label on the created PodMonitor. This is + # used when different Prometheus instances have label selectors matching + # different PodMonitors. + prometheusInstance: default + + # The interval to scrape metrics. + interval: 60s + + # The timeout before a metrics scrape fails. + scrapeTimeout: 30s + + # Additional labels to add to the PodMonitor. + labels: {} + + # Additional annotations to add to the PodMonitor. + annotations: {} + + # Keep labels from scraped data, overriding server-side labels. + honorLabels: false + + # EndpointAdditionalProperties allows setting additional properties on the + # endpoint such as relabelings, metricRelabelings etc. + # + # For example: + # endpointAdditionalProperties: + # relabelings: + # - action: replace + # sourceLabels: + # - __meta_kubernetes_pod_node_name + # targetLabel: instance + # + # +docs:property + endpointAdditionalProperties: {} + image: # Target image registry. This value is prepended to the target image repository, if set. # For example: @@ -130,7 +177,7 @@ nodeSelector: kubernetes.io/os: linux # Kubernetes affinity: constraints for pod assignment. -# +# # For example: # affinity: # nodeAffinity: @@ -154,15 +201,15 @@ tolerations: [] priorityClassName: "" openshift: - securityContextConstraint: + securityContextConstraint: # Include RBAC to allow the DaemonSet to "use" the specified # SecurityContextConstraints. # # This value can either be a boolean true or false, or the string "detect". - # If set to "detect" then the securityContextConstraint is automatically + # If set to "detect" then the securityContextConstraint is automatically # enabled for openshift installs. # # +docs:type=boolean,string,null enabled: detect # Name of the SecurityContextConstraints to create RBAC for. - name: privileged \ No newline at end of file + name: privileged From 541320561cec65afb9d427fb2c9f76cab8d4fd3c Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Thu, 27 Jun 2024 14:13:11 +0100 Subject: [PATCH 4/8] Use controller-runtime metrics server Signed-off-by: Richard Wall --- cmd/app/app.go | 80 ++++++++++++------- cmd/app/options/options.go | 9 +-- deploy/charts/csi-driver/README.md | 9 ++- .../csi-driver/templates/daemonset.yaml | 9 ++- deploy/charts/csi-driver/values.schema.json | 10 ++- deploy/charts/csi-driver/values.yaml | 4 +- go.mod | 7 +- go.sum | 10 +++ internal/metrics/metrics.go | 37 --------- test/e2e/suite/cases/metrics.go | 2 +- 10 files changed, 102 insertions(+), 75 deletions(-) delete mode 100644 internal/metrics/metrics.go diff --git a/cmd/app/app.go b/cmd/app/app.go index 0be390d0..7e483a2e 100644 --- a/cmd/app/app.go +++ b/cmd/app/app.go @@ -23,9 +23,8 @@ import ( "crypto/x509" "encoding/pem" "fmt" - "time" + "net/http" - "github.com/cert-manager/cert-manager/pkg/server" "github.com/cert-manager/csi-lib/driver" "github.com/cert-manager/csi-lib/manager" "github.com/cert-manager/csi-lib/manager/util" @@ -34,9 +33,10 @@ import ( "github.com/spf13/cobra" "golang.org/x/sync/errgroup" "k8s.io/utils/clock" + ctrl "sigs.k8s.io/controller-runtime" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "github.com/cert-manager/csi-driver/cmd/app/options" - "github.com/cert-manager/csi-driver/internal/metrics" "github.com/cert-manager/csi-driver/internal/version" csiapi "github.com/cert-manager/csi-driver/pkg/apis/v1alpha1" "github.com/cert-manager/csi-driver/pkg/filestore" @@ -61,8 +61,11 @@ func NewCommand(ctx context.Context) *cobra.Command { }, RunE: func(cmd *cobra.Command, args []string) error { log := opts.Logr.WithName("main") - log.Info("Starting driver", "version", version.VersionInfo()) + // Set the controller-runtime logger so that we get the + // controller-runtime metricsserver logs. + ctrl.SetLogger(log) + log.Info("Starting driver", "version", version.VersionInfo()) store, err := storage.NewFilesystem(opts.Logr.WithName("storage"), opts.DataRoot) if err != nil { return fmt.Errorf("failed to setup filesystem: %w", err) @@ -100,14 +103,7 @@ func NewCommand(ctx context.Context) *cobra.Command { return fmt.Errorf("failed to setup driver: " + err.Error()) } - // Start metrics server - metricsLn, err := server.Listen("tcp", opts.MetricsListenAddress) - if err != nil { - return fmt.Errorf("failed to listen on prometheus address %s: %v", opts.MetricsListenAddress, err) - } - metricsServer := metrics.NewServer(metricsLn) - - g, _ := errgroup.WithContext(ctx) + g, gCTX := errgroup.WithContext(ctx) g.Go(func() error { <-ctx.Done() log.Info("shutting down driver", "context", ctx.Err()) @@ -123,21 +119,51 @@ func NewCommand(ctx context.Context) *cobra.Command { return nil }) - g.Go(func() error { - <-ctx.Done() - // allow a timeout for graceful shutdown - shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - // nolint: contextcheck - return metricsServer.Shutdown(shutdownCtx) - }) - - g.Go(func() error { - log.V(3).Info("starting metrics server", "address", metricsLn.Addr()) - return metricsServer.Serve(metricsLn) - }) - + // Start a metrics server if the --metrics-bind-address is not "0". + // + // By default this will serve all the metrics that are registered by + // controller-runtime to its global metrics registry. Including: + // * Go Runtime metrics + // * Process metrics + // * Various controller-runtime controller metrics + // (not updated by csi-driver because it doesn't use controller-runtime) + // * Leader election metrics + // (not updated by csi-driver because it doesn't use leader-election) + // + // The full list is here: + // https://github.com/kubernetes-sigs/controller-runtime/blob/700befecdffa803d19830a6a43adc5779ed01e26/pkg/internal/controller/metrics/metrics.go#L73-L86 + // + // The advantages of using the controller-runtime metricsserver are: + // * It already exists and is actively maintained. + // * Provides optional features for securing the metrics endpoint by + // TLS and by authentication with a K8S service account token, + // should that be requested by users in the future. + // * Consistency with cert-manager/approver-policy, which also uses + // this library and therefore publishes the same set of + // controller-runtime base metrics. + // Disadvantages: + // * It introduces a dependency on controller-runtime, which often + // introduces breaking changes. + // * It uses a global metrics registry, which has the usual risks + // associated with globals and makes it difficult for us to control + // which metrics are published for csi-driver. + // https://github.com/kubernetes-sigs/controller-runtime/issues/210 + var unusedHttpClient *http.Client + metricsServer, err := metricsserver.NewServer( + metricsserver.Options{ + BindAddress: opts.MetricsBindAddress, + }, + opts.RestConfig, + unusedHttpClient, + ) + if err != nil { + return err + } + if metricsServer != nil { + g.Go(func() error { + return metricsServer.Start(gCTX) + }) + } return g.Wait() }, } diff --git a/cmd/app/options/options.go b/cmd/app/options/options.go index 2867c9ff..43e1eae5 100644 --- a/cmd/app/options/options.go +++ b/cmd/app/options/options.go @@ -32,8 +32,6 @@ import ( _ "k8s.io/client-go/plugin/pkg/client/auth" ) -const defaultPrometheusMetricsServerAddress = "0.0.0.0:9402" - // Options are the main options for the driver. Populated via processing // command line flags. type Options struct { @@ -73,7 +71,7 @@ type Options struct { CMClient cmclient.Interface // The host and port that the metrics endpoint should listen on. - MetricsListenAddress string + MetricsBindAddress string } func New() *Options { @@ -157,6 +155,7 @@ func (o *Options) addAppFlags(fs *pflag.FlagSet) { fs.BoolVar(&o.UseTokenRequest, "use-token-request", false, "Use the empty audience token request for creating CertificateRequests. Requires the token request to be defined on the CSIDriver manifest.") - fs.StringVar(&o.MetricsListenAddress, "metrics-listen-address", defaultPrometheusMetricsServerAddress, - "The host and port that the metrics endpoint should listen on.") + fs.StringVar(&o.MetricsBindAddress, "metrics-bind-address", "0", + "The host and port that the metrics endpoint should listen on (for example ::9402). "+ + "If 0, the metrics server will be disabled. (default).") } diff --git a/deploy/charts/csi-driver/README.md b/deploy/charts/csi-driver/README.md index 6883d5ad..5b7d2927 100644 --- a/deploy/charts/csi-driver/README.md +++ b/deploy/charts/csi-driver/README.md @@ -13,7 +13,14 @@ > ``` Enable the metrics server on csi-driver pods. -If false, then the other metrics fields below will be ignored. +If false, the metrics server will be disabled and the other metrics fields below will be ignored. +#### **metrics.port** ~ `number` +> Default value: +> ```yaml +> 9402 +> ``` + +The TCP port on which the metrics server will listen. #### **metrics.podmonitor.enabled** ~ `bool` > Default value: > ```yaml diff --git a/deploy/charts/csi-driver/templates/daemonset.yaml b/deploy/charts/csi-driver/templates/daemonset.yaml index 6cbe9faa..d6ef09c4 100644 --- a/deploy/charts/csi-driver/templates/daemonset.yaml +++ b/deploy/charts/csi-driver/templates/daemonset.yaml @@ -84,6 +84,11 @@ spec: - --endpoint=$(CSI_ENDPOINT) - --data-root=csi-data-dir - --use-token-request={{ .Values.app.driver.useTokenRequest }} +{{- if .Values.metrics.enabled }} + - --metrics-bind-address=:{{ .Values.metrics.port }} +{{- else }} + - --metrics-bind-address=0 +{{- end }} env: - name: NODE_ID valueFrom: @@ -103,8 +108,10 @@ spec: ports: - containerPort: {{.Values.app.livenessProbe.port}} name: healthz - - containerPort: 9402 +{{- if .Values.metrics.enabled }} + - containerPort: {{ .Values.metrics.port }} name: http-metrics +{{- end }} livenessProbe: httpGet: path: /healthz diff --git a/deploy/charts/csi-driver/values.schema.json b/deploy/charts/csi-driver/values.schema.json index 0e050a71..c7d75b4d 100644 --- a/deploy/charts/csi-driver/values.schema.json +++ b/deploy/charts/csi-driver/values.schema.json @@ -248,13 +248,16 @@ }, "podmonitor": { "$ref": "#/$defs/helm-values.metrics.podmonitor" + }, + "port": { + "$ref": "#/$defs/helm-values.metrics.port" } }, "type": "object" }, "helm-values.metrics.enabled": { "default": true, - "description": "Enable the metrics server on csi-driver pods.\nIf false, then the other metrics fields below will be ignored.", + "description": "Enable the metrics server on csi-driver pods.\nIf false, the metrics server will be disabled and the other metrics fields below will be ignored.", "type": "boolean" }, "helm-values.metrics.podmonitor": { @@ -334,6 +337,11 @@ "description": "The timeout before a metrics scrape fails.", "type": "string" }, + "helm-values.metrics.port": { + "default": 9402, + "description": "The TCP port on which the metrics server will listen.", + "type": "number" + }, "helm-values.nodeDriverRegistrarImage": { "additionalProperties": false, "properties": { diff --git a/deploy/charts/csi-driver/values.yaml b/deploy/charts/csi-driver/values.yaml index 45f9421c..0a2796f5 100644 --- a/deploy/charts/csi-driver/values.yaml +++ b/deploy/charts/csi-driver/values.yaml @@ -1,7 +1,9 @@ metrics: # Enable the metrics server on csi-driver pods. - # If false, then the other metrics fields below will be ignored. + # If false, the metrics server will be disabled and the other metrics fields below will be ignored. enabled: true + # The TCP port on which the metrics server will listen. + port: 9402 podmonitor: # Create a PodMonitor to add csi-driver to Prometheus. enabled: false diff --git a/go.mod b/go.mod index 1b8c714b..f3591d9c 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,6 @@ require ( github.com/go-logr/logr v1.4.2 github.com/onsi/ginkgo/v2 v2.19.0 github.com/onsi/gomega v1.33.1 - github.com/prometheus/client_golang v1.18.0 github.com/spf13/cobra v1.8.1 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.9.0 @@ -35,6 +34,8 @@ require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.12.0 // indirect github.com/evanphx/json-patch v5.9.0+incompatible // indirect + github.com/evanphx/json-patch/v5 v5.9.0 // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-asn1-ber/asn1-ber v1.5.6 // indirect github.com/go-errors/errors v1.4.2 // indirect github.com/go-ldap/ldap/v3 v3.4.8 // indirect @@ -44,6 +45,7 @@ require ( github.com/go-openapi/swag v0.23.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/btree v1.1.2 // indirect github.com/google/gnostic-models v0.6.9-0.20230804172637-c7be7c783f49 // indirect @@ -72,6 +74,7 @@ require ( github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_golang v1.18.0 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.46.0 // indirect github.com/prometheus/procfs v0.15.0 // indirect @@ -80,6 +83,7 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/crypto v0.24.0 // indirect + golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect golang.org/x/net v0.26.0 // indirect golang.org/x/oauth2 v0.20.0 // indirect golang.org/x/sys v0.21.0 // indirect @@ -87,6 +91,7 @@ require ( golang.org/x/text v0.16.0 // indirect golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect + gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291 // indirect google.golang.org/grpc v1.64.0 // indirect google.golang.org/protobuf v1.34.1 // indirect diff --git a/go.sum b/go.sum index a5514003..51f04362 100644 --- a/go.sum +++ b/go.sum @@ -38,6 +38,10 @@ github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.m github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/evanphx/json-patch v5.9.0+incompatible h1:fBXyNpNMuTTDdquAq/uisOr2lShz4oaXpDTX2bLe7ls= github.com/evanphx/json-patch v5.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= +github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= +github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/go-asn1-ber/asn1-ber v1.5.5/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= github.com/go-asn1-ber/asn1-ber v1.5.6 h1:CYsqysemXfEaQbyrLJmdsCRuufHoLa3P/gGWGl5TDrM= github.com/go-asn1-ber/asn1-ber v1.5.6/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= @@ -60,6 +64,8 @@ github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -217,6 +223,8 @@ golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOM golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI= golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 h1:vr/HnozRka3pE4EsMEg1lgkXJkTFJCVUX+S/ZT6wYzM= +golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= @@ -305,6 +313,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= +gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go deleted file mode 100644 index 0ee35224..00000000 --- a/internal/metrics/metrics.go +++ /dev/null @@ -1,37 +0,0 @@ -package metrics - -import ( - "net" - "net/http" - "time" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/collectors" - "github.com/prometheus/client_golang/prometheus/promhttp" -) - -const ( - prometheusMetricsServerReadTimeout = 8 * time.Second - prometheusMetricsServerWriteTimeout = 8 * time.Second - prometheusMetricsServerMaxHeaderBytes = 1 << 20 // 1 MiB -) - -func NewServer(ln net.Listener) *http.Server { - registry := prometheus.NewRegistry() - registry.MustRegister( - collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}), - collectors.NewGoCollector(), - ) - - mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{})) - - server := &http.Server{ - Addr: ln.Addr().String(), - ReadTimeout: prometheusMetricsServerReadTimeout, - WriteTimeout: prometheusMetricsServerWriteTimeout, - MaxHeaderBytes: prometheusMetricsServerMaxHeaderBytes, - Handler: mux, - } - return server -} diff --git a/test/e2e/suite/cases/metrics.go b/test/e2e/suite/cases/metrics.go index c83752f0..4a2b290b 100644 --- a/test/e2e/suite/cases/metrics.go +++ b/test/e2e/suite/cases/metrics.go @@ -17,7 +17,7 @@ const ( var _ = framework.CasesDescribe("Metrics", func() { f := framework.NewDefaultFramework("metrics") - It("Should serve Go and process metrics on port 9402", func() { + FIt("Should serve Go and process metrics on port 9402", func() { By("Discovering the name of the csi-driver Pod") pods, err := f.KubeClientSet.CoreV1().Pods(certManagerNamespace).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app.kubernetes.io/instance=csi-driver", From 5b7d752bd1b9f6dafdd4f18245313f502db4caf1 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Fri, 28 Jun 2024 06:37:14 +0100 Subject: [PATCH 5/8] Enable podmonitor Signed-off-by: Richard Wall --- make/test-e2e.mk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/make/test-e2e.mk b/make/test-e2e.mk index 00657c3e..c6d4dce2 100644 --- a/make/test-e2e.mk +++ b/make/test-e2e.mk @@ -34,6 +34,7 @@ e2e-setup-cert-manager: | kind-cluster $(NEEDS_HELM) $(NEEDS_KUBECTL) --set startupapicheck.image.repository=$(quay.io/jetstack/cert-manager-startupapicheck.REPO) \ --set startupapicheck.image.tag=$(quay.io/jetstack/cert-manager-startupapicheck.TAG) \ --set startupapicheck.image.pullPolicy=Never \ + --set prometheus.podmonitor.enabled=true \ cert-manager cert-manager >/dev/null # The "install" target can be run on its own with any currently active cluster, @@ -46,7 +47,7 @@ endif test-e2e-deps: INSTALL_OPTIONS := test-e2e-deps: INSTALL_OPTIONS += --set image.repository=$(oci_manager_image_name_development) -# test-e2e-deps: INSTALL_OPTIONS += --set metrics.enabled=true +test-e2e-deps: INSTALL_OPTIONS += --set metrics.podmonitor.enabled=true test-e2e-deps: e2e-setup-cert-manager test-e2e-deps: install From ab3a39d7588e2e0058cefe8c389a5556b3d354b4 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Fri, 28 Jun 2024 10:11:45 +0100 Subject: [PATCH 6/8] Self review changes Signed-off-by: Richard Wall --- cmd/app/options/options.go | 8 +++++--- deploy/charts/csi-driver/README.md | 2 +- deploy/charts/csi-driver/values.schema.json | 2 +- deploy/charts/csi-driver/values.yaml | 3 ++- make/test-e2e.mk | 2 -- test/e2e/suite/cases/metrics.go | 18 +++++++++++++++++- 6 files changed, 26 insertions(+), 9 deletions(-) diff --git a/cmd/app/options/options.go b/cmd/app/options/options.go index 43e1eae5..c96ca8ca 100644 --- a/cmd/app/options/options.go +++ b/cmd/app/options/options.go @@ -70,7 +70,9 @@ type Options struct { // CMClient is a rest client for interacting with cert-manager resources. CMClient cmclient.Interface - // The host and port that the metrics endpoint should listen on. + // MetricsBindAddress is the TCP address for exposing HTTP Prometheus metrics + // which will be served on the HTTP path '/metrics'. The value "0" will + // disable exposing metrics. MetricsBindAddress string } @@ -156,6 +158,6 @@ func (o *Options) addAppFlags(fs *pflag.FlagSet) { fs.BoolVar(&o.UseTokenRequest, "use-token-request", false, "Use the empty audience token request for creating CertificateRequests. Requires the token request to be defined on the CSIDriver manifest.") fs.StringVar(&o.MetricsBindAddress, "metrics-bind-address", "0", - "The host and port that the metrics endpoint should listen on (for example ::9402). "+ - "If 0, the metrics server will be disabled. (default).") + "TCP address for exposing HTTP Prometheus metrics which will be served on the HTTP path '/metrics'. "+ + `The value "0" will disable exposing metrics.`) } diff --git a/deploy/charts/csi-driver/README.md b/deploy/charts/csi-driver/README.md index 5b7d2927..eab401d8 100644 --- a/deploy/charts/csi-driver/README.md +++ b/deploy/charts/csi-driver/README.md @@ -27,7 +27,7 @@ The TCP port on which the metrics server will listen. > false > ``` -Create a PodMonitor to add csi-driver to Prometheus. +Create a PodMonitor to add csi-driver to Prometheus if you are using Prometheus Operator. See https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.PodMonitor #### **metrics.podmonitor.namespace** ~ `string` The namespace that the pod monitor should live in, defaults to the cert-manager-csi-driver namespace. diff --git a/deploy/charts/csi-driver/values.schema.json b/deploy/charts/csi-driver/values.schema.json index c7d75b4d..13b90212 100644 --- a/deploy/charts/csi-driver/values.schema.json +++ b/deploy/charts/csi-driver/values.schema.json @@ -300,7 +300,7 @@ }, "helm-values.metrics.podmonitor.enabled": { "default": false, - "description": "Create a PodMonitor to add csi-driver to Prometheus.", + "description": "Create a PodMonitor to add csi-driver to Prometheus if you are using Prometheus Operator. See https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.PodMonitor", "type": "boolean" }, "helm-values.metrics.podmonitor.endpointAdditionalProperties": { diff --git a/deploy/charts/csi-driver/values.yaml b/deploy/charts/csi-driver/values.yaml index 0a2796f5..9783f21a 100644 --- a/deploy/charts/csi-driver/values.yaml +++ b/deploy/charts/csi-driver/values.yaml @@ -5,7 +5,8 @@ metrics: # The TCP port on which the metrics server will listen. port: 9402 podmonitor: - # Create a PodMonitor to add csi-driver to Prometheus. + # Create a PodMonitor to add csi-driver to Prometheus if you are using Prometheus Operator. + # See https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.PodMonitor enabled: false # The namespace that the pod monitor should live in, defaults diff --git a/make/test-e2e.mk b/make/test-e2e.mk index c6d4dce2..516eee18 100644 --- a/make/test-e2e.mk +++ b/make/test-e2e.mk @@ -34,7 +34,6 @@ e2e-setup-cert-manager: | kind-cluster $(NEEDS_HELM) $(NEEDS_KUBECTL) --set startupapicheck.image.repository=$(quay.io/jetstack/cert-manager-startupapicheck.REPO) \ --set startupapicheck.image.tag=$(quay.io/jetstack/cert-manager-startupapicheck.TAG) \ --set startupapicheck.image.pullPolicy=Never \ - --set prometheus.podmonitor.enabled=true \ cert-manager cert-manager >/dev/null # The "install" target can be run on its own with any currently active cluster, @@ -47,7 +46,6 @@ endif test-e2e-deps: INSTALL_OPTIONS := test-e2e-deps: INSTALL_OPTIONS += --set image.repository=$(oci_manager_image_name_development) -test-e2e-deps: INSTALL_OPTIONS += --set metrics.podmonitor.enabled=true test-e2e-deps: e2e-setup-cert-manager test-e2e-deps: install diff --git a/test/e2e/suite/cases/metrics.go b/test/e2e/suite/cases/metrics.go index 4a2b290b..d7a27592 100644 --- a/test/e2e/suite/cases/metrics.go +++ b/test/e2e/suite/cases/metrics.go @@ -1,3 +1,19 @@ +/* +Copyright 2021 The cert-manager Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package cases import ( @@ -17,7 +33,7 @@ const ( var _ = framework.CasesDescribe("Metrics", func() { f := framework.NewDefaultFramework("metrics") - FIt("Should serve Go and process metrics on port 9402", func() { + It("Should serve Go and process metrics on port 9402", func() { By("Discovering the name of the csi-driver Pod") pods, err := f.KubeClientSet.CoreV1().Pods(certManagerNamespace).List(context.TODO(), metav1.ListOptions{ LabelSelector: "app.kubernetes.io/instance=csi-driver", From 3f82ded7485b5ef27e1206bf8f5c4a2901193833 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Fri, 28 Jun 2024 11:23:53 +0100 Subject: [PATCH 7/8] Fix import order Signed-off-by: Richard Wall --- test/e2e/suite/cases/metrics.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/e2e/suite/cases/metrics.go b/test/e2e/suite/cases/metrics.go index d7a27592..d5d14cf2 100644 --- a/test/e2e/suite/cases/metrics.go +++ b/test/e2e/suite/cases/metrics.go @@ -19,11 +19,12 @@ package cases import ( "context" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/cert-manager/csi-driver/test/e2e/framework" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" ) const ( From 7e0684ca64f85f3ff635f35b91d0d88db95a3ed8 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Fri, 28 Jun 2024 11:37:28 +0100 Subject: [PATCH 8/8] Simplify tests Signed-off-by: Richard Wall --- test/e2e/suite/cases/metrics.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/e2e/suite/cases/metrics.go b/test/e2e/suite/cases/metrics.go index d5d14cf2..28ab33d6 100644 --- a/test/e2e/suite/cases/metrics.go +++ b/test/e2e/suite/cases/metrics.go @@ -18,6 +18,7 @@ package cases import ( "context" + "strings" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -49,10 +50,12 @@ var _ = framework.CasesDescribe("Metrics", func() { ProxyGet("http", p.Name, "9402", "/metrics", map[string]string{}). DoRaw(context.TODO()) Expect(err).NotTo(HaveOccurred()) - resp := string(respBytes) - Expect(resp).To(ContainSubstring("# HELP go_threads Number of OS threads created."), + resp := strings.Split(string(respBytes), "\n") + Expect(resp).To(ContainElement(`# TYPE go_threads gauge`), "go_collector metrics should be available") - Expect(resp).To(ContainSubstring("# HELP process_open_fds Number of open file descriptors."), + Expect(resp).To(ContainElement(`# TYPE process_open_fds gauge`), "process_collector metrics should be available") + Expect(resp).To(ContainElement(`# TYPE rest_client_requests_total counter`), + "controller-runtime metrics should be available") }) })