Skip to content

Commit

Permalink
Merge pull request #271 from wallrj/prometheus-metrics
Browse files Browse the repository at this point in the history
[VC-34401] Add Prometheus metrics endpoint
  • Loading branch information
cert-manager-prow[bot] committed Jun 28, 2024
2 parents 4e343e1 + 7e0684c commit 81e52c2
Show file tree
Hide file tree
Showing 10 changed files with 448 additions and 13 deletions.
73 changes: 65 additions & 8 deletions cmd/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,18 @@ import (
"crypto/x509"
"encoding/pem"
"fmt"
"net/http"

"github.com/cert-manager/csi-lib/driver"
"github.com/cert-manager/csi-lib/manager"
"github.com/cert-manager/csi-lib/manager/util"
"github.com/cert-manager/csi-lib/metadata"
"github.com/cert-manager/csi-lib/storage"
"github.com/spf13/cobra"
"golang.org/x/sync/errgroup"
"k8s.io/utils/clock"
ctrl "sigs.k8s.io/controller-runtime"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

"github.com/cert-manager/csi-driver/cmd/app/options"
"github.com/cert-manager/csi-driver/internal/version"
Expand All @@ -57,8 +61,11 @@ func NewCommand(ctx context.Context) *cobra.Command {
},
RunE: func(cmd *cobra.Command, args []string) error {
log := opts.Logr.WithName("main")
log.Info("Starting driver", "version", version.VersionInfo())
// Set the controller-runtime logger so that we get the
// controller-runtime metricsserver logs.
ctrl.SetLogger(log)

log.Info("Starting driver", "version", version.VersionInfo())
store, err := storage.NewFilesystem(opts.Logr.WithName("storage"), opts.DataRoot)
if err != nil {
return fmt.Errorf("failed to setup filesystem: %w", err)
Expand Down Expand Up @@ -96,18 +103,68 @@ func NewCommand(ctx context.Context) *cobra.Command {
return fmt.Errorf("failed to setup driver: " + err.Error())
}

go func() {
g, gCTX := errgroup.WithContext(ctx)
g.Go(func() error {
<-ctx.Done()
log.Info("shutting down driver", "context", ctx.Err())
d.Stop()
}()
return nil
})

log.Info("running driver")
if err := d.Run(); err != nil {
return fmt.Errorf("failed running driver: " + err.Error())
}
g.Go(func() error {
log.Info("running driver")
if err := d.Run(); err != nil {
return fmt.Errorf("failed running driver: " + err.Error())
}
return nil
})

return nil
// Start a metrics server if the --metrics-bind-address is not "0".
//
// By default this will serve all the metrics that are registered by
// controller-runtime to its global metrics registry. Including:
// * Go Runtime metrics
// * Process metrics
// * Various controller-runtime controller metrics
// (not updated by csi-driver because it doesn't use controller-runtime)
// * Leader election metrics
// (not updated by csi-driver because it doesn't use leader-election)
//
// The full list is here:
// https://github.com/kubernetes-sigs/controller-runtime/blob/700befecdffa803d19830a6a43adc5779ed01e26/pkg/internal/controller/metrics/metrics.go#L73-L86
//
// The advantages of using the controller-runtime metricsserver are:
// * It already exists and is actively maintained.
// * Provides optional features for securing the metrics endpoint by
// TLS and by authentication with a K8S service account token,
// should that be requested by users in the future.
// * Consistency with cert-manager/approver-policy, which also uses
// this library and therefore publishes the same set of
// controller-runtime base metrics.
// Disadvantages:
// * It introduces a dependency on controller-runtime, which often
// introduces breaking changes.
// * It uses a global metrics registry, which has the usual risks
// associated with globals and makes it difficult for us to control
// which metrics are published for csi-driver.
// https://github.com/kubernetes-sigs/controller-runtime/issues/210
var unusedHttpClient *http.Client
metricsServer, err := metricsserver.NewServer(
metricsserver.Options{
BindAddress: opts.MetricsBindAddress,
},
opts.RestConfig,
unusedHttpClient,
)
if err != nil {
return err
}
if metricsServer != nil {
g.Go(func() error {
return metricsServer.Start(gCTX)
})
}
return g.Wait()
},
}

Expand Down
8 changes: 8 additions & 0 deletions cmd/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ type Options struct {

// CMClient is a rest client for interacting with cert-manager resources.
CMClient cmclient.Interface

// MetricsBindAddress is the TCP address for exposing HTTP Prometheus metrics
// which will be served on the HTTP path '/metrics'. The value "0" will
// disable exposing metrics.
MetricsBindAddress string
}

func New() *Options {
Expand Down Expand Up @@ -152,4 +157,7 @@ func (o *Options) addAppFlags(fs *pflag.FlagSet) {

fs.BoolVar(&o.UseTokenRequest, "use-token-request", false,
"Use the empty audience token request for creating CertificateRequests. Requires the token request to be defined on the CSIDriver manifest.")
fs.StringVar(&o.MetricsBindAddress, "metrics-bind-address", "0",
"TCP address for exposing HTTP Prometheus metrics which will be served on the HTTP path '/metrics'. "+
`The value "0" will disable exposing metrics.`)
}
89 changes: 89 additions & 0 deletions deploy/charts/csi-driver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,95 @@

<!-- AUTO-GENERATED -->

#### **metrics.enabled** ~ `bool`
> Default value:
> ```yaml
> true
> ```
Enable the metrics server on csi-driver pods.
If false, the metrics server will be disabled and the other metrics fields below will be ignored.
#### **metrics.port** ~ `number`
> Default value:
> ```yaml
> 9402
> ```
The TCP port on which the metrics server will listen.
#### **metrics.podmonitor.enabled** ~ `bool`
> Default value:
> ```yaml
> false
> ```
Create a PodMonitor to add csi-driver to Prometheus if you are using Prometheus Operator. See https://prometheus-operator.dev/docs/operator/api/#monitoring.coreos.com/v1.PodMonitor
#### **metrics.podmonitor.namespace** ~ `string`
The namespace that the pod monitor should live in, defaults to the cert-manager-csi-driver namespace.
#### **metrics.podmonitor.prometheusInstance** ~ `string`
> Default value:
> ```yaml
> default
> ```
Specifies the `prometheus` label on the created PodMonitor. This is used when different Prometheus instances have label selectors matching different PodMonitors.
#### **metrics.podmonitor.interval** ~ `string`
> Default value:
> ```yaml
> 60s
> ```
The interval to scrape metrics.
#### **metrics.podmonitor.scrapeTimeout** ~ `string`
> Default value:
> ```yaml
> 30s
> ```
The timeout before a metrics scrape fails.
#### **metrics.podmonitor.labels** ~ `object`
> Default value:
> ```yaml
> {}
> ```
Additional labels to add to the PodMonitor.
#### **metrics.podmonitor.annotations** ~ `object`
> Default value:
> ```yaml
> {}
> ```
Additional annotations to add to the PodMonitor.
#### **metrics.podmonitor.honorLabels** ~ `bool`
> Default value:
> ```yaml
> false
> ```
Keep labels from scraped data, overriding server-side labels.
#### **metrics.podmonitor.endpointAdditionalProperties** ~ `object`
> Default value:
> ```yaml
> {}
> ```
EndpointAdditionalProperties allows setting additional properties on the endpoint such as relabelings, metricRelabelings etc.
For example:
```yaml
endpointAdditionalProperties:
relabelings:
- action: replace
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: instance
```
#### **image.registry** ~ `string`
Target image registry. This value is prepended to the target image repository, if set.
Expand Down
9 changes: 9 additions & 0 deletions deploy/charts/csi-driver/templates/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ spec:
- --endpoint=$(CSI_ENDPOINT)
- --data-root=csi-data-dir
- --use-token-request={{ .Values.app.driver.useTokenRequest }}
{{- if .Values.metrics.enabled }}
- --metrics-bind-address=:{{ .Values.metrics.port }}
{{- else }}
- --metrics-bind-address=0
{{- end }}
env:
- name: NODE_ID
valueFrom:
Expand All @@ -103,6 +108,10 @@ spec:
ports:
- containerPort: {{.Values.app.livenessProbe.port}}
name: healthz
{{- if .Values.metrics.enabled }}
- containerPort: {{ .Values.metrics.port }}
name: http-metrics
{{- end }}
livenessProbe:
httpGet:
path: /healthz
Expand Down
41 changes: 41 additions & 0 deletions deploy/charts/csi-driver/templates/podmonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{{- if and .Values.metrics.enabled .Values.metrics.podmonitor.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: {{ include "cert-manager-csi-driver.name" . }}
{{- if .Values.metrics.podmonitor.namespace }}
namespace: {{ .Values.metrics.podmonitor.namespace }}
{{- else }}
namespace: {{ .Release.Namespace | quote }}
{{- end }}
labels:
{{- include "cert-manager-csi-driver.labels" . | nindent 4 }}
prometheus: {{ .Values.metrics.podmonitor.prometheusInstance }}
{{- with .Values.metrics.podmonitor.labels }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with .Values.metrics.podmonitor.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
jobLabel: {{ include "cert-manager-csi-driver.name" . }}
selector:
matchLabels:
app.kubernetes.io/name: {{ include "cert-manager-csi-driver.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- if .Values.metrics.podmonitor.namespace }}
namespaceSelector:
matchNames:
- {{ .Release.Namespace | quote }}
{{- end }}
podMetricsEndpoints:
- port: http-metrics
path: /metrics
interval: {{ .Values.metrics.podmonitor.interval }}
scrapeTimeout: {{ .Values.metrics.podmonitor.scrapeTimeout }}
honorLabels: {{ .Values.metrics.podmonitor.honorLabels }}
{{- with .Values.metrics.podmonitor.endpointAdditionalProperties }}
{{- toYaml . | nindent 4 }}
{{- end }}
{{- end }}
Loading

0 comments on commit 81e52c2

Please sign in to comment.