From 327a33e329e823cbffed87338dbd20e870780bc9 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Sat, 29 Jan 2022 16:45:29 -0800 Subject: [PATCH] fix: register controller workqueue metrics correctly Signed-off-by: Ben Ye --- controller/appcontroller.go | 3 - controller/metrics/metrics.go | 1 + controller/metrics/workqueue.go | 101 ++++++++++++++++++++++++++++++++ go.mod | 3 +- go.sum | 1 - 5 files changed, 103 insertions(+), 6 deletions(-) create mode 100644 controller/metrics/workqueue.go diff --git a/controller/appcontroller.go b/controller/appcontroller.go index 3677195fef772..6dfa478133019 100644 --- a/controller/appcontroller.go +++ b/controller/appcontroller.go @@ -36,9 +36,6 @@ import ( "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" - // make sure to register workqueue prometheus metrics - _ "k8s.io/component-base/metrics/prometheus/workqueue" - statecache "github.com/argoproj/argo-cd/v2/controller/cache" "github.com/argoproj/argo-cd/v2/controller/metrics" "github.com/argoproj/argo-cd/v2/pkg/apis/application" diff --git a/controller/metrics/metrics.go b/controller/metrics/metrics.go index b1d2c8ab8eb49..dbb4bfd8dcace 100644 --- a/controller/metrics/metrics.go +++ b/controller/metrics/metrics.go @@ -159,6 +159,7 @@ func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFil mux := http.NewServeMux() registry := NewAppRegistry(appLister, appFilter, appLabels) + registry.MustRegister(depth, adds, latency, workDuration, unfinished, longestRunningProcessor, retries) mux.Handle(MetricsPath, promhttp.HandlerFor(prometheus.Gatherers{ // contains app controller specific metrics registry, diff --git a/controller/metrics/workqueue.go b/controller/metrics/workqueue.go new file mode 100644 index 0000000000000..2ef10685ee47d --- /dev/null +++ b/controller/metrics/workqueue.go @@ -0,0 +1,101 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "k8s.io/client-go/util/workqueue" +) + +const ( + WorkQueueSubsystem = "workqueue" + DepthKey = "depth" + AddsKey = "adds_total" + QueueLatencyKey = "queue_duration_seconds" + WorkDurationKey = "work_duration_seconds" + UnfinishedWorkKey = "unfinished_work_seconds" + LongestRunningProcessorKey = "longest_running_processor_seconds" + RetriesKey = "retries_total" +) + +var ( + depth = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: WorkQueueSubsystem, + Name: DepthKey, + Help: "Current depth of workqueue", + }, []string{"name"}) + + adds = prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: WorkQueueSubsystem, + Name: AddsKey, + Help: "Total number of adds handled by workqueue", + }, []string{"name"}) + + latency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Subsystem: WorkQueueSubsystem, + Name: QueueLatencyKey, + Help: "How long in seconds an item stays in workqueue before being requested", + Buckets: []float64{1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 5, 10, 15, 30, 60, 120, 180}, + }, []string{"name"}) + + workDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Subsystem: WorkQueueSubsystem, + Name: WorkDurationKey, + Help: "How long in seconds processing an item from workqueue takes.", + Buckets: []float64{1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 5, 10, 15, 30, 60, 120, 180}, + }, []string{"name"}) + + unfinished = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: WorkQueueSubsystem, + Name: UnfinishedWorkKey, + Help: "How many seconds of work has been done that " + + "is in progress and hasn't been observed by work_duration. Large " + + "values indicate stuck threads. One can deduce the number of stuck " + + "threads by observing the rate at which this increases.", + }, []string{"name"}) + + longestRunningProcessor = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Subsystem: WorkQueueSubsystem, + Name: LongestRunningProcessorKey, + Help: "How many seconds has the longest running " + + "processor for workqueue been running.", + }, []string{"name"}) + + retries = prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: WorkQueueSubsystem, + Name: RetriesKey, + Help: "Total number of retries handled by workqueue", + }, []string{"name"}) +) + +func init() { + workqueue.SetProvider(workqueueMetricsProvider{}) +} + +type workqueueMetricsProvider struct{} + +func (workqueueMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetric { + return depth.WithLabelValues(name) +} + +func (workqueueMetricsProvider) NewAddsMetric(name string) workqueue.CounterMetric { + return adds.WithLabelValues(name) +} + +func (workqueueMetricsProvider) NewLatencyMetric(name string) workqueue.HistogramMetric { + return latency.WithLabelValues(name) +} + +func (workqueueMetricsProvider) NewWorkDurationMetric(name string) workqueue.HistogramMetric { + return workDuration.WithLabelValues(name) +} + +func (workqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(name string) workqueue.SettableGaugeMetric { + return unfinished.WithLabelValues(name) +} + +func (workqueueMetricsProvider) NewLongestRunningProcessorSecondsMetric(name string) workqueue.SettableGaugeMetric { + return longestRunningProcessor.WithLabelValues(name) +} + +func (workqueueMetricsProvider) NewRetriesMetric(name string) workqueue.CounterMetric { + return retries.WithLabelValues(name) +} diff --git a/go.mod b/go.mod index 580ae9febed17..8b09d87409841 100644 --- a/go.mod +++ b/go.mod @@ -81,7 +81,6 @@ require ( k8s.io/apimachinery v0.23.1 k8s.io/client-go v0.23.1 k8s.io/code-generator v0.23.1 - k8s.io/component-base v0.23.1 k8s.io/klog/v2 v2.30.0 k8s.io/kube-openapi v0.0.0-20211115234752-e816edb12b65 k8s.io/kubectl v0.23.1 @@ -112,7 +111,6 @@ require ( github.com/antonmedv/expr v1.8.9 // indirect github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/blang/semver v3.5.1+incompatible // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect @@ -208,6 +206,7 @@ require ( gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect k8s.io/apiserver v0.23.1 // indirect k8s.io/cli-runtime v0.23.1 // indirect + k8s.io/component-base v0.23.1 // indirect k8s.io/component-helpers v0.23.1 // indirect k8s.io/gengo v0.0.0-20210813121822-485abfe95c7c // indirect k8s.io/kube-aggregator v0.23.1 // indirect diff --git a/go.sum b/go.sum index 54cbb0f37fb48..b2a7ad7a2910c 100644 --- a/go.sum +++ b/go.sum @@ -160,7 +160,6 @@ github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edY github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84= github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= -github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= github.com/bombsimon/logrusr/v2 v2.0.1 h1:1VgxVNQMCvjirZIYaT9JYn6sAVGVEcNtRE0y4mvaOAM=