diff --git a/pkg/metrics/client_go_adapter.go b/pkg/metrics/client_go_adapter.go index 3b2c316280..dd9e50b3fd 100644 --- a/pkg/metrics/client_go_adapter.go +++ b/pkg/metrics/client_go_adapter.go @@ -20,23 +20,9 @@ import ( "net/url" "time" - "k8s.io/apimachinery/pkg/util/runtime" - "github.com/prometheus/client_golang/prometheus" reflectormetrics "k8s.io/client-go/tools/cache" clientmetrics "k8s.io/client-go/tools/metrics" - workqueuemetrics "k8s.io/client-go/util/workqueue" -) - -const ( - workQueueSubsystem = "workqueue" - depthKey = "depth" - addsKey = "adds_total" - queueLatencyKey = "queue_duration_seconds" - workDurationKey = "work_duration_seconds" - unfinishedWorkKey = "unfinished_work_seconds" - longestRunningProcessorKey = "longest_running_processor_seconds" - retriesKey = "retries_total" ) // this file contains setup logic to initialize the myriad of places @@ -117,62 +103,11 @@ var ( Name: "last_resource_version", Help: "Last resource version seen for the reflectors", }, []string{"name"}) - - // workqueue metrics - - depth = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Subsystem: workQueueSubsystem, - Name: depthKey, - Help: "Current depth of workqueue", - }, []string{"name"}) - - adds = prometheus.NewCounterVec(prometheus.CounterOpts{ - Subsystem: workQueueSubsystem, - Name: addsKey, - Help: "Total number of adds handled by workqueue", - }, []string{"name"}) - - latency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Subsystem: workQueueSubsystem, - Name: queueLatencyKey, - Help: "How long in seconds an item stays in workqueue before being requested.", - Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), - }, []string{"name"}) - - workDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Subsystem: workQueueSubsystem, - Name: workDurationKey, - Help: "How long in seconds processing an item from workqueue takes.", - Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), - }, []string{"name"}) - - unfinishedWork = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Subsystem: workQueueSubsystem, - Name: unfinishedWorkKey, - Help: "How many seconds of work has done that " + - "is in progress and hasn't been observed by work_duration. Large " + - "values indicate stuck threads. One can deduce the number of stuck " + - "threads by observing the rate at which this increases.", - }, []string{"name"}) - - longestRunning = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Subsystem: workQueueSubsystem, - Name: longestRunningProcessorKey, - Help: "How many seconds has the longest running " + - "processor for workqueue been running.", - }, []string{"name"}) - - retries = prometheus.NewCounterVec(prometheus.CounterOpts{ - Subsystem: workQueueSubsystem, - Name: retriesKey, - Help: "Total number of retries handled by workqueue", - }, []string{"name"}) ) func init() { registerClientMetrics() registerReflectorMetrics() - registerWorkqueueMetrics() } // registerClientMetrics sets up the client latency metrics from client-go @@ -199,19 +134,6 @@ func registerReflectorMetrics() { reflectormetrics.SetReflectorMetricsProvider(reflectorMetricsProvider{}) } -// registerWorkQueueMetrics sets up workqueue (other reconcile) metrics -func registerWorkqueueMetrics() { - Registry.MustRegister(depth) - Registry.MustRegister(adds) - Registry.MustRegister(latency) - Registry.MustRegister(workDuration) - Registry.MustRegister(retries) - Registry.MustRegister(longestRunning) - Registry.MustRegister(unfinishedWork) - - workqueuemetrics.SetProvider(workqueueMetricsProvider{}) -} - // this section contains adapters, implementations, and other sundry organic, artisinally // hand-crafted syntax trees required to convince client-go that it actually wants to let // someone use its metrics. @@ -273,114 +195,3 @@ func (reflectorMetricsProvider) NewItemsInWatchMetric(name string) reflectormetr func (reflectorMetricsProvider) NewLastResourceVersionMetric(name string) reflectormetrics.GaugeMetric { return lastResourceVersion.WithLabelValues(name) } - -// Workqueue metrics (method #3 for client-go metrics), -// copied (more-or-less directly) from k8s.io/kubernetes setup code -// (which isn't anywhere in an easily-importable place). -// TODO(directxman12): stop "cheating" and calling histograms summaries when we pull in the latest deps - -type workqueueMetricsProvider struct{} - -func (workqueueMetricsProvider) NewDepthMetric(name string) workqueuemetrics.GaugeMetric { - return depth.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewAddsMetric(name string) workqueuemetrics.CounterMetric { - return adds.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewLatencyMetric(name string) workqueuemetrics.HistogramMetric { - return latency.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewWorkDurationMetric(name string) workqueuemetrics.HistogramMetric { - return workDuration.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(name string) workqueuemetrics.SettableGaugeMetric { - return unfinishedWork.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewLongestRunningProcessorSecondsMetric(name string) workqueuemetrics.SettableGaugeMetric { - return longestRunning.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewRetriesMetric(name string) workqueuemetrics.CounterMetric { - return retries.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewDeprecatedDepthMetric(name string) workqueuemetrics.GaugeMetric { - depth := prometheus.NewGauge(prometheus.GaugeOpts{ - Subsystem: name, - Name: "depth", - Help: "Current depth of workqueue: " + name, - }) - runtime.HandleError(Registry.Register(depth)) - return depth -} - -func (workqueueMetricsProvider) NewDeprecatedAddsMetric(name string) workqueuemetrics.CounterMetric { - adds := prometheus.NewCounter(prometheus.CounterOpts{ - Subsystem: name, - Name: "adds", - Help: "Total number of adds handled by workqueue: " + name, - }) - runtime.HandleError(Registry.Register(adds)) - return adds -} - -func (workqueueMetricsProvider) NewDeprecatedLatencyMetric(name string) workqueuemetrics.SummaryMetric { - latency := prometheus.NewSummary(prometheus.SummaryOpts{ - Subsystem: name, - Name: "queue_latency", - Help: "How long an item stays in workqueue" + name + " before being requested.", - ConstLabels: prometheus.Labels{"name": name}, - }) - runtime.HandleError(Registry.Register(latency)) - return latency -} - -func (workqueueMetricsProvider) NewDeprecatedWorkDurationMetric(name string) workqueuemetrics.SummaryMetric { - workDuration := prometheus.NewSummary(prometheus.SummaryOpts{ - Subsystem: name, - Name: "work_duration", - Help: "How long processing an item from workqueue" + name + " takes.", - ConstLabels: prometheus.Labels{"name": name}, - }) - runtime.HandleError(Registry.Register(workDuration)) - return workDuration -} - -func (workqueueMetricsProvider) NewDeprecatedUnfinishedWorkSecondsMetric(name string) workqueuemetrics.SettableGaugeMetric { - unfinishedWork := prometheus.NewGauge(prometheus.GaugeOpts{ - Subsystem: name, - Name: "unfinished_work_seconds", - Help: "How many seconds of work " + name + " has done that " + - "is in progress and hasn't been observed by work_duration. Large " + - "values indicate stuck threads. One can deduce the number of stuck " + - "threads by observing the rate at which this increases.", - }) - runtime.HandleError(Registry.Register(unfinishedWork)) - return unfinishedWork -} - -func (workqueueMetricsProvider) NewDeprecatedLongestRunningProcessorMicrosecondsMetric(name string) workqueuemetrics.SettableGaugeMetric { - longestRunning := prometheus.NewGauge(prometheus.GaugeOpts{ - Subsystem: name, - Name: "longest_running_processor_microseconds", - Help: "How many microseconds has the longest running " + - "processor for " + name + " been running.", - }) - runtime.HandleError(Registry.Register(longestRunning)) - return longestRunning -} - -func (workqueueMetricsProvider) NewDeprecatedRetriesMetric(name string) workqueuemetrics.CounterMetric { - retries := prometheus.NewCounter(prometheus.CounterOpts{ - Subsystem: name, - Name: "retries", - Help: "Total number of retries handled by workqueue: " + name, - }) - runtime.HandleError(Registry.Register(retries)) - return retries -} diff --git a/pkg/metrics/workqueue.go b/pkg/metrics/workqueue.go new file mode 100644 index 0000000000..6381f0c14a --- /dev/null +++ b/pkg/metrics/workqueue.go @@ -0,0 +1,173 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "k8s.io/client-go/util/workqueue" + logf "sigs.k8s.io/controller-runtime/pkg/internal/log" +) + +var log = logf.RuntimeLog.WithName("metrics") + +// This file is copied and adapted from k8s.io/kubernetes/pkg/util/workqueue/prometheus +// which registers metrics to the default prometheus Registry. We require very +// similar functionality, but must register metrics to a different Registry. + +func init() { + workqueue.SetProvider(workqueueMetricsProvider{}) +} + +func registerWorkqueueMetric(c prometheus.Collector, name, queue string) { + if err := Registry.Register(c); err != nil { + log.Error(err, "failed to register metric", "name", name, "queue", queue) + } +} + +type workqueueMetricsProvider struct{} + +func (workqueueMetricsProvider) NewDepthMetric(queue string) workqueue.GaugeMetric { + const name = "workqueue_depth" + m := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: name, + Help: "Current depth of workqueue", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewAddsMetric(queue string) workqueue.CounterMetric { + const name = "workqueue_adds_total" + m := prometheus.NewCounter(prometheus.CounterOpts{ + Name: name, + Help: "Total number of adds handled by workqueue", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewLatencyMetric(queue string) workqueue.HistogramMetric { + const name = "workqueue_queue_duration_seconds" + m := prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: name, + Help: "How long in seconds an item stays in workqueue before being requested.", + ConstLabels: prometheus.Labels{"name": queue}, + Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewWorkDurationMetric(queue string) workqueue.HistogramMetric { + const name = "workqueue_work_duration_seconds" + m := prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: name, + Help: "How long in seconds processing an item from workqueue takes.", + ConstLabels: prometheus.Labels{"name": queue}, + Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(queue string) workqueue.SettableGaugeMetric { + const name = "workqueue_unfinished_work_seconds" + m := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: name, + Help: "How many seconds of work has done that " + + "is in progress and hasn't been observed by work_duration. Large " + + "values indicate stuck threads. One can deduce the number of stuck " + + "threads by observing the rate at which this increases.", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewLongestRunningProcessorSecondsMetric(queue string) workqueue.SettableGaugeMetric { + const name = "workqueue_longest_running_processor_seconds" + m := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: name, + Help: "How many seconds has the longest running " + + "processor for workqueue been running.", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewRetriesMetric(queue string) workqueue.CounterMetric { + const name = "workqueue_retries_total" + m := prometheus.NewCounter(prometheus.CounterOpts{ + Name: name, + Help: "Total number of retries handled by workqueue", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +// TODO(abursavich): Remove the following deprecated metrics when they are +// removed from k8s.io/client-go/util/workqueue. + +func (workqueueMetricsProvider) NewDeprecatedLongestRunningProcessorMicrosecondsMetric(queue string) workqueue.SettableGaugeMetric { + const name = "workqueue_longest_running_processor_microseconds" + m := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: name, + Help: "(Deprecated) How many microseconds has the longest running " + + "processor for workqueue been running.", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +// NOTE: The following deprecated metrics are noops because they were never +// included in controller-runtime. + +func (workqueueMetricsProvider) NewDeprecatedDepthMetric(queue string) workqueue.GaugeMetric { + return noopMetric{} +} + +func (workqueueMetricsProvider) NewDeprecatedAddsMetric(queue string) workqueue.CounterMetric { + return noopMetric{} +} + +func (workqueueMetricsProvider) NewDeprecatedLatencyMetric(queue string) workqueue.SummaryMetric { + return noopMetric{} +} + +func (workqueueMetricsProvider) NewDeprecatedWorkDurationMetric(queue string) workqueue.SummaryMetric { + return noopMetric{} +} + +func (workqueueMetricsProvider) NewDeprecatedUnfinishedWorkSecondsMetric(queue string) workqueue.SettableGaugeMetric { + return noopMetric{} +} + +func (workqueueMetricsProvider) NewDeprecatedRetriesMetric(queue string) workqueue.CounterMetric { + return noopMetric{} +} + +type noopMetric struct{} + +func (noopMetric) Inc() {} +func (noopMetric) Dec() {} +func (noopMetric) Set(float64) {} +func (noopMetric) Observe(float64) {}