From ce521f8367a3210040d6107848f3389fea81ffa5 Mon Sep 17 00:00:00 2001 From: Andy Bursavich Date: Sun, 19 May 2019 11:49:27 -0700 Subject: [PATCH] metrics: migrate workqueue provider to v1.14 removing deprecated metrics Migration work was already done to move controller-runtime from k8s v1.13 to v1.14, however this introduced certain issues. Unfortunately, workqueue_queue_duration_seconds and workqueue_work_duration_seconds were included in v0.1.10, but were recorded with microsecond values. Moving to k8s v1.14 corrects those values to seconds. Primarily this change removes the metrics that were deprecated in k8s v1.14, but were never previously included in v0.1 of controller-runtime. The one metric that was deprecated and is retained is longest_running_processor_microseconds, as it was included in v0.1.10. --- pkg/metrics/client_go_adapter.go | 189 ------------------------------- pkg/metrics/workqueue.go | 173 ++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 189 deletions(-) create mode 100644 pkg/metrics/workqueue.go diff --git a/pkg/metrics/client_go_adapter.go b/pkg/metrics/client_go_adapter.go index 3b2c316280..dd9e50b3fd 100644 --- a/pkg/metrics/client_go_adapter.go +++ b/pkg/metrics/client_go_adapter.go @@ -20,23 +20,9 @@ import ( "net/url" "time" - "k8s.io/apimachinery/pkg/util/runtime" - "github.com/prometheus/client_golang/prometheus" reflectormetrics "k8s.io/client-go/tools/cache" clientmetrics "k8s.io/client-go/tools/metrics" - workqueuemetrics "k8s.io/client-go/util/workqueue" -) - -const ( - workQueueSubsystem = "workqueue" - depthKey = "depth" - addsKey = "adds_total" - queueLatencyKey = "queue_duration_seconds" - workDurationKey = "work_duration_seconds" - unfinishedWorkKey = "unfinished_work_seconds" - longestRunningProcessorKey = "longest_running_processor_seconds" - retriesKey = "retries_total" ) // this file contains setup logic to initialize the myriad of places @@ -117,62 +103,11 @@ var ( Name: "last_resource_version", Help: "Last resource version seen for the reflectors", }, []string{"name"}) - - // workqueue metrics - - depth = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Subsystem: workQueueSubsystem, - Name: depthKey, - Help: "Current depth of workqueue", - }, []string{"name"}) - - adds = prometheus.NewCounterVec(prometheus.CounterOpts{ - Subsystem: workQueueSubsystem, - Name: addsKey, - Help: "Total number of adds handled by workqueue", - }, []string{"name"}) - - latency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Subsystem: workQueueSubsystem, - Name: queueLatencyKey, - Help: "How long in seconds an item stays in workqueue before being requested.", - Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), - }, []string{"name"}) - - workDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Subsystem: workQueueSubsystem, - Name: workDurationKey, - Help: "How long in seconds processing an item from workqueue takes.", - Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), - }, []string{"name"}) - - unfinishedWork = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Subsystem: workQueueSubsystem, - Name: unfinishedWorkKey, - Help: "How many seconds of work has done that " + - "is in progress and hasn't been observed by work_duration. Large " + - "values indicate stuck threads. One can deduce the number of stuck " + - "threads by observing the rate at which this increases.", - }, []string{"name"}) - - longestRunning = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Subsystem: workQueueSubsystem, - Name: longestRunningProcessorKey, - Help: "How many seconds has the longest running " + - "processor for workqueue been running.", - }, []string{"name"}) - - retries = prometheus.NewCounterVec(prometheus.CounterOpts{ - Subsystem: workQueueSubsystem, - Name: retriesKey, - Help: "Total number of retries handled by workqueue", - }, []string{"name"}) ) func init() { registerClientMetrics() registerReflectorMetrics() - registerWorkqueueMetrics() } // registerClientMetrics sets up the client latency metrics from client-go @@ -199,19 +134,6 @@ func registerReflectorMetrics() { reflectormetrics.SetReflectorMetricsProvider(reflectorMetricsProvider{}) } -// registerWorkQueueMetrics sets up workqueue (other reconcile) metrics -func registerWorkqueueMetrics() { - Registry.MustRegister(depth) - Registry.MustRegister(adds) - Registry.MustRegister(latency) - Registry.MustRegister(workDuration) - Registry.MustRegister(retries) - Registry.MustRegister(longestRunning) - Registry.MustRegister(unfinishedWork) - - workqueuemetrics.SetProvider(workqueueMetricsProvider{}) -} - // this section contains adapters, implementations, and other sundry organic, artisinally // hand-crafted syntax trees required to convince client-go that it actually wants to let // someone use its metrics. @@ -273,114 +195,3 @@ func (reflectorMetricsProvider) NewItemsInWatchMetric(name string) reflectormetr func (reflectorMetricsProvider) NewLastResourceVersionMetric(name string) reflectormetrics.GaugeMetric { return lastResourceVersion.WithLabelValues(name) } - -// Workqueue metrics (method #3 for client-go metrics), -// copied (more-or-less directly) from k8s.io/kubernetes setup code -// (which isn't anywhere in an easily-importable place). -// TODO(directxman12): stop "cheating" and calling histograms summaries when we pull in the latest deps - -type workqueueMetricsProvider struct{} - -func (workqueueMetricsProvider) NewDepthMetric(name string) workqueuemetrics.GaugeMetric { - return depth.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewAddsMetric(name string) workqueuemetrics.CounterMetric { - return adds.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewLatencyMetric(name string) workqueuemetrics.HistogramMetric { - return latency.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewWorkDurationMetric(name string) workqueuemetrics.HistogramMetric { - return workDuration.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(name string) workqueuemetrics.SettableGaugeMetric { - return unfinishedWork.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewLongestRunningProcessorSecondsMetric(name string) workqueuemetrics.SettableGaugeMetric { - return longestRunning.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewRetriesMetric(name string) workqueuemetrics.CounterMetric { - return retries.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewDeprecatedDepthMetric(name string) workqueuemetrics.GaugeMetric { - depth := prometheus.NewGauge(prometheus.GaugeOpts{ - Subsystem: name, - Name: "depth", - Help: "Current depth of workqueue: " + name, - }) - runtime.HandleError(Registry.Register(depth)) - return depth -} - -func (workqueueMetricsProvider) NewDeprecatedAddsMetric(name string) workqueuemetrics.CounterMetric { - adds := prometheus.NewCounter(prometheus.CounterOpts{ - Subsystem: name, - Name: "adds", - Help: "Total number of adds handled by workqueue: " + name, - }) - runtime.HandleError(Registry.Register(adds)) - return adds -} - -func (workqueueMetricsProvider) NewDeprecatedLatencyMetric(name string) workqueuemetrics.SummaryMetric { - latency := prometheus.NewSummary(prometheus.SummaryOpts{ - Subsystem: name, - Name: "queue_latency", - Help: "How long an item stays in workqueue" + name + " before being requested.", - ConstLabels: prometheus.Labels{"name": name}, - }) - runtime.HandleError(Registry.Register(latency)) - return latency -} - -func (workqueueMetricsProvider) NewDeprecatedWorkDurationMetric(name string) workqueuemetrics.SummaryMetric { - workDuration := prometheus.NewSummary(prometheus.SummaryOpts{ - Subsystem: name, - Name: "work_duration", - Help: "How long processing an item from workqueue" + name + " takes.", - ConstLabels: prometheus.Labels{"name": name}, - }) - runtime.HandleError(Registry.Register(workDuration)) - return workDuration -} - -func (workqueueMetricsProvider) NewDeprecatedUnfinishedWorkSecondsMetric(name string) workqueuemetrics.SettableGaugeMetric { - unfinishedWork := prometheus.NewGauge(prometheus.GaugeOpts{ - Subsystem: name, - Name: "unfinished_work_seconds", - Help: "How many seconds of work " + name + " has done that " + - "is in progress and hasn't been observed by work_duration. Large " + - "values indicate stuck threads. One can deduce the number of stuck " + - "threads by observing the rate at which this increases.", - }) - runtime.HandleError(Registry.Register(unfinishedWork)) - return unfinishedWork -} - -func (workqueueMetricsProvider) NewDeprecatedLongestRunningProcessorMicrosecondsMetric(name string) workqueuemetrics.SettableGaugeMetric { - longestRunning := prometheus.NewGauge(prometheus.GaugeOpts{ - Subsystem: name, - Name: "longest_running_processor_microseconds", - Help: "How many microseconds has the longest running " + - "processor for " + name + " been running.", - }) - runtime.HandleError(Registry.Register(longestRunning)) - return longestRunning -} - -func (workqueueMetricsProvider) NewDeprecatedRetriesMetric(name string) workqueuemetrics.CounterMetric { - retries := prometheus.NewCounter(prometheus.CounterOpts{ - Subsystem: name, - Name: "retries", - Help: "Total number of retries handled by workqueue: " + name, - }) - runtime.HandleError(Registry.Register(retries)) - return retries -} diff --git a/pkg/metrics/workqueue.go b/pkg/metrics/workqueue.go new file mode 100644 index 0000000000..6381f0c14a --- /dev/null +++ b/pkg/metrics/workqueue.go @@ -0,0 +1,173 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "k8s.io/client-go/util/workqueue" + logf "sigs.k8s.io/controller-runtime/pkg/internal/log" +) + +var log = logf.RuntimeLog.WithName("metrics") + +// This file is copied and adapted from k8s.io/kubernetes/pkg/util/workqueue/prometheus +// which registers metrics to the default prometheus Registry. We require very +// similar functionality, but must register metrics to a different Registry. + +func init() { + workqueue.SetProvider(workqueueMetricsProvider{}) +} + +func registerWorkqueueMetric(c prometheus.Collector, name, queue string) { + if err := Registry.Register(c); err != nil { + log.Error(err, "failed to register metric", "name", name, "queue", queue) + } +} + +type workqueueMetricsProvider struct{} + +func (workqueueMetricsProvider) NewDepthMetric(queue string) workqueue.GaugeMetric { + const name = "workqueue_depth" + m := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: name, + Help: "Current depth of workqueue", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewAddsMetric(queue string) workqueue.CounterMetric { + const name = "workqueue_adds_total" + m := prometheus.NewCounter(prometheus.CounterOpts{ + Name: name, + Help: "Total number of adds handled by workqueue", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewLatencyMetric(queue string) workqueue.HistogramMetric { + const name = "workqueue_queue_duration_seconds" + m := prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: name, + Help: "How long in seconds an item stays in workqueue before being requested.", + ConstLabels: prometheus.Labels{"name": queue}, + Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewWorkDurationMetric(queue string) workqueue.HistogramMetric { + const name = "workqueue_work_duration_seconds" + m := prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: name, + Help: "How long in seconds processing an item from workqueue takes.", + ConstLabels: prometheus.Labels{"name": queue}, + Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(queue string) workqueue.SettableGaugeMetric { + const name = "workqueue_unfinished_work_seconds" + m := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: name, + Help: "How many seconds of work has done that " + + "is in progress and hasn't been observed by work_duration. Large " + + "values indicate stuck threads. One can deduce the number of stuck " + + "threads by observing the rate at which this increases.", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewLongestRunningProcessorSecondsMetric(queue string) workqueue.SettableGaugeMetric { + const name = "workqueue_longest_running_processor_seconds" + m := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: name, + Help: "How many seconds has the longest running " + + "processor for workqueue been running.", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +func (workqueueMetricsProvider) NewRetriesMetric(queue string) workqueue.CounterMetric { + const name = "workqueue_retries_total" + m := prometheus.NewCounter(prometheus.CounterOpts{ + Name: name, + Help: "Total number of retries handled by workqueue", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +// TODO(abursavich): Remove the following deprecated metrics when they are +// removed from k8s.io/client-go/util/workqueue. + +func (workqueueMetricsProvider) NewDeprecatedLongestRunningProcessorMicrosecondsMetric(queue string) workqueue.SettableGaugeMetric { + const name = "workqueue_longest_running_processor_microseconds" + m := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: name, + Help: "(Deprecated) How many microseconds has the longest running " + + "processor for workqueue been running.", + ConstLabels: prometheus.Labels{"name": queue}, + }) + registerWorkqueueMetric(m, name, queue) + return m +} + +// NOTE: The following deprecated metrics are noops because they were never +// included in controller-runtime. + +func (workqueueMetricsProvider) NewDeprecatedDepthMetric(queue string) workqueue.GaugeMetric { + return noopMetric{} +} + +func (workqueueMetricsProvider) NewDeprecatedAddsMetric(queue string) workqueue.CounterMetric { + return noopMetric{} +} + +func (workqueueMetricsProvider) NewDeprecatedLatencyMetric(queue string) workqueue.SummaryMetric { + return noopMetric{} +} + +func (workqueueMetricsProvider) NewDeprecatedWorkDurationMetric(queue string) workqueue.SummaryMetric { + return noopMetric{} +} + +func (workqueueMetricsProvider) NewDeprecatedUnfinishedWorkSecondsMetric(queue string) workqueue.SettableGaugeMetric { + return noopMetric{} +} + +func (workqueueMetricsProvider) NewDeprecatedRetriesMetric(queue string) workqueue.CounterMetric { + return noopMetric{} +} + +type noopMetric struct{} + +func (noopMetric) Inc() {} +func (noopMetric) Dec() {} +func (noopMetric) Set(float64) {} +func (noopMetric) Observe(float64) {}