diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 10db6c245f..9088d00935 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -243,6 +243,5 @@ func (c *Controller) InjectFunc(f inject.Func) error { // updateMetrics updates prometheus metrics within the controller func (c *Controller) updateMetrics(reconcileTime time.Duration) { - ctrlmetrics.QueueLength.WithLabelValues(c.Name).Set(float64(c.Queue.Len())) ctrlmetrics.ReconcileTime.WithLabelValues(c.Name).Observe(reconcileTime.Seconds()) } diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 893f7ee937..587a5a70cf 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -410,16 +410,7 @@ var _ = Describe("controller", func() { Context("should update prometheus metrics", func() { It("should requeue a Request if there is an error and continue processing items", func(done Done) { - var queueLength, reconcileErrs dto.Metric - ctrlmetrics.QueueLength.Reset() - Expect(func() error { - ctrlmetrics.QueueLength.WithLabelValues(ctrl.Name).Write(&queueLength) - if queueLength.GetGauge().GetValue() != 0.0 { - return fmt.Errorf("metrics not reset") - } - return nil - }()).Should(Succeed()) - + var reconcileErrs dto.Metric ctrlmetrics.ReconcileErrors.Reset() Expect(func() error { ctrlmetrics.ReconcileErrors.WithLabelValues(ctrl.Name).Write(&reconcileErrs) @@ -441,13 +432,6 @@ var _ = Describe("controller", func() { By("Invoking Reconciler which will give an error") Expect(<-reconciled).To(Equal(request)) - Eventually(func() error { - ctrlmetrics.QueueLength.WithLabelValues(ctrl.Name).Write(&queueLength) - if queueLength.GetGauge().GetValue() != 1.0 { - return fmt.Errorf("metrics not updated") - } - return nil - }, 2.0).Should(Succeed()) Eventually(func() error { ctrlmetrics.ReconcileErrors.WithLabelValues(ctrl.Name).Write(&reconcileErrs) if reconcileErrs.GetCounter().GetValue() != 1.0 { diff --git a/pkg/internal/controller/metrics/metrics.go b/pkg/internal/controller/metrics/metrics.go index 944ca8e035..d6756d6f66 100644 --- a/pkg/internal/controller/metrics/metrics.go +++ b/pkg/internal/controller/metrics/metrics.go @@ -22,13 +22,6 @@ import ( ) var ( - // QueueLength is a prometheus metric which counts the current reconcile - // queue length per controller - QueueLength = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "controller_runtime_reconcile_queue_length", - Help: "Length of reconcile queue per controller", - }, []string{"controller"}) - // ReconcileErrors is a prometheus counter metrics which holds the total // number of errors from the Reconciler ReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{ diff --git a/pkg/metrics/client_go_adapter.go b/pkg/metrics/client_go_adapter.go index 8012ad7280..a27ff8b7db 100644 --- a/pkg/metrics/client_go_adapter.go +++ b/pkg/metrics/client_go_adapter.go @@ -104,6 +104,10 @@ var ( Name: "last_resource_version", Help: "Last resource version seen for the reflectors", }, []string{"name"}) + + // workqueue metrics + + workQueueSubsystem = "workqueue" ) func init() { @@ -206,6 +210,8 @@ func (reflectorMetricsProvider) NewLastResourceVersionMetric(name string) reflec // Workqueue metrics (method #3 for client-go metrics), // copied (more-or-less directly) from k8s.io/kubernetes setup code // (which isn't anywhere in an easily-importable place). +// TODO(directxman12): stop "cheating" and calling histograms summaries when we pull in the latest deps +// TODO(directxman12): add in unfinished_work and longest_running_processor when we pull in the latest deps // NB(directxman12): these are changed to MustRegister from Register. It's not clear why they weren't // MustRegister in the first place, except maybe to not bring down the controller if the metrics fail @@ -215,50 +221,57 @@ type workqueueMetricsProvider struct{} func (workqueueMetricsProvider) NewDepthMetric(name string) workqueuemetrics.GaugeMetric { depth := prometheus.NewGauge(prometheus.GaugeOpts{ - Subsystem: name, - Name: "depth", - Help: "Current depth of workqueue: " + name, + Subsystem: workQueueSubsystem, + Name: "depth", + Help: "Current depth of workqueue", + ConstLabels: prometheus.Labels{"name": name}, }) - Registry.MustRegister(depth) + prometheus.MustRegister(depth) return depth } func (workqueueMetricsProvider) NewAddsMetric(name string) workqueuemetrics.CounterMetric { adds := prometheus.NewCounter(prometheus.CounterOpts{ - Subsystem: name, - Name: "adds", - Help: "Total number of adds handled by workqueue: " + name, + Subsystem: workQueueSubsystem, + Name: "adds_total", + Help: "Total number of adds handled by workqueue", + ConstLabels: prometheus.Labels{"name": name}, }) - Registry.MustRegister(adds) + prometheus.MustRegister(adds) return adds } func (workqueueMetricsProvider) NewLatencyMetric(name string) workqueuemetrics.SummaryMetric { - latency := prometheus.NewSummary(prometheus.SummaryOpts{ - Subsystem: name, - Name: "queue_latency", - Help: "How long an item stays in workqueue" + name + " before being requested.", + latency := prometheus.NewHistogram(prometheus.HistogramOpts{ + Subsystem: workQueueSubsystem, + Name: "queue_latency_seconds", + Help: "How long in seconds an item stays in workqueue before being requested.", + ConstLabels: prometheus.Labels{"name": name}, + Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), }) - Registry.MustRegister(latency) + prometheus.MustRegister(latency) return latency } func (workqueueMetricsProvider) NewWorkDurationMetric(name string) workqueuemetrics.SummaryMetric { - workDuration := prometheus.NewSummary(prometheus.SummaryOpts{ - Subsystem: name, - Name: "work_duration", - Help: "How long processing an item from workqueue" + name + " takes.", + workDuration := prometheus.NewHistogram(prometheus.HistogramOpts{ + Subsystem: workQueueSubsystem, + Name: "work_duration_seconds", + Help: "How long in seconds processing an item from workqueue takes.", + ConstLabels: prometheus.Labels{"name": name}, + Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), }) - Registry.MustRegister(workDuration) + prometheus.MustRegister(workDuration) return workDuration } func (workqueueMetricsProvider) NewRetriesMetric(name string) workqueuemetrics.CounterMetric { retries := prometheus.NewCounter(prometheus.CounterOpts{ - Subsystem: name, - Name: "retries", - Help: "Total number of retries handled by workqueue: " + name, + Subsystem: workQueueSubsystem, + Name: "retries_total", + Help: "Total number of retries handled by workqueue", + ConstLabels: prometheus.Labels{"name": name}, }) - Registry.MustRegister(retries) + prometheus.MustRegister(retries) return retries }