From b8c479b93703559102290fde8396afc079bb2b16 Mon Sep 17 00:00:00 2001 From: Alexandre Gaudreault Date: Fri, 26 Jan 2024 16:24:49 -0500 Subject: [PATCH 1/7] fix(controller): add missing kubernetes metrics Signed-off-by: Alexandre Gaudreault --- controller/metrics/metrics.go | 7 ++- controller/metrics/workqueue.go | 101 -------------------------------- go.mod | 2 +- 3 files changed, 6 insertions(+), 104 deletions(-) delete mode 100644 controller/metrics/workqueue.go diff --git a/controller/metrics/metrics.go b/controller/metrics/metrics.go index e4ef09552c09d..40c5d01fe0330 100644 --- a/controller/metrics/metrics.go +++ b/controller/metrics/metrics.go @@ -23,6 +23,10 @@ import ( "github.com/argoproj/argo-cd/v2/util/git" "github.com/argoproj/argo-cd/v2/util/healthz" "github.com/argoproj/argo-cd/v2/util/profile" + + "k8s.io/component-base/metrics/legacyregistry" + // make sure to register workqueue prometheus metrics + _ "k8s.io/component-base/metrics/prometheus/workqueue" ) type MetricsServer struct { @@ -160,12 +164,11 @@ func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFil mux := http.NewServeMux() registry := NewAppRegistry(appLister, appFilter, appLabels) - registry.MustRegister(depth, adds, latency, workDuration, unfinished, longestRunningProcessor, retries) mux.Handle(MetricsPath, promhttp.HandlerFor(prometheus.Gatherers{ // contains app controller specific metrics registry, // contains process, golang and controller workqueues metrics - prometheus.DefaultGatherer, + legacyregistry.DefaultGatherer, }, promhttp.HandlerOpts{})) profile.RegisterProfiler(mux) healthz.ServeHealthCheck(mux, healthCheck) diff --git a/controller/metrics/workqueue.go b/controller/metrics/workqueue.go deleted file mode 100644 index 2ef10685ee47d..0000000000000 --- a/controller/metrics/workqueue.go +++ /dev/null @@ -1,101 +0,0 @@ -package metrics - -import ( - "github.com/prometheus/client_golang/prometheus" - "k8s.io/client-go/util/workqueue" -) - -const ( - WorkQueueSubsystem = "workqueue" - DepthKey = "depth" - AddsKey = "adds_total" - QueueLatencyKey = "queue_duration_seconds" - WorkDurationKey = "work_duration_seconds" - UnfinishedWorkKey = "unfinished_work_seconds" - LongestRunningProcessorKey = "longest_running_processor_seconds" - RetriesKey = "retries_total" -) - -var ( - depth = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Subsystem: WorkQueueSubsystem, - Name: DepthKey, - Help: "Current depth of workqueue", - }, []string{"name"}) - - adds = prometheus.NewCounterVec(prometheus.CounterOpts{ - Subsystem: WorkQueueSubsystem, - Name: AddsKey, - Help: "Total number of adds handled by workqueue", - }, []string{"name"}) - - latency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Subsystem: WorkQueueSubsystem, - Name: QueueLatencyKey, - Help: "How long in seconds an item stays in workqueue before being requested", - Buckets: []float64{1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 5, 10, 15, 30, 60, 120, 180}, - }, []string{"name"}) - - workDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ - Subsystem: WorkQueueSubsystem, - Name: WorkDurationKey, - Help: "How long in seconds processing an item from workqueue takes.", - Buckets: []float64{1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 5, 10, 15, 30, 60, 120, 180}, - }, []string{"name"}) - - unfinished = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Subsystem: WorkQueueSubsystem, - Name: UnfinishedWorkKey, - Help: "How many seconds of work has been done that " + - "is in progress and hasn't been observed by work_duration. Large " + - "values indicate stuck threads. One can deduce the number of stuck " + - "threads by observing the rate at which this increases.", - }, []string{"name"}) - - longestRunningProcessor = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Subsystem: WorkQueueSubsystem, - Name: LongestRunningProcessorKey, - Help: "How many seconds has the longest running " + - "processor for workqueue been running.", - }, []string{"name"}) - - retries = prometheus.NewCounterVec(prometheus.CounterOpts{ - Subsystem: WorkQueueSubsystem, - Name: RetriesKey, - Help: "Total number of retries handled by workqueue", - }, []string{"name"}) -) - -func init() { - workqueue.SetProvider(workqueueMetricsProvider{}) -} - -type workqueueMetricsProvider struct{} - -func (workqueueMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetric { - return depth.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewAddsMetric(name string) workqueue.CounterMetric { - return adds.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewLatencyMetric(name string) workqueue.HistogramMetric { - return latency.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewWorkDurationMetric(name string) workqueue.HistogramMetric { - return workDuration.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewUnfinishedWorkSecondsMetric(name string) workqueue.SettableGaugeMetric { - return unfinished.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewLongestRunningProcessorSecondsMetric(name string) workqueue.SettableGaugeMetric { - return longestRunningProcessor.WithLabelValues(name) -} - -func (workqueueMetricsProvider) NewRetriesMetric(name string) workqueue.CounterMetric { - return retries.WithLabelValues(name) -} diff --git a/go.mod b/go.mod index 07dd99e4beff1..4e94c535e18f6 100644 --- a/go.mod +++ b/go.mod @@ -281,7 +281,7 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect k8s.io/cli-runtime v0.26.11 // indirect - k8s.io/component-base v0.26.11 // indirect + k8s.io/component-base v0.26.11 k8s.io/component-helpers v0.26.11 // indirect k8s.io/gengo v0.0.0-20220902162205-c0856e24416d // indirect k8s.io/kube-aggregator v0.26.4 // indirect From adef3f2104bff987c9a79794924d5aaa735fe85c Mon Sep 17 00:00:00 2001 From: Alexandre Gaudreault Date: Fri, 26 Jan 2024 17:51:59 -0500 Subject: [PATCH 2/7] validate workqueue metrics are present Signed-off-by: Alexandre Gaudreault --- controller/metrics/metrics_test.go | 70 +++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/controller/metrics/metrics_test.go b/controller/metrics/metrics_test.go index 61a99a46492a2..cfbd02014f079 100644 --- a/controller/metrics/metrics_test.go +++ b/controller/metrics/metrics_test.go @@ -2,6 +2,7 @@ package metrics import ( "context" + "fmt" "log" "net/http" "net/http/httptest" @@ -15,6 +16,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" "sigs.k8s.io/yaml" argoappv1 "github.com/argoproj/argo-cd/v2/pkg/apis/application/v1alpha1" @@ -360,7 +362,7 @@ func assertMetricsPrinted(t *testing.T, expectedLines, body string) { if line == "" { continue } - assert.Contains(t, body, line, "expected metrics mismatch") + assert.Contains(t, body, line, fmt.Sprintf("expected metrics mismatch for line: %s", line)) } } @@ -443,3 +445,69 @@ argocd_app_sync_total{dest_server="https://localhost:6443",name="my-app",namespa err = metricsServ.SetExpiration(time.Second) assert.Error(t, err) } + +func TestWorkqueueMetrics(t *testing.T) { + cancel, appLister := newFakeLister() + defer cancel() + metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck, []string{}) + assert.NoError(t, err) + + expectedMetrics := ` +# TYPE workqueue_adds_total counter +workqueue_adds_total{name="test"} + +# TYPE workqueue_depth gauge +workqueue_depth{name="test"} + +# TYPE workqueue_longest_running_processor_seconds gauge +workqueue_longest_running_processor_seconds{name="test"} + +# TYPE workqueue_queue_duration_seconds histogram + +# TYPE workqueue_unfinished_work_seconds gauge +workqueue_unfinished_work_seconds{name="test"} + +# TYPE workqueue_work_duration_seconds histogram +` + workqueue.NewNamed("test") + + req, err := http.NewRequest(http.MethodGet, "/metrics", nil) + assert.NoError(t, err) + rr := httptest.NewRecorder() + metricsServ.Handler.ServeHTTP(rr, req) + assert.Equal(t, rr.Code, http.StatusOK) + body := rr.Body.String() + log.Println(body) + assertMetricsPrinted(t, expectedMetrics, body) +} + +func TestGoMetrics(t *testing.T) { + cancel, appLister := newFakeLister() + defer cancel() + metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck, []string{}) + assert.NoError(t, err) + + expectedMetrics := ` +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds_sum +go_gc_duration_seconds_count +# TYPE go_goroutines gauge +go_goroutines +# TYPE go_info gauge +go_info +# TYPE go_memstats_alloc_bytes gauge +go_memstats_alloc_bytes +# TYPE go_memstats_sys_bytes gauge +go_memstats_sys_bytes +# TYPE go_threads gauge +go_threads +` + req, err := http.NewRequest(http.MethodGet, "/metrics", nil) + assert.NoError(t, err) + rr := httptest.NewRecorder() + metricsServ.Handler.ServeHTTP(rr, req) + assert.Equal(t, rr.Code, http.StatusOK) + body := rr.Body.String() + log.Println(body) + assertMetricsPrinted(t, expectedMetrics, body) +} From 8f84ad1b0c0b0216e65e99bf4b7d796f3fef5fdd Mon Sep 17 00:00:00 2001 From: Alexandre Gaudreault Date: Fri, 26 Jan 2024 18:34:16 -0500 Subject: [PATCH 3/7] use newer metrics registry Signed-off-by: Alexandre Gaudreault --- controller/metrics/metrics.go | 10 +++++----- go.mod | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/controller/metrics/metrics.go b/controller/metrics/metrics.go index 40c5d01fe0330..ec1db73037211 100644 --- a/controller/metrics/metrics.go +++ b/controller/metrics/metrics.go @@ -24,9 +24,7 @@ import ( "github.com/argoproj/argo-cd/v2/util/healthz" "github.com/argoproj/argo-cd/v2/util/profile" - "k8s.io/component-base/metrics/legacyregistry" - // make sure to register workqueue prometheus metrics - _ "k8s.io/component-base/metrics/prometheus/workqueue" + ctrl_metrics "sigs.k8s.io/controller-runtime/pkg/metrics" ) type MetricsServer struct { @@ -167,8 +165,10 @@ func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFil mux.Handle(MetricsPath, promhttp.HandlerFor(prometheus.Gatherers{ // contains app controller specific metrics registry, - // contains process, golang and controller workqueues metrics - legacyregistry.DefaultGatherer, + // contains process and golang metrics + prometheus.DefaultGatherer, + // contains workqueue metrics + ctrl_metrics.Registry, }, promhttp.HandlerOpts{})) profile.RegisterProfiler(mux) healthz.ServeHealthCheck(mux, healthCheck) diff --git a/go.mod b/go.mod index 4e94c535e18f6..07dd99e4beff1 100644 --- a/go.mod +++ b/go.mod @@ -281,7 +281,7 @@ require ( gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect k8s.io/cli-runtime v0.26.11 // indirect - k8s.io/component-base v0.26.11 + k8s.io/component-base v0.26.11 // indirect k8s.io/component-helpers v0.26.11 // indirect k8s.io/gengo v0.0.0-20220902162205-c0856e24416d // indirect k8s.io/kube-aggregator v0.26.4 // indirect From f55540cce52e55ec666a92d9b0352ecbcba4174d Mon Sep 17 00:00:00 2001 From: Alexandre Gaudreault Date: Wed, 31 Jan 2024 11:48:48 -0500 Subject: [PATCH 4/7] fix duplicated Signed-off-by: Alexandre Gaudreault --- controller/metrics/metrics.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/controller/metrics/metrics.go b/controller/metrics/metrics.go index ec1db73037211..94405b51eac75 100644 --- a/controller/metrics/metrics.go +++ b/controller/metrics/metrics.go @@ -162,12 +162,11 @@ func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFil mux := http.NewServeMux() registry := NewAppRegistry(appLister, appFilter, appLabels) + mux.Handle(MetricsPath, promhttp.HandlerFor(prometheus.Gatherers{ // contains app controller specific metrics registry, - // contains process and golang metrics - prometheus.DefaultGatherer, - // contains workqueue metrics + // contains workqueue metrics, process and golang metrics ctrl_metrics.Registry, }, promhttp.HandlerOpts{})) profile.RegisterProfiler(mux) From 7a24340e2a0be1c905975882855efe7b9ccea263 Mon Sep 17 00:00:00 2001 From: Alexandre Gaudreault Date: Thu, 1 Feb 2024 11:10:29 -0500 Subject: [PATCH 5/7] init runtime controller in test to have correct metrics Signed-off-by: Alexandre Gaudreault --- controller/metrics/metrics_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/controller/metrics/metrics_test.go b/controller/metrics/metrics_test.go index cfbd02014f079..78f047e227c84 100644 --- a/controller/metrics/metrics_test.go +++ b/controller/metrics/metrics_test.go @@ -23,6 +23,8 @@ import ( appclientset "github.com/argoproj/argo-cd/v2/pkg/client/clientset/versioned/fake" appinformer "github.com/argoproj/argo-cd/v2/pkg/client/informers/externalversions" applister "github.com/argoproj/argo-cd/v2/pkg/client/listers/application/v1alpha1" + + "sigs.k8s.io/controller-runtime/pkg/controller" ) const fakeApp = ` @@ -142,6 +144,12 @@ var appFilter = func(obj interface{}) bool { return true } +func init() { + // Create a fake controller so we initialize the internal controller metrics. + // https://github.com/kubernetes-sigs/controller-runtime/blob/4000e996a202917ad7d40f02ed8a2079a9ce25e9/pkg/internal/controller/metrics/metrics.go + controller.New("test-controller", nil, controller.Options{}) +} + func newFakeApp(fakeAppYAML string) *argoappv1.Application { var app argoappv1.Application err := yaml.Unmarshal([]byte(fakeAppYAML), &app) @@ -502,6 +510,7 @@ go_memstats_sys_bytes # TYPE go_threads gauge go_threads ` + req, err := http.NewRequest(http.MethodGet, "/metrics", nil) assert.NoError(t, err) rr := httptest.NewRecorder() From b6935ddfccdabb9a5d5d3ea62c034d7de36f5ee6 Mon Sep 17 00:00:00 2001 From: Alexandre Gaudreault Date: Thu, 8 Feb 2024 18:37:56 -0500 Subject: [PATCH 6/7] fix lint error Signed-off-by: Alexandre Gaudreault --- controller/metrics/metrics_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/metrics/metrics_test.go b/controller/metrics/metrics_test.go index 78f047e227c84..23628c38347a5 100644 --- a/controller/metrics/metrics_test.go +++ b/controller/metrics/metrics_test.go @@ -147,7 +147,7 @@ var appFilter = func(obj interface{}) bool { func init() { // Create a fake controller so we initialize the internal controller metrics. // https://github.com/kubernetes-sigs/controller-runtime/blob/4000e996a202917ad7d40f02ed8a2079a9ce25e9/pkg/internal/controller/metrics/metrics.go - controller.New("test-controller", nil, controller.Options{}) + _, _ = controller.New("test-controller", nil, controller.Options{}) } func newFakeApp(fakeAppYAML string) *argoappv1.Application { From 3bbb84e330f7e8ba69040a783167d20ddd15b92e Mon Sep 17 00:00:00 2001 From: Alexandre Gaudreault Date: Tue, 13 Feb 2024 11:36:15 -0500 Subject: [PATCH 7/7] update controller-runtime to remove metrics with high cardinality Signed-off-by: Alexandre Gaudreault --- go.mod | 4 ++-- go.sum | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/go.mod b/go.mod index cb024e3183404..2f3bdec276c7c 100644 --- a/go.mod +++ b/go.mod @@ -93,7 +93,7 @@ require ( gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 k8s.io/api v0.26.11 - k8s.io/apiextensions-apiserver v0.26.4 + k8s.io/apiextensions-apiserver v0.26.10 k8s.io/apimachinery v0.26.11 k8s.io/apiserver v0.26.11 k8s.io/client-go v0.26.11 @@ -104,7 +104,7 @@ require ( k8s.io/utils v0.0.0-20230220204549-a5ecb0141aa5 layeh.com/gopher-json v0.0.0-20190114024228-97fed8db8427 oras.land/oras-go/v2 v2.3.0 - sigs.k8s.io/controller-runtime v0.14.6 + sigs.k8s.io/controller-runtime v0.14.7 sigs.k8s.io/structured-merge-diff/v4 v4.4.1 sigs.k8s.io/yaml v1.3.0 ) diff --git a/go.sum b/go.sum index 2d33e5a248cce..495ba3ed9ba29 100644 --- a/go.sum +++ b/go.sum @@ -2706,8 +2706,8 @@ rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8 rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= -sigs.k8s.io/controller-runtime v0.14.6 h1:oxstGVvXGNnMvY7TAESYk+lzr6S3V5VFxQ6d92KcwQA= -sigs.k8s.io/controller-runtime v0.14.6/go.mod h1:WqIdsAY6JBsjfc/CqO0CORmNtoCtE4S6qbPc9s68h+0= +sigs.k8s.io/controller-runtime v0.14.7 h1:Vrnm2vk9ZFlRkXATHz0W0wXcqNl7kPat8q2JyxVy0Q8= +sigs.k8s.io/controller-runtime v0.14.7/go.mod h1:ErTs3SJCOujNUnTz4AS+uh8hp6DHMo1gj6fFndJT1X8= sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=