From 00e6956210ca3acc0afed4bcb4454955f7353a51 Mon Sep 17 00:00:00 2001 From: Vadim Gedz Date: Mon, 8 Apr 2024 13:45:10 +0300 Subject: [PATCH] chore: add in progress tasks metric (#275) --- .../argocd/argo_status_updater.go | 8 ++++++ .../argocd/argo_status_updater_test.go | 20 ++++++++++++++ cmd/argo-watcher/prometheus/metrics.go | 17 ++++++++++++ cmd/argo-watcher/prometheus/metrics_test.go | 27 +++++++++++++++++++ 4 files changed, 72 insertions(+) diff --git a/cmd/argo-watcher/argocd/argo_status_updater.go b/cmd/argo-watcher/argocd/argo_status_updater.go index 8c1fa652..6ea2661b 100644 --- a/cmd/argo-watcher/argocd/argo_status_updater.go +++ b/cmd/argo-watcher/argocd/argo_status_updater.go @@ -73,6 +73,9 @@ func (updater *ArgoStatusUpdater) collectInitialAppStatus(task *models.Task) err } func (updater *ArgoStatusUpdater) WaitForRollout(task models.Task) { + // increment in progress task counter + updater.argo.metrics.AddInProgressTask() + // notify about the deployment start sendWebhookEvent(task, updater.webhookService) @@ -85,6 +88,8 @@ func (updater *ArgoStatusUpdater) WaitForRollout(task models.Task) { updater.argo.metrics.AddFailedDeployment(task.App) // update task status regarding failure updater.handleArgoAPIFailure(task, err) + // decrement in progress task counter + updater.argo.metrics.RemoveInProgressTask() return } @@ -118,6 +123,9 @@ func (updater *ArgoStatusUpdater) WaitForRollout(task models.Task) { task.Status = models.StatusFailedMessage } + // decrement in progress task counter + updater.argo.metrics.RemoveInProgressTask() + // send webhook event about the deployment result sendWebhookEvent(task, updater.webhookService) } diff --git a/cmd/argo-watcher/argocd/argo_status_updater_test.go b/cmd/argo-watcher/argocd/argo_status_updater_test.go index fb0804ff..c9dc6824 100644 --- a/cmd/argo-watcher/argocd/argo_status_updater_test.go +++ b/cmd/argo-watcher/argocd/argo_status_updater_test.go @@ -59,7 +59,9 @@ func TestArgoStatusUpdaterCheck(t *testing.T) { // mock calls apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3) + metricsMock.EXPECT().AddInProgressTask() metricsMock.EXPECT().ResetFailedDeployment(task.App) + metricsMock.EXPECT().RemoveInProgressTask() stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusDeployedMessage, "") // run the rollout @@ -107,7 +109,9 @@ func TestArgoStatusUpdaterCheck(t *testing.T) { // mock calls apiMock.EXPECT().GetApplication(task.App).Return(&unhealthyApp, nil).Times(2) apiMock.EXPECT().GetApplication(task.App).Return(&healthyApp, nil).Times(1) + metricsMock.EXPECT().AddInProgressTask() metricsMock.EXPECT().ResetFailedDeployment(task.App) + metricsMock.EXPECT().RemoveInProgressTask() stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusDeployedMessage, "") // run the rollout @@ -148,7 +152,9 @@ func TestArgoStatusUpdaterCheck(t *testing.T) { // mock calls apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3) + metricsMock.EXPECT().AddInProgressTask() metricsMock.EXPECT().ResetFailedDeployment(task.App) + metricsMock.EXPECT().RemoveInProgressTask() stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusDeployedMessage, "") // run the rollout @@ -189,7 +195,9 @@ func TestArgoStatusUpdaterCheck(t *testing.T) { // mock calls apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3) + metricsMock.EXPECT().AddInProgressTask() metricsMock.EXPECT().AddFailedDeployment(task.App) + metricsMock.EXPECT().RemoveInProgressTask() stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusFailedMessage, "Application deployment failed. Rollout status \"not available\"\n\nList of current images (last app check):\n\ttest-registry/ghcr.io/shini4i/argo-watcher:dev\n\nList of expected images:\n\tghcr.io/shini4i/argo-watcher:dev") @@ -219,7 +227,9 @@ func TestArgoStatusUpdaterCheck(t *testing.T) { // mock calls apiMock.EXPECT().GetApplication(task.App).Return(nil, fmt.Errorf("applications.argoproj.io \"test-app\" not found")) + metricsMock.EXPECT().AddInProgressTask() metricsMock.EXPECT().AddFailedDeployment(task.App) + metricsMock.EXPECT().RemoveInProgressTask() stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusAppNotFoundMessage, "ArgoCD API Error: applications.argoproj.io \"test-app\" not found") // run the rollout @@ -248,7 +258,9 @@ func TestArgoStatusUpdaterCheck(t *testing.T) { // mock calls apiMock.EXPECT().GetApplication(task.App).Return(nil, fmt.Errorf(argoUnavailableErrorMessage)) + metricsMock.EXPECT().AddInProgressTask() metricsMock.EXPECT().AddFailedDeployment(task.App) + metricsMock.EXPECT().RemoveInProgressTask() stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusAborted, "ArgoCD API Error: connect: connection refused") // run the rollout @@ -277,7 +289,9 @@ func TestArgoStatusUpdaterCheck(t *testing.T) { // mock calls apiMock.EXPECT().GetApplication(task.App).Return(nil, fmt.Errorf("unexpected failure")) + metricsMock.EXPECT().AddInProgressTask() metricsMock.EXPECT().AddFailedDeployment(task.App) + metricsMock.EXPECT().RemoveInProgressTask() stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusFailedMessage, "ArgoCD API Error: unexpected failure") // run the rollout @@ -316,7 +330,9 @@ func TestArgoStatusUpdaterCheck(t *testing.T) { // mock calls apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3) + metricsMock.EXPECT().AddInProgressTask() metricsMock.EXPECT().AddFailedDeployment(task.App) + metricsMock.EXPECT().RemoveInProgressTask() stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusFailedMessage, "Application deployment failed. Rollout status \"not available\"\n\nList of current images (last app check):\n\ttest-image:v0.0.1\n\nList of expected images:\n\tghcr.io/shini4i/argo-watcher:dev") @@ -360,7 +376,9 @@ func TestArgoStatusUpdaterCheck(t *testing.T) { // mock calls apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3) + metricsMock.EXPECT().AddInProgressTask() metricsMock.EXPECT().AddFailedDeployment(task.App) + metricsMock.EXPECT().RemoveInProgressTask() stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusFailedMessage, "Application deployment failed. Rollout status \"not synced\"\n\nApp status \"NotWorking\"\nApp message \"Not working test app\"\nResources:\n\t") @@ -402,7 +420,9 @@ func TestArgoStatusUpdaterCheck(t *testing.T) { // mock calls apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3) + metricsMock.EXPECT().AddInProgressTask() metricsMock.EXPECT().AddFailedDeployment(task.App) + metricsMock.EXPECT().RemoveInProgressTask() stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusFailedMessage, "Application deployment failed. Rollout status \"not healthy\"\n\nApp sync status \"Synced\"\nApp health status \"NotHealthy\"\nResources:\n\t") diff --git a/cmd/argo-watcher/prometheus/metrics.go b/cmd/argo-watcher/prometheus/metrics.go index 7fbdb827..0226c79e 100644 --- a/cmd/argo-watcher/prometheus/metrics.go +++ b/cmd/argo-watcher/prometheus/metrics.go @@ -10,12 +10,15 @@ type MetricsInterface interface { ResetFailedDeployment(app string) AddProcessedDeployment() SetArgoUnavailable(unavailable bool) + AddInProgressTask() + RemoveInProgressTask() } type Metrics struct { failedDeployment *prometheus.GaugeVec processedDeployments prometheus.Counter argocdUnavailable prometheus.Gauge + inProgressTasks prometheus.Gauge } func (metrics *Metrics) Init() { @@ -33,6 +36,11 @@ func (metrics *Metrics) Init() { Name: "argocd_unavailable", Help: "Whether ArgoCD is available for argo-watcher.", }) + + metrics.inProgressTasks = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "in_progress_tasks", + Help: "The number of tasks currently in progress.", + }) } func (metrics *Metrics) Register() { @@ -40,6 +48,7 @@ func (metrics *Metrics) Register() { prometheus.MustRegister(metrics.failedDeployment) prometheus.MustRegister(metrics.processedDeployments) prometheus.MustRegister(metrics.argocdUnavailable) + prometheus.MustRegister(metrics.inProgressTasks) } func (metrics *Metrics) AddFailedDeployment(app string) { @@ -54,6 +63,14 @@ func (metrics *Metrics) AddProcessedDeployment() { metrics.processedDeployments.Inc() } +func (metrics *Metrics) AddInProgressTask() { + metrics.inProgressTasks.Inc() +} + +func (metrics *Metrics) RemoveInProgressTask() { + metrics.inProgressTasks.Dec() +} + func (metrics *Metrics) SetArgoUnavailable(unavailable bool) { if unavailable { metrics.argocdUnavailable.Set(1) diff --git a/cmd/argo-watcher/prometheus/metrics_test.go b/cmd/argo-watcher/prometheus/metrics_test.go index 0bc9f032..f93fbdb1 100644 --- a/cmd/argo-watcher/prometheus/metrics_test.go +++ b/cmd/argo-watcher/prometheus/metrics_test.go @@ -110,3 +110,30 @@ func testMetricRegistered(metricName string) bool { return false } + +func TestInProgressTasks(t *testing.T) { + metrics := &Metrics{} + metrics.Init() + + t.Run("AddInProgressTask", func(t *testing.T) { + // Call the method to test + metrics.AddInProgressTask() + + // Get the current value of the metric + metric := testutil.ToFloat64(metrics.inProgressTasks) + + // Check if the metric was incremented + assert.Equal(t, 1.0, metric) + }) + + t.Run("RemoveInProgressTask", func(t *testing.T) { + // Call the method to test + metrics.RemoveInProgressTask() + + // Get the current value of the metric + metric := testutil.ToFloat64(metrics.inProgressTasks) + + // Check if the metric was decremented + assert.Equal(t, 0.0, metric) + }) +}