Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: correctly process degraded applications #225

Merged
merged 19 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 1 addition & 25 deletions .github/workflows/run-tests-and-sonar-scan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,8 @@ on:
- main
pull_request:
types: [opened, synchronize, reopened]
env:
GOLANG_VERSION: '1.21.0'

jobs:
golangci:
name: GolangCI
runs-on: ubuntu-latest
permissions:
contents: read

steps:
- uses: actions/checkout@v3

- uses: actions/setup-go@v4
with:
go-version: ${{ env.GOLANG_VERSION }}
cache: false

- name: Install project dependencies
run: make install-deps mocks docs

- name: golangci-lint
uses: golangci/golangci-lint-action@v3
with:
version: v1.53

test:
name: Test
runs-on: ubuntu-latest
Expand Down Expand Up @@ -59,7 +35,7 @@ jobs:

- uses: actions/setup-go@v4
with:
go-version: ${{ env.GOLANG_VERSION }}
go-version-file: go.mod

- name: Install project dependencies
run: make install-deps mocks docs
Expand Down
25 changes: 0 additions & 25 deletions .golangci.yml

This file was deleted.

1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,3 @@ repos:
- id: go-fmt
- id: go-mod-tidy
- id: go-imports
- id: golangci-lint
52 changes: 44 additions & 8 deletions cmd/argo-watcher/argo_status_updater.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
package main

import (
"bytes"
"errors"
"fmt"
"slices"
"strings"
"sync"
"time"

"github.com/shini4i/argo-watcher/internal/helpers"

"github.com/avast/retry-go/v4"
"github.com/rs/zerolog/log"
"github.com/shini4i/argo-watcher/internal/models"
Expand Down Expand Up @@ -42,6 +46,25 @@
}
}

func (updater *ArgoStatusUpdater) collectInitialAppStatus(task *models.Task) error {
application, err := updater.argo.api.GetApplication(task.App)
if err != nil {
return err
}

Check warning on line 53 in cmd/argo-watcher/argo_status_updater.go

View check run for this annotation

Codecov / codecov/patch

cmd/argo-watcher/argo_status_updater.go#L52-L53

Added lines #L52 - L53 were not covered by tests

status := application.GetRolloutStatus(task.ListImages(), updater.registryProxyUrl)

// sort images to avoid hash mismatch
slices.Sort(application.Status.Summary.Images)

task.SavedAppStatus = models.SavedAppStatus{
Status: status,
ImagesHash: helpers.GenerateHash(strings.Join(application.Status.Summary.Images, ",")),
}

return nil
}

func (updater *ArgoStatusUpdater) WaitForRollout(task models.Task) {
// wait for application to get into deployed status or timeout
application, err := updater.waitForApplicationDeployment(task)
Expand All @@ -57,7 +80,7 @@

// get application status
status := application.GetRolloutStatus(task.ListImages(), updater.registryProxyUrl)
if application.IsFinalRolloutStatus(status) {
if status == models.ArgoRolloutAppSuccess {
log.Info().Str("id", task.Id).Msg("App is running on the expected version.")
// deployment success
updater.argo.metrics.ResetFailedDeployment(task.App)
Expand Down Expand Up @@ -93,6 +116,11 @@
return nil, err
}

// save the initial application status to compare with the final one
if err := updater.collectInitialAppStatus(&task); err != nil {
return nil, err
}

Check warning on line 122 in cmd/argo-watcher/argo_status_updater.go

View check run for this annotation

Codecov / codecov/patch

cmd/argo-watcher/argo_status_updater.go#L121-L122

Added lines #L121 - L122 were not covered by tests

// This mutex is used only to avoid concurrent updates of the same application.
mutex := updater.mutex.Get(task.App)

Expand Down Expand Up @@ -134,6 +162,7 @@
log.Debug().Str("id", task.Id).Msg("Waiting for rollout")
_ = retry.Do(func() error {
application, err = updater.argo.api.GetApplication(task.App)

if err != nil {
// check if ArgoCD didn't have the app
if task.IsAppNotFoundError(err) {
Expand All @@ -144,16 +173,23 @@
log.Debug().Str("id", task.Id).Msgf("Failed fetching application status. Error: %s", err.Error())
return err
}
// print application debug here

status := application.GetRolloutStatus(task.ListImages(), updater.registryProxyUrl)
if !application.IsFinalRolloutStatus(status) {
// print status debug here

switch status {
case models.ArgoRolloutAppDegraded:
log.Debug().Str("id", task.Id).Msgf("Application is degraded")
hash := helpers.GenerateHash(strings.Join(application.Status.Summary.Images, ","))
if !bytes.Equal(task.SavedAppStatus.ImagesHash, hash) {
return retry.Unrecoverable(errors.New("application has degraded"))
}

Check warning on line 185 in cmd/argo-watcher/argo_status_updater.go

View check run for this annotation

Codecov / codecov/patch

cmd/argo-watcher/argo_status_updater.go#L180-L185

Added lines #L180 - L185 were not covered by tests
case models.ArgoRolloutAppSuccess:
log.Debug().Str("id", task.Id).Msgf("Application rollout finished")
return nil
default:
log.Debug().Str("id", task.Id).Msgf("Application status is not final. Status received \"%s\"", status)
return errors.New("force retry")
}
// all good
log.Debug().Str("id", task.Id).Msgf("Application rollout finished")
return nil
return errors.New("force retry")
}, updater.retryOptions...)

// return application and latest error
Expand Down
12 changes: 6 additions & 6 deletions cmd/argo-watcher/argo_status_updater_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func TestArgoStatusUpdaterCheck(t *testing.T) {
application.Status.Health.Status = "Healthy"

// mock calls
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(2)
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3)
metricsMock.EXPECT().ResetFailedDeployment(task.App)
stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusDeployedMessage, "")

Expand Down Expand Up @@ -139,7 +139,7 @@ func TestArgoStatusUpdaterCheck(t *testing.T) {
application.Status.Health.Status = "Healthy"

// mock calls
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(2)
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3)
metricsMock.EXPECT().ResetFailedDeployment(task.App)
stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusDeployedMessage, "")

Expand Down Expand Up @@ -180,7 +180,7 @@ func TestArgoStatusUpdaterCheck(t *testing.T) {
application.Status.Health.Status = "Healthy"

// mock calls
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(2)
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3)
metricsMock.EXPECT().AddFailedDeployment(task.App)
stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusFailedMessage,
"Application deployment failed. Rollout status \"not available\"\n\nList of current images (last app check):\n\ttest-registry/ghcr.io/shini4i/argo-watcher:dev\n\nList of expected images:\n\tghcr.io/shini4i/argo-watcher:dev")
Expand Down Expand Up @@ -307,7 +307,7 @@ func TestArgoStatusUpdaterCheck(t *testing.T) {
application.Status.Summary.Images = []string{"test-image:v0.0.1"}

// mock calls
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(2)
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3)
metricsMock.EXPECT().AddFailedDeployment(task.App)
stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusFailedMessage,
"Application deployment failed. Rollout status \"not available\"\n\nList of current images (last app check):\n\ttest-image:v0.0.1\n\nList of expected images:\n\tghcr.io/shini4i/argo-watcher:dev")
Expand Down Expand Up @@ -351,7 +351,7 @@ func TestArgoStatusUpdaterCheck(t *testing.T) {
application.Status.OperationState.Message = "Not working test app"

// mock calls
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(2)
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3)
metricsMock.EXPECT().AddFailedDeployment(task.App)
stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusFailedMessage,
"Application deployment failed. Rollout status \"not synced\"\n\nApp status \"NotWorking\"\nApp message \"Not working test app\"\nResources:\n\t")
Expand Down Expand Up @@ -393,7 +393,7 @@ func TestArgoStatusUpdaterCheck(t *testing.T) {
application.Status.Health.Status = "NotHealthy"

// mock calls
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(2)
apiMock.EXPECT().GetApplication(task.App).Return(&application, nil).Times(3)
metricsMock.EXPECT().AddFailedDeployment(task.App)
stateMock.EXPECT().SetTaskStatus(task.Id, models.StatusFailedMessage,
"Application deployment failed. Rollout status \"not healthy\"\n\nApp sync status \"Synced\"\nApp health status \"NotHealthy\"\nResources:\n\t")
Expand Down
58 changes: 27 additions & 31 deletions cmd/argo-watcher/config/config.go
Original file line number Diff line number Diff line change
@@ -1,41 +1,38 @@
package config

import (
"errors"
"net/url"

"github.com/shini4i/argo-watcher/internal/helpers"

envConfig "github.com/caarlos0/env/v9"
envConfig "github.com/caarlos0/env/v10"
"github.com/go-playground/validator/v10"
)

const (
LogFormatText = "text"
)

type ServerConfig struct {
ArgoUrl url.URL `env:"ARGO_URL,required" json:"argo_cd_url"`
// ArgoUrlAlias is used to replace the ArgoUrl in the UI. This is useful when the ArgoUrl is an internal URL
ArgoUrlAlias string `env:"ARGO_URL_ALIAS" json:"argo_cd_url_alias,omitempty"`
ArgoToken string `env:"ARGO_TOKEN,required" json:"-"`
ArgoApiTimeout int64 `env:"ARGO_API_TIMEOUT" envDefault:"60" json:"argo_api_timeout"`
DeploymentTimeout uint `env:"DEPLOYMENT_TIMEOUT" envDefault:"900" json:"deployment_timeout"`
ArgoRefreshApp bool `env:"ARGO_REFRESH_APP" envDefault:"true" json:"argo_refresh_app"`
RegistryProxyUrl string `env:"DOCKER_IMAGES_PROXY" json:"registry_proxy_url,omitempty"`
StateType string `env:"STATE_TYPE,required" json:"state_type"`
StaticFilePath string `env:"STATIC_FILES_PATH" envDefault:"static" json:"-"`
SkipTlsVerify bool `env:"SKIP_TLS_VERIFY" envDefault:"false" json:"skip_tls_verify"`
LogLevel string `env:"LOG_LEVEL" envDefault:"info" json:"log_level"`
LogFormat string `env:"LOG_FORMAT" envDefault:"json" json:"-"`
Host string `env:"HOST" envDefault:"0.0.0.0" json:"-"`
Port string `env:"PORT" envDefault:"8080" json:"-"`
DbHost string `env:"DB_HOST" json:"db_host,omitempty"`
DbPort int `env:"DB_PORT" json:"db_port,omitempty"`
DbName string `env:"DB_NAME" json:"db_name,omitempty"`
DbUser string `env:"DB_USER" json:"db_user,omitempty"`
DbPassword string `env:"DB_PASSWORD" json:"-"`
DbMigrationsPath string `env:"DB_MIGRATIONS_PATH" envDefault:"db/migrations" json:"-"`
DeployToken string `env:"ARGO_WATCHER_DEPLOY_TOKEN" json:"-"`
ArgoUrl url.URL `env:"ARGO_URL,required" json:"argo_cd_url"`
ArgoUrlAlias string `env:"ARGO_URL_ALIAS" json:"argo_cd_url_alias,omitempty"` // Used to generate App URL. Can be omitted if ArgoUrl is reachable from outside.
ArgoToken string `env:"ARGO_TOKEN,required" json:"-"`
ArgoApiTimeout int64 `env:"ARGO_API_TIMEOUT" envDefault:"60" json:"argo_api_timeout"`
DeploymentTimeout uint `env:"DEPLOYMENT_TIMEOUT" envDefault:"900" json:"deployment_timeout"`
ArgoRefreshApp bool `env:"ARGO_REFRESH_APP" envDefault:"true" json:"argo_refresh_app"`
RegistryProxyUrl string `env:"DOCKER_IMAGES_PROXY" json:"registry_proxy_url,omitempty"`
StateType string `env:"STATE_TYPE,required" validate:"oneof=postgres in-memory" json:"state_type"`
StaticFilePath string `env:"STATIC_FILES_PATH" envDefault:"static" json:"-"`
SkipTlsVerify bool `env:"SKIP_TLS_VERIFY" envDefault:"false" json:"skip_tls_verify"`
LogLevel string `env:"LOG_LEVEL" envDefault:"info" json:"log_level"`
LogFormat string `env:"LOG_FORMAT" envDefault:"json" json:"-"`
Host string `env:"HOST" envDefault:"0.0.0.0" json:"-"`
Port string `env:"PORT" envDefault:"8080" json:"-"`
DbHost string `env:"DB_HOST" json:"db_host,omitempty"`
DbPort int `env:"DB_PORT" json:"db_port,omitempty"`
DbName string `env:"DB_NAME" json:"db_name,omitempty"`
DbUser string `env:"DB_USER" json:"db_user,omitempty"`
DbPassword string `env:"DB_PASSWORD" json:"-"`
DbMigrationsPath string `env:"DB_MIGRATIONS_PATH" envDefault:"db/migrations" json:"-"`
DeployToken string `env:"ARGO_WATCHER_DEPLOY_TOKEN" json:"-"`
}

// NewServerConfig parses the server configuration from environment variables using the envconfig package.
Expand All @@ -53,10 +50,9 @@ func NewServerConfig() (*ServerConfig, error) {
return nil, err
}

// custom checks
allowedTypes := []string{"postgres", "in-memory"}
if config.StateType == "" || !helpers.Contains(allowedTypes, config.StateType) {
return nil, errors.New("variable STATE_TYPE must be one of [\"postgres\", \"in-memory\"]")
validate := validator.New()
if err := validate.Struct(&config); err != nil {
return nil, err
}

// return config
Expand All @@ -68,5 +64,5 @@ func NewServerConfig() (*ServerConfig, error) {
// The calculated value is incremented by 1 to account for the initial attempt.
// It returns the number of retry attempts as an unsigned integer.
func (config *ServerConfig) GetRetryAttempts() uint {
return (config.DeploymentTimeout / 15) + 1
return config.DeploymentTimeout/15 + 1
}
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ services:
STATE_TYPE: postgres
ARGO_URL: http://mock:8081
ARGO_TOKEN: example
ARGO_TIMEOUT: 120
DEPLOYMENT_TIMEOUT: 120
DB_HOST: postgres
DB_PORT: 5432
DB_USER: watcher
Expand Down
Loading