Skip to content

Commit

Permalink
Merge branch 'main' into fix_model_migration
Browse files Browse the repository at this point in the history
  • Loading branch information
bozerkins authored Aug 15, 2023
2 parents 43050be + 58fac9d commit 27abee8
Show file tree
Hide file tree
Showing 11 changed files with 393 additions and 253 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/run-tests-and-sonar-scan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ jobs:
ARGO_URL: http://localhost:8081
run: make test

- name: SonarCloud Scan
uses: sonarsource/sonarcloud-github-action@v1.9.1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: coverage.out
7 changes: 4 additions & 3 deletions .goreleaser.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,14 @@ docker_signs:
checksum:
name_template: 'checksums.txt'

snapshot:
name_template: "{{ incpatch .Version }}-next"

changelog:
use:
github-native

release:
prerelease: auto
draft: false

footer: |
## Docker Images
- `ghcr.io/shini4i/{{ .ProjectName }}:{{ .Version }}`
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Improve visibility of deployments managed by Argo CD Image Updater
![GitHub Actions](https://img.shields.io/github/actions/workflow/status/shini4i/argo-watcher/run-tests-and-sonar-scan.yml?branch=main)
![GitHub go.mod Go version](https://img.shields.io/github/go-mod/go-version/shini4i/argo-watcher)
![GitHub release (latest by date)](https://img.shields.io/github/v/release/shini4i/argo-watcher)
[![Coverage](https://sonarcloud.io/api/project_badges/measure?project=shini4i_argo-watcher&metric=coverage)](https://sonarcloud.io/summary/new_code?id=shini4i_argo-watcher)
[![codecov](https://codecov.io/gh/shini4i/argo-watcher/graph/badge.svg?token=9JI19X0BIN)](https://codecov.io/gh/shini4i/argo-watcher)
[![Go Report Card](https://goreportcard.com/badge/github.com/shini4i/argo-watcher)](https://goreportcard.com/report/github.com/shini4i/argo-watcher)
![GitHub](https://img.shields.io/github/license/shini4i/argo-watcher)

Expand Down
3 changes: 1 addition & 2 deletions cmd/argo-watcher/argo.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ import (
)

var (
argoSyncRetryDelay = 15 * time.Second
errorArgoPlannedRetry = fmt.Errorf("planned retry")
argoSyncRetryDelay = 15 * time.Second
)

const (
Expand Down
289 changes: 78 additions & 211 deletions cmd/argo-watcher/argo_status_updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,254 +8,121 @@ import (

"github.com/avast/retry-go/v4"
"github.com/rs/zerolog/log"
"github.com/shini4i/argo-watcher/internal/helpers"
"github.com/shini4i/argo-watcher/internal/models"
)

const defaultErrorMessage string = "could not retrieve details"
const failedToUpdateTaskStatusTemplate string = "Failed to change task status: %s"

type ArgoStatusUpdater struct {
argo Argo
retryAttempts uint
retryDelay time.Duration
registryProxyUrl string
retryOptions []retry.Option
}

func (updater *ArgoStatusUpdater) Init(argo Argo, retryAttempts uint, retryDelay time.Duration, registryProxyUrl string) {
updater.argo = argo
updater.retryAttempts = retryAttempts
updater.retryDelay = retryDelay
updater.registryProxyUrl = registryProxyUrl
updater.retryOptions = []retry.Option{
retry.DelayType(retry.FixedDelay),
retry.Attempts(retryAttempts),
retry.Delay(retryDelay),
retry.LastErrorOnly(true),
}
}

func (updater *ArgoStatusUpdater) WaitForRollout(task models.Task) {
// continuously check for application status change
status, err := updater.checkWithRetry(task)

// application synced successfully
if status == ArgoAppSuccess {
updater.handleDeploymentSuccess(task)
return
}

// we had some unexpected error with ArgoCD API
if status == ArgoAppFailed {
// wait for application to get into deployed status or timeout
application, err := updater.waitForApplicationDeployment(task)

// handle application failure
if err != nil {
// deployment failed
updater.argo.metrics.AddFailedDeployment(task.App)
// update task status regarding failure
updater.handleArgoAPIFailure(task, err)
return
}

// fetch application details
app, err := updater.argo.api.GetApplication(task.App)

// handle application sync failure
switch status {
// not all images were deployed to the application
case ArgoAppNotAvailable:
// show list of missing images
var message string
// define details
if err != nil {
message = defaultErrorMessage
} else {
message = fmt.Sprintf(
"List of current images (last app check):\n"+
"\t%s\n\n"+
"List of expected images:\n"+
"\t%s",
strings.Join(app.Status.Summary.Images, "\n\t"),
strings.Join(task.ListImages(), "\n\t"),
)
// get application status
status := application.GetRolloutStatus(task.ListImages(), updater.registryProxyUrl)
if application.IsFinalRolloutStatus(status) {
log.Info().Str("id", task.Id).Msg("App is running on the excepted version.")
// deployment success
updater.argo.metrics.ResetFailedDeployment(task.App)
// update task status
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, models.StatusDeployedMessage, "")
if errStatusChange != nil {
log.Error().Str("id", task.Id).Msgf(failedToUpdateTaskStatusTemplate, errStatusChange)
}
// handle error
updater.handleAppNotAvailable(task, errors.New(message))
// application sync status wasn't valid
case ArgoAppNotSynced:
// display sync status and last sync message
var message string
// define details
if err != nil {
message = defaultErrorMessage
} else {
message = fmt.Sprintf(
"App status \"%s\"\n"+
"App message \"%s\"\n"+
"Resources:\n"+
"\t%s",
app.Status.OperationState.Phase,
app.Status.OperationState.Message,
strings.Join(app.ListSyncResultResources(), "\n\t"),
)
}
// handle error
updater.handleAppOutOfSync(task, errors.New(message))
// application is not in a healthy status
case ArgoAppNotHealthy:
// display current health of pods
var message string
// define details
if err != nil {
message = defaultErrorMessage
} else {
message = fmt.Sprintf(
"App sync status \"%s\"\n"+
"App health status \"%s\"\n"+
"Resources:\n"+
"\t%s",
app.Status.Sync.Status,
app.Status.Health.Status,
strings.Join(app.ListUnhealthyResources(), "\n\t"),
)
} else {
log.Info().Str("id", task.Id).Msg("App deployment failed.")
// deployment failed
updater.argo.metrics.AddFailedDeployment(task.App)
// generate failure reason
reason := fmt.Sprintf(
"Application deployment failed. Rollout status \"%s\"\n\n%s",
status,
application.GetRolloutMessage(status, task.ListImages()),
)
// update task status
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, models.StatusFailedMessage, reason)
if errStatusChange != nil {
log.Error().Str("id", task.Id).Msgf(failedToUpdateTaskStatusTemplate, errStatusChange)
}
// handle error
updater.handleAppNotHealthy(task, errors.New(message))
// handle unexpected status
default:
updater.handleDeploymentUnexpectedStatus(task, fmt.Errorf("received unexpected status \"%d\"", status))
}
}

func (updater *ArgoStatusUpdater) checkWithRetry(task models.Task) (int, error) {
var lastStatus int

err := retry.Do(
func() error {
app, err := updater.argo.api.GetApplication(task.App)

if err != nil {
log.Warn().Str("app", task.App).Msg(err.Error())
lastStatus = ArgoAppFailed
return err
}

for _, image := range task.Images {
expected := fmt.Sprintf("%s:%s", image.Image, image.Tag)
if !helpers.ImagesContains(app.Status.Summary.Images, expected, updater.registryProxyUrl) {
log.Debug().Str("app", task.App).Str("id", task.Id).Msgf("%s is not available yet", expected)
lastStatus = ArgoAppNotAvailable
return errorArgoPlannedRetry
} else {
log.Debug().Str("app", task.App).Str("id", task.Id).Msgf("Expected image is in the app summary")
}
}
func (updater *ArgoStatusUpdater) waitForApplicationDeployment(task models.Task) (*models.Application, error) {
var application *models.Application
var err error

if app.Status.Sync.Status != "Synced" {
log.Debug().Str("id", task.Id).Msgf("%s is not synced yet", task.App)
lastStatus = ArgoAppNotSynced
return errorArgoPlannedRetry
}

if app.Status.Health.Status != "Healthy" {
log.Debug().Str("id", task.Id).Msgf("%s is not healthy yet", task.App)
lastStatus = ArgoAppNotHealthy
return errorArgoPlannedRetry
// wait for application to get into deployed status or timeout
log.Debug().Str("id", task.Id).Msg("Waiting for rollout")
_ = retry.Do(func() error {
application, err = updater.argo.api.GetApplication(task.App)
if err != nil {
// check if ArgoCD didn't have the app
if task.IsAppNotFoundError(err) {
// no need to retry in such cases
return retry.Unrecoverable(err)
}
// print application api failure here
log.Debug().Str("id", task.Id).Msgf("Failed fetching application status. Error: %s", err.Error())
return err
}
// print application debug here
status := application.GetRolloutStatus(task.ListImages(), updater.registryProxyUrl)
if !application.IsFinalRolloutStatus(status) {
// print status debug here
log.Debug().Str("id", task.Id).Msgf("Application status is not final. Status received \"%s\"", status)
return errors.New("force retry")
}
// all good
log.Debug().Str("id", task.Id).Msgf("Application rollout finished")
return nil
}, updater.retryOptions...)

lastStatus = ArgoAppSuccess
return nil
},
retry.DelayType(retry.FixedDelay),
retry.Delay(updater.retryDelay),
retry.Attempts(updater.retryAttempts),
retry.RetryIf(func(err error) bool {
return errors.Is(err, errorArgoPlannedRetry)
}),
retry.LastErrorOnly(true),
)

return lastStatus, err
// return application and latest error
return application, err
}

func (updater *ArgoStatusUpdater) handleArgoAPIFailure(task models.Task, err error) {
// notify user that app wasn't found
appNotFoundError := fmt.Sprintf("applications.argoproj.io \"%s\" not found", task.App)
if strings.Contains(err.Error(), appNotFoundError) {
updater.handleAppNotFound(task, err)
return
}
// notify user that ArgoCD API isn't available
if strings.Contains(err.Error(), argoUnavailableErrorMessage) {
updater.handleArgoUnavailable(task, err)
return
}

// notify of unexpected error
updater.handleDeploymentFailed(task, err)
}
var apiFailureStatus string = models.StatusFailedMessage

func (updater *ArgoStatusUpdater) handleAppNotFound(task models.Task, err error) {
log.Info().Str("id", task.Id).Msgf("Application %s does not exist.", task.App)
reason := fmt.Sprintf(ArgoAPIErrorTemplate, err.Error())
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, models.StatusAppNotFoundMessage, reason)
if errStatusChange != nil {
log.Error().Str("id", task.Id).Msgf(failedToUpdateTaskStatusTemplate, errStatusChange)
// check if ArgoCD didn't have the app
if task.IsAppNotFoundError(err) {
apiFailureStatus = models.StatusAppNotFoundMessage
}
}

func (updater *ArgoStatusUpdater) handleArgoUnavailable(task models.Task, err error) {
log.Error().Str("id", task.Id).Msg("ArgoCD is not available. Aborting.")
reason := fmt.Sprintf(ArgoAPIErrorTemplate, err.Error())
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, models.StatusAborted, reason)
if errStatusChange != nil {
log.Error().Str("id", task.Id).Msgf(failedToUpdateTaskStatusTemplate, errStatusChange)
// check if ArgoCD was unavailable
if strings.Contains(err.Error(), argoUnavailableErrorMessage) {
apiFailureStatus = models.StatusAborted
}
}

func (updater *ArgoStatusUpdater) handleDeploymentFailed(task models.Task, err error) {
log.Warn().Str("id", task.Id).Msgf("Deployment failed. Aborting with error: %s", err)
updater.argo.metrics.AddFailedDeployment(task.App)
// write debug reason
reason := fmt.Sprintf(ArgoAPIErrorTemplate, err.Error())
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, models.StatusFailedMessage, reason)
if errStatusChange != nil {
log.Error().Str("id", task.Id).Msgf(failedToUpdateTaskStatusTemplate, errStatusChange)
}
}

func (updater *ArgoStatusUpdater) handleDeploymentSuccess(task models.Task) {
log.Info().Str("id", task.Id).Msg("App is running on the excepted version.")
updater.argo.metrics.ResetFailedDeployment(task.App)
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, models.StatusDeployedMessage, "")
if errStatusChange != nil {
log.Error().Str("id", task.Id).Msgf(failedToUpdateTaskStatusTemplate, errStatusChange)
}
}

func (updater *ArgoStatusUpdater) handleAppNotAvailable(task models.Task, err error) {
log.Warn().Str("id", task.Id).Msgf("Deployment failed. Application not available\n%s", err.Error())
updater.argo.metrics.AddFailedDeployment(task.App)
reason := fmt.Sprintf("Application not available\n\n%s", err.Error())
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, models.StatusFailedMessage, reason)
if errStatusChange != nil {
log.Error().Str("id", task.Id).Msgf(failedToUpdateTaskStatusTemplate, errStatusChange)
}
}

func (updater *ArgoStatusUpdater) handleAppNotHealthy(task models.Task, err error) {
log.Warn().Str("id", task.Id).Msgf("Deployment failed. Application not healthy\n%s", err.Error())
updater.argo.metrics.AddFailedDeployment(task.App)
reason := fmt.Sprintf("Application not healthy\n\n%s", err.Error())
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, models.StatusFailedMessage, reason)
if errStatusChange != nil {
log.Error().Str("id", task.Id).Msgf(failedToUpdateTaskStatusTemplate, errStatusChange)
}
}

func (updater *ArgoStatusUpdater) handleAppOutOfSync(task models.Task, err error) {
log.Warn().Str("id", task.Id).Msgf("Deployment failed. Application out of sync\n%s", err.Error())
updater.argo.metrics.AddFailedDeployment(task.App)
reason := fmt.Sprintf("Application out of sync\n\n%s", err.Error())
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, models.StatusFailedMessage, reason)
if errStatusChange != nil {
log.Error().Str("id", task.Id).Msgf(failedToUpdateTaskStatusTemplate, errStatusChange)
}
}
log.Warn().Str("id", task.Id).Msgf("Deployment failed with status \"%s\". Aborting with error: %s", apiFailureStatus, reason)

func (updater *ArgoStatusUpdater) handleDeploymentUnexpectedStatus(task models.Task, err error) {
log.Error().Str("id", task.Id).Msg("Deployment timed out with unexpected status. Aborting.")
log.Error().Str("id", task.Id).Msgf("Deployment error\n%s", err.Error())
updater.argo.metrics.AddFailedDeployment(task.App)
reason := fmt.Sprintf("Deployment timeout\n\n%s", err.Error())
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, models.StatusFailedMessage, reason)
errStatusChange := updater.argo.state.SetTaskStatus(task.Id, apiFailureStatus, reason)
if errStatusChange != nil {
log.Error().Str("id", task.Id).Msgf(failedToUpdateTaskStatusTemplate, errStatusChange)
}
Expand Down
Loading

0 comments on commit 27abee8

Please sign in to comment.