From b1af44a700a5ea536117a5821c5188860fbd4511 Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Mon, 6 May 2024 15:22:13 -0500 Subject: [PATCH] fix!: do not uninstall helm chart after failed install or upgrade (#2456) ## Description Do not uninstall helm chart after failed install or upgrade ## Related Issue Fixes #2455 ## Checklist before merging - [x] Test, docs, adr added or updated as needed - [x] [Contributor Guide Steps](https://github.com/defenseunicorns/zarf/blob/main/.github/CONTRIBUTING.md#developer-workflow) followed --------- Co-authored-by: Austin Abro <37223396+AustinAbro321@users.noreply.github.com> --- site/src/content/docs/ref/deploy.mdx | 38 ++++++++++++++++++++++++++ src/internal/packager/helm/chart.go | 25 +++++++---------- src/pkg/packager/deploy.go | 2 +- src/test/e2e/25_helm_test.go | 13 +++++---- src/test/e2e/36_custom_retries_test.go | 5 +++- 5 files changed, 61 insertions(+), 22 deletions(-) diff --git a/site/src/content/docs/ref/deploy.mdx b/site/src/content/docs/ref/deploy.mdx index f6b144c9bd..3b92ee1070 100644 --- a/site/src/content/docs/ref/deploy.mdx +++ b/site/src/content/docs/ref/deploy.mdx @@ -120,3 +120,41 @@ $ zarf connect [service name] You can also specify a package locally, or via oci such as `zarf package deploy oci://defenseunicorns/dos-games:1.0.0-$(uname -m) --key=https://zarf.dev/cosign.pub` ::: + +## Installing, Upgrading, and Rolling Back with Helm + +Zarf deploys resources in Kubernetes using [Helm's Go SDK](https://helm.sh/docs/topics/advanced/#go-sdk), and converts manifests into Helm charts for installation. + +If no existing Helm releases match a given chart in the cluster, Zarf executes a `helm install`. + +Should matching releases exist, a `helm upgrade` is performed. + +### Handling CustomResourceDefinitions (CRDs) + + - CRDs are _included_ during `helm install` to support Kubernetes Operator deployments + - CRDs are _excluded_ during `helm upgrade` due to [Helm's lack of support for upgrading CRDs](https://helm.sh/docs/chart_best_practices/custom_resource_definitions/#some-caveats-and-explanations) + +### Waiting for Resource Readiness + +By default, Zarf waits for all resources to deploy successfully during install, upgrade, and rollback operations. + +You can override this behavior during install and upgrade by setting the `noWait: true` key under the `charts` and `manifests` fields. + +### Timeout Settings + +The default timeout for Helm operations in Zarf is 15 minutes. + +Use the `--timeout` flag with `zarf init` and `zarf package deploy` to modify the timeout duration. + +### Retry Policy + +Zarf retries install and upgrade operations up to three times by default if an error occurs. + +Use the `--retries` flag with `zarf init` and `zarf package deploy` to change the number of retry attempts. + +### Rollback Process + +If attempts to upgrade a chart fail, Zarf tries to roll the chart back to its last successful release. During this rollback process: + + - Any resources created during the failed upgrade attempt are deleted (`helm rollback --cleanup-on-fail`) + - Resource updates are forced through delete and recreate if needed (`helm rollback --force`) diff --git a/src/internal/packager/helm/chart.go b/src/internal/packager/helm/chart.go index 72b80d65f9..9252bf40d4 100644 --- a/src/internal/packager/helm/chart.go +++ b/src/internal/packager/helm/chart.go @@ -82,7 +82,7 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { } if err != nil { - return fmt.Errorf("unable to complete the helm chart install/upgrade: %w", err) + return err } message.Debug(output.Info.Description) @@ -92,7 +92,6 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { err = helpers.Retry(tryHelm, h.retries, 5*time.Second, message.Warnf) if err != nil { - // Try to rollback any deployed releases releases, _ := histClient.Run(h.chart.ReleaseName) previouslyDeployedVersion := 0 @@ -103,25 +102,21 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { } } - // On total failure try to rollback (if there was a previously deployed version) or uninstall. - if previouslyDeployedVersion > 0 { - spinner.Updatef("Performing chart rollback") - - err = h.rollbackChart(h.chart.ReleaseName, previouslyDeployedVersion) - if err != nil { - return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts and unable to rollback: %w", h.retries, err) - } + removeMsg := "if you need to remove the failed chart, use `zarf package remove`" - return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts", h.retries) + // No prior releases means this was an initial install. + if previouslyDeployedVersion == 0 { + return nil, "", fmt.Errorf("unable to install chart after %d attempts: %s", h.retries, removeMsg) } - spinner.Updatef("Performing chart uninstall") - _, err = h.uninstallChart(h.chart.ReleaseName) + // Attempt to rollback on a failed upgrade. + spinner.Updatef("Performing chart rollback") + err = h.rollbackChart(h.chart.ReleaseName, previouslyDeployedVersion) if err != nil { - return nil, "", fmt.Errorf("unable to install chart after %d attempts and unable to uninstall: %w", h.retries, err) + return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts and unable to rollback: %s", h.retries, removeMsg) } - return nil, "", fmt.Errorf("unable to install chart after %d attempts", h.retries) + return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts: %s", h.retries, removeMsg) } // return any collected connect strings for zarf connect. diff --git a/src/pkg/packager/deploy.go b/src/pkg/packager/deploy.go index ef4495be0a..181a52cb42 100644 --- a/src/pkg/packager/deploy.go +++ b/src/pkg/packager/deploy.go @@ -340,7 +340,7 @@ func (p *Packager) deployComponent(component types.ZarfComponent, noImgChecksum if hasCharts || hasManifests { if charts, err = p.installChartAndManifests(componentPath, component); err != nil { - return charts, fmt.Errorf("unable to install helm chart(s): %w", err) + return charts, err } } diff --git a/src/test/e2e/25_helm_test.go b/src/test/e2e/25_helm_test.go index 1926785c9a..e09d6cf47f 100644 --- a/src/test/e2e/25_helm_test.go +++ b/src/test/e2e/25_helm_test.go @@ -130,16 +130,19 @@ func testHelmUninstallRollback(t *testing.T) { // This package contains SBOMable things but was created with --skip-sbom require.Contains(t, string(stdErr), "This package does NOT contain an SBOM.") - // Ensure that this does not leave behind a dos-games chart + // Ensure this leaves behind a dos-games chart. + // We do not want to uninstall charts that had failed installs/upgrades + // to prevent unintentional deletion and/or data loss in production environments. + // https://github.com/defenseunicorns/zarf/issues/2455 helmOut, err := exec.Command("helm", "list", "-n", "dos-games").Output() require.NoError(t, err) - require.NotContains(t, string(helmOut), "zarf-f53a99d4a4dd9a3575bedf59cd42d48d751ae866") + require.Contains(t, string(helmOut), "zarf-f53a99d4a4dd9a3575bedf59cd42d48d751ae866") // Deploy the good package. stdOut, stdErr, err = e2e.Zarf("package", "deploy", goodPath, "--confirm") require.NoError(t, err, stdOut, stdErr) - // Ensure that this does create a dos-games chart + // Ensure this upgrades/fixes the dos-games chart. helmOut, err = exec.Command("helm", "list", "-n", "dos-games").Output() require.NoError(t, err) require.Contains(t, string(helmOut), "zarf-f53a99d4a4dd9a3575bedf59cd42d48d751ae866") @@ -151,7 +154,7 @@ func testHelmUninstallRollback(t *testing.T) { // Ensure that we rollback properly helmOut, err = exec.Command("helm", "history", "-n", "dos-games", "zarf-f53a99d4a4dd9a3575bedf59cd42d48d751ae866", "--max", "1").Output() require.NoError(t, err) - require.Contains(t, string(helmOut), "Rollback to 1") + require.Contains(t, string(helmOut), "Rollback to 4") // Deploy the evil package (again to ensure we check full history) stdOut, stdErr, err = e2e.Zarf("package", "deploy", evilPath, "--timeout", "10s", "--confirm") @@ -160,7 +163,7 @@ func testHelmUninstallRollback(t *testing.T) { // Ensure that we rollback properly helmOut, err = exec.Command("helm", "history", "-n", "dos-games", "zarf-f53a99d4a4dd9a3575bedf59cd42d48d751ae866", "--max", "1").Output() require.NoError(t, err) - require.Contains(t, string(helmOut), "Rollback to 5") + require.Contains(t, string(helmOut), "Rollback to 8") // Remove the package. stdOut, stdErr, err = e2e.Zarf("package", "remove", "dos-games", "--confirm") diff --git a/src/test/e2e/36_custom_retries_test.go b/src/test/e2e/36_custom_retries_test.go index a0f33b94ac..cbe5428a25 100644 --- a/src/test/e2e/36_custom_retries_test.go +++ b/src/test/e2e/36_custom_retries_test.go @@ -28,5 +28,8 @@ func TestRetries(t *testing.T) { stdOut, stdErr, err = e2e.Zarf("package", "deploy", path.Join(tmpDir, pkgName), "--retries", "2", "--timeout", "3s", "--tmpdir", tmpDir, "--confirm") require.Error(t, err, stdOut, stdErr) require.Contains(t, stdErr, "Retrying in 5s") - require.Contains(t, stdErr, "unable to install chart after 2 attempts") + require.Contains(t, e2e.StripMessageFormatting(stdErr), "unable to install chart after 2 attempts") + + _, _, err = e2e.Zarf("package", "remove", "dos-games", "--confirm") + require.NoError(t, err) }