From 98f1562b268fc6a08b5bf4055fbfbd10956ddcb0 Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Tue, 23 Apr 2024 15:06:30 -0500 Subject: [PATCH 1/9] Do not uninstall helm chart after failed install or upgrade --- src/internal/packager/helm/chart.go | 6 ------ src/test/e2e/25_helm_test.go | 13 ++++++++----- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/internal/packager/helm/chart.go b/src/internal/packager/helm/chart.go index 72b80d65f9..e92f50c159 100644 --- a/src/internal/packager/helm/chart.go +++ b/src/internal/packager/helm/chart.go @@ -115,12 +115,6 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts", h.retries) } - spinner.Updatef("Performing chart uninstall") - _, err = h.uninstallChart(h.chart.ReleaseName) - if err != nil { - return nil, "", fmt.Errorf("unable to install chart after %d attempts and unable to uninstall: %w", h.retries, err) - } - return nil, "", fmt.Errorf("unable to install chart after %d attempts", h.retries) } diff --git a/src/test/e2e/25_helm_test.go b/src/test/e2e/25_helm_test.go index 1926785c9a..e09d6cf47f 100644 --- a/src/test/e2e/25_helm_test.go +++ b/src/test/e2e/25_helm_test.go @@ -130,16 +130,19 @@ func testHelmUninstallRollback(t *testing.T) { // This package contains SBOMable things but was created with --skip-sbom require.Contains(t, string(stdErr), "This package does NOT contain an SBOM.") - // Ensure that this does not leave behind a dos-games chart + // Ensure this leaves behind a dos-games chart. + // We do not want to uninstall charts that had failed installs/upgrades + // to prevent unintentional deletion and/or data loss in production environments. + // https://github.com/defenseunicorns/zarf/issues/2455 helmOut, err := exec.Command("helm", "list", "-n", "dos-games").Output() require.NoError(t, err) - require.NotContains(t, string(helmOut), "zarf-f53a99d4a4dd9a3575bedf59cd42d48d751ae866") + require.Contains(t, string(helmOut), "zarf-f53a99d4a4dd9a3575bedf59cd42d48d751ae866") // Deploy the good package. stdOut, stdErr, err = e2e.Zarf("package", "deploy", goodPath, "--confirm") require.NoError(t, err, stdOut, stdErr) - // Ensure that this does create a dos-games chart + // Ensure this upgrades/fixes the dos-games chart. helmOut, err = exec.Command("helm", "list", "-n", "dos-games").Output() require.NoError(t, err) require.Contains(t, string(helmOut), "zarf-f53a99d4a4dd9a3575bedf59cd42d48d751ae866") @@ -151,7 +154,7 @@ func testHelmUninstallRollback(t *testing.T) { // Ensure that we rollback properly helmOut, err = exec.Command("helm", "history", "-n", "dos-games", "zarf-f53a99d4a4dd9a3575bedf59cd42d48d751ae866", "--max", "1").Output() require.NoError(t, err) - require.Contains(t, string(helmOut), "Rollback to 1") + require.Contains(t, string(helmOut), "Rollback to 4") // Deploy the evil package (again to ensure we check full history) stdOut, stdErr, err = e2e.Zarf("package", "deploy", evilPath, "--timeout", "10s", "--confirm") @@ -160,7 +163,7 @@ func testHelmUninstallRollback(t *testing.T) { // Ensure that we rollback properly helmOut, err = exec.Command("helm", "history", "-n", "dos-games", "zarf-f53a99d4a4dd9a3575bedf59cd42d48d751ae866", "--max", "1").Output() require.NoError(t, err) - require.Contains(t, string(helmOut), "Rollback to 5") + require.Contains(t, string(helmOut), "Rollback to 8") // Remove the package. stdOut, stdErr, err = e2e.Zarf("package", "remove", "dos-games", "--confirm") From 6ab57d0bd36102561deea5a82276d1e2340c22c1 Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Wed, 24 Apr 2024 17:33:40 -0500 Subject: [PATCH 2/9] Add msg about using pkg remove and cleanup err handling --- src/internal/packager/helm/chart.go | 23 ++++++++++++----------- src/pkg/packager/deploy.go | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/internal/packager/helm/chart.go b/src/internal/packager/helm/chart.go index e92f50c159..3a9413fa22 100644 --- a/src/internal/packager/helm/chart.go +++ b/src/internal/packager/helm/chart.go @@ -82,7 +82,7 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { } if err != nil { - return fmt.Errorf("unable to complete the helm chart install/upgrade: %w", err) + return err } message.Debug(output.Info.Description) @@ -92,7 +92,6 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { err = helpers.Retry(tryHelm, h.retries, 5*time.Second, message.Warnf) if err != nil { - // Try to rollback any deployed releases releases, _ := histClient.Run(h.chart.ReleaseName) previouslyDeployedVersion := 0 @@ -103,19 +102,21 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { } } - // On total failure try to rollback (if there was a previously deployed version) or uninstall. - if previouslyDeployedVersion > 0 { - spinner.Updatef("Performing chart rollback") + removeMsg := "if you need to remove the failed chart, use `zarf package remove`" - err = h.rollbackChart(h.chart.ReleaseName, previouslyDeployedVersion) - if err != nil { - return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts and unable to rollback: %w", h.retries, err) - } + // No prior releases means this was an initial install. + if previouslyDeployedVersion == 0 { + return nil, "", fmt.Errorf("unable to install chart after %d attempts: %w: %s", h.retries, err, removeMsg) + } - return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts", h.retries) + // Attempt to rollback on a failed upgrade. + spinner.Updatef("Performing chart rollback") + err = h.rollbackChart(h.chart.ReleaseName, previouslyDeployedVersion) + if err != nil { + return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts and unable to rollback: %w: %s", h.retries, err, removeMsg) } - return nil, "", fmt.Errorf("unable to install chart after %d attempts", h.retries) + return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts: %w: %s", h.retries, err, removeMsg) } // return any collected connect strings for zarf connect. diff --git a/src/pkg/packager/deploy.go b/src/pkg/packager/deploy.go index ef4495be0a..181a52cb42 100644 --- a/src/pkg/packager/deploy.go +++ b/src/pkg/packager/deploy.go @@ -340,7 +340,7 @@ func (p *Packager) deployComponent(component types.ZarfComponent, noImgChecksum if hasCharts || hasManifests { if charts, err = p.installChartAndManifests(componentPath, component); err != nil { - return charts, fmt.Errorf("unable to install helm chart(s): %w", err) + return charts, err } } From 46c5832f75a4b1c5db30369874746d1bd76b4d79 Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Wed, 24 Apr 2024 18:10:48 -0500 Subject: [PATCH 3/9] Remove redundant error return and fix retries test --- src/internal/packager/helm/chart.go | 2 -- src/test/e2e/36_custom_retries_test.go | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/internal/packager/helm/chart.go b/src/internal/packager/helm/chart.go index 3a9413fa22..425dd50bcf 100644 --- a/src/internal/packager/helm/chart.go +++ b/src/internal/packager/helm/chart.go @@ -115,8 +115,6 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { if err != nil { return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts and unable to rollback: %w: %s", h.retries, err, removeMsg) } - - return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts: %w: %s", h.retries, err, removeMsg) } // return any collected connect strings for zarf connect. diff --git a/src/test/e2e/36_custom_retries_test.go b/src/test/e2e/36_custom_retries_test.go index e66a6ff435..70fc6ed9ae 100644 --- a/src/test/e2e/36_custom_retries_test.go +++ b/src/test/e2e/36_custom_retries_test.go @@ -29,5 +29,5 @@ func TestRetries(t *testing.T) { require.Error(t, err, stdOut, stdErr) require.Contains(t, stdErr, "Retrying (1/2) in 5s:") require.Contains(t, stdErr, "Retrying (2/2) in 10s:") - require.Contains(t, stdErr, "unable to install chart after 2 attempts") + require.Contains(t, e2e.StripMessageFormatting(stdErr), "unable to install chart after 2 attempts") } From 3fa858e3d35aff490e69ae9e35291d06f44ff136 Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Wed, 24 Apr 2024 18:30:49 -0500 Subject: [PATCH 4/9] just kidding the other way was better --- src/internal/packager/helm/chart.go | 6 ++++-- src/test/e2e/36_custom_retries_test.go | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/internal/packager/helm/chart.go b/src/internal/packager/helm/chart.go index 425dd50bcf..9252bf40d4 100644 --- a/src/internal/packager/helm/chart.go +++ b/src/internal/packager/helm/chart.go @@ -106,15 +106,17 @@ func (h *Helm) InstallOrUpgradeChart() (types.ConnectStrings, string, error) { // No prior releases means this was an initial install. if previouslyDeployedVersion == 0 { - return nil, "", fmt.Errorf("unable to install chart after %d attempts: %w: %s", h.retries, err, removeMsg) + return nil, "", fmt.Errorf("unable to install chart after %d attempts: %s", h.retries, removeMsg) } // Attempt to rollback on a failed upgrade. spinner.Updatef("Performing chart rollback") err = h.rollbackChart(h.chart.ReleaseName, previouslyDeployedVersion) if err != nil { - return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts and unable to rollback: %w: %s", h.retries, err, removeMsg) + return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts and unable to rollback: %s", h.retries, removeMsg) } + + return nil, "", fmt.Errorf("unable to upgrade chart after %d attempts: %s", h.retries, removeMsg) } // return any collected connect strings for zarf connect. diff --git a/src/test/e2e/36_custom_retries_test.go b/src/test/e2e/36_custom_retries_test.go index 70fc6ed9ae..e33b752e20 100644 --- a/src/test/e2e/36_custom_retries_test.go +++ b/src/test/e2e/36_custom_retries_test.go @@ -30,4 +30,7 @@ func TestRetries(t *testing.T) { require.Contains(t, stdErr, "Retrying (1/2) in 5s:") require.Contains(t, stdErr, "Retrying (2/2) in 10s:") require.Contains(t, e2e.StripMessageFormatting(stdErr), "unable to install chart after 2 attempts") + + _, _, err = e2e.Zarf("package", "remove", "dos-games", "--confirm") + require.NoError(t, err) } From 8dd2536da2abfb4eb8e5b61afd959d0a157097fa Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Thu, 25 Apr 2024 20:10:33 -0500 Subject: [PATCH 5/9] Add section to docs about helm usage and behavior --- site/src/content/docs/ref/deploy.mdx | 40 ++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/site/src/content/docs/ref/deploy.mdx b/site/src/content/docs/ref/deploy.mdx index f6b144c9bd..e2ead5ffb3 100644 --- a/site/src/content/docs/ref/deploy.mdx +++ b/site/src/content/docs/ref/deploy.mdx @@ -120,3 +120,43 @@ $ zarf connect [service name] You can also specify a package locally, or via oci such as `zarf package deploy oci://defenseunicorns/dos-games:1.0.0-$(uname -m) --key=https://zarf.dev/cosign.pub` ::: + +## Installing, Upgrading, and Rolling Back with Helm + +Zarf deploys resources in Kubernetes using [Helm's Go SDK](https://helm.sh/docs/topics/advanced/#go-sdk), and converts manifests into Helm charts for installation. + +If no existing Helm releases match a given chart in the cluster, Zarf executes a `helm install`. + +Should matching releases exist, a `helm upgrade` is performed. + +### Handling CustomResourceDefinitions (CRDs) + + - CRDs are _included_ during `helm install` to support Kubernetes Operator deployments + - CRDs are _excluded_ during `helm upgrade` due to [Helm's lack of support for upgrading CRDs](https://helm.sh/docs/chart_best_practices/custom_resource_definitions/#some-caveats-and-explanations) + +### Waiting for Resource Readiness + +By default, Zarf waits for all resources to deploy successfully during install, upgrade, and rollback operations. + +You can override this behavior during install and upgrade by setting the `noWait: true` key under the `charts` and `manifests` fields. + +### Timeout Settings + +The default timeout for Helm operations in Zarf is 15 minutes. + +Use the `--timeout` flag with `zarf init` and `zarf package deploy` to modify the timeout duration. + +### Retry Policy + +Zarf retries install and upgrade operations up to three times by default if an error occurs. + +Use the `--retries` flag with `zarf init` and `zarf package deploy` to change the number of retry attempts. + +### Rollback Process + +If attempts to install or upgrade a chart fail, Zarf tries to roll the chart back to its last successful release. + +Key features of Zarf's `helm rollback` include: + + - Deleting newly created resources if a rollback fails (`helm rollback --cleanup-on-fail`) + - Forcing updates by deleting and recreating resources as necessary (`helm rollback --force`) From 3a8e2d75cb36c012d7b81ef7240a28b6cca6f24e Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Thu, 25 Apr 2024 20:13:25 -0500 Subject: [PATCH 6/9] Fix rollback docs --- site/src/content/docs/ref/deploy.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/src/content/docs/ref/deploy.mdx b/site/src/content/docs/ref/deploy.mdx index e2ead5ffb3..e8d7babd11 100644 --- a/site/src/content/docs/ref/deploy.mdx +++ b/site/src/content/docs/ref/deploy.mdx @@ -154,7 +154,7 @@ Use the `--retries` flag with `zarf init` and `zarf package deploy` to change th ### Rollback Process -If attempts to install or upgrade a chart fail, Zarf tries to roll the chart back to its last successful release. +If attempts to upgrade a chart fail, Zarf tries to roll the chart back to its last successful release. Key features of Zarf's `helm rollback` include: From 33c937ada539f3344d6e552d5c78cd7259886c7b Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Thu, 25 Apr 2024 20:46:28 -0500 Subject: [PATCH 7/9] Fix retries test --- src/test/e2e/36_custom_retries_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/test/e2e/36_custom_retries_test.go b/src/test/e2e/36_custom_retries_test.go index e33b752e20..cc9a6b031c 100644 --- a/src/test/e2e/36_custom_retries_test.go +++ b/src/test/e2e/36_custom_retries_test.go @@ -27,8 +27,7 @@ func TestRetries(t *testing.T) { stdOut, stdErr, err = e2e.Zarf("package", "deploy", path.Join(tmpDir, pkgName), "--retries", "2", "--timeout", "3s", "--tmpdir", tmpDir, "--confirm") require.Error(t, err, stdOut, stdErr) - require.Contains(t, stdErr, "Retrying (1/2) in 5s:") - require.Contains(t, stdErr, "Retrying (2/2) in 10s:") + require.Contains(t, stdErr, "Retrying in 5") require.Contains(t, e2e.StripMessageFormatting(stdErr), "unable to install chart after 2 attempts") _, _, err = e2e.Zarf("package", "remove", "dos-games", "--confirm") From 9c33724e0301510f18676858bbe5ab9b31028e28 Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Thu, 25 Apr 2024 20:48:11 -0500 Subject: [PATCH 8/9] Keep the s for sigh --- src/test/e2e/36_custom_retries_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/e2e/36_custom_retries_test.go b/src/test/e2e/36_custom_retries_test.go index cc9a6b031c..cbe5428a25 100644 --- a/src/test/e2e/36_custom_retries_test.go +++ b/src/test/e2e/36_custom_retries_test.go @@ -27,7 +27,7 @@ func TestRetries(t *testing.T) { stdOut, stdErr, err = e2e.Zarf("package", "deploy", path.Join(tmpDir, pkgName), "--retries", "2", "--timeout", "3s", "--tmpdir", tmpDir, "--confirm") require.Error(t, err, stdOut, stdErr) - require.Contains(t, stdErr, "Retrying in 5") + require.Contains(t, stdErr, "Retrying in 5s") require.Contains(t, e2e.StripMessageFormatting(stdErr), "unable to install chart after 2 attempts") _, _, err = e2e.Zarf("package", "remove", "dos-games", "--confirm") From bb754626d1815db76e38dd4fb4ec75d8e0341264 Mon Sep 17 00:00:00 2001 From: Lucas Rodriguez Date: Mon, 29 Apr 2024 13:36:18 -0500 Subject: [PATCH 9/9] Update site/src/content/docs/ref/deploy.mdx Co-authored-by: Austin Abro <37223396+AustinAbro321@users.noreply.github.com> --- site/src/content/docs/ref/deploy.mdx | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/site/src/content/docs/ref/deploy.mdx b/site/src/content/docs/ref/deploy.mdx index e8d7babd11..3b92ee1070 100644 --- a/site/src/content/docs/ref/deploy.mdx +++ b/site/src/content/docs/ref/deploy.mdx @@ -154,9 +154,7 @@ Use the `--retries` flag with `zarf init` and `zarf package deploy` to change th ### Rollback Process -If attempts to upgrade a chart fail, Zarf tries to roll the chart back to its last successful release. +If attempts to upgrade a chart fail, Zarf tries to roll the chart back to its last successful release. During this rollback process: -Key features of Zarf's `helm rollback` include: - - - Deleting newly created resources if a rollback fails (`helm rollback --cleanup-on-fail`) - - Forcing updates by deleting and recreating resources as necessary (`helm rollback --force`) + - Any resources created during the failed upgrade attempt are deleted (`helm rollback --cleanup-on-fail`) + - Resource updates are forced through delete and recreate if needed (`helm rollback --force`)