Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add retry to helm installation and knative manifests apply #523

Merged
merged 1 commit into from
Feb 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# Changelog

## Unreleased
## v0.28.0

- Add a retry when deploying knative manifests
- Add arm64 artifacts
[#518](https://github.com/Kong/kubernetes-testing-framework/pull/518)
- Add a retry when deploying knative manifests and during kong chart installation
[#520](https://github.com/Kong/kubernetes-testing-framework/pull/520)
[#523](https://github.com/Kong/kubernetes-testing-framework/pull/523)

## v0.27.0

Expand Down
84 changes: 84 additions & 0 deletions internal/retry/command.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package retry

import (
"bytes"
"context"
"fmt"
"os/exec"
"time"

"github.com/avast/retry-go/v4"
"github.com/sirupsen/logrus"
)

const (
retryCount = 10
retryWait = 3 * time.Second
)

type Doer interface {
Do(ctx context.Context) error
DoWithErrorHandling(ctx context.Context, errorFunc ErrorFunc) error
}

type ErrorFunc func(error, *bytes.Buffer, *bytes.Buffer) error

type commandDoer struct {
cmd string
args []string
}

func Command(cmd string, args ...string) Doer {
return commandDoer{
cmd: cmd,
args: args,
}
}

func (c commandDoer) Do(ctx context.Context) error {
return retry.Do(func() error {
cmd, stdout, stderr := c.createCmd(ctx)
err := cmd.Run()
if err != nil {
return fmt.Errorf("command %q with args [%v] failed STDOUT=(%s) STDERR=(%s): %w",
c.cmd, c.args, stdout.String(), stderr.String(), err,
)
}
return nil
},
c.createOpts(ctx)...,
)
}

func (c commandDoer) DoWithErrorHandling(ctx context.Context, errorFunc ErrorFunc) error {
return retry.Do(func() error {
cmd, stdout, stderr := c.createCmd(ctx)
err := cmd.Run()
return errorFunc(err, stdout, stderr)
},
c.createOpts(ctx)...,
)
}

func (c commandDoer) createCmd(ctx context.Context) (*exec.Cmd, *bytes.Buffer, *bytes.Buffer) {
stdout := new(bytes.Buffer)
stderr := new(bytes.Buffer)
cmd := exec.CommandContext(ctx, c.cmd, c.args...) //nolint:gosec
cmd.Stdout = stdout
cmd.Stderr = stderr
return cmd, stdout, stderr
}

func (c commandDoer) createOpts(ctx context.Context) []retry.Option {
return []retry.Option{
retry.Context(ctx),
retry.Delay(retryWait),
retry.Attempts(retryCount),
retry.DelayType(retry.FixedDelay),
retry.OnRetry(func(_ uint, err error) {
logrus.WithError(err).
WithField("args", c.args).
Errorf("failed running %s", c.cmd)
}),
}
}
59 changes: 9 additions & 50 deletions pkg/clusters/addons/knative/knative.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@ import (
"strings"
"time"

"github.com/avast/retry-go/v4"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"

"github.com/kong/kubernetes-testing-framework/internal/retry"
"github.com/kong/kubernetes-testing-framework/pkg/clusters"
"github.com/kong/kubernetes-testing-framework/pkg/utils/github"
)
Expand Down Expand Up @@ -111,58 +110,18 @@ func deployKnative(ctx context.Context, cluster clusters.Cluster, version string
}
defer os.Remove(kubeconfig.Name())

const (
retryCount = 10
retryWait = 3 * time.Second
)

// Sometimes accessing knative CRDs URL fails. Just in case this happens, retry.
err = retry.Do(func() error {
// apply the CRDs: we wait here as this avoids any subsecond timing issues
cmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "apply", "-f", fmt.Sprintf(knativeCRDs, version)) //nolint:gosec
stdout, stderr := new(bytes.Buffer), new(bytes.Buffer)
cmd.Stdout = stdout
cmd.Stderr = stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("knative CRD deployment failed STDOUT=(%s) STDERR=(%s): %w", stdout.String(), stderr.String(), err)
}
return nil
},
retry.Context(ctx),
retry.Attempts(retryCount),
retry.DelayType(retry.FixedDelay),
retry.Delay(retryWait),
retry.OnRetry(func(_ uint, err error) {
logrus.WithError(err).Error("failed applying knative CRDs")
}),
)
err = retry.
Command("kubectl", "--kubeconfig", kubeconfig.Name(), "apply", "-f", fmt.Sprintf(knativeCRDs, version)).
Do(ctx)
if err != nil {
return err
return fmt.Errorf("knative CRD deployment failed: %w", err)
}

// Sometimes accessing knative deployment URL fails. Just in case this happens, retry.
err = retry.Do(func() error {
// apply the core deployments, but don't wait because we're going to patch them
// the CRDs applied earlier may not become available immediately, so retry this several times if it fails
cmd := exec.CommandContext(ctx, "kubectl", "--kubeconfig", kubeconfig.Name(), "apply", "-f", fmt.Sprintf(knativeCore, version)) //nolint:gosec
stdout, stderr := new(bytes.Buffer), new(bytes.Buffer)
cmd.Stdout = stdout
cmd.Stderr = stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("knative core deployment failed STDOUT=(%s) STDERR=(%s): %w", stdout.String(), stderr.String(), err)
}
return nil
},
retry.Context(ctx),
retry.Attempts(retryCount),
retry.DelayType(retry.FixedDelay),
retry.Delay(retryWait),
retry.OnRetry(func(_ uint, err error) {
logrus.WithError(err).Error("failed applying knative core deployment")
}),
)
err = retry.
Command("kubectl", "--kubeconfig", kubeconfig.Name(), "apply", "-f", fmt.Sprintf(knativeCore, version)).
Do(ctx)
if err != nil {
return err
return fmt.Errorf("knative core deployment failed: %w", err)
}

// the deployment manifests for knative include some CPU and Memory limits which
Expand Down
40 changes: 16 additions & 24 deletions pkg/clusters/addons/kong/addon.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"k8s.io/kubectl/pkg/cmd/create"
"k8s.io/kubectl/pkg/scheme"

"github.com/kong/kubernetes-testing-framework/internal/retry"
"github.com/kong/kubernetes-testing-framework/internal/utils"
"github.com/kong/kubernetes-testing-framework/pkg/clusters"
"github.com/kong/kubernetes-testing-framework/pkg/clusters/addons/metallb"
Expand Down Expand Up @@ -197,21 +198,15 @@ func (a *Addon) Deploy(ctx context.Context, cluster clusters.Cluster) error {
defer os.Remove(kubeconfig.Name())

// ensure the repo exists
stderr := new(bytes.Buffer)
cmd := exec.CommandContext(ctx, "helm", "--kubeconfig", kubeconfig.Name(), "repo", "add", "--force-update", "kong", KongHelmRepoURL) //nolint:gosec
cmd.Stdout = io.Discard
cmd.Stderr = stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("%s: %w", stderr.String(), err)
err = retry.Command("helm", "--kubeconfig", kubeconfig.Name(), "repo", "add", "--force-update", "kong", KongHelmRepoURL).Do(ctx)
if err != nil {
return err
}

// ensure all repos are up to date
stderr = new(bytes.Buffer)
cmd = exec.CommandContext(ctx, "helm", "--kubeconfig", kubeconfig.Name(), "repo", "update") //nolint:gosec
cmd.Stdout = io.Discard
cmd.Stderr = stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("%s: %w", stderr.String(), err)
err = retry.Command("helm", "--kubeconfig", kubeconfig.Name(), "repo", "update").Do(ctx)
if err != nil {
return err
}

// create a namespace ahead of deployment so things like license secrets and other configurations
Expand Down Expand Up @@ -344,18 +339,15 @@ func (a *Addon) Deploy(ctx context.Context, cluster clusters.Cluster) error {
args = append(args, exposePortsDefault()...)
a.logger.Debugf("helm install arguments: %+v", args)

// run the helm install command
stderr = new(bytes.Buffer)
cmd = exec.CommandContext(ctx, "helm", args...)
cmd.Stdout = io.Discard
cmd.Stderr = stderr
if err := cmd.Run(); err != nil {
if !strings.Contains(stderr.String(), "cannot re-use") { // ignore if addon is already deployed
return fmt.Errorf("%s: %w", stderr.String(), err)
}
}

return nil
// Sometimes running helm install fails. Just in case this happens, retry.
return retry.
Command("helm", args...).
DoWithErrorHandling(ctx, func(err error, _, stderr *bytes.Buffer) error {
if !strings.Contains(stderr.String(), "cannot re-use") {
return fmt.Errorf("%s: %w", stderr, err)
}
return nil
})
}

func (a *Addon) Delete(ctx context.Context, cluster clusters.Cluster) error {
Expand Down
1 change: 0 additions & 1 deletion pkg/clusters/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/dynamic"
"sigs.k8s.io/controller-runtime/pkg/client"

gatewayv1alpha2 "sigs.k8s.io/gateway-api/apis/v1alpha2"
gatewayv1beta1 "sigs.k8s.io/gateway-api/apis/v1beta1"
)
Expand Down
1 change: 1 addition & 0 deletions test/integration/kongaddon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func TestKongAddonWithCustomImage(t *testing.T) {
}

for _, tc := range tests {
tc := tc
t.Run(tc.name(), func(t *testing.T) {
t.Parallel()
testKongAddonWithCustomImage(t, tc)
Expand Down