Skip to content

Commit

Permalink
add backup finalizer patch retries
Browse files Browse the repository at this point in the history
Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com>
  • Loading branch information
kaovilai committed May 31, 2024
1 parent ba7cd07 commit e0f5b03
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 16 deletions.
22 changes: 12 additions & 10 deletions pkg/client/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ var twoMinBackoff = wait.Backoff{
Steps: 8,
Duration: 10 * time.Millisecond,
Jitter: 0.1,
Cap: 2 * time.Minute,
Cap: 2 * time.Minute,
}

// override backoff for testing
Expand All @@ -55,7 +55,7 @@ func TestOverrideBackoff() {
Steps: twoMinBackoff.Steps,
Duration: 1,
Jitter: 0.0,
Cap: 2 * time.Minute,
Cap: 2 * time.Minute,

Check warning on line 58 in pkg/client/retry.go

View check run for this annotation

Codecov / codecov/patch

pkg/client/retry.go#L52-L58

Added lines #L52 - L58 were not covered by tests
}
}

Expand All @@ -65,7 +65,7 @@ func TestResetBackoff() {
Steps: 8,
Duration: 10 * time.Millisecond,
Jitter: 0.1,
Cap: 2 * time.Minute,
Cap: 2 * time.Minute,

Check warning on line 68 in pkg/client/retry.go

View check run for this annotation

Codecov / codecov/patch

pkg/client/retry.go#L62-L68

Added lines #L62 - L68 were not covered by tests
}
}

Expand All @@ -75,10 +75,12 @@ func GetBackoffSteps() int {

// RetriesPhasePatchFunc accepts a patch function param, retrying when the provided retriable function returns true.
// We want retry to last up to 2 minutes: https://github.com/vmware-tanzu/velero/issues/7207#:~:text=A%20two%2Dminute%20retry%20is%20reasonable%20when%20there%20is%20API%20outage%20due%20to%20cert%20rotation.
func RetriesPhasePatchFunc(fn func() error, retriable func(error) bool, ) error {
return retry.OnError(twoMinBackoff, func(err error) bool {
return retriable(err)
}, func() error {
return fn()
})
}
func RetriesPhasePatchFunc(fn func() error, retriable func(error) bool) error {
return retry.OnError(twoMinBackoff, func(err error) bool { return retriable(err) }, fn)

Check warning on line 79 in pkg/client/retry.go

View check run for this annotation

Codecov / codecov/patch

pkg/client/retry.go#L78-L79

Added lines #L78 - L79 were not covered by tests
}

// RetriesPhasePatchFunc accepts a patch function param, retrying when the provided retriable function returns true.
// We want retry to last up to 2 minutes: https://github.com/vmware-tanzu/velero/issues/7207#:~:text=A%20two%2Dminute%20retry%20is%20reasonable%20when%20there%20is%20API%20outage%20due%20to%20cert%20rotation.
func RetriesPhasePatchFuncOnErrors(fn func() error) error {
return RetriesPhasePatchFunc(fn, func(err error) bool { return err != nil })

Check warning on line 85 in pkg/client/retry.go

View check run for this annotation

Codecov / codecov/patch

pkg/client/retry.go#L84-L85

Added lines #L84 - L85 were not covered by tests
}
6 changes: 5 additions & 1 deletion pkg/controller/backup_finalizer_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (

velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
pkgbackup "github.com/vmware-tanzu/velero/pkg/backup"
"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/itemoperation"
"github.com/vmware-tanzu/velero/pkg/kuberesource"
"github.com/vmware-tanzu/velero/pkg/metrics"
Expand Down Expand Up @@ -119,7 +120,10 @@ func (r *backupFinalizerReconciler) Reconcile(ctx context.Context, req ctrl.Requ
r.backupTracker.Delete(backup.Namespace, backup.Name)
}
// Always attempt to Patch the backup object and status after each reconciliation.
if err := r.client.Patch(ctx, backup, kbclient.MergeFrom(original)); err != nil {
//
// if this patch fails, there may not be another opportunity to update the backup object without external update event.
// so we retry
if err := client.RetriesPhasePatchFuncOnErrors(func() error { return r.client.Patch(ctx, backup, kbclient.MergeFrom(original)) }); err != nil {
log.WithError(err).Error("Error updating backup")
return
}
Expand Down
4 changes: 1 addition & 3 deletions pkg/controller/restore_finalizer_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
crclient "sigs.k8s.io/controller-runtime/pkg/client"

testingclock "k8s.io/utils/clock/testing"

"github.com/vmware-tanzu/velero/internal/hook"
"github.com/vmware-tanzu/velero/internal/volume"
velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
Expand Down Expand Up @@ -617,7 +615,7 @@ func Test_restoreFinalizerReconciler_finishProcessing(t *testing.T) {
r := &restoreFinalizerReconciler{
Client: client,
metrics: metrics.NewServerMetrics(),
clock: testingclock.NewFakeClock(time.Now()),
clock: testclocks.NewFakeClock(time.Now()),
}
restore := builder.ForRestore(velerov1api.DefaultNamespace, "restoreName").Result()
if err := r.finishProcessing(velerov1api.RestorePhaseInProgress, restore, restore); (err != nil) != tt.wantErr {
Expand Down
5 changes: 3 additions & 2 deletions pkg/util/kube/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ package kube
import (
"context"

veleroPkgClient "github.com/vmware-tanzu/velero/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client"

veleroPkgClient "github.com/vmware-tanzu/velero/pkg/client"
)

func PatchResource(original, updated client.Object, kbClient client.Client) error {
Expand All @@ -32,7 +33,7 @@ func PatchResource(original, updated client.Object, kbClient client.Client) erro
// PatchResourceWithRetries patches the original resource with the updated resource, retrying when the provided retriable function returns true.
// We want retry to last up to 2 minutes: https://github.com/vmware-tanzu/velero/issues/7207#:~:text=A%20two%2Dminute%20retry%20is%20reasonable%20when%20there%20is%20API%20outage%20due%20to%20cert%20rotation.
func PatchResourceWithRetries(original, updated client.Object, kbClient client.Client, retriable func(error) bool) error {
return veleroPkgClient.RetriesPhasePatchFunc(func () error {return PatchResource(original, updated, kbClient)}, retriable)
return veleroPkgClient.RetriesPhasePatchFunc(func() error { return PatchResource(original, updated, kbClient) }, retriable)

Check warning on line 36 in pkg/util/kube/client.go

View check run for this annotation

Codecov / codecov/patch

pkg/util/kube/client.go#L35-L36

Added lines #L35 - L36 were not covered by tests
}

// PatchResourceWithRetriesOnErrors patches the original resource with the updated resource, retrying when the operation returns an error.
Expand Down

0 comments on commit e0f5b03

Please sign in to comment.