From 8ebc18abac1676064abc718875e65c5a8f21c53a Mon Sep 17 00:00:00 2001 From: kennytm Date: Mon, 25 May 2020 18:19:57 +0800 Subject: [PATCH 1/3] restore: Make all download error as retryable (#298) --- pkg/restore/backoff.go | 2 +- pkg/restore/backoff_test.go | 62 +++++++++++++++++++++++++++++-------- pkg/restore/import.go | 11 +++++-- pkg/utils/retry.go | 15 ++++++--- 4 files changed, 68 insertions(+), 22 deletions(-) diff --git a/pkg/restore/backoff.go b/pkg/restore/backoff.go index a84014c11..aa5010ab4 100644 --- a/pkg/restore/backoff.go +++ b/pkg/restore/backoff.go @@ -60,7 +60,7 @@ func newDownloadSSTBackoffer() utils.Backoffer { func (bo *importerBackoffer) NextBackoff(err error) time.Duration { switch errors.Cause(err) { - case errGrpc, errEpochNotMatch, errIngestFailed: + case errGrpc, errEpochNotMatch, errDownloadFailed, errIngestFailed: bo.delayTime = 2 * bo.delayTime bo.attempt-- case errRangeIsEmpty, errRewriteRuleNotFound: diff --git a/pkg/restore/backoff_test.go b/pkg/restore/backoff_test.go index a07c0839b..91e6114cd 100644 --- a/pkg/restore/backoff_test.go +++ b/pkg/restore/backoff_test.go @@ -8,6 +8,7 @@ import ( . "github.com/pingcap/check" "github.com/pingcap/tidb/util/testleak" + "go.uber.org/multierr" "github.com/pingcap/br/pkg/mock" "github.com/pingcap/br/pkg/utils" @@ -29,7 +30,26 @@ func (s *testBackofferSuite) TearDownSuite(c *C) { testleak.AfterTest(c)() } -func (s *testBackofferSuite) TestImporterBackoffer(c *C) { +func (s *testBackofferSuite) TestBackoffWithSuccess(c *C) { + var counter int + backoffer := restore.NewBackoffer(10, time.Nanosecond, time.Nanosecond) + err := utils.WithRetry(context.Background(), func() error { + defer func() { counter++ }() + switch counter { + case 0: + return errGRPC + case 1: + return errEpochNotMatch + case 2: + return nil + } + return nil + }, backoffer) + c.Assert(counter, Equals, 3) + c.Assert(err, IsNil) +} + +func (s *testBackofferSuite) TestBackoffWithFatalError(c *C) { var counter int err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() @@ -39,23 +59,39 @@ func (s *testBackofferSuite) TestImporterBackoffer(c *C) { case 1: return errEpochNotMatch case 2: + return errDownloadFailed + case 3: return errRangeIsEmpty } return nil - }, newImportSSTBackoffer()) - c.Assert(counter, Equals, 3) - c.Assert(err, Equals, errRangeIsEmpty) - - counter = 0 - backoffer := importerBackoffer{ - attempt: 10, - delayTime: time.Nanosecond, - maxDelayTime: time.Nanosecond, - } - err = utils.WithRetry(context.Background(), func() error { + }, backoffer) + c.Assert(counter, Equals, 4) + c.Assert(multierr.Errors(err), DeepEquals, []error{ + errGRPC, + errEpochNotMatch, + errDownloadFailed, + errRangeIsEmpty, + }) +} + +func (s *testBackofferSuite) TestBackoffWithRetryableError(c *C) { + var counter int + backoffer := restore.NewBackoffer(10, time.Nanosecond, time.Nanosecond) + err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() return errEpochNotMatch }, &backoffer) c.Assert(counter, Equals, 10) - c.Assert(err, Equals, errEpochNotMatch) + c.Assert(multierr.Errors(err), DeepEquals, []error{ + errEpochNotMatch, + errEpochNotMatch, + errEpochNotMatch, + errEpochNotMatch, + errEpochNotMatch, + errEpochNotMatch, + errEpochNotMatch, + errEpochNotMatch, + errEpochNotMatch, + errEpochNotMatch, + }) } diff --git a/pkg/restore/import.go b/pkg/restore/import.go index 58a168f06..6d87f0df1 100644 --- a/pkg/restore/import.go +++ b/pkg/restore/import.go @@ -16,6 +16,7 @@ import ( "github.com/pingcap/kvproto/pkg/kvrpcpb" "github.com/pingcap/log" "github.com/pingcap/pd/v3/pkg/codec" + "go.uber.org/multierr" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/credentials" @@ -228,6 +229,7 @@ func (importer *FileImporter) Import( log.Debug("scan regions", zap.Stringer("file", file), zap.Int("count", len(regionInfos))) // Try to download and ingest the file in every region + regionLoop: for _, regionInfo := range regionInfos { info := regionInfo // Try to download file. @@ -242,9 +244,12 @@ func (importer *FileImporter) Import( return e }, newDownloadSSTBackoffer()) if errDownload != nil { - if errDownload == errRewriteRuleNotFound || errDownload == errRangeIsEmpty { - // Skip this region - continue + for _, e := range multierr.Errors(errDownload) { + switch e { + case errRewriteRuleNotFound, errRangeIsEmpty: + // Skip this region + continue regionLoop + } } log.Error("download file failed", zap.Stringer("file", file), diff --git a/pkg/utils/retry.go b/pkg/utils/retry.go index 1dbbcdad2..8b6461c76 100644 --- a/pkg/utils/retry.go +++ b/pkg/utils/retry.go @@ -5,6 +5,8 @@ package utils import ( "context" "time" + + "go.uber.org/multierr" ) // RetryableFunc presents a retryable opreation @@ -18,25 +20,28 @@ type Backoffer interface { Attempt() int } -// WithRetry retrys a given operation with a backoff policy +// WithRetry retries a given operation with a backoff policy. +// +// Returns nil if `retryableFunc` succeeded at least once. Otherwise, returns a +// multierr containing all errors encountered. func WithRetry( ctx context.Context, retryableFunc RetryableFunc, backoffer Backoffer, ) error { - var lastErr error + var allErrors error for backoffer.Attempt() > 0 { err := retryableFunc() if err != nil { - lastErr = err + allErrors = multierr.Append(allErrors, err) select { case <-ctx.Done(): - return lastErr + return allErrors case <-time.After(backoffer.NextBackoff(err)): } } else { return nil } } - return lastErr + return allErrors } From 57352bb3239428e7462d36ee04dea2b88b6f9163 Mon Sep 17 00:00:00 2001 From: kennytm Date: Wed, 27 May 2020 16:42:24 +0800 Subject: [PATCH 2/3] restore: fix build break --- pkg/restore/backoff_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/restore/backoff_test.go b/pkg/restore/backoff_test.go index 91e6114cd..ff3d42715 100644 --- a/pkg/restore/backoff_test.go +++ b/pkg/restore/backoff_test.go @@ -32,7 +32,7 @@ func (s *testBackofferSuite) TearDownSuite(c *C) { func (s *testBackofferSuite) TestBackoffWithSuccess(c *C) { var counter int - backoffer := restore.NewBackoffer(10, time.Nanosecond, time.Nanosecond) + backoffer := &newImportSSTBackoffer{attemp: 10, delayTime: time.Nanosecond, maxDelayTime: time.Nanosecond} err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() switch counter { @@ -51,6 +51,7 @@ func (s *testBackofferSuite) TestBackoffWithSuccess(c *C) { func (s *testBackofferSuite) TestBackoffWithFatalError(c *C) { var counter int + backoffer := &newImportSSTBackoffer{attemp: 10, delayTime: time.Nanosecond, maxDelayTime: time.Nanosecond} err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() switch counter { @@ -76,7 +77,7 @@ func (s *testBackofferSuite) TestBackoffWithFatalError(c *C) { func (s *testBackofferSuite) TestBackoffWithRetryableError(c *C) { var counter int - backoffer := restore.NewBackoffer(10, time.Nanosecond, time.Nanosecond) + backoffer := &newImportSSTBackoffer{attemp: 10, delayTime: time.Nanosecond, maxDelayTime: time.Nanosecond} err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() return errEpochNotMatch From cc3cae703d645c099490e7ad5e49f234426a6a3d Mon Sep 17 00:00:00 2001 From: kennytm Date: Thu, 28 May 2020 00:31:19 +0800 Subject: [PATCH 3/3] restore: fix build break --- pkg/restore/backoff_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/restore/backoff_test.go b/pkg/restore/backoff_test.go index ff3d42715..379324028 100644 --- a/pkg/restore/backoff_test.go +++ b/pkg/restore/backoff_test.go @@ -32,12 +32,12 @@ func (s *testBackofferSuite) TearDownSuite(c *C) { func (s *testBackofferSuite) TestBackoffWithSuccess(c *C) { var counter int - backoffer := &newImportSSTBackoffer{attemp: 10, delayTime: time.Nanosecond, maxDelayTime: time.Nanosecond} + backoffer := &importerBackoffer{attempt: 10, delayTime: time.Nanosecond, maxDelayTime: time.Nanosecond} err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() switch counter { case 0: - return errGRPC + return errGrpc case 1: return errEpochNotMatch case 2: @@ -51,7 +51,7 @@ func (s *testBackofferSuite) TestBackoffWithSuccess(c *C) { func (s *testBackofferSuite) TestBackoffWithFatalError(c *C) { var counter int - backoffer := &newImportSSTBackoffer{attemp: 10, delayTime: time.Nanosecond, maxDelayTime: time.Nanosecond} + backoffer := &importerBackoffer{attempt: 10, delayTime: time.Nanosecond, maxDelayTime: time.Nanosecond} err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() switch counter { @@ -68,7 +68,7 @@ func (s *testBackofferSuite) TestBackoffWithFatalError(c *C) { }, backoffer) c.Assert(counter, Equals, 4) c.Assert(multierr.Errors(err), DeepEquals, []error{ - errGRPC, + errGrpc, errEpochNotMatch, errDownloadFailed, errRangeIsEmpty, @@ -77,11 +77,11 @@ func (s *testBackofferSuite) TestBackoffWithFatalError(c *C) { func (s *testBackofferSuite) TestBackoffWithRetryableError(c *C) { var counter int - backoffer := &newImportSSTBackoffer{attemp: 10, delayTime: time.Nanosecond, maxDelayTime: time.Nanosecond} + backoffer := &importerBackoffer{attempt: 10, delayTime: time.Nanosecond, maxDelayTime: time.Nanosecond} err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() return errEpochNotMatch - }, &backoffer) + }, backoffer) c.Assert(counter, Equals, 10) c.Assert(multierr.Errors(err), DeepEquals, []error{ errEpochNotMatch,