Skip to content

Commit

Permalink
Add ability to set LongPollGracePeriod for debug
Browse files Browse the repository at this point in the history
  • Loading branch information
Quinn-With-Two-Ns committed Feb 9, 2023
1 parent d2c9ab2 commit 1b4a3cd
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
15 changes: 13 additions & 2 deletions internal/internal_worker_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,14 @@ const (
retryPollOperationMaxInterval = 10 * time.Second
retryPollResourceExhaustedInitialInterval = time.Second
retryPollResourceExhaustedMaxInterval = 10 * time.Second
retryLongPollGracePeriod = 2 * time.Minute
// How long the same poll task error can remain suppressed
lastPollTaskErrSuppressTime = 1 * time.Minute
)

var (
pollOperationRetryPolicy = createPollRetryPolicy()
pollResourceExhaustedRetryPolicy = createPollResourceExhaustedRetryPolicy()
retryLongPollGracePeriod = 2 * time.Minute
)

var errStop = errors.New("worker stopping")
Expand Down Expand Up @@ -194,6 +194,17 @@ type (
}
)

// SetRetryLongPollGracePeriod sets the amount of time a long poller retrys on
// fatal errors before it actually fails. For test use only,
// not safe to call with a running worker.
func SetRetryLongPollGracePeriod(period time.Duration) {
retryLongPollGracePeriod = period
}

func getRetryLongPollGracePeriod() time.Duration {
return retryLongPollGracePeriod
}

func createPollRetryPolicy() backoff.RetryPolicy {
policy := backoff.NewExponentialRetryPolicy(retryPollOperationInitialInterval)
policy.SetMaximumInterval(retryPollOperationMaxInterval)
Expand Down Expand Up @@ -335,7 +346,7 @@ func (bw *baseWorker) pollTask() {
if err != nil {
// We retry "non retriable" errors while long polling for a while, because some proxies return
// unexpected values causing unnecessary downtime.
if isNonRetriableError(err) && bw.retrier.GetElapsedTime() > retryLongPollGracePeriod {
if isNonRetriableError(err) && bw.retrier.GetElapsedTime() > getRetryLongPollGracePeriod() {
bw.logger.Error("Worker received non-retriable error. Shutting down.", tagError, err)
if bw.fatalErrCb != nil {
bw.fatalErrCb(err)
Expand Down
3 changes: 3 additions & 0 deletions test/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ import (
"go.temporal.io/sdk/client"
contribtally "go.temporal.io/sdk/contrib/tally"
"go.temporal.io/sdk/interceptor"
"go.temporal.io/sdk/internal"
"go.temporal.io/sdk/internal/common"
"go.temporal.io/sdk/internal/common/metrics"
"go.temporal.io/sdk/internal/interceptortest"
Expand Down Expand Up @@ -2276,6 +2277,8 @@ func (ts *IntegrationTestSuite) TestWorkerFatalErrorOnStart() {
}

func (ts *IntegrationTestSuite) testWorkerFatalError(useWorkerRun bool) {
// Allow the worker to fail faster so the test does not take 2 minutes.
internal.SetRetryLongPollGracePeriod(5 * time.Second)
// Make a new client that will fail a poll with a namespace not found
c, err := client.Dial(client.Options{
HostPort: ts.config.ServiceAddr,
Expand Down

0 comments on commit 1b4a3cd

Please sign in to comment.