From df7b3e3eeb2c149d6a4d05cb2f82c1d7abed7d85 Mon Sep 17 00:00:00 2001
From: Tim Gross <tgross@hashicorp.com>
Date: Tue, 15 Aug 2023 14:38:18 -0400
Subject: [PATCH] fix multiple overflow errors in exponential backoff (#18200)

We use capped exponential backoff in several places in the code when handling
failures. The code we've copy-and-pasted all over has a check to see if the
backoff is greater than the limit, but this check happens after the bitshift and
we always increment the number of attempts. This causes an overflow with a
fairly small number of failures (ex. at one place I tested it occurs after only
24 iterations), resulting in a negative backoff which then never recovers. The
backoff becomes a tight loop consuming resources and/or DoS'ing a Nomad RPC
handler or an external API such as Vault. Note this doesn't occur in places
where we cap the number of iterations so the loop breaks (usually to return an
error), so long as the number of iterations is reasonable.

Introduce a helper with a check on the cap before the bitshift to avoid overflow in all
places this can occur.

Fixes: #18199
Co-authored-by: stswidwinski <stan.swidwinski@gmail.com>
---
 .changelog/18200.txt                          |  3 +
 client/allocrunner/taskrunner/stats_hook.go   | 13 ++--
 client/allocrunner/taskrunner/vault_hook.go   | 13 ++--
 client/devicemanager/instance.go              | 11 ++-
 .../pluginmanager/drivermanager/instance.go   | 16 ++---
 command/agent/consul/version_checker.go       | 14 ++--
 drivers/docker/driver.go                      | 26 +++----
 drivers/docker/stats.go                       | 10 +--
 helper/backoff.go                             | 31 ++++++++
 helper/backoff_test.go                        | 72 +++++++++++++++++++
 nomad/state/state_store.go                    | 11 ++-
 nomad/worker.go                               | 17 ++---
 12 files changed, 162 insertions(+), 75 deletions(-)
 create mode 100644 .changelog/18200.txt
 create mode 100644 helper/backoff.go
 create mode 100644 helper/backoff_test.go

diff --git a/.changelog/18200.txt b/.changelog/18200.txt
new file mode 100644
index 000000000000..378043a657a4
--- /dev/null
+++ b/.changelog/18200.txt
@@ -0,0 +1,3 @@
+```release-note:bug
+core: Fixed a bug where exponential backoff could result in excessive CPU usage
+```
diff --git a/client/allocrunner/taskrunner/stats_hook.go b/client/allocrunner/taskrunner/stats_hook.go
index 1968e99fa4bd..dcd0972873db 100644
--- a/client/allocrunner/taskrunner/stats_hook.go
+++ b/client/allocrunner/taskrunner/stats_hook.go
@@ -11,6 +11,7 @@ import (
 	hclog "github.com/hashicorp/go-hclog"
 	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
 	cstructs "github.com/hashicorp/nomad/client/structs"
+	"github.com/hashicorp/nomad/helper"
 	"github.com/hashicorp/nomad/nomad/structs"
 	bstructs "github.com/hashicorp/nomad/plugins/base/structs"
 )
@@ -127,7 +128,9 @@ MAIN:
 //
 // It logs the errors with appropriate log levels; don't log returned error
 func (h *statsHook) callStatsWithRetry(ctx context.Context, handle interfaces.DriverStats) (<-chan *cstructs.TaskResourceUsage, error) {
-	var retry int
+	var retry uint64
+	var backoff time.Duration
+	limit := time.Second * 5
 
 MAIN:
 	if ctx.Err() != nil {
@@ -162,13 +165,7 @@ MAIN:
 		h.logger.Error("failed to start stats collection for task", "error", err)
 	}
 
-	limit := time.Second * 5
-	backoff := 1 << (2 * uint64(retry)) * time.Second
-	if backoff > limit || retry > 5 {
-		backoff = limit
-	}
-
-	// Increment retry counter
+	backoff = helper.Backoff(time.Second, limit, retry)
 	retry++
 
 	time.Sleep(backoff)
diff --git a/client/allocrunner/taskrunner/vault_hook.go b/client/allocrunner/taskrunner/vault_hook.go
index b979e055c0c6..71322347533f 100644
--- a/client/allocrunner/taskrunner/vault_hook.go
+++ b/client/allocrunner/taskrunner/vault_hook.go
@@ -18,6 +18,7 @@ import (
 	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
 	ti "github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"
 	"github.com/hashicorp/nomad/client/vaultclient"
+	"github.com/hashicorp/nomad/helper"
 	"github.com/hashicorp/nomad/nomad/structs"
 )
 
@@ -311,7 +312,8 @@ OUTER:
 // deriveVaultToken derives the Vault token using exponential backoffs. It
 // returns the Vault token and whether the manager should exit.
 func (h *vaultHook) deriveVaultToken() (token string, exit bool) {
-	attempts := 0
+	var attempts uint64
+	var backoff time.Duration
 	for {
 		tokens, err := h.client.DeriveToken(h.alloc, []string{h.taskName})
 		if err == nil {
@@ -339,14 +341,11 @@ func (h *vaultHook) deriveVaultToken() (token string, exit bool) {
 		}
 
 		// Handle the retry case
-		backoff := (1 << (2 * uint64(attempts))) * vaultBackoffBaseline
-		if backoff > vaultBackoffLimit {
-			backoff = vaultBackoffLimit
-		}
-		h.logger.Error("failed to derive Vault token", "error", err, "recoverable", true, "backoff", backoff)
-
+		backoff = helper.Backoff(vaultBackoffBaseline, vaultBackoffLimit, attempts)
 		attempts++
 
+		h.logger.Error("failed to derive Vault token", "error", err, "recoverable", true, "backoff", backoff)
+
 		// Wait till retrying
 		select {
 		case <-h.ctx.Done():
diff --git a/client/devicemanager/instance.go b/client/devicemanager/instance.go
index 89553fed787e..deb29d2c3a5e 100644
--- a/client/devicemanager/instance.go
+++ b/client/devicemanager/instance.go
@@ -11,6 +11,7 @@ import (
 
 	log "github.com/hashicorp/go-hclog"
 	multierror "github.com/hashicorp/go-multierror"
+	"github.com/hashicorp/nomad/helper"
 	"github.com/hashicorp/nomad/helper/pluginutils/loader"
 	"github.com/hashicorp/nomad/helper/pluginutils/singleton"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -450,7 +451,8 @@ func (i *instanceManager) handleFingerprint(f *device.FingerprintResponse) error
 // collectStats is a long lived goroutine for collecting device statistics. It
 // handles errors by backing off exponentially and retrying.
 func (i *instanceManager) collectStats() {
-	attempt := 0
+	var attempt uint64
+	var backoff time.Duration
 
 START:
 	// Get a device plugin
@@ -495,10 +497,7 @@ START:
 			}
 
 			// Retry with an exponential backoff
-			backoff := (1 << (2 * uint64(attempt))) * statsBackoffBaseline
-			if backoff > statsBackoffLimit {
-				backoff = statsBackoffLimit
-			}
+			backoff = helper.Backoff(statsBackoffBaseline, statsBackoffLimit, attempt)
 			attempt++
 
 			i.logger.Error("stats returned an error", "error", err, "retry", backoff)
@@ -511,7 +510,7 @@ START:
 			}
 		}
 
-		// Reset the attempt since we got statistics
+		// Reset the attempts since we got statistics
 		attempt = 0
 
 		// Store the new stats
diff --git a/client/pluginmanager/drivermanager/instance.go b/client/pluginmanager/drivermanager/instance.go
index 8b7a5a85ec02..0ef3fab0405d 100644
--- a/client/pluginmanager/drivermanager/instance.go
+++ b/client/pluginmanager/drivermanager/instance.go
@@ -10,6 +10,7 @@ import (
 	"time"
 
 	log "github.com/hashicorp/go-hclog"
+	"github.com/hashicorp/nomad/helper"
 	"github.com/hashicorp/nomad/helper/pluginutils/loader"
 	"github.com/hashicorp/nomad/helper/pluginutils/singleton"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -290,7 +291,7 @@ func (i *instanceManager) fingerprint() {
 
 	// backoff and retry used if the RPC is closed by the other end
 	var backoff time.Duration
-	var retry int
+	var retry uint64
 	for {
 		if backoff > 0 {
 			select {
@@ -329,11 +330,7 @@ func (i *instanceManager) fingerprint() {
 				i.handleFingerprintError()
 
 				// Calculate the new backoff
-				backoff = (1 << (2 * uint64(retry))) * driverFPBackoffBaseline
-				if backoff > driverFPBackoffLimit {
-					backoff = driverFPBackoffLimit
-				}
-				// Increment retry counter
+				backoff = helper.Backoff(driverFPBackoffBaseline, driverFPBackoffLimit, retry)
 				retry++
 				continue
 			}
@@ -426,7 +423,7 @@ func (i *instanceManager) handleEvents() {
 	}
 
 	var backoff time.Duration
-	var retry int
+	var retry uint64
 	for {
 		if backoff > 0 {
 			select {
@@ -453,10 +450,7 @@ func (i *instanceManager) handleEvents() {
 				i.logger.Warn("failed to receive task events, retrying", "error", err, "retry", retry)
 
 				// Calculate the new backoff
-				backoff = (1 << (2 * uint64(retry))) * driverFPBackoffBaseline
-				if backoff > driverFPBackoffLimit {
-					backoff = driverFPBackoffLimit
-				}
+				backoff = helper.Backoff(driverFPBackoffBaseline, driverFPBackoffLimit, retry)
 				retry++
 				continue
 			}
diff --git a/command/agent/consul/version_checker.go b/command/agent/consul/version_checker.go
index 2e0fb637ee12..90501843ee32 100644
--- a/command/agent/consul/version_checker.go
+++ b/command/agent/consul/version_checker.go
@@ -23,11 +23,12 @@ func checkConsulTLSSkipVerify(ctx context.Context, logger log.Logger, client Age
 
 	defer close(done)
 
-	i := uint64(0)
-
 	timer, stop := helper.NewSafeTimer(limit)
 	defer stop()
 
+	var attempts uint64
+	var backoff time.Duration
+
 	for {
 		self, err := client.Self()
 		if err == nil {
@@ -40,13 +41,8 @@ func checkConsulTLSSkipVerify(ctx context.Context, logger log.Logger, client Age
 			return
 		}
 
-		backoff := (1 << (2 * i)) * baseline
-		if backoff > limit {
-			backoff = limit
-		} else {
-			i++
-		}
-
+		backoff = helper.Backoff(baseline, limit, attempts)
+		attempts++
 		timer.Reset(backoff)
 
 		select {
diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go
index 49624a53ac06..cb7e6ae2be04 100644
--- a/drivers/docker/driver.go
+++ b/drivers/docker/driver.go
@@ -483,7 +483,9 @@ type createContainerClient interface {
 func (d *Driver) createContainer(client createContainerClient, config docker.CreateContainerOptions,
 	image string) (*docker.Container, error) {
 	// Create a container
-	attempted := 0
+	var attempted uint64
+	var backoff time.Duration
+
 CREATE:
 	container, createErr := client.CreateContainer(config)
 	if createErr == nil {
@@ -533,16 +535,19 @@ CREATE:
 
 		if attempted < 5 {
 			attempted++
-			time.Sleep(nextBackoff(attempted))
+			backoff = helper.Backoff(50*time.Millisecond, time.Minute, attempted)
+			time.Sleep(backoff)
 			goto CREATE
 		}
+
 	} else if strings.Contains(strings.ToLower(createErr.Error()), "no such image") {
 		// There is still a very small chance this is possible even with the
 		// coordinator so retry.
 		return nil, nstructs.NewRecoverableError(createErr, true)
 	} else if isDockerTransientError(createErr) && attempted < 5 {
 		attempted++
-		time.Sleep(nextBackoff(attempted))
+		backoff = helper.Backoff(50*time.Millisecond, time.Minute, attempted)
+		time.Sleep(backoff)
 		goto CREATE
 	}
 
@@ -557,8 +562,9 @@ func (d *Driver) startContainer(c *docker.Container) error {
 		return err
 	}
 
-	// Start a container
-	attempted := 0
+	var attempted uint64
+	var backoff time.Duration
+
 START:
 	startErr := dockerClient.StartContainer(c.ID, c.HostConfig)
 	if startErr == nil || strings.Contains(startErr.Error(), "Container already running") {
@@ -570,7 +576,8 @@ START:
 	if isDockerTransientError(startErr) {
 		if attempted < 5 {
 			attempted++
-			time.Sleep(nextBackoff(attempted))
+			backoff = helper.Backoff(50*time.Millisecond, time.Minute, attempted)
+			time.Sleep(backoff)
 			goto START
 		}
 		return nstructs.NewRecoverableError(startErr, true)
@@ -579,13 +586,6 @@ START:
 	return recoverableErrTimeouts(startErr)
 }
 
-// nextBackoff returns appropriate docker backoff durations after attempted attempts.
-func nextBackoff(attempted int) time.Duration {
-	// attempts in 200ms, 800ms, 3.2s, 12.8s, 51.2s
-	// TODO: add randomization factor and extract to a helper
-	return 1 << (2 * uint64(attempted)) * 50 * time.Millisecond
-}
-
 // createImage creates a docker image either by pulling it from a registry or by
 // loading it from the file system
 func (d *Driver) createImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (string, error) {
diff --git a/drivers/docker/stats.go b/drivers/docker/stats.go
index 23097ee999a5..c3401a04dbe3 100644
--- a/drivers/docker/stats.go
+++ b/drivers/docker/stats.go
@@ -95,8 +95,8 @@ func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, inte
 	defer destCh.close()
 
 	// backoff and retry used if the docker stats API returns an error
-	var backoff time.Duration = 0
-	var retry int
+	var backoff time.Duration
+	var retry uint64
 
 	// create an interval timer
 	timer, stop := helper.NewSafeTimer(backoff)
@@ -136,11 +136,7 @@ func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, inte
 			h.logger.Debug("error collecting stats from container", "error", err)
 
 			// Calculate the new backoff
-			backoff = (1 << (2 * uint64(retry))) * statsCollectorBackoffBaseline
-			if backoff > statsCollectorBackoffLimit {
-				backoff = statsCollectorBackoffLimit
-			}
-			// Increment retry counter
+			backoff = helper.Backoff(statsCollectorBackoffBaseline, statsCollectorBackoffLimit, retry)
 			retry++
 			continue
 		}
diff --git a/helper/backoff.go b/helper/backoff.go
new file mode 100644
index 000000000000..e25e1f1c2b95
--- /dev/null
+++ b/helper/backoff.go
@@ -0,0 +1,31 @@
+// Copyright (c) HashiCorp, Inc.
+// SPDX-License-Identifier: BUSL-1.1
+
+package helper
+
+import (
+	"time"
+)
+
+func Backoff(backoffBase time.Duration, backoffLimit time.Duration, attempt uint64) time.Duration {
+	const MaxUint = ^uint64(0)
+	const MaxInt = int64(MaxUint >> 1)
+
+	// Ensure lack of non-positive backoffs since these make no sense
+	if backoffBase.Nanoseconds() <= 0 {
+		return max(backoffBase, 0*time.Second)
+	}
+
+	// Ensure that a large attempt will not cause an overflow
+	if attempt > 62 || MaxInt/backoffBase.Nanoseconds() < (1<<attempt) {
+		return backoffLimit
+	}
+
+	// Compute deadline and clamp it to backoffLimit
+	deadline := 1 << attempt * backoffBase
+	if deadline > backoffLimit {
+		deadline = backoffLimit
+	}
+
+	return deadline
+}
diff --git a/helper/backoff_test.go b/helper/backoff_test.go
new file mode 100644
index 000000000000..29af33f7b1ec
--- /dev/null
+++ b/helper/backoff_test.go
@@ -0,0 +1,72 @@
+// Copyright (c) HashiCorp, Inc.
+// SPDX-License-Identifier: BUSL-1.1
+
+package helper
+
+import (
+	"testing"
+	"time"
+
+	"github.com/shoenig/test/must"
+)
+
+func Test_Backoff(t *testing.T) {
+	const MaxUint = ^uint64(0)
+	const MaxInt = int64(MaxUint >> 1)
+
+	cases := []struct {
+		name           string
+		backoffBase    time.Duration
+		backoffLimit   time.Duration
+		attempt        uint64
+		expectedResult time.Duration
+	}{
+		{
+			name:           "backoff limit clamps for high base",
+			backoffBase:    time.Hour,
+			backoffLimit:   time.Minute,
+			attempt:        1,
+			expectedResult: time.Minute,
+		},
+		{
+			name:           "backoff limit clamps for boundary attempt",
+			backoffBase:    time.Hour,
+			backoffLimit:   time.Minute,
+			attempt:        63,
+			expectedResult: time.Minute,
+		},
+		{
+			name:           "small retry value",
+			backoffBase:    time.Minute,
+			backoffLimit:   time.Hour,
+			attempt:        0,
+			expectedResult: time.Minute,
+		},
+		{
+			name:           "first retry value",
+			backoffBase:    time.Minute,
+			backoffLimit:   time.Hour,
+			attempt:        1,
+			expectedResult: 2 * time.Minute,
+		},
+		{
+			name:           "fifth retry value",
+			backoffBase:    time.Minute,
+			backoffLimit:   time.Hour,
+			attempt:        5,
+			expectedResult: 32 * time.Minute,
+		},
+		{
+			name:           "sixth retry value",
+			backoffBase:    time.Minute,
+			backoffLimit:   time.Hour,
+			attempt:        6,
+			expectedResult: time.Hour,
+		},
+	}
+
+	for _, tc := range cases {
+		result := Backoff(tc.backoffBase, tc.backoffLimit, tc.attempt)
+		must.Eq(t, tc.expectedResult, result)
+	}
+}
diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go
index 43c1dd1a54cd..7dd7c595d793 100644
--- a/nomad/state/state_store.go
+++ b/nomad/state/state_store.go
@@ -17,6 +17,7 @@ import (
 	"github.com/hashicorp/go-memdb"
 	"github.com/hashicorp/go-multierror"
 	"github.com/hashicorp/go-set"
+	"github.com/hashicorp/nomad/helper"
 	"github.com/hashicorp/nomad/helper/pointer"
 	"github.com/hashicorp/nomad/lib/lang"
 	"github.com/hashicorp/nomad/nomad/stream"
@@ -254,8 +255,9 @@ func (s *StateStore) SnapshotMinIndex(ctx context.Context, index uint64) (*State
 
 	const backoffBase = 20 * time.Millisecond
 	const backoffLimit = 1 * time.Second
-	var retries uint
+	var retries uint64
 	var retryTimer *time.Timer
+	var deadline time.Duration
 
 	// XXX: Potential optimization is to set up a watch on the state
 	// store's index table and only unblock via a trigger rather than
@@ -273,16 +275,13 @@ func (s *StateStore) SnapshotMinIndex(ctx context.Context, index uint64) (*State
 		}
 
 		// Exponential back off
-		retries++
 		if retryTimer == nil {
 			// First retry, start at baseline
 			retryTimer = time.NewTimer(backoffBase)
 		} else {
 			// Subsequent retry, reset timer
-			deadline := 1 << (2 * retries) * backoffBase
-			if deadline > backoffLimit {
-				deadline = backoffLimit
-			}
+			deadline = helper.Backoff(backoffBase, backoffLimit, retries)
+			retries++
 			retryTimer.Reset(deadline)
 		}
 
diff --git a/nomad/worker.go b/nomad/worker.go
index 8caf93d70ee9..928b73a9a538 100644
--- a/nomad/worker.go
+++ b/nomad/worker.go
@@ -16,6 +16,7 @@ import (
 	log "github.com/hashicorp/go-hclog"
 	memdb "github.com/hashicorp/go-memdb"
 	"github.com/hashicorp/go-version"
+	"github.com/hashicorp/nomad/helper"
 	"github.com/hashicorp/nomad/helper/uuid"
 	"github.com/hashicorp/nomad/nomad/state"
 	"github.com/hashicorp/nomad/nomad/structs"
@@ -109,8 +110,9 @@ type Worker struct {
 
 	// failures is the count of errors encountered while dequeueing evaluations
 	// and is used to calculate backoff.
-	failures  uint
-	evalToken string
+	failures       uint64
+	failureBackoff time.Duration
+	evalToken      string
 
 	// snapshotIndex is the index of the snapshot in which the scheduler was
 	// first invoked. It is used to mark the SnapshotIndex of evaluations
@@ -133,6 +135,7 @@ func newWorker(ctx context.Context, srv *Server, args SchedulerWorkerPoolArgs) *
 		start:             time.Now(),
 		status:            WorkerStarting,
 		enabledSchedulers: make([]string, len(args.EnabledSchedulers)),
+		failureBackoff:    time.Duration(0),
 	}
 	copy(w.enabledSchedulers, args.EnabledSchedulers)
 
@@ -874,12 +877,10 @@ func (w *Worker) shouldResubmit(err error) bool {
 // backoff if the server or the worker is shutdown.
 func (w *Worker) backoffErr(base, limit time.Duration) bool {
 	w.setWorkloadStatus(WorkloadBackoff)
-	backoff := (1 << (2 * w.failures)) * base
-	if backoff > limit {
-		backoff = limit
-	} else {
-		w.failures++
-	}
+
+	backoff := helper.Backoff(base, limit, w.failures)
+	w.failures++
+
 	select {
 	case <-time.After(backoff):
 		return false