Skip to content

Commit

Permalink
Merge pull request google#266 from alexmwu/retry
Browse files Browse the repository at this point in the history
launcher: Change retry policy to hourly and add jitter to refresh time
  • Loading branch information
alexmwu authored Nov 23, 2022
2 parents 39db2f4 + 066fa98 commit 8c21e8e
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 35 deletions.
84 changes: 49 additions & 35 deletions launcher/container_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"io"
"log"
"math/rand"
"net/url"
"os"
"path"
Expand Down Expand Up @@ -57,34 +58,15 @@ const (
snapshotID = "tee-snapshot"
)

/*
Values for token refresh and retries.
With a 60m token expiration, the refresher goroutine will refresh beginning at .9*60=54m.
Given the following default arguments, the retry sequence will be,
assuming we go over the MaxElapsedTime:
RetryInterval = 30s
RandomizationFactor = 0.5
Multiplier = 1.5
MaxInterval = 180s
MaxElapsedTime = 600s
Request # RetryInterval (seconds) Randomized Interval (seconds)
RetryInterval*[1-RandFactor, 1+RandFactor]
1 30 [15, 45]
2 60 [30, 90]
3 120 [60, 180]
4 180 (MaxInterval) [90, 270]
5 180 (MaxInterval) [90, 270]
reached MaxElapsedTime backoff.Stop
*/
const (
defaultRefreshMultiplier = 0.9
defaultInitialInterval = 30 * time.Second
defaultMaxInterval = 3 * time.Minute
defaultMaxElapsedTime = 10 * time.Minute
// defaultRefreshMultiplier is a multiplier on the current token expiration
// time, at which the refresher goroutine will collect a new token.
// defaultRefreshMultiplier+defaultRefreshJitter should be <1.
defaultRefreshMultiplier = 0.8
// defaultRefreshJitter is a random component applied additively to the
// refresh multiplier. The refresher will wait for some time in the range
// [defaultRefreshMultiplier-defaultRefreshJitter, defaultRefreshMultiplier+defaultRefreshJitter]
defaultRefreshJitter = 0.1
)

func fetchImpersonatedToken(ctx context.Context, serviceAccount string, audience string, opts ...option.ClientOption) ([]byte, error) {
Expand Down Expand Up @@ -373,7 +355,7 @@ func (r *ContainerRunner) refreshToken(ctx context.Context) (time.Duration, erro
}
r.logger.Println(string(claimsString))

return time.Duration(float64(time.Until(claims.ExpiresAt.Time)) * defaultRefreshMultiplier), nil
return getNextRefreshFromExpiration(time.Until(claims.ExpiresAt.Time), rand.Float64()), nil
}

// ctx must be a cancellable context.
Expand Down Expand Up @@ -427,15 +409,47 @@ func (r *ContainerRunner) fetchAndWriteTokenWithRetry(ctx context.Context,
return nil
}

// defaultRetryPolicy retries with:
// initial interval of 30s, multiplication factor of 1.5
// randomization factor of 0.5, max interval of 3m, and
// max elapsed time of 10m.
// getNextRefreshFromExpiration returns the Duration for the next run of the
// token refresher goroutine. It expects pre-validation that expiration is in
// the future (e.g., time.Now < expiration).
func getNextRefreshFromExpiration(expiration time.Duration, random float64) time.Duration {
diff := defaultRefreshJitter * float64(expiration)
center := defaultRefreshMultiplier * float64(expiration)
minRange := center - diff
return time.Duration(minRange + random*2*diff)
}

/*
defaultRetryPolicy retries as follows:
Given the following arguments, the retry sequence will be:
RetryInterval = 60 sec
RandomizationFactor = 0.5
Multiplier = 2
MaxInterval = 3600 sec
MaxElapsedTime = 0 (never stops retrying)
Request # RetryInterval (seconds) Randomized Interval (seconds)
RetryInterval*[1-RandFactor, 1+RandFactor]
1 60 [30, 90]
2 120 [60, 180]
3 240 [120, 360]
4 480 [240, 720]
5 960 [480, 1440]
6 1920 [960, 2880]
7 3600 (MaxInterval) [1800, 5400]
8 3600 (MaxInterval) [1800, 5400]
...
*/
func defaultRetryPolicy() *backoff.ExponentialBackOff {
expBack := backoff.NewExponentialBackOff()
expBack.InitialInterval = defaultInitialInterval
expBack.MaxInterval = defaultMaxInterval
expBack.MaxElapsedTime = defaultMaxElapsedTime
expBack.InitialInterval = time.Minute
expBack.RandomizationFactor = 0.5
expBack.Multiplier = 2
expBack.MaxInterval = time.Hour
// Never stop retrying.
expBack.MaxElapsedTime = 0
return expBack
}

Expand Down
16 changes: 16 additions & 0 deletions launcher/container_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,3 +448,19 @@ func TestFetchImpersonatedToken(t *testing.T) {
t.Errorf("fetchImpersonatedToken did not return expected token: got %v, want %v", token, expectedToken)
}
}

func TestGetNextRefresh(t *testing.T) {
// 0 <= random < 1.
for _, randNum := range []float64{0, .1415926, .5, .75, .999999999} {
// expiration should always be >0.
// 0 or negative expiration means the token has already expired.
for _, expInt := range []int64{1, 10, 100, 1000, 10000, 1000000} {
expDuration := time.Duration(expInt)
next := getNextRefreshFromExpiration(expDuration, randNum)
if next >= expDuration {
t.Errorf("getNextRefreshFromExpiration(%v, %v) = %v next refresh. expected %v (next refresh) < %v (expiration)",
expDuration, randNum, next, next, expDuration)
}
}
}
}

0 comments on commit 8c21e8e

Please sign in to comment.