Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

launcher: Change retry policy to hourly and add jitter to refresh time #266

Merged
merged 2 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 49 additions & 35 deletions launcher/container_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"io"
"log"
"math/rand"
"net/url"
"os"
"path"
Expand Down Expand Up @@ -57,34 +58,15 @@ const (
snapshotID = "tee-snapshot"
)

/*
Values for token refresh and retries.

With a 60m token expiration, the refresher goroutine will refresh beginning at .9*60=54m.

Given the following default arguments, the retry sequence will be,
assuming we go over the MaxElapsedTime:

RetryInterval = 30s
RandomizationFactor = 0.5
Multiplier = 1.5
MaxInterval = 180s
MaxElapsedTime = 600s

Request # RetryInterval (seconds) Randomized Interval (seconds)
RetryInterval*[1-RandFactor, 1+RandFactor]
1 30 [15, 45]
2 60 [30, 90]
3 120 [60, 180]
4 180 (MaxInterval) [90, 270]
5 180 (MaxInterval) [90, 270]
reached MaxElapsedTime backoff.Stop
*/
const (
defaultRefreshMultiplier = 0.9
defaultInitialInterval = 30 * time.Second
defaultMaxInterval = 3 * time.Minute
defaultMaxElapsedTime = 10 * time.Minute
// defaultRefreshMultiplier is a multiplier on the current token expiration
// time, at which the refresher goroutine will collect a new token.
// defaultRefreshMultiplier+defaultRefreshJitter should be <1.
defaultRefreshMultiplier = 0.8
// defaultRefreshJitter is a random component applied additively to the
// refresh multiplier. The refresher will wait for some time in the range
// [defaultRefreshMultiplier-defaultRefreshJitter, defaultRefreshMultiplier+defaultRefreshJitter]
defaultRefreshJitter = 0.1
)

func fetchImpersonatedToken(ctx context.Context, serviceAccount string, audience string, opts ...option.ClientOption) ([]byte, error) {
Expand Down Expand Up @@ -373,7 +355,7 @@ func (r *ContainerRunner) refreshToken(ctx context.Context) (time.Duration, erro
}
r.logger.Println(string(claimsString))

return time.Duration(float64(time.Until(claims.ExpiresAt.Time)) * defaultRefreshMultiplier), nil
return getNextRefreshFromExpiration(time.Until(claims.ExpiresAt.Time), rand.Float64()), nil
}

// ctx must be a cancellable context.
Expand Down Expand Up @@ -427,15 +409,47 @@ func (r *ContainerRunner) fetchAndWriteTokenWithRetry(ctx context.Context,
return nil
}

// defaultRetryPolicy retries with:
// initial interval of 30s, multiplication factor of 1.5
// randomization factor of 0.5, max interval of 3m, and
// max elapsed time of 10m.
// getNextRefreshFromExpiration returns the Duration for the next run of the
// token refresher goroutine. It expects pre-validation that expiration is in
// the future (e.g., time.Now < expiration).
func getNextRefreshFromExpiration(expiration time.Duration, random float64) time.Duration {
diff := defaultRefreshJitter * float64(expiration)
center := defaultRefreshMultiplier * float64(expiration)
minRange := center - diff
return time.Duration(minRange + random*2*diff)
}

/*
defaultRetryPolicy retries as follows:

Given the following arguments, the retry sequence will be:

RetryInterval = 60 sec
RandomizationFactor = 0.5
Multiplier = 2
MaxInterval = 3600 sec
MaxElapsedTime = 0 (never stops retrying)

Request # RetryInterval (seconds) Randomized Interval (seconds)
RetryInterval*[1-RandFactor, 1+RandFactor]
1 60 [30, 90]
2 120 [60, 180]
3 240 [120, 360]
4 480 [240, 720]
5 960 [480, 1440]
6 1920 [960, 2880]
7 3600 (MaxInterval) [1800, 5400]
8 3600 (MaxInterval) [1800, 5400]
...
*/
func defaultRetryPolicy() *backoff.ExponentialBackOff {
expBack := backoff.NewExponentialBackOff()
expBack.InitialInterval = defaultInitialInterval
expBack.MaxInterval = defaultMaxInterval
expBack.MaxElapsedTime = defaultMaxElapsedTime
expBack.InitialInterval = time.Minute
expBack.RandomizationFactor = 0.5
expBack.Multiplier = 2
expBack.MaxInterval = time.Hour
// Never stop retrying.
expBack.MaxElapsedTime = 0
return expBack
}

Expand Down
16 changes: 16 additions & 0 deletions launcher/container_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,3 +448,19 @@ func TestFetchImpersonatedToken(t *testing.T) {
t.Errorf("fetchImpersonatedToken did not return expected token: got %v, want %v", token, expectedToken)
}
}

func TestGetNextRefresh(t *testing.T) {
// 0 <= random < 1.
for _, randNum := range []float64{0, .1415926, .5, .75, .999999999} {
// expiration should always be >0.
// 0 or negative expiration means the token has already expired.
for _, expInt := range []int64{1, 10, 100, 1000, 10000, 1000000} {
expDuration := time.Duration(expInt)
next := getNextRefreshFromExpiration(expDuration, randNum)
if next >= expDuration {
t.Errorf("getNextRefreshFromExpiration(%v, %v) = %v next refresh. expected %v (next refresh) < %v (expiration)",
expDuration, randNum, next, next, expDuration)
}
}
}
}