diff --git a/CHANGELOG.md b/CHANGELOG.md index 7966114fa051..6445fb9a59fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,12 +4,16 @@ FEATURES: * vault: Add initial support for Vault namespaces [[GH-5520](https://github.com/hashicorp/nomad/pull/5520)] * allocations: Add support for restarting allocations in-place [[GH-5502](https://github.com/hashicorp/nomad/pull/5502)] - + IMPROVEMENTS: * core: Add node name to output of `nomad node status` command in verbose mode [[GH-5224](https://github.com/hashicorp/nomad/pull/5224)] * core: Add `-verbose` flag to `nomad status` wrapper command [[GH-5516](https://github.com/hashicorp/nomad/pull/5516)] +BUG FIXES: + + * vault: Fix renewal time to be 1/2 lease duration with jitter [[GH-5479](https://github.com/hashicorp/nomad/issues/5479)] + ## 0.9.0 (April 9, 2019) __BACKWARDS INCOMPATIBILITIES:__ diff --git a/client/vaultclient/vaultclient.go b/client/vaultclient/vaultclient.go index 5bebbd883e03..5e47358d3af2 100644 --- a/client/vaultclient/vaultclient.go +++ b/client/vaultclient/vaultclient.go @@ -194,28 +194,36 @@ func (c *vaultClient) isTracked(id string) bool { return ok } +// isRunning returns true if the client is running. +func (c *vaultClient) isRunning() bool { + c.lock.RLock() + defer c.lock.RUnlock() + return c.running +} + // Starts the renewal loop of vault client func (c *vaultClient) Start() { + c.lock.Lock() + defer c.lock.Unlock() + if !c.config.IsEnabled() || c.running { return } - c.lock.Lock() c.running = true - c.lock.Unlock() go c.run() } // Stops the renewal loop of vault client func (c *vaultClient) Stop() { + c.lock.Lock() + defer c.lock.Unlock() + if !c.config.IsEnabled() || !c.running { return } - c.lock.Lock() - defer c.lock.Unlock() - c.running = false close(c.stopCh) } @@ -235,7 +243,7 @@ func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string) if !c.config.IsEnabled() { return nil, fmt.Errorf("vault client not enabled") } - if !c.running { + if !c.isRunning() { return nil, fmt.Errorf("vault client is not running") } @@ -421,21 +429,9 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { } } - duration := leaseDuration / 2 - switch { - case leaseDuration < 30: - // Don't bother about introducing randomness if the - // leaseDuration is too small. - default: - // Give a breathing space of 20 seconds - min := 10 - max := leaseDuration - min - rand.Seed(time.Now().Unix()) - duration = min + rand.Intn(max-min) - } - // Determine the next renewal time - next := time.Now().Add(time.Duration(duration) * time.Second) + renewalDuration := renewalTime(rand.Intn, leaseDuration) + next := time.Now().Add(renewalDuration) fatal := false if renewalErr != nil && @@ -445,7 +441,7 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error { strings.Contains(renewalErr.Error(), "permission denied")) { fatal = true } else if renewalErr != nil { - c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "duration", duration) + c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "renewal_duration", renewalDuration) c.logger.Error("error during renewal of lease or token failed due to a non-fatal error; retrying", "error", renewalErr, "period", next) } @@ -517,7 +513,7 @@ func (c *vaultClient) run() { } var renewalCh <-chan time.Time - for c.config.IsEnabled() && c.running { + for c.config.IsEnabled() && c.isRunning() { // Fetches the candidate for next renewal renewalReq, renewalTime := c.nextRenewal() if renewalTime.IsZero() { @@ -728,3 +724,31 @@ func (h *vaultDataHeapImp) Pop() interface{} { *h = old[0 : n-1] return entry } + +// randIntn is the function in math/rand needed by renewalTime. A type is used +// to ease deterministic testing. +type randIntn func(int) int + +// renewalTime returns when a token should be renewed given its leaseDuration +// and a randomizer to provide jitter. +// +// Leases < 1m will be not jitter. +func renewalTime(dice randIntn, leaseDuration int) time.Duration { + // Start trying to renew at half the lease duration to allow ample time + // for latency and retries. + renew := leaseDuration / 2 + + // Don't bother about introducing randomness if the + // leaseDuration is too small. + const cutoff = 30 + if renew < cutoff { + return time.Duration(renew) * time.Second + } + + // jitter is the amount +/- to vary the renewal time + const jitter = 10 + min := renew - jitter + renew = min + dice(jitter*2) + + return time.Duration(renew) * time.Second +} diff --git a/client/vaultclient/vaultclient_test.go b/client/vaultclient/vaultclient_test.go index f3f6a879c6d6..c62400a61b39 100644 --- a/client/vaultclient/vaultclient_test.go +++ b/client/vaultclient/vaultclient_test.go @@ -10,6 +10,7 @@ import ( "github.com/hashicorp/nomad/testutil" vaultapi "github.com/hashicorp/vault/api" vaultconsts "github.com/hashicorp/vault/helper/consts" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -76,8 +77,11 @@ func TestVaultClient_TokenRenewals(t *testing.T) { }(errCh) } - if c.heap.Length() != num { - t.Fatalf("bad: heap length: expected: %d, actual: %d", num, c.heap.Length()) + c.lock.Lock() + length := c.heap.Length() + c.lock.Unlock() + if length != num { + t.Fatalf("bad: heap length: expected: %d, actual: %d", num, length) } time.Sleep(time.Duration(testutil.TestMultiplier()) * time.Second) @@ -88,8 +92,11 @@ func TestVaultClient_TokenRenewals(t *testing.T) { } } - if c.heap.Length() != 0 { - t.Fatalf("bad: heap length: expected: 0, actual: %d", c.heap.Length()) + c.lock.Lock() + length = c.heap.Length() + c.lock.Unlock() + if length != 0 { + t.Fatalf("bad: heap length: expected: 0, actual: %d", length) } } @@ -300,3 +307,44 @@ func TestVaultClient_RenewNonexistentLease(t *testing.T) { t.Fatalf("expected \"%s\" or \"%s\" in error message, got \"%v\"", "lease not found", "lease is not renewable", err.Error()) } } + +// TestVaultClient_RenewalTime_Long asserts that for leases over 1m the renewal +// time is jittered. +func TestVaultClient_RenewalTime_Long(t *testing.T) { + t.Parallel() + + // highRoller is a randIntn func that always returns the max value + highRoller := func(n int) int { + return n - 1 + } + + // lowRoller is a randIntn func that always returns the min value (0) + lowRoller := func(int) int { + return 0 + } + + assert.Equal(t, 39*time.Second, renewalTime(highRoller, 60)) + assert.Equal(t, 20*time.Second, renewalTime(lowRoller, 60)) + + assert.Equal(t, 309*time.Second, renewalTime(highRoller, 600)) + assert.Equal(t, 290*time.Second, renewalTime(lowRoller, 600)) + + const days3 = 60 * 60 * 24 * 3 + assert.Equal(t, (days3/2+9)*time.Second, renewalTime(highRoller, days3)) + assert.Equal(t, (days3/2-10)*time.Second, renewalTime(lowRoller, days3)) +} + +// TestVaultClient_RenewalTime_Short asserts that for leases under 1m the renewal +// time is lease/2. +func TestVaultClient_RenewalTime_Short(t *testing.T) { + t.Parallel() + + dice := func(int) int { + require.Fail(t, "dice should not have been called") + panic("unreachable") + } + + assert.Equal(t, 29*time.Second, renewalTime(dice, 58)) + assert.Equal(t, 15*time.Second, renewalTime(dice, 30)) + assert.Equal(t, 1*time.Second, renewalTime(dice, 2)) +}