Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vault: fix renewal time #5479

Merged
merged 3 commits into from
Apr 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ FEATURES:

* vault: Add initial support for Vault namespaces [[GH-5520](https://github.com/hashicorp/nomad/pull/5520)]
* allocations: Add support for restarting allocations in-place [[GH-5502](https://github.com/hashicorp/nomad/pull/5502)]

IMPROVEMENTS:

* core: Add node name to output of `nomad node status` command in verbose mode [[GH-5224](https://github.com/hashicorp/nomad/pull/5224)]
* core: Add `-verbose` flag to `nomad status` wrapper command [[GH-5516](https://github.com/hashicorp/nomad/pull/5516)]

BUG FIXES:

* vault: Fix renewal time to be 1/2 lease duration with jitter [[GH-5479](https://github.com/hashicorp/nomad/issues/5479)]

## 0.9.0 (April 9, 2019)

__BACKWARDS INCOMPATIBILITIES:__
Expand Down
68 changes: 46 additions & 22 deletions client/vaultclient/vaultclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,28 +194,36 @@ func (c *vaultClient) isTracked(id string) bool {
return ok
}

// isRunning returns true if the client is running.
func (c *vaultClient) isRunning() bool {
c.lock.RLock()
defer c.lock.RUnlock()
return c.running
}

// Starts the renewal loop of vault client
func (c *vaultClient) Start() {
c.lock.Lock()
defer c.lock.Unlock()

if !c.config.IsEnabled() || c.running {
return
}

c.lock.Lock()
c.running = true
c.lock.Unlock()

go c.run()
}

// Stops the renewal loop of vault client
func (c *vaultClient) Stop() {
c.lock.Lock()
defer c.lock.Unlock()

if !c.config.IsEnabled() || !c.running {
return
}

c.lock.Lock()
defer c.lock.Unlock()

c.running = false
close(c.stopCh)
}
Expand All @@ -235,7 +243,7 @@ func (c *vaultClient) DeriveToken(alloc *structs.Allocation, taskNames []string)
if !c.config.IsEnabled() {
return nil, fmt.Errorf("vault client not enabled")
}
if !c.running {
if !c.isRunning() {
return nil, fmt.Errorf("vault client is not running")
}

Expand Down Expand Up @@ -421,21 +429,9 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
}
}

duration := leaseDuration / 2
switch {
case leaseDuration < 30:
// Don't bother about introducing randomness if the
// leaseDuration is too small.
default:
// Give a breathing space of 20 seconds
min := 10
max := leaseDuration - min
rand.Seed(time.Now().Unix())
duration = min + rand.Intn(max-min)
}

// Determine the next renewal time
next := time.Now().Add(time.Duration(duration) * time.Second)
renewalDuration := renewalTime(rand.Intn, leaseDuration)
next := time.Now().Add(renewalDuration)

fatal := false
if renewalErr != nil &&
Expand All @@ -445,7 +441,7 @@ func (c *vaultClient) renew(req *vaultClientRenewalRequest) error {
strings.Contains(renewalErr.Error(), "permission denied")) {
fatal = true
} else if renewalErr != nil {
c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "duration", duration)
c.logger.Debug("renewal error details", "req.increment", req.increment, "lease_duration", leaseDuration, "renewal_duration", renewalDuration)
c.logger.Error("error during renewal of lease or token failed due to a non-fatal error; retrying",
"error", renewalErr, "period", next)
}
Expand Down Expand Up @@ -517,7 +513,7 @@ func (c *vaultClient) run() {
}

var renewalCh <-chan time.Time
for c.config.IsEnabled() && c.running {
for c.config.IsEnabled() && c.isRunning() {
// Fetches the candidate for next renewal
renewalReq, renewalTime := c.nextRenewal()
if renewalTime.IsZero() {
Expand Down Expand Up @@ -728,3 +724,31 @@ func (h *vaultDataHeapImp) Pop() interface{} {
*h = old[0 : n-1]
return entry
}

// randIntn is the function in math/rand needed by renewalTime. A type is used
// to ease deterministic testing.
type randIntn func(int) int

// renewalTime returns when a token should be renewed given its leaseDuration
// and a randomizer to provide jitter.
//
// Leases < 1m will be not jitter.
func renewalTime(dice randIntn, leaseDuration int) time.Duration {
// Start trying to renew at half the lease duration to allow ample time
// for latency and retries.
renew := leaseDuration / 2

// Don't bother about introducing randomness if the
// leaseDuration is too small.
const cutoff = 30
if renew < cutoff {
return time.Duration(renew) * time.Second
}

// jitter is the amount +/- to vary the renewal time
const jitter = 10
min := renew - jitter
renew = min + dice(jitter*2)

return time.Duration(renew) * time.Second
}
56 changes: 52 additions & 4 deletions client/vaultclient/vaultclient_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/hashicorp/nomad/testutil"
vaultapi "github.com/hashicorp/vault/api"
vaultconsts "github.com/hashicorp/vault/helper/consts"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -76,8 +77,11 @@ func TestVaultClient_TokenRenewals(t *testing.T) {
}(errCh)
}

if c.heap.Length() != num {
t.Fatalf("bad: heap length: expected: %d, actual: %d", num, c.heap.Length())
c.lock.Lock()
length := c.heap.Length()
c.lock.Unlock()
if length != num {
t.Fatalf("bad: heap length: expected: %d, actual: %d", num, length)
}

time.Sleep(time.Duration(testutil.TestMultiplier()) * time.Second)
Expand All @@ -88,8 +92,11 @@ func TestVaultClient_TokenRenewals(t *testing.T) {
}
}

if c.heap.Length() != 0 {
t.Fatalf("bad: heap length: expected: 0, actual: %d", c.heap.Length())
c.lock.Lock()
length = c.heap.Length()
c.lock.Unlock()
if length != 0 {
t.Fatalf("bad: heap length: expected: 0, actual: %d", length)
}
}

Expand Down Expand Up @@ -300,3 +307,44 @@ func TestVaultClient_RenewNonexistentLease(t *testing.T) {
t.Fatalf("expected \"%s\" or \"%s\" in error message, got \"%v\"", "lease not found", "lease is not renewable", err.Error())
}
}

// TestVaultClient_RenewalTime_Long asserts that for leases over 1m the renewal
// time is jittered.
func TestVaultClient_RenewalTime_Long(t *testing.T) {
t.Parallel()

// highRoller is a randIntn func that always returns the max value
highRoller := func(n int) int {
return n - 1
}

// lowRoller is a randIntn func that always returns the min value (0)
lowRoller := func(int) int {
return 0
}

assert.Equal(t, 39*time.Second, renewalTime(highRoller, 60))
assert.Equal(t, 20*time.Second, renewalTime(lowRoller, 60))

assert.Equal(t, 309*time.Second, renewalTime(highRoller, 600))
assert.Equal(t, 290*time.Second, renewalTime(lowRoller, 600))

const days3 = 60 * 60 * 24 * 3
assert.Equal(t, (days3/2+9)*time.Second, renewalTime(highRoller, days3))
assert.Equal(t, (days3/2-10)*time.Second, renewalTime(lowRoller, days3))
}

// TestVaultClient_RenewalTime_Short asserts that for leases under 1m the renewal
// time is lease/2.
func TestVaultClient_RenewalTime_Short(t *testing.T) {
t.Parallel()

dice := func(int) int {
require.Fail(t, "dice should not have been called")
panic("unreachable")
}

assert.Equal(t, 29*time.Second, renewalTime(dice, 58))
assert.Equal(t, 15*time.Second, renewalTime(dice, 30))
assert.Equal(t, 1*time.Second, renewalTime(dice, 2))
}