Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: emit stats for vault token next_renewal & last_renewal #5222 #12435

Merged
merged 4 commits into from
Apr 6, 2022
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 47 additions & 2 deletions nomad/vault.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,14 @@ type VaultStats struct {

// TokenExpiry is the recorded expiry time of the current token
TokenExpiry time.Time

// LastRenewalTime is the time since the token was last renewed
LastRenewalTime time.Time
TimeFromLastRenewal time.Duration

// NextRenewalTime is the time the token will attempt to renew
NextRenewalTime time.Time
TimeToNextRenewal time.Duration
}

// PurgeVaultAccessorFn is called to remove VaultAccessors from the system. If
Expand Down Expand Up @@ -232,6 +240,9 @@ type vaultClient struct {
// currentExpiration is the time the current token lease expires
currentExpiration time.Time
currentExpirationLock sync.Mutex
lastRenewalTime time.Time
nextRenewalTime time.Time
nextRenewalTimeLock sync.Mutex

tomb *tomb.Tomb
logger log.Logger
Expand Down Expand Up @@ -557,6 +568,9 @@ func (v *vaultClient) renewalLoop() {
if err == nil {
// Attempt to renew the token at half the expiration time
durationUntilRenew := time.Until(currentExpiration) / 2
v.nextRenewalTimeLock.Lock()
v.nextRenewalTime = time.Now().Add(durationUntilRenew)
v.nextRenewalTimeLock.Unlock()

v.logger.Info("successfully renewed token", "next_renewal", durationUntilRenew)
authRenewTimer.Reset(durationUntilRenew)
Expand Down Expand Up @@ -587,6 +601,9 @@ func (v *vaultClient) renewalLoop() {
}

durationUntilRetry := time.Duration(backoff) * time.Second
v.nextRenewalTimeLock.Lock()
v.nextRenewalTime = time.Now().Add(durationUntilRetry)
v.nextRenewalTimeLock.Unlock()
v.logger.Info("backing off renewal", "retry", durationUntilRetry)

authRenewTimer.Reset(durationUntilRetry)
Expand Down Expand Up @@ -1391,15 +1408,27 @@ func (v *vaultClient) Stats() map[string]string {
stat := v.stats()

expireTimeStr := ""

if !stat.TokenExpiry.IsZero() {
expireTimeStr = stat.TokenExpiry.Format(time.RFC3339)
}

lastRenewTimeStr := ""
if !stat.LastRenewalTime.IsZero() {
lastRenewTimeStr = stat.LastRenewalTime.Format(time.RFC3339)
}

nextRenewTimeStr := ""
if !stat.NextRenewalTime.IsZero() {
nextRenewTimeStr = stat.NextRenewalTime.Format(time.RFC3339)
}


return map[string]string{
"tracked_for_revoked": strconv.Itoa(stat.TrackedForRevoke),
"token_ttl": stat.TokenTTL.Round(time.Second).String(),
"token_expire_time": expireTimeStr,
"last_renewal_time": lastRenewTimeStr,
"next_renewal_time": nextRenewTimeStr,
jazzyfresh marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand All @@ -1413,12 +1442,24 @@ func (v *vaultClient) stats() *VaultStats {

v.currentExpirationLock.Lock()
stats.TokenExpiry = v.currentExpiration
stats.LastRenewalTime = v.lastRenewalTime
v.currentExpirationLock.Unlock()

v.nextRenewalTimeLock.Lock()
stats.NextRenewalTime = v.nextRenewalTime
v.nextRenewalTimeLock.Unlock()

if !stats.TokenExpiry.IsZero() {
stats.TokenTTL = time.Until(stats.TokenExpiry)
}

if !stats.LastRenewalTime.IsZero() {
stats.TimeFromLastRenewal = time.Since(stats.LastRenewalTime)
}
if !stats.NextRenewalTime.IsZero() {
stats.TimeToNextRenewal = time.Until(stats.NextRenewalTime)
}

return stats
}

Expand All @@ -1435,6 +1476,8 @@ func (v *vaultClient) EmitStats(period time.Duration, stopCh <-chan struct{}) {
stats := v.stats()
metrics.SetGauge([]string{"nomad", "vault", "distributed_tokens_revoking"}, float32(stats.TrackedForRevoke))
metrics.SetGauge([]string{"nomad", "vault", "token_ttl"}, float32(stats.TokenTTL/time.Millisecond))
metrics.SetGauge([]string{"nomad", "vault", "last_renewal"}, float32(stats.TimeFromLastRenewal/time.Millisecond))
metrics.SetGauge([]string{"nomad", "vault", "next_renewal"}, float32(stats.TimeToNextRenewal/time.Millisecond))
jazzyfresh marked this conversation as resolved.
Show resolved Hide resolved

case <-stopCh:
return
Expand All @@ -1445,7 +1488,9 @@ func (v *vaultClient) EmitStats(period time.Duration, stopCh <-chan struct{}) {
// extendExpiration sets the current auth token expiration record to ttLSeconds seconds from now
func (v *vaultClient) extendExpiration(ttlSeconds int) {
v.currentExpirationLock.Lock()
v.currentExpiration = time.Now().Add(time.Duration(ttlSeconds) * time.Second)
now := time.Now()
v.currentExpiration = now.Add(time.Duration(ttlSeconds) * time.Second)
v.lastRenewalTime = now
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now extendExpiration is only called in parseToken, which runs once when the client starts, so the metric works, but it seems like it would be easy to call it somewhere else and accidentally reset this metric.

Updating this value inside renew (once it succeeds) sounds like a better option, or maybe inside the renewalLoop inside the if err == nil block and the use the same lock for last and next renewal?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extendExpiration is called in renew too.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, but my point is that it should only be called in renew, otherwise it will reset the metric value. You would also want to reset it whenever renew is called, so this action is tied to the renew process, not necessarily to the expiration extension.

v.currentExpirationLock.Unlock()
}

Expand Down