Skip to content

Commit

Permalink
identity: support jwt expiration and rotation
Browse files Browse the repository at this point in the history
  • Loading branch information
schmichael committed Aug 18, 2023
1 parent 6fca4fa commit 4e3abba
Show file tree
Hide file tree
Showing 7 changed files with 200 additions and 12 deletions.
9 changes: 5 additions & 4 deletions api/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -1151,8 +1151,9 @@ func (t *TaskCSIPluginConfig) Canonicalize() {
// WorkloadIdentity is the jobspec block which determines if and how a workload
// identity is exposed to tasks.
type WorkloadIdentity struct {
Name string `hcl:"name,optional"`
Audience []string `mapstructure:"aud" hcl:"aud,optional"`
Env bool `hcl:"env,optional"`
File bool `hcl:"file,optional"`
Name string `hcl:"name,optional"`
Audience []string `mapstructure:"aud" hcl:"aud,optional"`
Env bool `hcl:"env,optional"`
File bool `hcl:"file,optional"`
TTL time.Duration `mapstructure:"ttl" hcl:"ttl,optional"`
}
131 changes: 131 additions & 0 deletions client/allocrunner/taskrunner/identity_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ import (
"context"
"fmt"
"path/filepath"
"time"

log "github.com/hashicorp/go-hclog"

"github.com/hashicorp/nomad/client/allocrunner/interfaces"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/users"
"github.com/hashicorp/nomad/nomad/structs"
)
Expand All @@ -34,12 +36,22 @@ type identityHook struct {
tr *TaskRunner
tokenDir string
logger log.Logger

stopCtx context.Context
stop context.CancelFunc
}

func newIdentityHook(tr *TaskRunner, logger log.Logger) *identityHook {
// Create a context for the renew loop. This context will be canceled when
// the task is stopped or agent is shutting down, unlike Prestart's ctx which
// is not intended for use after Prestart is returns.
stopCtx, stop := context.WithCancel(context.Background())

h := &identityHook{
tr: tr,
tokenDir: tr.taskDir.SecretsDir,
stopCtx: stopCtx,
stop: stop,
}
h.logger = logger.Named(h.Name())
return h
Expand Down Expand Up @@ -74,6 +86,14 @@ func (h *identityHook) Prestart(ctx context.Context, req *interfaces.TaskPrestar
}
}

// Start token renewal loop
go h.renew(req.Alloc.CreateIndex, signedWIDs)

return nil
}

func (h *identityHook) Stop(context.Context, *interfaces.TaskStopRequest, *interfaces.TaskStopResponse) error {
h.stop()
return nil
}

Expand Down Expand Up @@ -151,3 +171,114 @@ func (h *identityHook) getIdentities(alloc *structs.Allocation, task *structs.Ta

return widMap, nil
}

func (h *identityHook) renew(createIndex uint64, signedWIDs map[string]*structs.SignedWorkloadIdentity) {
wids := h.tr.Task().Identities
if len(wids) == 0 {
h.logger.Trace("no workload identities to renew")
return
}

var reqs []*structs.WorkloadIdentityRequest
var minExp time.Time
widMap := make(map[string]*structs.WorkloadIdentity, len(wids)) // Identity.Name -> Identity

for _, wid := range wids {
if wid.TTL == 0 {
// No ttl, so no need to renew it
continue
}

widMap[wid.Name] = wid

reqs = append(reqs, &structs.WorkloadIdentityRequest{
AllocID: h.tr.allocID,
TaskName: h.tr.taskName,
IdentityName: wid.Name,
})

sid, ok := signedWIDs[wid.Name]
if !ok {
// Missing a signature, treat this case as already expired so we get a
// token ASAP
minExp = time.Time{}
continue
}

if sid.Expiration.Before(minExp) {
minExp = sid.Expiration
}
}

if len(reqs) == 0 {
h.logger.Trace("no workload identities expire")
return
}

const minWait = 10 * time.Second
wait := helper.ExpiryToRenewTime(minExp, time.Now, minWait)

timer, timerStop := helper.NewStoppedTimer()
defer timerStop()

var retry uint64

for err := h.stopCtx.Err(); err == nil; {
timer.Reset(wait)
select {
case <-timer.C:
h.logger.Trace("getting new signed identities", "num", len(reqs))
case <-h.stopCtx.Done():
return
}

// Renew all tokens together since its cheap
tokens, err := h.tr.widmgr.SignIdentities(createIndex, reqs)
if err != nil {
retry++
wait = helper.Backoff(minWait, time.Hour, retry) + helper.RandomStagger(minWait)
h.logger.Error("error renewing workload identities", "error", err, "next", wait)
continue
}

if len(tokens) == 0 {
retry++
wait = helper.Backoff(minWait, time.Hour, retry) + helper.RandomStagger(minWait)
h.logger.Error("error renewing workload identities", "error", "no tokens", "next", wait)
continue
}

// Reset next expiration time
minExp = time.Time{}

for _, token := range tokens {
widspec, ok := widMap[token.IdentityName]
if !ok {
// Bug: Every requested workload identity should either have a signed
// identity.
h.logger.Warn("bug: unexpected workload identity received", "identity", token.IdentityName)
continue
}

if err := h.setAltToken(widspec, token.JWT); err != nil {
// Set minExp using retry's backoff logic
minExp = time.Now().Add(helper.Backoff(minWait, time.Hour, retry+1) + helper.RandomStagger(minWait))
h.logger.Error("error setting new workload identity", "error", err, "identity", token.IdentityName)
continue
}

// Set next expiration time
if minExp.IsZero() {
minExp = token.Expiration
} else if token.Expiration.Before(minExp) {
minExp = token.Expiration
}
}

// Success! Set next renewal and reset retries
wait = helper.ExpiryToRenewTime(minExp, time.Now, minWait)
retry = 0

h.logger.Debug("waitng to renew workloading identities", "next", wait)
}
}
2 changes: 2 additions & 0 deletions command/agent/job_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,7 @@ func ApiTaskToStructsTask(job *structs.Job, group *structs.TaskGroup,
Audience: slices.Clone(id.Audience),
Env: id.Env,
File: id.File,
TTL: id.TTL,
}
}

Expand All @@ -1224,6 +1225,7 @@ func ApiTaskToStructsTask(job *structs.Job, group *structs.TaskGroup,
Audience: slices.Clone(id.Audience),
Env: id.Env,
File: id.File,
TTL: id.TTL,
}

}
Expand Down
15 changes: 15 additions & 0 deletions helper/retry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,18 @@ func TestExpiryToRenewTime_Expired(t *testing.T) {
must.Greater(t, min, renew)
must.Less(t, min*2, renew)
}

// TestExpiryToRenewTime_Zero asserts that ExpiryToRenewTime handles the zero
// value for renewal time and returns the minimum.
func TestExpiryToRenewTime_Zero(t *testing.T) {
exp := time.Time{}
now := func() time.Time {
return time.Date(2023, 2, 1, 0, 0, 0, 0, time.UTC)
}
min := time.Hour

renew := ExpiryToRenewTime(exp, now, min)

must.Greater(t, min, renew)
must.Less(t, min*2, renew)
}
1 change: 1 addition & 0 deletions nomad/alloc_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,7 @@ func (a *Alloc) SignIdentities(args *structs.AllocIdentitiesRequest, reply *stru
reply.SignedIdentities = append(reply.SignedIdentities, &structs.SignedWorkloadIdentity{
WorkloadIdentityRequest: *idReq,
JWT: token,
Expiration: claims.Expiry.Time(),
})
break
}
Expand Down
19 changes: 15 additions & 4 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -11193,16 +11193,17 @@ func NewIdentityClaims(job *Job, alloc *Allocation, taskName string, wid *Worklo
}

claims.TaskName = taskName
claims.Audience = wid.Audience
claims.SetSubject(job, alloc.TaskGroup, taskName, wid.Name)
claims.Audience = slices.Clone(wid.Audience)
claims.setSubject(job, alloc.TaskGroup, taskName, wid.Name)
claims.setExp(now, wid)

claims.ID = uuid.Generate()

return claims
}

// SetSubject creates the standard subject claim for workload identities.
func (claims *IdentityClaims) SetSubject(job *Job, group, task, id string) {
// setSubject creates the standard subject claim for workload identities.
func (claims *IdentityClaims) setSubject(job *Job, group, task, id string) {
claims.Subject = strings.Join([]string{
job.Region,
job.Namespace,
Expand All @@ -11213,6 +11214,16 @@ func (claims *IdentityClaims) SetSubject(job *Job, group, task, id string) {
}, ":")
}

// setExp sets the absolute time at which these identity claims expire.
func (claims *IdentityClaims) setExp(now time.Time, wid *WorkloadIdentity) {
if wid.TTL == 0 {
// No expiry
return
}

claims.Expiry = jwt.NewNumericDate(now.Add(wid.TTL))
}

// AllocationDiff is another named type for Allocation (to use the same fields),
// which is used to represent the delta for an Allocation. If you need a method
// defined on the al
Expand Down
35 changes: 31 additions & 4 deletions nomad/structs/workload_id.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package structs

import (
"fmt"
"time"

"github.com/hashicorp/go-multierror"
"golang.org/x/exp/slices"
Expand Down Expand Up @@ -56,6 +57,10 @@ type WorkloadIdentity struct {
// File writes the Workload Identity into the Task's secrets directory
// if set.
File bool

// TTL is used to determine the expiration of the credentials created for
// this identity (eg the JWT "exp" claim).
TTL time.Duration
}

func (wi *WorkloadIdentity) Copy() *WorkloadIdentity {
Expand All @@ -67,6 +72,7 @@ func (wi *WorkloadIdentity) Copy() *WorkloadIdentity {
Audience: slices.Clone(wi.Audience),
Env: wi.Env,
File: wi.File,
TTL: wi.TTL,
}
}

Expand All @@ -91,6 +97,10 @@ func (wi *WorkloadIdentity) Equal(other *WorkloadIdentity) bool {
return false
}

if wi.TTL != other.TTL {
return false
}

return true
}

Expand Down Expand Up @@ -127,6 +137,14 @@ func (wi *WorkloadIdentity) Validate() error {
}
}

if wi.TTL > 0 && (wi.Name == "" || wi.Name == WorkloadIdentityDefaultName) {
mErr.Errors = append(mErr.Errors, fmt.Errorf("ttl for default token not yet supported"))
}

if wi.TTL < 0 {
mErr.Errors = append(mErr.Errors, fmt.Errorf("ttl must be >= 0"))
}

return mErr.ErrorOrNil()
}

Expand All @@ -135,13 +153,21 @@ func (wi *WorkloadIdentity) Warnings() error {
return fmt.Errorf("must not be nil")
}

var mErr multierror.Error

if n := len(wi.Audience); n == 0 {
return fmt.Errorf("identities without an audience are insecure")
mErr.Errors = append(mErr.Errors, fmt.Errorf("identities without an audience are insecure"))
} else if n > 1 {
return fmt.Errorf("while multiple audiences is allowed, it is more secure to use 1 audience per identity")
mErr.Errors = append(mErr.Errors, fmt.Errorf("while multiple audiences is allowed, it is more secure to use 1 audience per identity"))
}

return nil
if wi.Name != "" || wi.Name != WorkloadIdentityDefaultName {
if wi.TTL == 0 {
mErr.Errors = append(mErr.Errors, fmt.Errorf("identities without an expiration are insecure"))
}
}

return mErr.ErrorOrNil()
}

// WorkloadIdentityRequest encapsulates the 3 parameters used to generated a
Expand All @@ -156,7 +182,8 @@ type WorkloadIdentityRequest struct {
// includes the JWT for the requested workload identity.
type SignedWorkloadIdentity struct {
WorkloadIdentityRequest
JWT string
JWT string
Expiration time.Time
}

// WorkloadIdentityRejection is the response to a WorkloadIdentityRequest that
Expand Down

0 comments on commit 4e3abba

Please sign in to comment.