Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

client: split identity_hook across allocrunner and taskrunner #18431

Merged
merged 44 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
0082084
moved identity hook to allocrunner
pkazmierczak Sep 7, 2023
a476edd
splitting the identity hook
pkazmierczak Sep 8, 2023
75c41b3
fix errors with getIdentities
pkazmierczak Sep 11, 2023
734f8ce
fixed identity_hook errors
pkazmierczak Sep 12, 2023
7d3e21e
clean up
pkazmierczak Sep 12, 2023
b959429
renew
pkazmierczak Sep 13, 2023
8a56035
fix allocHookResrouces signatures
pkazmierczak Sep 13, 2023
0809420
communication between allocrunner and taskrunner hooks via channels
pkazmierczak Sep 14, 2023
1209445
error handling fixes
pkazmierczak Sep 14, 2023
c5dc60e
removed obsolete code and comments
pkazmierczak Sep 14, 2023
0bba786
TaskIdentity helper struct
pkazmierczak Sep 14, 2023
b49a6ea
gofmt snafu
pkazmierczak Sep 14, 2023
c485631
widmgr -> signer; identity_hook -> widmgr
schmichael Sep 15, 2023
bce993c
new allocrunner identity_hook
pkazmierczak Sep 15, 2023
cb1aa30
handle stopping
pkazmierczak Sep 15, 2023
76796ac
wip
pkazmierczak Sep 15, 2023
f7a9ec5
pass widmgr around
pkazmierczak Sep 15, 2023
a2ada2d
handle incoming signed identities in the taskrunner hook
pkazmierczak Sep 15, 2023
f34374b
switch to Watch-only call scheme
schmichael Sep 15, 2023
7e6c2df
initialize the new allocrunner hook
pkazmierczak Sep 18, 2023
b17de6d
allocrunner identity_hook unit test
pkazmierczak Sep 18, 2023
d2fa276
remove widmgr from client
pkazmierczak Sep 18, 2023
19903b1
moved things around
pkazmierczak Sep 18, 2023
adc07da
taskrunner hook unit tests
pkazmierczak Sep 19, 2023
3679206
superfluous newIdentityHook call
pkazmierczak Sep 19, 2023
265f6cd
signer should error if there are no identities to sign, and NewIdenti…
pkazmierczak Sep 19, 2023
48079f4
client: handle tasks without alt identities and fix tests
lgfa29 Sep 19, 2023
e8ef776
addressed review comments from @tgross
pkazmierczak Sep 20, 2023
e3dd3d3
applied Luiz's review comments
pkazmierczak Sep 20, 2023
60d0a95
Update client/allocrunner/alloc_runner.go
pkazmierczak Sep 20, 2023
fa36330
Merge branch 'main' into f-consul-wi-consul_hook
pkazmierczak Sep 20, 2023
9592485
Apply suggestions from code review
pkazmierczak Sep 20, 2023
f5aa137
multierror in the RPC
pkazmierczak Sep 20, 2023
e2852ff
Merge branch 'f-consul-wi-consul_hook' of github.com:hashicorp/nomad …
pkazmierczak Sep 20, 2023
9e47778
gofmt
pkazmierczak Sep 20, 2023
ea9635d
addressed more of Luiz's comments
pkazmierczak Sep 20, 2023
7c5bdc5
close watchers on stopCtx.Done()
pkazmierczak Sep 20, 2023
ddce193
fix widspec loop
pkazmierczak Sep 20, 2023
5200e7a
revert m.send select
pkazmierczak Sep 20, 2023
0bdce1d
multiple watchers (map of maps)
pkazmierczak Sep 21, 2023
3de57bd
multiple watchers (map of slices)
pkazmierczak Sep 21, 2023
75aa938
logger name for widmgr
pkazmierczak Sep 21, 2023
eb1aba3
do not set minExp to arbitrary value
pkazmierczak Sep 21, 2023
dad0e65
applied Tim's comments
pkazmierczak Sep 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
248 changes: 248 additions & 0 deletions client/allocrunner/identity_hook.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1

package allocrunner

import (
"context"
"fmt"
"time"

log "github.com/hashicorp/go-hclog"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/nomad/structs"
)

// IdentitySigner is the interface needed to retrieve signed identities for
// workload identities. At runtime it is implemented by *widmgr.WIDMgr.
type IdentitySigner interface {
SignIdentities(minIndex uint64, req []*structs.WorkloadIdentityRequest) ([]*structs.SignedWorkloadIdentity, error)
}

type identityHook struct {
ar *allocRunner
hookResources *cstructs.AllocHookResources
widmgr IdentitySigner
logger log.Logger

// minWait is the minimum amount of time to wait before renewing. Settable to
// ease testing.
minWait time.Duration

stopCtx context.Context
stop context.CancelFunc
}

func newIdentityHook(ar *allocRunner, hookResources *cstructs.AllocHookResources, logger log.Logger) *identityHook {
// Create a context for the renew loop. This context will be canceled when
// the allocation is stopped or agent is shutting down
stopCtx, stop := context.WithCancel(context.Background())

h := &identityHook{
ar: ar,
hookResources: hookResources,
minWait: 10 * time.Second,
stopCtx: stopCtx,
stop: stop,
}
h.logger = logger.Named(h.Name())
return h
}

func (*identityHook) Name() string {
return "identity"
}

func (h *identityHook) Prerun() error {
signedWIDs := map[string]*structs.SignedWorkloadIdentity{}

// we need to know the amount of tasks to create a buffered
// SignedTaskIdentities channel
numberOfTasks := len(h.ar.tasks)

for _, t := range h.ar.tasks {
task := t.Task()
if task == nil {
// hitting this means a bug, but better safe than sorry
continue
}

var err error
signedWIDs, err = h.getIdentities(h.ar.Alloc(), task)
if err != nil {
return fmt.Errorf("error fetching alternate identities: %w", err)
}

// publish task identities inside hookResources, so that taskrunner
// hooks can also use them.
h.hookResources.SignedTaskIdentities = make(chan map[string]*structs.SignedWorkloadIdentity, numberOfTasks)
h.hookResources.SignedTaskIdentities <- signedWIDs
}

// Start token renewal loop
go h.renew(h.ar.alloc.CreateIndex, signedWIDs)

return nil
}

// Stop implements interfaces.TaskStopHook
func (h *identityHook) Stop(context.Context, *interfaces.TaskStopRequest, *interfaces.TaskStopResponse) error {
h.stop()
return nil
}

// Shutdown implements interfaces.ShutdownHook
func (h *identityHook) Shutdown() {
h.stop()
}

// getIdentities calls Alloc.SignIdentities to get all of the identities for
// this workload signed. If there are no identities to be signed then (nil,
// nil) is returned.
func (h *identityHook) getIdentities(alloc *structs.Allocation, task *structs.Task) (map[string]*structs.SignedWorkloadIdentity, error) {

if len(task.Identities) == 0 {
return nil, nil
}

req := make([]*structs.WorkloadIdentityRequest, len(task.Identities))
for i, widspec := range task.Identities {
req[i] = &structs.WorkloadIdentityRequest{
AllocID: alloc.ID,
TaskName: task.Name,
IdentityName: widspec.Name,
}
}

// Get signed workload identities
signedWIDs, err := h.ar.widmgr.SignIdentities(alloc.CreateIndex, req)
if err != nil {
return nil, err
}

// Index initial workload identities by name
widMap := make(map[string]*structs.SignedWorkloadIdentity, len(signedWIDs))
for _, wid := range signedWIDs {
widMap[wid.IdentityName] = wid
}

return widMap, nil
}

// renew fetches new signed workload identity tokens before the existing tokens
// expire.
func (h *identityHook) renew(createIndex uint64, signedWIDs map[string]*structs.SignedWorkloadIdentity) {
for _, t := range h.ar.tasks {
alloc := h.ar.Alloc()
task := t.Task()

wids := task.Identities
if len(wids) == 0 {
h.logger.Trace("no workload identities to renew")
return
}

var reqs []*structs.WorkloadIdentityRequest
renewNow := false
minExp := time.Now().Add(30 * time.Hour) // set high default expiration
widMap := make(map[string]*structs.WorkloadIdentity, len(wids)) // Identity.Name -> Identity

for _, wid := range wids {
if wid.TTL == 0 {
// No ttl, so no need to renew it
continue
}

widMap[wid.Name] = wid

reqs = append(reqs, &structs.WorkloadIdentityRequest{
AllocID: alloc.ID,
TaskName: task.Name,
IdentityName: wid.Name,
})

sid, ok := signedWIDs[wid.Name]
if !ok {
// Missing a signature, treat this case as already expired so we get a
// token ASAP
h.logger.Trace("missing token for identity", "identity", wid.Name)
renewNow = true
continue
}

if sid.Expiration.Before(minExp) {
minExp = sid.Expiration
}
}

if len(reqs) == 0 {
h.logger.Trace("no workload identities expire")
return
}

var wait time.Duration
if !renewNow {
wait = helper.ExpiryToRenewTime(minExp, time.Now, h.minWait)
}

timer, timerStop := helper.NewStoppedTimer()
defer timerStop()

var retry uint64

for {
// we need to handle stopCtx.Err() and manually stop the subscribers
if err := h.stopCtx.Err(); err != nil {
h.hookResources.StopChan <- struct{}{}
return
}

h.logger.Debug("waiting to renew identities", "num", len(reqs), "wait", wait)
timer.Reset(wait)
select {
case <-timer.C:
h.logger.Trace("getting new signed identities", "num", len(reqs))
case <-h.stopCtx.Done():
h.hookResources.StopChan <- struct{}{}
return
}

// Renew all tokens together since its cheap
tokens, err := h.widmgr.SignIdentities(createIndex, reqs)
if err != nil {
retry++
wait = helper.Backoff(h.minWait, time.Hour, retry) + helper.RandomStagger(h.minWait)
h.logger.Error("error renewing workload identities", "error", err, "next", wait)
continue
}

if len(tokens) == 0 {
retry++
wait = helper.Backoff(h.minWait, time.Hour, retry) + helper.RandomStagger(h.minWait)
h.logger.Error("error renewing workload identities", "error", "no tokens", "next", wait)
continue
}

// Reset next expiration time
minExp = time.Time{}

for _, token := range tokens {
// Set next expiration time
if minExp.IsZero() {
minExp = token.Expiration
} else if token.Expiration.Before(minExp) {
minExp = token.Expiration
}
}

// Publish updates for taskrunner consumers
h.hookResources.SignedTaskIdentities <- signedWIDs

// Success! Set next renewal and reset retries
wait = helper.ExpiryToRenewTime(minExp, time.Now, h.minWait)
retry = 0
}
}
}
Loading