Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

template: trigger change_mode for dynamic secrets on restore #9636

Merged
merged 8 commits into from
Dec 16, 2020
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 1.0.2 (Unreleased)

BUG FIXES:
* template: Fixed a bug where dynamic secrets did not trigger the template `change_mode` after a client restart. [[GH-9636](https://github.com/hashicorp/nomad/issues/9636)]

## 1.0.1 (Unreleased)

IMPROVEMENTS:
Expand Down
8 changes: 8 additions & 0 deletions client/allocrunner/taskrunner/interfaces/lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,12 @@ type TaskLifecycle interface {

// Kill a task permanently.
Kill(ctx context.Context, event *structs.TaskEvent) error

// IsRunning returns true if the task runner has a handle to the task
// driver, which is useful for distinguishing restored tasks during
// prestart hooks. But note that the driver handle could go away after you
// check this, so callers should make sure they're handling that case
// safely. Ideally prestart hooks should be idempotnent whenever possible
tgross marked this conversation as resolved.
Show resolved Hide resolved
// to handle restored tasks; use this as an escape hatch.
IsRunning() bool
}
4 changes: 4 additions & 0 deletions client/allocrunner/taskrunner/lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,7 @@ func (tr *TaskRunner) Kill(ctx context.Context, event *structs.TaskEvent) error

return tr.getKillErr()
}

func (tr *TaskRunner) IsRunning() bool {
return tr.getDriverHandle() != nil
}
192 changes: 104 additions & 88 deletions client/allocrunner/taskrunner/template/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,11 +273,22 @@ WAIT:
continue
}

dirty := false
for _, event := range events {
// This template hasn't been rendered
if event.LastWouldRender.IsZero() {
continue WAIT
}
if event.WouldRender && event.DidRender {
dirty = true
}
}

// if there's a driver handle then the task is already running and
// that changes how we want to behave on first render
if dirty && tm.config.Lifecycle.IsRunning() {
handledRenders := make(map[string]time.Time, len(tm.config.Templates))
tm.onTemplateRendered(handledRenders, time.Time{})
}

break WAIT
Expand Down Expand Up @@ -368,112 +379,117 @@ func (tm *TaskTemplateManager) handleTemplateRerenders(allRenderedTime time.Time
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed: %v", err)))
case <-tm.runner.TemplateRenderedCh():
// A template has been rendered, figure out what to do
var handling []string
signals := make(map[string]struct{})
restart := false
var splay time.Duration
tm.onTemplateRendered(handledRenders, allRenderedTime)
}
}
}

events := tm.runner.RenderEvents()
for id, event := range events {
func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time.Time, allRenderedTime time.Time) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 on extracting the function. This commit is best viewed with ?w=1: 815f01c?w=1 .

// A template has been rendered, figure out what to do
tgross marked this conversation as resolved.
Show resolved Hide resolved
var handling []string
signals := make(map[string]struct{})
restart := false
var splay time.Duration

// First time through
if allRenderedTime.After(event.LastDidRender) || allRenderedTime.Equal(event.LastDidRender) {
handledRenders[id] = allRenderedTime
continue
}
events := tm.runner.RenderEvents()
for id, event := range events {

// We have already handled this one
if htime := handledRenders[id]; htime.After(event.LastDidRender) || htime.Equal(event.LastDidRender) {
continue
}
// First time through
if allRenderedTime.After(event.LastDidRender) || allRenderedTime.Equal(event.LastDidRender) {
handledRenders[id] = allRenderedTime
continue
}

// Lookup the template and determine what to do
tmpls, ok := tm.lookup[id]
if !ok {
tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template runner returned unknown template id %q", id)))
return
}
// We have already handled this one
if htime := handledRenders[id]; htime.After(event.LastDidRender) || htime.Equal(event.LastDidRender) {
continue
}

// Read environment variables from templates
envMap, err := loadTemplateEnv(tm.config.Templates, tm.config.TaskDir, tm.config.EnvBuilder.Build())
if err != nil {
tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed to read environment variables: %v", err)))
return
}
tm.config.EnvBuilder.SetTemplateEnv(envMap)

for _, tmpl := range tmpls {
switch tmpl.ChangeMode {
case structs.TemplateChangeModeSignal:
signals[tmpl.ChangeSignal] = struct{}{}
case structs.TemplateChangeModeRestart:
restart = true
case structs.TemplateChangeModeNoop:
continue
}
// Lookup the template and determine what to do
tmpls, ok := tm.lookup[id]
if !ok {
tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template runner returned unknown template id %q", id)))
return
}

if tmpl.Splay > splay {
splay = tmpl.Splay
}
}
// Read environment variables from templates
envMap, err := loadTemplateEnv(tm.config.Templates, tm.config.TaskDir, tm.config.EnvBuilder.Build())
if err != nil {
tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed to read environment variables: %v", err)))
return
}
tm.config.EnvBuilder.SetTemplateEnv(envMap)

for _, tmpl := range tmpls {
switch tmpl.ChangeMode {
case structs.TemplateChangeModeSignal:
signals[tmpl.ChangeSignal] = struct{}{}
case structs.TemplateChangeModeRestart:
restart = true
case structs.TemplateChangeModeNoop:
continue
}

handling = append(handling, id)
if tmpl.Splay > splay {
splay = tmpl.Splay
}
}

if restart || len(signals) != 0 {
if splay != 0 {
ns := splay.Nanoseconds()
offset := rand.Int63n(ns)
t := time.Duration(offset)
handling = append(handling, id)
}

select {
case <-time.After(t):
case <-tm.shutdownCh:
return
}
}
if restart || len(signals) != 0 {
if splay != 0 {
ns := splay.Nanoseconds()
offset := rand.Int63n(ns)
t := time.Duration(offset)

// Update handle time
for _, id := range handling {
handledRenders[id] = events[id].LastDidRender
}
select {
case <-time.After(t):
case <-tm.shutdownCh:
return
}
}

if restart {
tm.config.Lifecycle.Restart(context.Background(),
structs.NewTaskEvent(structs.TaskRestartSignal).
SetDisplayMessage("Template with change_mode restart re-rendered"), false)
} else if len(signals) != 0 {
var mErr multierror.Error
for signal := range signals {
s := tm.signals[signal]
event := structs.NewTaskEvent(structs.TaskSignaling).SetTaskSignal(s).SetDisplayMessage("Template re-rendered")
if err := tm.config.Lifecycle.Signal(event, signal); err != nil {
multierror.Append(&mErr, err)
}
}
// Update handle time
for _, id := range handling {
handledRenders[id] = events[id].LastDidRender
}

if err := mErr.ErrorOrNil(); err != nil {
flat := make([]os.Signal, 0, len(signals))
for signal := range signals {
flat = append(flat, tm.signals[signal])
}
if restart {
tm.config.Lifecycle.Restart(context.Background(),
structs.NewTaskEvent(structs.TaskRestartSignal).
SetDisplayMessage("Template with change_mode restart re-rendered"), false)
} else if len(signals) != 0 {
var mErr multierror.Error
for signal := range signals {
s := tm.signals[signal]
event := structs.NewTaskEvent(structs.TaskSignaling).SetTaskSignal(s).SetDisplayMessage("Template re-rendered")
if err := tm.config.Lifecycle.Signal(event, signal); err != nil {
multierror.Append(&mErr, err)
}
}

tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed to send signals %v: %v", flat, err)))
}
if err := mErr.ErrorOrNil(); err != nil {
flat := make([]os.Signal, 0, len(signals))
for signal := range signals {
flat = append(flat, tm.signals[signal])
}

tm.config.Lifecycle.Kill(context.Background(),
structs.NewTaskEvent(structs.TaskKilling).
SetFailsTask().
SetDisplayMessage(fmt.Sprintf("Template failed to send signals %v: %v", flat, err)))
}
}
}

}

// allTemplatesNoop returns whether all the managed templates have change mode noop.
Expand Down
106 changes: 106 additions & 0 deletions client/allocrunner/taskrunner/template/template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ type MockTaskHooks struct {

Events []*structs.TaskEvent
EmitEventCh chan *structs.TaskEvent

// hasHandle can be set to simulate restoring a task after client restart
hasHandle bool
}

func NewMockTaskHooks() *MockTaskHooks {
Expand Down Expand Up @@ -98,6 +101,10 @@ func (m *MockTaskHooks) Kill(ctx context.Context, event *structs.TaskEvent) erro
return nil
}

func (m *MockTaskHooks) IsRunning() bool {
return m.hasHandle
}

func (m *MockTaskHooks) EmitEvent(event *structs.TaskEvent) {
m.Events = append(m.Events, event)
select {
Expand Down Expand Up @@ -760,6 +767,105 @@ func TestTaskTemplateManager_Unblock_Multi_Template(t *testing.T) {
}
}

// TestTaskTemplateManager_FirstRender_Restored tests that a task that's been
// restored renders and triggers its change mode if the template has changed
func TestTaskTemplateManager_FirstRender_Restored(t *testing.T) {
t.Parallel()
require := require.New(t)
// Make a template that will render based on a key in Vault
vaultPath := "secret/data/password"
key := "password"
content := "barbaz"
embedded := fmt.Sprintf(`{{with secret "%s"}}{{.Data.data.%s}}{{end}}`, vaultPath, key)
file := "my.tmpl"
template := &structs.Template{
EmbeddedTmpl: embedded,
DestPath: file,
ChangeMode: structs.TemplateChangeModeRestart,
}

harness := newTestHarness(t, []*structs.Template{template}, false, true)
harness.start(t)
defer harness.stop()

// Ensure no unblock
select {
case <-harness.mockHooks.UnblockCh:
require.Fail("Task unblock should not have been called")
case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second):
}

// Write the secret to Vault
logical := harness.vault.Client.Logical()
_, err := logical.Write(vaultPath, map[string]interface{}{"data": map[string]interface{}{key: content}})
require.NoError(err)

// Wait for the unblock
select {
case <-harness.mockHooks.UnblockCh:
case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second):
require.Fail("Task unblock should have been called")
}

// Check the file is there
path := filepath.Join(harness.taskDir, file)
raw, err := ioutil.ReadFile(path)
require.NoError(err, "Failed to read rendered template from %q", path)
require.Equal(content, string(raw), "Unexpected template data; got %s, want %q", raw, content)

// task is now running
harness.mockHooks.hasHandle = true

// simulate a client restart
harness.manager.Stop()
harness.mockHooks.UnblockCh = make(chan struct{}, 1)
harness.start(t)

// Wait for the unblock
select {
case <-harness.mockHooks.UnblockCh:
case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second):
require.Fail("Task unblock should have been called")
}

select {
case <-harness.mockHooks.RestartCh:
require.Fail("should not have restarted", harness.mockHooks)
case <-harness.mockHooks.SignalCh:
require.Fail("should not have restarted", harness.mockHooks)
case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second):
}

// simulate a client restart and TTL expiry
harness.manager.Stop()
content = "bazbar"
_, err = logical.Write(vaultPath, map[string]interface{}{"data": map[string]interface{}{key: content}})
require.NoError(err)
harness.mockHooks.UnblockCh = make(chan struct{}, 1)
harness.start(t)

// Wait for the unblock
select {
case <-harness.mockHooks.UnblockCh:
case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second):
require.Fail("Task unblock should have been called")
}

// Wait for restart
timeout := time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second)
OUTER:
for {
select {
case <-harness.mockHooks.RestartCh:
break OUTER
case <-harness.mockHooks.SignalCh:
require.Fail("Signal with restart policy", harness.mockHooks)
case <-timeout:
require.Fail("Should have received a restart", harness.mockHooks)
}
}
}

func TestTaskTemplateManager_Rerender_Noop(t *testing.T) {
t.Parallel()
// Make a template that will render based on a key in Consul
Expand Down
Loading