From e1edb29675a499eb4086005688348ee5dff6b324 Mon Sep 17 00:00:00 2001 From: Jasmine Dahilig Date: Wed, 8 Jul 2020 11:01:35 -0700 Subject: [PATCH 01/10] add poststart hook to task hook coordinator & structs --- api/tasks.go | 1 + client/allocrunner/task_hook_coordinator.go | 40 +++++++++++++++++++-- nomad/structs/structs.go | 2 ++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index b9b79af5470d..8d3920529fbf 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -632,6 +632,7 @@ type DispatchPayloadConfig struct { const ( TaskLifecycleHookPrestart = "prestart" + TaskLifecycleHookPoststart = "poststart" ) type TaskLifecycle struct { diff --git a/client/allocrunner/task_hook_coordinator.go b/client/allocrunner/task_hook_coordinator.go index 8f6d1c10bc6d..2fd94714ef8e 100644 --- a/client/allocrunner/task_hook_coordinator.go +++ b/client/allocrunner/task_hook_coordinator.go @@ -18,23 +18,31 @@ type taskHookCoordinator struct { mainTaskCtx context.Context mainTaskCtxCancel func() + poststartTaskCtx context.Context + poststartTaskCtxCancel func() + prestartSidecar map[string]struct{} prestartEphemeral map[string]struct{} + + mainTasksPending map[string]struct{} } func newTaskHookCoordinator(logger hclog.Logger, tasks []*structs.Task) *taskHookCoordinator { closedCh := make(chan struct{}) close(closedCh) - mainTaskCtx, cancelFn := context.WithCancel(context.Background()) + mainTaskCtx, mainCancelFn := context.WithCancel(context.Background()) + poststartTaskCtx, poststartCancelFn := context.WithCancel(context.Background()) c := &taskHookCoordinator{ logger: logger, closedCh: closedCh, mainTaskCtx: mainTaskCtx, - mainTaskCtxCancel: cancelFn, + mainTaskCtxCancel: mainCancelFn, prestartSidecar: map[string]struct{}{}, prestartEphemeral: map[string]struct{}{}, + poststartTaskCtx: poststartTaskCtx, + poststartTaskCtxCancel: poststartCancelFn, } c.setTasks(tasks) return c @@ -55,7 +63,6 @@ func (c *taskHookCoordinator) setTasks(tasks []*structs.Task) { } else { c.prestartEphemeral[task.Name] = struct{}{} } - default: c.logger.Error("invalid lifecycle hook", "hook", task.Lifecycle.Hook) } @@ -70,11 +77,25 @@ func (c *taskHookCoordinator) hasPrestartTasks() bool { return len(c.prestartSidecar)+len(c.prestartEphemeral) > 0 } +func (c *taskHookCoordinator) hasPendingMainTasks() bool { + return len(c.mainTasksPending) > 0 +} + func (c *taskHookCoordinator) startConditionForTask(task *structs.Task) <-chan struct{} { if task.Lifecycle != nil && task.Lifecycle.Hook == structs.TaskLifecycleHookPrestart { return c.closedCh } + switch task.Lifecycle.Hook { + case structs.TaskLifecycleHookPrestart: + // Prestart tasks start without checking status of other tasks + return c.closedCh + case structs.TaskLifecycleHookPoststart: + return c.poststartTaskCtx.Done() + default: + return c.mainTaskCtx.Done() + } + return c.mainTaskCtx.Done() } @@ -104,10 +125,23 @@ func (c *taskHookCoordinator) taskStateUpdated(states map[string]*structs.TaskSt delete(c.prestartEphemeral, task) } + for task := range c.mainTasksPending { + st := states[task] + if st == nil || st.StartedAt.IsZero() { + continue + } + + delete(c.mainTasksPending, task) + } + // everything well if !c.hasPrestartTasks() { c.mainTaskCtxCancel() } + + if !c.hasPendingMainTasks() { + c.poststartTaskCtxCancel() + } } // hasNonSidecarTasks returns false if all the passed tasks are sidecar tasks diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 4996b3638e22..437af243cd8c 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -4906,6 +4906,7 @@ func (d *DispatchPayloadConfig) Validate() error { const ( TaskLifecycleHookPrestart = "prestart" + TaskLifecycleHookPoststart = "poststart" ) type TaskLifecycleConfig struct { @@ -4929,6 +4930,7 @@ func (d *TaskLifecycleConfig) Validate() error { switch d.Hook { case TaskLifecycleHookPrestart: + case TaskLifecycleHookPoststart: case "": return fmt.Errorf("no lifecycle hook provided") default: From c0640146a4cff269aa952335778d85d78ebe8562 Mon Sep 17 00:00:00 2001 From: Jasmine Dahilig Date: Fri, 10 Jul 2020 09:03:10 -0700 Subject: [PATCH 02/10] fix panic, but poststart is still stalled --- client/allocrunner/task_hook_coordinator.go | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/client/allocrunner/task_hook_coordinator.go b/client/allocrunner/task_hook_coordinator.go index 2fd94714ef8e..42890f78f22f 100644 --- a/client/allocrunner/task_hook_coordinator.go +++ b/client/allocrunner/task_hook_coordinator.go @@ -23,7 +23,6 @@ type taskHookCoordinator struct { prestartSidecar map[string]struct{} prestartEphemeral map[string]struct{} - mainTasksPending map[string]struct{} } @@ -41,6 +40,7 @@ func newTaskHookCoordinator(logger hclog.Logger, tasks []*structs.Task) *taskHoo mainTaskCtxCancel: mainCancelFn, prestartSidecar: map[string]struct{}{}, prestartEphemeral: map[string]struct{}{}, + mainTasksPending: map[string]struct{}{}, poststartTaskCtx: poststartTaskCtx, poststartTaskCtxCancel: poststartCancelFn, } @@ -52,7 +52,7 @@ func (c *taskHookCoordinator) setTasks(tasks []*structs.Task) { for _, task := range tasks { if task.Lifecycle == nil { - // move nothing + c.mainTasksPending[task.Name] = struct{}{} continue } @@ -82,8 +82,8 @@ func (c *taskHookCoordinator) hasPendingMainTasks() bool { } func (c *taskHookCoordinator) startConditionForTask(task *structs.Task) <-chan struct{} { - if task.Lifecycle != nil && task.Lifecycle.Hook == structs.TaskLifecycleHookPrestart { - return c.closedCh + if task.Lifecycle == nil { + return c.mainTaskCtx.Done() } switch task.Lifecycle.Hook { @@ -95,9 +95,6 @@ func (c *taskHookCoordinator) startConditionForTask(task *structs.Task) <-chan s default: return c.mainTaskCtx.Done() } - - return c.mainTaskCtx.Done() - } // This is not thread safe! This must only be called from one thread per alloc runner. From 9cf44295182df47d9a9166ad0c11688848aa7b20 Mon Sep 17 00:00:00 2001 From: Jasmine Dahilig Date: Wed, 29 Jul 2020 12:39:42 -0700 Subject: [PATCH 03/10] lifecycle: add allocrunner and task hook coordinator unit tests --- client/allocrunner/alloc_runner_test.go | 40 ++++++++++++++-- .../allocrunner/task_hook_coordinator_test.go | 46 +++++++++++++++++++ 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index a9d6dcf34cec..49c5af7de374 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -152,9 +152,9 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0 - // Create three tasks in the task group + // Create four tasks in the task group sidecar := alloc.Job.TaskGroups[0].Tasks[0].Copy() - sidecar.Name = "sidecar" + sidecar.Name = "prestart-sidecar" sidecar.Driver = "mock_driver" sidecar.KillTimeout = 10 * time.Millisecond sidecar.Lifecycle = &structs.TaskLifecycleConfig{ @@ -162,10 +162,24 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { Sidecar: true, } - sidecar.Config = map[string]interface{}{ + sidecar.Config = map[string]interface{}{ "run_for": "100s", } + poststart := alloc.Job.TaskGroups[0].Tasks[0].Copy() + poststart.Name = "poststart-sidecar" + poststart.Driver = "mock_driver" + poststart.KillTimeout = 10 * time.Millisecond + poststart.Lifecycle = &structs.TaskLifecycleConfig{ + Hook: structs.TaskLifecycleHookPoststart, + Sidecar: true, + } + + poststart.Config = map[string]interface{}{ + "run_for": "100s", + } + + // these two main tasks have the same name, is that ok? main1 := alloc.Job.TaskGroups[0].Tasks[0].Copy() main1.Name = "task2" main1.Driver = "mock_driver" @@ -183,6 +197,7 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { alloc.Job.TaskGroups[0].Tasks = []*structs.Task{sidecar, main1, main2} alloc.AllocatedResources.Tasks = map[string]*structs.AllocatedTaskResources{ sidecar.Name: tr, + poststart.Name: tr, main1.Name: tr, main2.Name: tr, } @@ -217,7 +232,7 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { var state *structs.TaskState - // Task1 should be killed because Task2 exited + // both sidecars should be killed because Task2 exited state = last.TaskStates[sidecar.Name] if state.State != structs.TaskStateDead { return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead) @@ -234,6 +249,23 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { return false, fmt.Errorf("Did not find event %v: %#+v", structs.TaskMainDead, state.Events) } + state = last.TaskStates[poststart.Name] + if state.State != structs.TaskStateDead { + return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead) + } + if state.FinishedAt.IsZero() || state.StartedAt.IsZero() { + return false, fmt.Errorf("expected to have a start and finish time") + } + if len(state.Events) < 2 { + // At least have a received and destroyed + return false, fmt.Errorf("Unexpected number of events") + } + + if !hasTaskMainEvent(state) { + return false, fmt.Errorf("Did not find event %v: %#+v", structs.TaskMainDead, state.Events) + } + + // main tasks should die naturely state = last.TaskStates[main1.Name] if state.State != structs.TaskStateDead { diff --git a/client/allocrunner/task_hook_coordinator_test.go b/client/allocrunner/task_hook_coordinator_test.go index 91405fbad634..ac82a9f00c77 100644 --- a/client/allocrunner/task_hook_coordinator_test.go +++ b/client/allocrunner/task_hook_coordinator_test.go @@ -224,6 +224,52 @@ func TestTaskHookCoordinator_SidecarNeverStarts(t *testing.T) { require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name) } +func TestTaskHookCoordinator_PoststartStartsAfterMain(t *testing.T) { + logger := testlog.HCLogger(t) + + alloc := mock.LifecycleAlloc() + tasks := alloc.Job.TaskGroups[0].Tasks + + mainTask := tasks[0] + sideTask := tasks[1] + postTask := tasks[2] + + postTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart + + coord := newTaskHookCoordinator(logger, tasks) + postCh := coord.startConditionForTask(postTask) + sideCh := coord.startConditionForTask(sideTask) + mainCh := coord.startConditionForTask(mainTask) + + require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name) + require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", mainTask.Name) + require.Falsef(t, isChannelClosed(mainCh), "%s channel was closed, should be open", postTask.Name) + + states := map[string]*structs.TaskState{ + postTask.Name: { + State: structs.TaskStatePending, + Failed: false, + }, + mainTask.Name: { + State: structs.TaskStateRunning, + Failed: false, + StartedAt: time.Now(), + }, + sideTask.Name: { + State: structs.TaskStateRunning, + Failed: false, + StartedAt: time.Now(), + }, + } + + coord.taskStateUpdated(states) + + require.Truef(t, isChannelClosed(postCh), "%s channel was open, should be closed", postTask.Name) + require.Truef(t, isChannelClosed(sideCh), "%s channel was open, should be closed", sideTask.Name) + require.Truef(t, isChannelClosed(mainCh), "%s channel was open, should be closed", mainTask.Name) +} + + func isChannelClosed(ch <-chan struct{}) bool { select { case <-ch: From 5e9a8bbd5275fb9c01974bcc60636c46318a309b Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 11 Aug 2020 09:47:18 -0700 Subject: [PATCH 04/10] client: don't restart poststart sidecars on success --- client/allocrunner/taskrunner/restarts/restarts.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/client/allocrunner/taskrunner/restarts/restarts.go b/client/allocrunner/taskrunner/restarts/restarts.go index f8aea3e61efb..6ee0056ccd8b 100644 --- a/client/allocrunner/taskrunner/restarts/restarts.go +++ b/client/allocrunner/taskrunner/restarts/restarts.go @@ -21,11 +21,19 @@ const ( ) func NewRestartTracker(policy *structs.RestartPolicy, jobType string, tlc *structs.TaskLifecycleConfig) *RestartTracker { + // Batch jobs should not restart if they exit successfully onSuccess := jobType != structs.JobTypeBatch + + // Prestart sidecars should get restarted on success if tlc != nil && tlc.Hook == structs.TaskLifecycleHookPrestart { onSuccess = tlc.Sidecar } + // Poststart sidecars should get restarted on success + if tlc != nil && tlc.Hook == structs.TaskLifecycleHookPoststart { + onSuccess = tlc.Sidecar + } + return &RestartTracker{ startTime: time.Now(), onSuccess: onSuccess, From 0d504ade1b55c2eb4d480644d302d1d1e51eeabf Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Tue, 11 Aug 2020 09:47:44 -0700 Subject: [PATCH 05/10] client: remove shortcircuit preventing poststart hooks from running --- client/allocrunner/task_hook_coordinator.go | 29 +++++++++------------ 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/client/allocrunner/task_hook_coordinator.go b/client/allocrunner/task_hook_coordinator.go index 42890f78f22f..7d69cea9f6eb 100644 --- a/client/allocrunner/task_hook_coordinator.go +++ b/client/allocrunner/task_hook_coordinator.go @@ -18,12 +18,12 @@ type taskHookCoordinator struct { mainTaskCtx context.Context mainTaskCtxCancel func() - poststartTaskCtx context.Context + poststartTaskCtx context.Context poststartTaskCtxCancel func() prestartSidecar map[string]struct{} prestartEphemeral map[string]struct{} - mainTasksPending map[string]struct{} + mainTasksPending map[string]struct{} } func newTaskHookCoordinator(logger hclog.Logger, tasks []*structs.Task) *taskHookCoordinator { @@ -34,14 +34,14 @@ func newTaskHookCoordinator(logger hclog.Logger, tasks []*structs.Task) *taskHoo poststartTaskCtx, poststartCancelFn := context.WithCancel(context.Background()) c := &taskHookCoordinator{ - logger: logger, - closedCh: closedCh, - mainTaskCtx: mainTaskCtx, - mainTaskCtxCancel: mainCancelFn, - prestartSidecar: map[string]struct{}{}, - prestartEphemeral: map[string]struct{}{}, - mainTasksPending: map[string]struct{}{}, - poststartTaskCtx: poststartTaskCtx, + logger: logger, + closedCh: closedCh, + mainTaskCtx: mainTaskCtx, + mainTaskCtxCancel: mainCancelFn, + prestartSidecar: map[string]struct{}{}, + prestartEphemeral: map[string]struct{}{}, + mainTasksPending: map[string]struct{}{}, + poststartTaskCtx: poststartTaskCtx, poststartTaskCtxCancel: poststartCancelFn, } c.setTasks(tasks) @@ -63,8 +63,10 @@ func (c *taskHookCoordinator) setTasks(tasks []*structs.Task) { } else { c.prestartEphemeral[task.Name] = struct{}{} } + case structs.TaskLifecycleHookPoststart: + // Poststart hooks don't need to be tracked. default: - c.logger.Error("invalid lifecycle hook", "hook", task.Lifecycle.Hook) + c.logger.Error("invalid lifecycle hook", "task", task.Name, "hook", task.Lifecycle.Hook) } } @@ -99,11 +101,6 @@ func (c *taskHookCoordinator) startConditionForTask(task *structs.Task) <-chan s // This is not thread safe! This must only be called from one thread per alloc runner. func (c *taskHookCoordinator) taskStateUpdated(states map[string]*structs.TaskState) { - if c.mainTaskCtx.Err() != nil { - // nothing to do here - return - } - for task := range c.prestartSidecar { st := states[task] if st == nil || st.StartedAt.IsZero() { From 599b56e054f6f183243d41673cb003ec2a206e50 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 12 Aug 2020 09:54:14 -0700 Subject: [PATCH 06/10] test: add allocrunner test for poststart hooks --- client/allocrunner/alloc_runner_test.go | 103 +++++++++++++++++- .../allocrunner/task_hook_coordinator_test.go | 14 +-- nomad/mock/mock.go | 2 + 3 files changed, 107 insertions(+), 12 deletions(-) diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 49c5af7de374..24344669a28d 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -142,6 +142,100 @@ func TestAllocRunner_TaskLeader_KillTG(t *testing.T) { }) } +// TestAllocRunner_Lifecycle_Poststart asserts that a service job with 2 +// poststart lifecycle hooks (1 sidecar, 1 ephemeral) starts all 3 tasks, only +// the ephemeral one finishes, and the other 2 exit when the alloc is stopped. +func TestAllocRunner_Lifecycle_Poststart(t *testing.T) { + alloc := mock.LifecycleAlloc() + + alloc.Job.Type = structs.JobTypeService + mainTask := alloc.Job.TaskGroups[0].Tasks[0] + mainTask.Config["run_for"] = "100s" + + sidecarTask := alloc.Job.TaskGroups[0].Tasks[1] + sidecarTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart + sidecarTask.Config["run_for"] = "100s" + + ephemeralTask := alloc.Job.TaskGroups[0].Tasks[2] + ephemeralTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart + + conf, cleanup := testAllocRunnerConfig(t, alloc) + defer cleanup() + ar, err := NewAllocRunner(conf) + require.NoError(t, err) + defer destroy(ar) + go ar.Run() + + upd := conf.StateUpdater.(*MockStateUpdater) + + // Wait for main and sidecar tasks to be running, and that the + // ephemeral task ran and exited. + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + if last == nil { + return false, fmt.Errorf("No updates") + } + + if last.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus) + } + + if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateRunning { + return false, fmt.Errorf("expected main task to be running not %s", s) + } + + if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateRunning { + return false, fmt.Errorf("expected sidecar task to be running not %s", s) + } + + if s := last.TaskStates[ephemeralTask.Name].State; s != structs.TaskStateDead { + return false, fmt.Errorf("expected ephemeral task to be dead not %s", s) + } + + if last.TaskStates[ephemeralTask.Name].Failed { + return false, fmt.Errorf("expected ephemeral task to be successful not failed") + } + + return true, nil + }, func(err error) { + t.Fatalf("error waiting for initial state:\n%v", err) + }) + + // Tell the alloc to stop + stopAlloc := alloc.Copy() + stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop + ar.Update(stopAlloc) + + // Wait for main and sidecar tasks to stop. + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + + if last.ClientStatus != structs.AllocClientStatusComplete { + return false, fmt.Errorf("expected alloc to be running not %s", last.ClientStatus) + } + + if s := last.TaskStates[mainTask.Name].State; s != structs.TaskStateDead { + return false, fmt.Errorf("expected main task to be dead not %s", s) + } + + if last.TaskStates[mainTask.Name].Failed { + return false, fmt.Errorf("expected main task to be successful not failed") + } + + if s := last.TaskStates[sidecarTask.Name].State; s != structs.TaskStateDead { + return false, fmt.Errorf("expected sidecar task to be dead not %s", s) + } + + if last.TaskStates[sidecarTask.Name].Failed { + return false, fmt.Errorf("expected sidecar task to be successful not failed") + } + + return true, nil + }, func(err error) { + t.Fatalf("error waiting for initial state:\n%v", err) + }) +} + // TestAllocRunner_TaskMain_KillTG asserts that when main tasks die the // entire task group is killed. func TestAllocRunner_TaskMain_KillTG(t *testing.T) { @@ -162,7 +256,7 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { Sidecar: true, } - sidecar.Config = map[string]interface{}{ + sidecar.Config = map[string]interface{}{ "run_for": "100s", } @@ -196,10 +290,10 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { alloc.Job.TaskGroups[0].Tasks = []*structs.Task{sidecar, main1, main2} alloc.AllocatedResources.Tasks = map[string]*structs.AllocatedTaskResources{ - sidecar.Name: tr, + sidecar.Name: tr, poststart.Name: tr, - main1.Name: tr, - main2.Name: tr, + main1.Name: tr, + main2.Name: tr, } conf, cleanup := testAllocRunnerConfig(t, alloc) @@ -265,7 +359,6 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { return false, fmt.Errorf("Did not find event %v: %#+v", structs.TaskMainDead, state.Events) } - // main tasks should die naturely state = last.TaskStates[main1.Name] if state.State != structs.TaskStateDead { diff --git a/client/allocrunner/task_hook_coordinator_test.go b/client/allocrunner/task_hook_coordinator_test.go index ac82a9f00c77..e4343d915931 100644 --- a/client/allocrunner/task_hook_coordinator_test.go +++ b/client/allocrunner/task_hook_coordinator_test.go @@ -234,6 +234,7 @@ func TestTaskHookCoordinator_PoststartStartsAfterMain(t *testing.T) { sideTask := tasks[1] postTask := tasks[2] + // Make the the third task a poststart hook postTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart coord := newTaskHookCoordinator(logger, tasks) @@ -251,14 +252,14 @@ func TestTaskHookCoordinator_PoststartStartsAfterMain(t *testing.T) { Failed: false, }, mainTask.Name: { - State: structs.TaskStateRunning, - Failed: false, - StartedAt: time.Now(), + State: structs.TaskStateRunning, + Failed: false, + StartedAt: time.Now(), }, sideTask.Name: { - State: structs.TaskStateRunning, - Failed: false, - StartedAt: time.Now(), + State: structs.TaskStateRunning, + Failed: false, + StartedAt: time.Now(), }, } @@ -269,7 +270,6 @@ func TestTaskHookCoordinator_PoststartStartsAfterMain(t *testing.T) { require.Truef(t, isChannelClosed(mainCh), "%s channel was open, should be closed", mainTask.Name) } - func isChannelClosed(ch <-chan struct{}) bool { select { case <-ch: diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 2f99a60449da..57b68fe4f845 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -367,6 +367,7 @@ func VariableLifecycleJob(resources structs.Resources, main int, init int, side job.Canonicalize() return job } + func LifecycleJob() *structs.Job { job := &structs.Job{ Region: "global", @@ -454,6 +455,7 @@ func LifecycleJob() *structs.Job { job.Canonicalize() return job } + func LifecycleAlloc() *structs.Allocation { alloc := &structs.Allocation{ ID: uuid.Generate(), From c302c4169ad3a1d6b6cf11c88caac08b08f2af24 Mon Sep 17 00:00:00 2001 From: Jasmine Dahilig Date: Tue, 18 Aug 2020 10:49:50 -0700 Subject: [PATCH 07/10] task lifecycle: e2e tests --- e2e/e2e_test.go | 1 + e2e/lifecycle/inputs/batch.nomad | 127 +++++++++++++++++++++++ e2e/lifecycle/inputs/service.nomad | 160 +++++++++++++++++++++++++++++ e2e/lifecycle/lifecycle.go | 115 +++++++++++++++++++++ 4 files changed, 403 insertions(+) create mode 100644 e2e/lifecycle/inputs/batch.nomad create mode 100644 e2e/lifecycle/inputs/service.nomad create mode 100644 e2e/lifecycle/lifecycle.go diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go index 0b0f024cfbef..f8d4032d3a84 100644 --- a/e2e/e2e_test.go +++ b/e2e/e2e_test.go @@ -17,6 +17,7 @@ import ( _ "github.com/hashicorp/nomad/e2e/deployment" _ "github.com/hashicorp/nomad/e2e/example" _ "github.com/hashicorp/nomad/e2e/hostvolumes" + _ "github.com/hashicorp/nomad/e2e/lifecycle" _ "github.com/hashicorp/nomad/e2e/metrics" _ "github.com/hashicorp/nomad/e2e/nomad09upgrade" _ "github.com/hashicorp/nomad/e2e/nomadexec" diff --git a/e2e/lifecycle/inputs/batch.nomad b/e2e/lifecycle/inputs/batch.nomad new file mode 100644 index 000000000000..744bcf7b6213 --- /dev/null +++ b/e2e/lifecycle/inputs/batch.nomad @@ -0,0 +1,127 @@ +# lifecycle hook test job for batch jobs. touches, removes, and tests +# for the existence of files to assert the order of running tasks. +# all tasks should exit 0 and the alloc dir should contain the following +# files: ./init-ran, ./main-ran, ./poststart-run + +job "batch-lifecycle" { + + datacenters = ["dc1"] + + type = "batch" + + group "test" { + + task "init" { + + lifecycle { + hook = "prestart" + } + + driver = "docker" + + config { + image = "busybox:1" + command = "/bin/sh" + args = ["local/prestart.sh"] + } + + template { + data = < Date: Thu, 20 Aug 2020 08:07:18 -0700 Subject: [PATCH 08/10] task lifecycle: make e2e service job test block until poststart task has started --- e2e/lifecycle/inputs/service.nomad | 4 +++- e2e/lifecycle/lifecycle.go | 22 +++++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/e2e/lifecycle/inputs/service.nomad b/e2e/lifecycle/inputs/service.nomad index ce4929721a94..215278e831dc 100644 --- a/e2e/lifecycle/inputs/service.nomad +++ b/e2e/lifecycle/inputs/service.nomad @@ -8,7 +8,7 @@ job "service-lifecycle" { datacenters = ["dc1"] - type = "batch" + type = "service" group "test" { @@ -65,6 +65,7 @@ EOT template { data = < Date: Thu, 20 Aug 2020 08:49:58 -0700 Subject: [PATCH 09/10] task lifecycle: e2e fix more alloc stop races --- e2e/lifecycle/inputs/service.nomad | 6 ++---- e2e/lifecycle/lifecycle.go | 19 ++++++++++++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/e2e/lifecycle/inputs/service.nomad b/e2e/lifecycle/inputs/service.nomad index 215278e831dc..5a35fc86ac07 100644 --- a/e2e/lifecycle/inputs/service.nomad +++ b/e2e/lifecycle/inputs/service.nomad @@ -65,7 +65,6 @@ EOT template { data = < Date: Mon, 31 Aug 2020 13:22:41 -0700 Subject: [PATCH 10/10] task lifecycle poststart: code review fixes --- client/allocrunner/alloc_runner_test.go | 24 +++++++++++++++--------- e2e/lifecycle/lifecycle.go | 8 ++------ nomad/structs/structs.go | 2 +- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 24344669a28d..b64990e0497d 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -247,16 +247,16 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { alloc.Job.TaskGroups[0].Tasks[0].RestartPolicy.Attempts = 0 // Create four tasks in the task group - sidecar := alloc.Job.TaskGroups[0].Tasks[0].Copy() - sidecar.Name = "prestart-sidecar" - sidecar.Driver = "mock_driver" - sidecar.KillTimeout = 10 * time.Millisecond - sidecar.Lifecycle = &structs.TaskLifecycleConfig{ + prestart := alloc.Job.TaskGroups[0].Tasks[0].Copy() + prestart.Name = "prestart-sidecar" + prestart.Driver = "mock_driver" + prestart.KillTimeout = 10 * time.Millisecond + prestart.Lifecycle = &structs.TaskLifecycleConfig{ Hook: structs.TaskLifecycleHookPrestart, Sidecar: true, } - sidecar.Config = map[string]interface{}{ + prestart.Config = map[string]interface{}{ "run_for": "100s", } @@ -288,9 +288,9 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { "run_for": "2s", } - alloc.Job.TaskGroups[0].Tasks = []*structs.Task{sidecar, main1, main2} + alloc.Job.TaskGroups[0].Tasks = []*structs.Task{prestart, poststart, main1, main2} alloc.AllocatedResources.Tasks = map[string]*structs.AllocatedTaskResources{ - sidecar.Name: tr, + prestart.Name: tr, poststart.Name: tr, main1.Name: tr, main2.Name: tr, @@ -327,7 +327,10 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { var state *structs.TaskState // both sidecars should be killed because Task2 exited - state = last.TaskStates[sidecar.Name] + state = last.TaskStates[prestart.Name] + if state == nil { + return false, fmt.Errorf("could not find state for task %s", prestart.Name) + } if state.State != structs.TaskStateDead { return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead) } @@ -344,6 +347,9 @@ func TestAllocRunner_TaskMain_KillTG(t *testing.T) { } state = last.TaskStates[poststart.Name] + if state == nil { + return false, fmt.Errorf("could not find state for task %s", poststart.Name) + } if state.State != structs.TaskStateDead { return false, fmt.Errorf("got state %v; want %v", state.State, structs.TaskStateDead) } diff --git a/e2e/lifecycle/lifecycle.go b/e2e/lifecycle/lifecycle.go index 075612d652f9..89a42ff16cca 100644 --- a/e2e/lifecycle/lifecycle.go +++ b/e2e/lifecycle/lifecycle.go @@ -2,6 +2,7 @@ package lifecycle import ( "fmt" + "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/e2e/e2eutil" "github.com/hashicorp/nomad/e2e/framework" @@ -18,8 +19,7 @@ type LifecycleE2ETest struct { func init() { framework.AddSuites(&framework.TestSuite{ - Component: "Lifecycle", - // YOU COULD RUN THIS LOCALLY BC DIS FLAG + Component: "Lifecycle", CanRunLocal: true, Cases: []framework.TestCase{new(LifecycleE2ETest)}, }) @@ -61,10 +61,6 @@ func (tc *LifecycleE2ETest) TestBatchJob(f *framework.F) { require.Equal(expected, got) } -// TODO: cleanup == poststop -// q: what is a good example for a poststart? -// a: notify(-slack) - // TestServiceJob runs a service job with prestart and poststop hooks func (tc *LifecycleE2ETest) TestServiceJob(f *framework.F) { t := f.T() diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 437af243cd8c..4c4aefdb6333 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -4905,7 +4905,7 @@ func (d *DispatchPayloadConfig) Validate() error { } const ( - TaskLifecycleHookPrestart = "prestart" + TaskLifecycleHookPrestart = "prestart" TaskLifecycleHookPoststart = "poststart" )