diff --git a/client/alloc_endpoint_test.go b/client/alloc_endpoint_test.go index 272819ecc35c..9fc912e460bd 100644 --- a/client/alloc_endpoint_test.go +++ b/client/alloc_endpoint_test.go @@ -67,6 +67,45 @@ func TestAllocations_Restart(t *testing.T) { }) } +func TestAllocations_RestartAllTasks(t *testing.T) { + ci.Parallel(t) + + require := require.New(t) + client, cleanup := TestClient(t, nil) + defer cleanup() + + alloc := mock.LifecycleAlloc() + require.Nil(client.addAlloc(alloc, "")) + + // Can't restart all tasks while specifying a task name. + req := &nstructs.AllocRestartRequest{ + AllocID: alloc.ID, + AllTasks: true, + TaskName: "web", + } + var resp nstructs.GenericResponse + err := client.ClientRPC("Allocations.Restart", &req, &resp) + require.Error(err) + + // Good request. + req = &nstructs.AllocRestartRequest{ + AllocID: alloc.ID, + AllTasks: true, + } + + testutil.WaitForResult(func() (bool, error) { + var resp2 nstructs.GenericResponse + err := client.ClientRPC("Allocations.Restart", &req, &resp2) + if err != nil && strings.Contains(err.Error(), "not running") { + return false, err + } + + return true, nil + }, func(err error) { + t.Fatalf("err: %v", err) + }) +} + func TestAllocations_Restart_ACL(t *testing.T) { ci.Parallel(t) require := require.New(t) diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index 1b3ead14daa7..c148293d6aa3 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/hashicorp/consul/api" + multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allochealth" "github.com/hashicorp/nomad/client/allocrunner/tasklifecycle" @@ -483,6 +484,468 @@ func TestAllocRunner_Lifecycle_Poststop(t *testing.T) { } +func TestAllocRunner_Lifecycle_Restart(t *testing.T) { + ci.Parallel(t) + + // test cases can use this default or override w/ taskDefs param + alloc := mock.LifecycleAllocFromTasks([]mock.LifecycleTaskDef{ + {"main", "100s", 0, "", false}, + {"prestart-oneshot", "1s", 0, "prestart", false}, + {"prestart-sidecar", "100s", 0, "prestart", true}, + {"poststart-oneshot", "1s", 0, "poststart", false}, + {"poststart-sidecar", "100s", 0, "poststart", true}, + {"poststop", "1s", 0, "poststop", false}, + }) + alloc.Job.Type = structs.JobTypeService + rp := &structs.RestartPolicy{ + Attempts: 1, + Interval: 10 * time.Minute, + Delay: 1 * time.Nanosecond, + Mode: structs.RestartPolicyModeFail, + } + + testCases := []struct { + name string + taskDefs []mock.LifecycleTaskDef + isBatch bool + hasLeader bool + action func(*allocRunner, *structs.Allocation) error + expectedErr string + expectedAfter map[string]structs.TaskState + }{ + { + name: "restart entire allocation", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + ev := &structs.TaskEvent{Type: structs.TaskRestartAllSignal} + return ar.RestartAll(ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "restart only running tasks", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + ev := &structs.TaskEvent{Type: structs.TaskRestartRunningSignal} + return ar.RestartRunning(ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "batch job restart entire allocation", + taskDefs: []mock.LifecycleTaskDef{ + {"main", "100s", 1, "", false}, + {"prestart-oneshot", "1s", 0, "prestart", false}, + {"prestart-sidecar", "100s", 0, "prestart", true}, + {"poststart-oneshot", "1s", 0, "poststart", false}, + {"poststart-sidecar", "100s", 0, "poststart", true}, + {"poststop", "1s", 0, "poststop", false}, + }, + isBatch: true, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + ev := &structs.TaskEvent{Type: structs.TaskRestartAllSignal} + return ar.RestartAll(ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "batch job restart only running tasks ", + taskDefs: []mock.LifecycleTaskDef{ + {"main", "100s", 1, "", false}, + {"prestart-oneshot", "1s", 0, "prestart", false}, + {"prestart-sidecar", "100s", 0, "prestart", true}, + {"poststart-oneshot", "1s", 0, "poststart", false}, + {"poststart-sidecar", "100s", 0, "poststart", true}, + {"poststop", "1s", 0, "poststop", false}, + }, + isBatch: true, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + ev := &structs.TaskEvent{Type: structs.TaskRestartRunningSignal} + return ar.RestartRunning(ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "restart entire allocation with leader", + hasLeader: true, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + ev := &structs.TaskEvent{Type: structs.TaskRestartAllSignal} + return ar.RestartAll(ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 1}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "stop from server", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + stopAlloc := alloc.Copy() + stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop + ar.Update(stopAlloc) + return nil + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "restart main task", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + ev := &structs.TaskEvent{Type: structs.TaskRestartSignal} + return ar.RestartTask("main", ev) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "restart leader main task", + hasLeader: true, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartTask("main", &structs.TaskEvent{Type: structs.TaskRestartSignal}) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "main task fails and restarts once", + taskDefs: []mock.LifecycleTaskDef{ + {"main", "2s", 1, "", false}, + {"prestart-oneshot", "1s", 0, "prestart", false}, + {"prestart-sidecar", "100s", 0, "prestart", true}, + {"poststart-oneshot", "1s", 0, "poststart", false}, + {"poststart-sidecar", "100s", 0, "poststart", true}, + {"poststop", "1s", 0, "poststop", false}, + }, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + time.Sleep(3 * time.Second) // make sure main task has exited + return nil + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "leader main task fails and restarts once", + taskDefs: []mock.LifecycleTaskDef{ + {"main", "2s", 1, "", false}, + {"prestart-oneshot", "1s", 0, "prestart", false}, + {"prestart-sidecar", "100s", 0, "prestart", true}, + {"poststart-oneshot", "1s", 0, "poststart", false}, + {"poststart-sidecar", "100s", 0, "poststart", true}, + {"poststop", "1s", 0, "poststop", false}, + }, + hasLeader: true, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + time.Sleep(3 * time.Second) // make sure main task has exited + return nil + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "main stopped unexpectedly and restarts once", + taskDefs: []mock.LifecycleTaskDef{ + {"main", "2s", 0, "", false}, + {"prestart-oneshot", "1s", 0, "prestart", false}, + {"prestart-sidecar", "100s", 0, "prestart", true}, + {"poststart-oneshot", "1s", 0, "poststart", false}, + {"poststart-sidecar", "100s", 0, "poststart", true}, + {"poststop", "1s", 0, "poststop", false}, + }, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + time.Sleep(3 * time.Second) // make sure main task has exited + return nil + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "leader main stopped unexpectedly and restarts once", + taskDefs: []mock.LifecycleTaskDef{ + {"main", "2s", 0, "", false}, + {"prestart-oneshot", "1s", 0, "prestart", false}, + {"prestart-sidecar", "100s", 0, "prestart", true}, + {"poststart-oneshot", "1s", 0, "poststart", false}, + {"poststart-sidecar", "100s", 0, "poststart", true}, + {"poststop", "1s", 0, "poststop", false}, + }, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + time.Sleep(3 * time.Second) // make sure main task has exited + return nil + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "failed main task cannot be restarted", + taskDefs: []mock.LifecycleTaskDef{ + {"main", "2s", 1, "", false}, + {"prestart-oneshot", "1s", 0, "prestart", false}, + {"prestart-sidecar", "100s", 0, "prestart", true}, + {"poststart-oneshot", "1s", 0, "poststart", false}, + {"poststart-sidecar", "100s", 0, "poststart", true}, + {"poststop", "1s", 0, "poststop", false}, + }, + action: func(ar *allocRunner, alloc *structs.Allocation) error { + // make sure main task has had a chance to restart once on its + // own and fail again before we try to manually restart it + time.Sleep(5 * time.Second) + return ar.RestartTask("main", &structs.TaskEvent{Type: structs.TaskRestartSignal}) + }, + expectedErr: "Task not running", + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "dead", Restarts: 1}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0}, + "poststop": structs.TaskState{State: "dead", Restarts: 0}, + }, + }, + { + name: "restart prestart-sidecar task", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartTask("prestart-sidecar", &structs.TaskEvent{Type: structs.TaskRestartSignal}) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 0}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + { + name: "restart poststart-sidecar task", + action: func(ar *allocRunner, alloc *structs.Allocation) error { + return ar.RestartTask("poststart-sidecar", &structs.TaskEvent{Type: structs.TaskRestartSignal}) + }, + expectedAfter: map[string]structs.TaskState{ + "main": structs.TaskState{State: "running", Restarts: 0}, + "prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "prestart-sidecar": structs.TaskState{State: "running", Restarts: 0}, + "poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0}, + "poststart-sidecar": structs.TaskState{State: "running", Restarts: 1}, + "poststop": structs.TaskState{State: "pending", Restarts: 0}, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + ci.Parallel(t) + + alloc := alloc.Copy() + alloc.Job.TaskGroups[0].RestartPolicy = rp + if tc.taskDefs != nil { + alloc = mock.LifecycleAllocFromTasks(tc.taskDefs) + alloc.Job.Type = structs.JobTypeService + } + for _, task := range alloc.Job.TaskGroups[0].Tasks { + task.RestartPolicy = rp // tasks inherit the group policy + } + if tc.hasLeader { + for _, task := range alloc.Job.TaskGroups[0].Tasks { + if task.Name == "main" { + task.Leader = true + } + } + } + if tc.isBatch { + alloc.Job.Type = structs.JobTypeBatch + } + + conf, cleanup := testAllocRunnerConfig(t, alloc) + defer cleanup() + ar, err := NewAllocRunner(conf) + require.NoError(t, err) + defer destroy(ar) + go ar.Run() + + upd := conf.StateUpdater.(*MockStateUpdater) + + // assert our "before" states: + // - all one-shot tasks should be dead but not failed + // - all main tasks and sidecars should be running + // - no tasks should have restarted + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + if last == nil { + return false, fmt.Errorf("no update") + } + if last.ClientStatus != structs.AllocClientStatusRunning { + return false, fmt.Errorf( + "expected alloc to be running not %s", last.ClientStatus) + } + var errs *multierror.Error + + expectedBefore := map[string]string{ + "main": "running", + "prestart-oneshot": "dead", + "prestart-sidecar": "running", + "poststart-oneshot": "dead", + "poststart-sidecar": "running", + "poststop": "pending", + } + + for task, expected := range expectedBefore { + got, ok := last.TaskStates[task] + if !ok { + continue + } + if got.State != expected { + errs = multierror.Append(errs, fmt.Errorf( + "expected initial state of task %q to be %q not %q", + task, expected, got.State)) + } + if got.Restarts != 0 { + errs = multierror.Append(errs, fmt.Errorf( + "expected no initial restarts of task %q, not %q", + task, got.Restarts)) + } + if expected == "dead" && got.Failed { + errs = multierror.Append(errs, fmt.Errorf( + "expected ephemeral task %q to be dead but not failed", + task)) + } + + } + if errs.ErrorOrNil() != nil { + return false, errs.ErrorOrNil() + } + return true, nil + }, func(err error) { + require.NoError(t, err, "error waiting for initial state") + }) + + // perform the action + err = tc.action(ar, alloc.Copy()) + if tc.expectedErr != "" { + require.EqualError(t, err, tc.expectedErr) + } else { + require.NoError(t, err) + } + + // assert our "after" states + testutil.WaitForResult(func() (bool, error) { + last := upd.Last() + if last == nil { + return false, fmt.Errorf("no update") + } + var errs *multierror.Error + for task, expected := range tc.expectedAfter { + got, ok := last.TaskStates[task] + if !ok { + errs = multierror.Append(errs, fmt.Errorf( + "no final state found for task %q", task, + )) + } + if got.State != expected.State { + errs = multierror.Append(errs, fmt.Errorf( + "expected final state of task %q to be %q not %q", + task, expected.State, got.State)) + } + if expected.State == "dead" { + if got.FinishedAt.IsZero() || got.StartedAt.IsZero() { + errs = multierror.Append(errs, fmt.Errorf( + "expected final state of task %q to have start and finish time", task)) + } + if len(got.Events) < 2 { + errs = multierror.Append(errs, fmt.Errorf( + "expected final state of task %q to include at least 2 tasks", task)) + } + } + + if got.Restarts != expected.Restarts { + errs = multierror.Append(errs, fmt.Errorf( + "expected final restarts of task %q to be %v not %v", + task, expected.Restarts, got.Restarts)) + } + } + if errs.ErrorOrNil() != nil { + return false, errs.ErrorOrNil() + } + return true, nil + }, func(err error) { + require.NoError(t, err, "error waiting for final state") + }) + }) + } +} + func TestAllocRunner_TaskGroup_ShutdownDelay(t *testing.T) { ci.Parallel(t) diff --git a/client/allocrunner/alloc_runner_unix_test.go b/client/allocrunner/alloc_runner_unix_test.go index ab3c777dd20f..7afb6faeaa5c 100644 --- a/client/allocrunner/alloc_runner_unix_test.go +++ b/client/allocrunner/alloc_runner_unix_test.go @@ -154,9 +154,10 @@ func TestAllocRunner_Restore_CompletedBatch(t *testing.T) { alloc := mock.Alloc() alloc.Job.Type = structs.JobTypeBatch task := alloc.Job.TaskGroups[0].Tasks[0] - task.Driver = "mock_driver" + task.Driver = "raw_exec" task.Config = map[string]interface{}{ - "run_for": "2ms", + "command": "sleep", + "args": []string{"2"}, } conf, cleanup := testAllocRunnerConfig(t, alloc.Copy()) @@ -207,18 +208,18 @@ func TestAllocRunner_Restore_CompletedBatch(t *testing.T) { go ar2.Run() defer destroy(ar2) - // AR waitCh must be closed even when task doesn't run again + // AR waitCh must be open as the task waits for a possible alloc restart. select { case <-ar2.WaitCh(): - case <-time.After(10 * time.Second): - require.Fail(t, "alloc.waitCh wasn't closed") + require.Fail(t, "alloc.waitCh was closed") + default: } - // TR waitCh must be closed too! + // TR waitCh must be open too! select { case <-ar2.tasks[task.Name].WaitCh(): - case <-time.After(10 * time.Second): - require.Fail(t, "tr.waitCh wasn't closed") + require.Fail(t, "tr.waitCh was closed") + default: } // Assert that events are unmodified, which they would if task re-run diff --git a/client/allocrunner/taskrunner/task_runner_test.go b/client/allocrunner/taskrunner/task_runner_test.go index 235f1c7ee6e7..18cacaf418d5 100644 --- a/client/allocrunner/taskrunner/task_runner_test.go +++ b/client/allocrunner/taskrunner/task_runner_test.go @@ -349,6 +349,60 @@ func TestTaskRunner_Restore_Running(t *testing.T) { assert.Equal(t, 1, started) } +// TestTaskRunner_Restore_Dead asserts restoring a dead task will cause it to +// block in alloc restart loop and it will be able to restart on request. +func TestTaskRunner_Restore_Dead(t *testing.T) { + ci.Parallel(t) + + alloc := mock.BatchAlloc() + alloc.Job.TaskGroups[0].Count = 1 + task := alloc.Job.TaskGroups[0].Tasks[0] + // use raw_exec because mock_driver restore doesn't behave as expected. + task.Driver = "raw_exec" + task.Config = map[string]interface{}{ + "command": "sleep", + "args": []string{"2"}, + } + conf, cleanup := testTaskRunnerConfig(t, alloc, task.Name) + conf.StateDB = cstate.NewMemDB(conf.Logger) // "persist" state between task runners + defer cleanup() + + // Run the first TaskRunner + origTR, err := NewTaskRunner(conf) + require.NoError(t, err) + go origTR.Run() + defer origTR.Kill(context.Background(), structs.NewTaskEvent("cleanup")) + + // Wait for it to be dead + testWaitForTaskToDie(t, origTR) + + // Cause TR to exit without shutting down task + origTR.Shutdown() + + // Start a new TaskRunner and make sure it does not rerun the task + newTR, err := NewTaskRunner(conf) + require.NoError(t, err) + + // Do the Restore + require.NoError(t, newTR.Restore()) + + go newTR.Run() + defer newTR.Kill(context.Background(), structs.NewTaskEvent("cleanup")) + + // Verify that the TaskRunner is still active + select { + case <-newTR.WaitCh(): + require.Fail(t, "WaitCh is not blocking") + default: + } + + // Verify that we can restart task + ev := &structs.TaskEvent{Type: structs.TaskRestartAllSignal} + err = newTR.Restart(context.Background(), ev, false) + require.NoError(t, err) + testWaitForTaskToStart(t, newTR) +} + // setupRestoreFailureTest starts a service, shuts down the task runner, and // kills the task before restarting a new TaskRunner. The new TaskRunner is // returned once it is running and waiting in pending along with a cleanup diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 8795de7e3217..146c759ee619 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -697,6 +697,50 @@ func LifecycleAlloc() *structs.Allocation { return alloc } +type LifecycleTaskDef struct { + Name string + RunFor string + ExitCode int + Hook string + IsSidecar bool +} + +// LifecycleAllocFromTasks generates an Allocation with mock tasks that have +// the provided lifecycles. +func LifecycleAllocFromTasks(tasks []LifecycleTaskDef) *structs.Allocation { + alloc := LifecycleAlloc() + alloc.Job.TaskGroups[0].Tasks = []*structs.Task{} + for _, task := range tasks { + var lc *structs.TaskLifecycleConfig + if task.Hook != "" { + // TODO: task coordinator doesn't treat nil and empty structs the same + lc = &structs.TaskLifecycleConfig{ + Hook: task.Hook, + Sidecar: task.IsSidecar, + } + } + + alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, + &structs.Task{ + Name: task.Name, + Driver: "mock_driver", + Config: map[string]interface{}{ + "run_for": task.RunFor, + "exit_code": task.ExitCode}, + Lifecycle: lc, + LogConfig: structs.DefaultLogConfig(), + Resources: &structs.Resources{CPU: 100, MemoryMB: 256}, + }, + ) + alloc.TaskResources[task.Name] = &structs.Resources{CPU: 100, MemoryMB: 256} + alloc.AllocatedResources.Tasks[task.Name] = &structs.AllocatedTaskResources{ + Cpu: structs.AllocatedCpuResources{CpuShares: 100}, + Memory: structs.AllocatedMemoryResources{MemoryMB: 256}, + } + } + return alloc +} + func LifecycleJobWithPoststopDeploy() *structs.Job { job := &structs.Job{ Region: "global",