Skip to content

Commit

Permalink
lifecycle: unit test for lifecycle task behavior on restarts
Browse files Browse the repository at this point in the history
Test the restart behavior of tasks with lifecycles when the allocation or
tasks are restarted.
  • Loading branch information
tgross committed Jun 21, 2021
1 parent 6dcada4 commit 3ca0645
Show file tree
Hide file tree
Showing 3 changed files with 379 additions and 175 deletions.
25 changes: 24 additions & 1 deletion client/allochealth/tracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,30 @@ func TestTracker_Checks_Healthy(t *testing.T) {
func TestTracker_Checks_PendingPostStop_Healthy(t *testing.T) {
t.Parallel()

alloc := mock.LifecycleAllocWithPoststopDeploy()
alloc := mock.LifecycleAllocFromTasks([]mock.LifecycleTaskDef{
{
Name: "web",
RunFor: "1s",
},
{
Name: "side",
RunFor: "1s",
Hook: structs.TaskLifecycleHookPrestart,
IsSidecar: true,
},
{
Name: "post",
RunFor: "1s",
Hook: structs.TaskLifecycleHookPoststop,
},
{
Name: "init",
RunFor: "1s",
Hook: structs.TaskLifecycleHookPrestart,
IsSidecar: false,
},
})

alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up

// Synthesize running alloc and tasks
Expand Down
320 changes: 318 additions & 2 deletions client/allocrunner/alloc_runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"time"

"github.com/hashicorp/consul/api"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allochealth"
"github.com/hashicorp/nomad/client/allocwatcher"
cconsul "github.com/hashicorp/nomad/client/consul"
Expand Down Expand Up @@ -236,9 +237,9 @@ func TestAllocRunner_Lifecycle_Poststart(t *testing.T) {
})
}

// TestAllocRunner_TaskMain_KillTG asserts that when main tasks die the
// TestAllocRunner_Lifecycle_TaskMain_KillTG asserts that when main tasks die the
// entire task group is killed.
func TestAllocRunner_TaskMain_KillTG(t *testing.T) {
func TestAllocRunner_Lifecycle_TaskMain_KillTG(t *testing.T) {
t.Parallel()

alloc := mock.BatchAlloc()
Expand Down Expand Up @@ -1568,3 +1569,318 @@ func TestAllocRunner_PersistState_Destroyed(t *testing.T) {
require.NoError(t, err)
require.Nil(t, ts)
}

func TestAllocRunner_Lifecycle_Restart(t *testing.T) {

// test cases can use this default or override w/ taskDefs param
alloc := mock.LifecycleAllocFromTasks([]mock.LifecycleTaskDef{
{"main", "100s", 0, "", false},
{"prestart-oneshot", "1s", 0, "prestart", false},
{"prestart-sidecar", "100s", 0, "prestart", true},
{"poststart-oneshot", "1s", 0, "poststart", false},
{"poststart-sidecar", "100s", 0, "poststart", true},
{"poststop", "1s", 0, "poststop", false},
})
alloc.Job.Type = structs.JobTypeService
rp := &structs.RestartPolicy{
Attempts: 1,
Interval: 10 * time.Minute,
Delay: 1 * time.Nanosecond,
Mode: structs.RestartPolicyModeFail,
}

testCases := []struct {
name string
taskDefs []mock.LifecycleTaskDef
action func(*allocRunner, *structs.Allocation) error
expectedErr string
expectedAfter map[string]structs.TaskState
}{
{
name: "restart entire allocation",
action: func(ar *allocRunner, alloc *structs.Allocation) error {
return ar.RestartAll(&structs.TaskEvent{})
},
// TODO: currently failed because we get an error here!
// TODO: some of these we're unsure of?
expectedAfter: map[string]structs.TaskState{
"main": structs.TaskState{State: "running", Restarts: 1},
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"prestart-sidecar": structs.TaskState{State: "running", Restarts: 1},
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"poststart-sidecar": structs.TaskState{State: "running", Restarts: 1},
"poststop": structs.TaskState{State: "pending", Restarts: 0},
},
},

{
name: "stop from server",
action: func(ar *allocRunner, alloc *structs.Allocation) error {
stopAlloc := alloc.Copy()
stopAlloc.DesiredStatus = structs.AllocDesiredStatusStop
ar.Update(stopAlloc)
return nil
},
expectedAfter: map[string]structs.TaskState{
"main": structs.TaskState{State: "dead", Restarts: 0},
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
"poststop": structs.TaskState{State: "dead", Restarts: 0},
},
},

{
name: "restart main task",
action: func(ar *allocRunner, alloc *structs.Allocation) error {
return ar.RestartTask("main", &structs.TaskEvent{})
},
expectedAfter: map[string]structs.TaskState{
"main": structs.TaskState{State: "running", Restarts: 1},
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"prestart-sidecar": structs.TaskState{State: "running", Restarts: 0},
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
// TODO: poststart-sidecar is showing 0!
"poststart-sidecar": structs.TaskState{State: "running", Restarts: 1},
"poststop": structs.TaskState{State: "pending", Restarts: 0},
},
},

{
name: "main task fails and restarts once",
taskDefs: []mock.LifecycleTaskDef{
{"main", "5s", 1, "", false},
{"prestart-oneshot", "1s", 0, "prestart", false},
{"prestart-sidecar", "100s", 0, "prestart", true},
{"poststart-oneshot", "1s", 0, "poststart", false},
{"poststart-sidecar", "100s", 0, "poststart", true},
{"poststop", "1s", 0, "poststop", false},
},
action: func(ar *allocRunner, alloc *structs.Allocation) error {
time.Sleep(5 * time.Second) // make sure main task has exited
return nil
},
expectedAfter: map[string]structs.TaskState{
"main": structs.TaskState{State: "dead", Restarts: 1},
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"poststart-sidecar": structs.TaskState{State: "dead", Restarts: 1},
"poststop": structs.TaskState{State: "pending", Restarts: 0},
},
},

{
name: "main stopped unexpectedly and restarts once",
taskDefs: []mock.LifecycleTaskDef{
{"main", "5s", 0, "", false},
{"prestart-oneshot", "1s", 0, "prestart", false},
{"prestart-sidecar", "100s", 0, "prestart", true},
{"poststart-oneshot", "1s", 0, "poststart", false},
{"poststart-sidecar", "100s", 0, "poststart", true},
{"poststop", "1s", 0, "poststop", false},
},
action: func(ar *allocRunner, alloc *structs.Allocation) error {
time.Sleep(5 * time.Second) // make sure main task has exited
return nil
},
expectedAfter: map[string]structs.TaskState{
"main": structs.TaskState{State: "dead", Restarts: 1},
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"poststart-sidecar": structs.TaskState{State: "dead", Restarts: 1},
"poststop": structs.TaskState{State: "pending", Restarts: 0},
},
},

{
name: "failed main task cannot be restarted",
taskDefs: []mock.LifecycleTaskDef{
{"main", "5s", 1, "", false},
{"prestart-oneshot", "1s", 0, "prestart", false},
{"prestart-sidecar", "100s", 0, "prestart", true},
{"poststart-oneshot", "1s", 0, "poststart", false},
{"poststart-sidecar", "100s", 0, "poststart", true},
{"poststop", "1s", 0, "poststop", false},
},
action: func(ar *allocRunner, alloc *structs.Allocation) error {
time.Sleep(5 * time.Second) // make sure main task has exited
return ar.RestartTask("main", &structs.TaskEvent{})
},
expectedErr: "Task not running",
expectedAfter: map[string]structs.TaskState{
"main": structs.TaskState{State: "dead", Restarts: 0},
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"prestart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"poststart-sidecar": structs.TaskState{State: "dead", Restarts: 0},
"poststop": structs.TaskState{State: "dead", Restarts: 0},
},
},

{
name: "restart prestart-sidecar task",
action: func(ar *allocRunner, alloc *structs.Allocation) error {
return ar.RestartTask("prestart-sidecar", &structs.TaskEvent{})
},
expectedAfter: map[string]structs.TaskState{
"main": structs.TaskState{State: "running", Restarts: 0},
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"prestart-sidecar": structs.TaskState{State: "running", Restarts: 1},
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"poststart-sidecar": structs.TaskState{State: "running", Restarts: 0},
"poststop": structs.TaskState{State: "pending", Restarts: 0},
},
},

{
name: "restart poststart-sidecar task",
action: func(ar *allocRunner, alloc *structs.Allocation) error {
return ar.RestartTask("poststart-sidecar", &structs.TaskEvent{})
},
expectedAfter: map[string]structs.TaskState{
"main": structs.TaskState{State: "running", Restarts: 0},
"prestart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"prestart-sidecar": structs.TaskState{State: "running", Restarts: 0},
"poststart-oneshot": structs.TaskState{State: "dead", Restarts: 0},
"poststart-sidecar": structs.TaskState{State: "running", Restarts: 1},
"poststop": structs.TaskState{State: "pending", Restarts: 0},
},
},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
require := require.New(t)

alloc := alloc.Copy()
alloc.Job.TaskGroups[0].RestartPolicy = rp
if tc.taskDefs != nil {
alloc = mock.LifecycleAllocFromTasks(tc.taskDefs)
alloc.Job.Type = structs.JobTypeService
}
for _, task := range alloc.Job.TaskGroups[0].Tasks {
task.RestartPolicy = rp // tasks inherit the group policy
}

conf, cleanup := testAllocRunnerConfig(t, alloc)
defer cleanup()
ar, err := NewAllocRunner(conf)
require.NoError(err)
defer destroy(ar)
go ar.Run()

upd := conf.StateUpdater.(*MockStateUpdater)

// assert our "before" states:
// - all one-shot tasks should be dead but not failed
// - all main tasks and sidecars should be running
// - no tasks should have restarted
testutil.WaitForResult(func() (bool, error) {
last := upd.Last()
if last == nil {
return false, fmt.Errorf("no update")
}
if last.ClientStatus != structs.AllocClientStatusRunning {
return false, fmt.Errorf(
"expected alloc to be running not %s", last.ClientStatus)
}
var errs *multierror.Error

expectedBefore := map[string]string{
"main": "running",
"prestart-oneshot": "dead",
"prestart-sidecar": "running",
"poststart-oneshot": "dead",
"poststart-sidecar": "running",
"poststop": "pending",
}

for task, expected := range expectedBefore {
got, ok := last.TaskStates[task]
if !ok {
continue
}
if got.State != expected {
errs = multierror.Append(errs, fmt.Errorf(
"expected initial state of task %q to be %q not %q",
task, expected, got.State))
}
if got.Restarts != 0 {
errs = multierror.Append(errs, fmt.Errorf(
"expected no initial restarts of task %q, not %q",
task, got.Restarts))
}
if expected == "dead" && got.Failed {
errs = multierror.Append(errs, fmt.Errorf(
"expected ephemeral task %q to be dead but not failed",
task))
}

}
if errs.ErrorOrNil() != nil {
return false, errs.ErrorOrNil()
}
return true, nil
}, func(err error) {
require.NoError(err, "error waiting for initial state")
})

// perform the action
err = tc.action(ar, alloc.Copy())
if tc.expectedErr != "" {
require.EqualError(err, tc.expectedErr)
} else {
require.NoError(err)
}

// assert our "after" states
testutil.WaitForResult(func() (bool, error) {
last := upd.Last()
if last == nil {
return false, fmt.Errorf("no update")
}
var errs *multierror.Error
for task, expected := range tc.expectedAfter {
got, ok := last.TaskStates[task]
if !ok {
errs = multierror.Append(errs, fmt.Errorf(
"no final state found for task %q", task,
))
}
if got.State != expected.State {
errs = multierror.Append(errs, fmt.Errorf(
"expected final state of task %q to be %q not %q",
task, expected.State, got.State))
}
if expected.State == "dead" {
if got.FinishedAt.IsZero() || got.StartedAt.IsZero() {
errs = multierror.Append(errs, fmt.Errorf(
"expected final state of task %q to have start and finish time", task))
}
if len(got.Events) < 2 {
errs = multierror.Append(errs, fmt.Errorf(
"expected final state of task %q to include at least 2 tasks", task))
}
}

if got.Restarts != expected.Restarts {
errs = multierror.Append(errs, fmt.Errorf(
"expected final restarts of task %q to be %v not %v",
task, expected.Restarts, got.Restarts))
}
}
if errs.ErrorOrNil() != nil {
return false, errs.ErrorOrNil()
}
return true, nil
}, func(err error) {
require.NoError(err, "error waiting for final state")
})
})
}
}
Loading

0 comments on commit 3ca0645

Please sign in to comment.