Skip to content

Commit

Permalink
ignore poststop when setting latest start time for allocation
Browse files Browse the repository at this point in the history
  • Loading branch information
drewbailey committed Dec 7, 2020
1 parent d272803 commit a9436ba
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 15 deletions.
18 changes: 8 additions & 10 deletions client/allochealth/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"sync"
"time"

"github.com/davecgh/go-spew/spew"
"github.com/hashicorp/consul/api"
hclog "github.com/hashicorp/go-hclog"
cconsul "github.com/hashicorp/nomad/client/consul"
Expand Down Expand Up @@ -69,7 +68,7 @@ type Tracker struct {

// lifecycleTasks is a set of tasks with lifecycle hook set and may
// terminate without affecting alloc health
lifecycleTasks map[string]bool
lifecycleTasks map[string]string

// l is used to lock shared fields listed below
l sync.Mutex
Expand Down Expand Up @@ -111,16 +110,15 @@ func NewTracker(parentCtx context.Context, logger hclog.Logger, alloc *structs.A
consulClient: consulClient,
checkLookupInterval: consulCheckLookupInterval,
logger: logger,
lifecycleTasks: map[string]bool{},
lifecycleTasks: map[string]string{},
}

t.taskHealth = make(map[string]*taskHealthState, len(t.tg.Tasks))
for _, task := range t.tg.Tasks {
t.taskHealth[task.Name] = &taskHealthState{task: task}

if task.Lifecycle != nil && !task.Lifecycle.Sidecar {
spew.Dump("ADDING LIFECYCLE TASK ", task.Name)
t.lifecycleTasks[task.Name] = true
t.lifecycleTasks[task.Name] = task.Lifecycle.Hook
}

for _, s := range task.Services {
Expand Down Expand Up @@ -280,12 +278,14 @@ func (t *Tracker) watchTaskEvents() {
latestStartTime := time.Time{}
for taskName, state := range alloc.TaskStates {
// One of the tasks has failed so we can exit watching
if state.Failed || (!state.FinishedAt.IsZero() && !t.lifecycleTasks[taskName]) {
if state.Failed || (!state.FinishedAt.IsZero() && (t.lifecycleTasks[taskName]) == "") {
t.setTaskHealth(false, true)
return
}

if (state.State == structs.TaskStatePending) && !t.lifecycleTasks[taskName] {
// Ignore poststop since it will be pending until the main job exists
if (state.State == structs.TaskStatePending) &&
t.lifecycleTasks[taskName] != structs.TaskLifecycleHookPoststop {
latestStartTime = time.Time{}
break
} else if state.StartedAt.After(latestStartTime) {
Expand Down Expand Up @@ -482,9 +482,7 @@ func (t *taskHealthState) event(deadline time.Time, minHealthyTime time.Duration

switch t.state.State {
case structs.TaskStatePending:
if t.task.Lifecycle == nil || t.task.Lifecycle.Hook != structs.TaskLifecycleHookPoststop {
return "Task not running by deadline", true
}
return "Task not running by deadline", true
case structs.TaskStateDead:
// hook tasks are healthy when dead successfully
if t.task.Lifecycle == nil || t.task.Lifecycle.Sidecar {
Expand Down
40 changes: 40 additions & 0 deletions client/allochealth/tracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,46 @@ func TestTracker_Checks_Healthy(t *testing.T) {
}
}

func TestTracker_Checks_PendingPostStop_Healthy(t *testing.T) {
t.Parallel()

alloc := mock.LifecycleAlloc()
alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up

// Synthesize running alloc and tasks
alloc.ClientStatus = structs.AllocClientStatusRunning
alloc.TaskStates = map[string]*structs.TaskState{
"web": {
State: structs.TaskStateRunning,
StartedAt: time.Now(),
},
"post": {
State: structs.TaskStatePending,
},
}

logger := testlog.HCLogger(t)
b := cstructs.NewAllocBroadcaster(logger)
defer b.Close()

consul := consul.NewMockConsulServiceClient(t, logger)
ctx, cancelFn := context.WithCancel(context.Background())
defer cancelFn()

checkInterval := 10 * time.Millisecond
tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul,
time.Millisecond, true)
tracker.checkLookupInterval = checkInterval
tracker.Start()

select {
case <-time.After(4 * checkInterval):
require.Fail(t, "timed out while waiting for health")
case h := <-tracker.HealthyCh():
require.True(t, h)
}
}

func TestTracker_Checks_Unhealthy(t *testing.T) {
t.Parallel()

Expand Down
24 changes: 21 additions & 3 deletions client/allocrunner/task_hook_coordinator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ func TestTaskHookCoordinator_PrestartRunsBeforeMain(t *testing.T) {

mainTask := tasks[0]
sideTask := tasks[1]
initTask := tasks[2]
initTask := tasks[3]

require.Equal(t, "web", mainTask.Name)
require.Equal(t, "side", sideTask.Name)
require.Equal(t, "init", initTask.Name)

coord := newTaskHookCoordinator(logger, tasks)
initCh := coord.startConditionForTask(initTask)
Expand All @@ -55,7 +59,11 @@ func TestTaskHookCoordinator_MainRunsAfterPrestart(t *testing.T) {

mainTask := tasks[0]
sideTask := tasks[1]
initTask := tasks[2]
initTask := tasks[3]

require.Equal(t, "web", mainTask.Name)
require.Equal(t, "side", sideTask.Name)
require.Equal(t, "init", initTask.Name)

coord := newTaskHookCoordinator(logger, tasks)
initCh := coord.startConditionForTask(initTask)
Expand Down Expand Up @@ -189,7 +197,11 @@ func TestTaskHookCoordinator_SidecarNeverStarts(t *testing.T) {

mainTask := tasks[0]
sideTask := tasks[1]
initTask := tasks[2]
initTask := tasks[3]

require.Equal(t, "web", mainTask.Name)
require.Equal(t, "side", sideTask.Name)
require.Equal(t, "init", initTask.Name)

coord := newTaskHookCoordinator(logger, tasks)
initCh := coord.startConditionForTask(initTask)
Expand Down Expand Up @@ -234,8 +246,14 @@ func TestTaskHookCoordinator_PoststartStartsAfterMain(t *testing.T) {
sideTask := tasks[1]
postTask := tasks[2]

require.Equal(t, "web", mainTask.Name)
require.Equal(t, "side", sideTask.Name)
require.Equal(t, "post", postTask.Name)
require.Equal(t, structs.TaskLifecycleHookPoststop, postTask.Lifecycle.Hook)

// Make the the third task a poststart hook
postTask.Lifecycle.Hook = structs.TaskLifecycleHookPoststart
require.Equal(t, structs.TaskLifecycleHookPoststart, postTask.Lifecycle.Hook)

coord := newTaskHookCoordinator(logger, tasks)
postCh := coord.startConditionForTask(postTask)
Expand Down
32 changes: 30 additions & 2 deletions nomad/mock/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,9 @@ func LifecycleJob() *structs.Job {
},
TaskGroups: []*structs.TaskGroup{
{
Name: "web",
Count: 1,
Name: "web",
Count: 1,
Migrate: structs.DefaultMigrateStrategy(),
RestartPolicy: &structs.RestartPolicy{
Attempts: 0,
Interval: 10 * time.Minute,
Expand Down Expand Up @@ -432,6 +433,21 @@ func LifecycleJob() *structs.Job {
MemoryMB: 256,
},
},
{
Name: "post",
Driver: "mock_driver",
Config: map[string]interface{}{
"run_for": "1s",
},
Lifecycle: &structs.TaskLifecycleConfig{
Hook: structs.TaskLifecycleHookPoststop,
},
LogConfig: structs.DefaultLogConfig(),
Resources: &structs.Resources{
CPU: 1000,
MemoryMB: 256,
},
},
{
Name: "init",
Driver: "mock_driver",
Expand Down Expand Up @@ -490,6 +506,10 @@ func LifecycleAlloc() *structs.Allocation {
CPU: 1000,
MemoryMB: 256,
},
"post": {
CPU: 1000,
MemoryMB: 256,
},
},

AllocatedResources: &structs.AllocatedResources{
Expand Down Expand Up @@ -518,6 +538,14 @@ func LifecycleAlloc() *structs.Allocation {
MemoryMB: 256,
},
},
"post": {
Cpu: structs.AllocatedCpuResources{
CpuShares: 1000,
},
Memory: structs.AllocatedMemoryResources{
MemoryMB: 256,
},
},
},
},
Job: LifecycleJob(),
Expand Down

0 comments on commit a9436ba

Please sign in to comment.