diff --git a/api/tasks.go b/api/tasks.go index 7ef9fcdb33fa..10629aa2d2bb 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -648,9 +648,8 @@ const ( ) type TaskLifecycle struct { - Hook string `mapstructure:"hook" hcl:"hook,optional"` - Sidecar bool `mapstructure:"sidecar" hcl:"sidecar,optional"` - IgnoreMinHealthyTime bool `mapstructure:"ignore_min_healthy_time" hcl:"ignore_min_healthy_time,optional"` + Hook string `mapstructure:"hook" hcl:"hook,optional"` + Sidecar bool `mapstructure:"sidecar" hcl:"sidecar,optional"` } // Determine if lifecycle has user-input values diff --git a/client/allochealth/tracker.go b/client/allochealth/tracker.go index 5e2e8fa609f8..7c17b7e364de 100644 --- a/client/allochealth/tracker.go +++ b/client/allochealth/tracker.go @@ -66,9 +66,9 @@ type Tracker struct { // not needed allocStopped chan struct{} - // lifecycleTasks is a map of ephemeral tasks and their lifecycle configs. + // lifecycleTasks is a map of ephemeral tasks and their lifecycle hooks. // These tasks may terminate without affecting alloc health - lifecycleTasks map[string]*structs.TaskLifecycleConfig + lifecycleTasks map[string]string // l is used to lock shared fields listed below l sync.Mutex @@ -110,7 +110,7 @@ func NewTracker(parentCtx context.Context, logger hclog.Logger, alloc *structs.A consulClient: consulClient, checkLookupInterval: consulCheckLookupInterval, logger: logger, - lifecycleTasks: map[string]*structs.TaskLifecycleConfig{}, + lifecycleTasks: map[string]string{}, } t.taskHealth = make(map[string]*taskHealthState, len(t.tg.Tasks)) @@ -118,7 +118,7 @@ func NewTracker(parentCtx context.Context, logger hclog.Logger, alloc *structs.A t.taskHealth[task.Name] = &taskHealthState{task: task} if task.Lifecycle != nil && !task.Lifecycle.Sidecar { - t.lifecycleTasks[task.Name] = task.Lifecycle + t.lifecycleTasks[task.Name] = task.Lifecycle.Hook } for _, s := range task.Services { @@ -277,37 +277,23 @@ func (t *Tracker) watchTaskEvents() { // Detect if the alloc is unhealthy or if all tasks have started yet latestStartTime := time.Time{} for taskName, state := range alloc.TaskStates { - if t.lifecycleTasks != nil && t.lifecycleTasks[taskName] != nil { - // If the task is a poststop task we do not want to evaluate it - // since it will remain pending until the main task has finished or - // exited. - if t.lifecycleTasks[taskName].Hook == structs.TaskLifecycleHookPoststop { - continue - } - - // If this is a poststart task which has already succeeded we - // want to check for two possible success conditions before - // attempting to evaluate it. - if t.lifecycleTasks[taskName].Hook == structs.TaskLifecycleHookPoststart && state.Successful() { - - // If the task was successful and it's runtime is at least - // t.minHealthyTime, skip evaluation. - if state.FinishedAt.Sub(state.StartedAt) >= t.minHealthyTime { - continue - } + // If the task is a poststop task we do not want to evaluate it + // since it will remain pending until the main task has finished + // or exited. + if t.lifecycleTasks[taskName] == structs.TaskLifecycleHookPoststop { + continue + } - // If the task was successful and the user set - // 'ignore_min_healthy_time' to 'true', skip evaluation. - if t.lifecycleTasks[taskName].IgnoreMinHealthyTime { - continue - } - } + // If this is a poststart task which has already succeeded, we + // should skip evaluation. + if t.lifecycleTasks[taskName] == structs.TaskLifecycleHookPoststart && state.Successful() { + continue + } - // One of the tasks has failed so we can exit watching - if state.Failed || (!state.FinishedAt.IsZero() && t.lifecycleTasks[taskName].Hook != structs.TaskLifecycleHookPrestart) { - t.setTaskHealth(false, true) - return - } + // One of the tasks has failed so we can exit watching + if state.Failed || (!state.FinishedAt.IsZero() && t.lifecycleTasks[taskName] != structs.TaskLifecycleHookPrestart) { + t.setTaskHealth(false, true) + return } if state.State == structs.TaskStatePending { diff --git a/client/allochealth/tracker_test.go b/client/allochealth/tracker_test.go index 095537a6219d..f4aec166d9e2 100644 --- a/client/allochealth/tracker_test.go +++ b/client/allochealth/tracker_test.go @@ -129,52 +129,11 @@ func TestTracker_Checks_PendingPostStop_Healthy(t *testing.T) { } } -func TestTracker_Checks_PendingPostStart_Healthy(t *testing.T) { +func TestTracker_Succeeded_PostStart_Healthy(t *testing.T) { t.Parallel() - alloc := mock.LifecycleAllocWithPoststartDeploy(false) - alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = time.Millisecond * 100 - // Synthesize running alloc and tasks - alloc.ClientStatus = structs.AllocClientStatusRunning - alloc.TaskStates = map[string]*structs.TaskState{ - "web": { - State: structs.TaskStateRunning, - StartedAt: time.Now(), - }, - "post": { - State: structs.TaskStateDead, - StartedAt: time.Now(), - FinishedAt: time.Now().Add(alloc.Job.TaskGroups[0].Migrate.MinHealthyTime), - }, - } - - logger := testlog.HCLogger(t) - b := cstructs.NewAllocBroadcaster(logger) - defer b.Close() - - consul := consul.NewMockConsulServiceClient(t, logger) - ctx, cancelFn := context.WithCancel(context.Background()) - defer cancelFn() - - checkInterval := 10 * time.Millisecond - tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, - alloc.Job.TaskGroups[0].Migrate.MinHealthyTime, true) - tracker.checkLookupInterval = checkInterval - tracker.Start() - - select { - case <-time.After(alloc.Job.TaskGroups[0].Migrate.MinHealthyTime * 2): - require.Fail(t, "timed out while waiting for health") - case h := <-tracker.HealthyCh(): - require.True(t, h) - } -} - -func TestTracker_Checks_PendingPostStartIgnoreMinHealthyTime_Healthy(t *testing.T) { - t.Parallel() - - alloc := mock.LifecycleAllocWithPoststartDeploy(true) - alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = time.Millisecond * 100 + alloc := mock.LifecycleAllocWithPoststartDeploy() + alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = time.Millisecond * 1 // Synthesize running alloc and tasks alloc.ClientStatus = structs.AllocClientStatusRunning alloc.TaskStates = map[string]*structs.TaskState{ diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index d4522ac191db..b3a4b6bd225a 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1172,9 +1172,8 @@ func ApiTaskToStructsTask(job *structs.Job, group *structs.TaskGroup, if apiTask.Lifecycle != nil { structsTask.Lifecycle = &structs.TaskLifecycleConfig{ - Hook: apiTask.Lifecycle.Hook, - Sidecar: apiTask.Lifecycle.Sidecar, - IgnoreMinHealthyTime: apiTask.Lifecycle.IgnoreMinHealthyTime, + Hook: apiTask.Lifecycle.Hook, + Sidecar: apiTask.Lifecycle.Sidecar, } } } diff --git a/jobspec/parse_task.go b/jobspec/parse_task.go index ace151d74c54..ff81b6ba66a8 100644 --- a/jobspec/parse_task.go +++ b/jobspec/parse_task.go @@ -339,7 +339,6 @@ func parseTask(item *ast.ObjectItem, keys []string) (*api.Task, error) { valid := []string{ "hook", "sidecar", - "ignore_min_healthy_time", } if err := checkHCLKeys(lifecycleBlock.Val, valid); err != nil { return nil, multierror.Prefix(err, "lifecycle ->") diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 995ed28e25a0..343545ef87ea 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -683,11 +683,7 @@ func LifecycleJobWithPoststopDeploy() *structs.Job { return job } -func LifecycleJobWithPoststartDeploy(ignoreMinHealthyTime bool) *structs.Job { - lifecycleConfig := &structs.TaskLifecycleConfig{ - Hook: structs.TaskLifecycleHookPoststart, - IgnoreMinHealthyTime: ignoreMinHealthyTime, - } +func LifecycleJobWithPoststartDeploy() *structs.Job { job := &structs.Job{ Region: "global", ID: fmt.Sprintf("mock-service-%s", uuid.Generate()), @@ -750,7 +746,9 @@ func LifecycleJobWithPoststartDeploy(ignoreMinHealthyTime bool) *structs.Job { Config: map[string]interface{}{ "run_for": "1s", }, - Lifecycle: lifecycleConfig, + Lifecycle: &structs.TaskLifecycleConfig{ + Hook: structs.TaskLifecycleHookPoststart, + }, LogConfig: structs.DefaultLogConfig(), Resources: &structs.Resources{ CPU: 1000, @@ -865,7 +863,7 @@ func LifecycleAllocWithPoststopDeploy() *structs.Allocation { return alloc } -func LifecycleAllocWithPoststartDeploy(ignoreMinHealthyTime bool) *structs.Allocation { +func LifecycleAllocWithPoststartDeploy() *structs.Allocation { alloc := &structs.Allocation{ ID: uuid.Generate(), EvalID: uuid.Generate(), @@ -933,7 +931,7 @@ func LifecycleAllocWithPoststartDeploy(ignoreMinHealthyTime bool) *structs.Alloc }, }, }, - Job: LifecycleJobWithPoststartDeploy(ignoreMinHealthyTime), + Job: LifecycleJobWithPoststartDeploy(), DesiredStatus: structs.AllocDesiredStatusRun, ClientStatus: structs.AllocClientStatusPending, } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 3dcb266359ac..ac061ab0997f 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -5333,9 +5333,8 @@ const ( ) type TaskLifecycleConfig struct { - Hook string - Sidecar bool - IgnoreMinHealthyTime bool + Hook string + Sidecar bool } func (d *TaskLifecycleConfig) Copy() *TaskLifecycleConfig { @@ -7770,9 +7769,9 @@ func (ts *TaskState) Copy() *TaskState { return newTS } -// Successful returns whether a task finished successfully. This doesn't really -// have meaning on a non-batch allocation because a service and system -// allocation should not finish. +// Successful returns whether a task finished successfully. Only meaningful for +// for batch allocations or ephemeral (non-sidecar) lifecycle tasks part of a +// service or system allocation. func (ts *TaskState) Successful() bool { return ts.State == TaskStateDead && !ts.Failed }