diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 2b5041abe69e..c450b1759893 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -551,10 +551,11 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st if a.ClientTerminalStatus() { alloc := ar.Alloc() - // If we are part of a deployment and the task has failed, mark the + // If we are part of a deployment and the alloc has failed, mark the // alloc as unhealthy. This guards against the watcher not be started. + // If the health status is already set then terminal allocations should not if a.ClientStatus == structs.AllocClientStatusFailed && - alloc.DeploymentID != "" && !a.DeploymentStatus.IsUnhealthy() { + alloc.DeploymentID != "" && !a.DeploymentStatus.HasHealth() { a.DeploymentStatus = &structs.AllocDeploymentStatus{ Healthy: helper.BoolToPtr(false), } diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index c8c3249aee0f..e969a31c7c03 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -780,7 +780,7 @@ func TestAllocRunner_HandlesArtifactFailure(t *testing.T) { // Test that alloc runner kills tasks in task group when another task fails func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { - alloc := mock.BatchAlloc() + alloc := mock.Alloc() tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 @@ -792,6 +792,22 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { task.Config = map[string]interface{}{ "run_for": "10s", } + // Set a service with check + task.Services = []*structs.Service{ + { + Name: "fakservice", + PortLabel: "http", + Checks: []*structs.ServiceCheck{ + { + Name: "fakecheck", + Type: structs.ServiceCheckScript, + Command: "true", + Interval: 30 * time.Second, + Timeout: 5 * time.Second, + }, + }, + }, + } task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy() task2.Name = "task 2" @@ -803,8 +819,38 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { alloc.AllocatedResources.Tasks[task.Name] = tr alloc.AllocatedResources.Tasks[task2.Name] = tr + // Make the alloc be part of a deployment + alloc.DeploymentID = uuid.Generate() + alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() + alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks + alloc.Job.TaskGroups[0].Update.MaxParallel = 1 + alloc.Job.TaskGroups[0].Update.MinHealthyTime = 10 * time.Millisecond + alloc.Job.TaskGroups[0].Update.HealthyDeadline = 2 * time.Second + + checkHealthy := &api.AgentCheck{ + CheckID: uuid.Generate(), + Status: api.HealthPassing, + } + conf, cleanup := testAllocRunnerConfig(t, alloc) defer cleanup() + + consulClient := conf.Consul.(*cconsul.MockConsulServiceClient) + consulClient.AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) { + return &consul.AllocRegistration{ + Tasks: map[string]*consul.TaskRegistration{ + task.Name: { + Services: map[string]*consul.ServiceRegistration{ + "123": { + Service: &api.AgentService{Service: "fakeservice"}, + Checks: []*api.AgentCheck{checkHealthy}, + }, + }, + }, + }, + }, nil + } + ar, err := NewAllocRunner(conf) require.NoError(t, err) defer destroy(ar) @@ -850,6 +896,10 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { return false, fmt.Errorf("task2 should have failed") } + if !last.DeploymentStatus.HasHealth() { + return false, fmt.Errorf("Expected deployment health to be non nil") + } + return true, nil }, func(err error) { require.Fail(t, "err: %v", err)