From b4ecb448b3cd3a835f84344483a54b628e265e55 Mon Sep 17 00:00:00 2001 From: Preetha Appan Date: Thu, 2 May 2019 22:59:56 -0500 Subject: [PATCH 1/3] Update deployment health on failed allocations only if health is unset This fixes a confusing UX where a previously successful deployment's healthy/unhealthy count would get updated if any allocations failed after the deployment was already marked as successful. --- client/allocrunner/alloc_runner.go | 3 +- client/allocrunner/alloc_runner_test.go | 52 ++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 2b5041abe69e..0e43d3083389 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -553,8 +553,9 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st // If we are part of a deployment and the task has failed, mark the // alloc as unhealthy. This guards against the watcher not be started. + // If the health status is already set then terminal allocations should not if a.ClientStatus == structs.AllocClientStatusFailed && - alloc.DeploymentID != "" && !a.DeploymentStatus.IsUnhealthy() { + alloc.DeploymentID != "" && !a.DeploymentStatus.HasHealth() && !a.DeploymentStatus.IsUnhealthy() { a.DeploymentStatus = &structs.AllocDeploymentStatus{ Healthy: helper.BoolToPtr(false), } diff --git a/client/allocrunner/alloc_runner_test.go b/client/allocrunner/alloc_runner_test.go index c8c3249aee0f..e969a31c7c03 100644 --- a/client/allocrunner/alloc_runner_test.go +++ b/client/allocrunner/alloc_runner_test.go @@ -780,7 +780,7 @@ func TestAllocRunner_HandlesArtifactFailure(t *testing.T) { // Test that alloc runner kills tasks in task group when another task fails func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { - alloc := mock.BatchAlloc() + alloc := mock.Alloc() tr := alloc.AllocatedResources.Tasks[alloc.Job.TaskGroups[0].Tasks[0].Name] alloc.Job.TaskGroups[0].RestartPolicy.Attempts = 0 @@ -792,6 +792,22 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { task.Config = map[string]interface{}{ "run_for": "10s", } + // Set a service with check + task.Services = []*structs.Service{ + { + Name: "fakservice", + PortLabel: "http", + Checks: []*structs.ServiceCheck{ + { + Name: "fakecheck", + Type: structs.ServiceCheckScript, + Command: "true", + Interval: 30 * time.Second, + Timeout: 5 * time.Second, + }, + }, + }, + } task2 := alloc.Job.TaskGroups[0].Tasks[0].Copy() task2.Name = "task 2" @@ -803,8 +819,38 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { alloc.AllocatedResources.Tasks[task.Name] = tr alloc.AllocatedResources.Tasks[task2.Name] = tr + // Make the alloc be part of a deployment + alloc.DeploymentID = uuid.Generate() + alloc.Job.TaskGroups[0].Update = structs.DefaultUpdateStrategy.Copy() + alloc.Job.TaskGroups[0].Update.HealthCheck = structs.UpdateStrategyHealthCheck_Checks + alloc.Job.TaskGroups[0].Update.MaxParallel = 1 + alloc.Job.TaskGroups[0].Update.MinHealthyTime = 10 * time.Millisecond + alloc.Job.TaskGroups[0].Update.HealthyDeadline = 2 * time.Second + + checkHealthy := &api.AgentCheck{ + CheckID: uuid.Generate(), + Status: api.HealthPassing, + } + conf, cleanup := testAllocRunnerConfig(t, alloc) defer cleanup() + + consulClient := conf.Consul.(*cconsul.MockConsulServiceClient) + consulClient.AllocRegistrationsFn = func(allocID string) (*consul.AllocRegistration, error) { + return &consul.AllocRegistration{ + Tasks: map[string]*consul.TaskRegistration{ + task.Name: { + Services: map[string]*consul.ServiceRegistration{ + "123": { + Service: &api.AgentService{Service: "fakeservice"}, + Checks: []*api.AgentCheck{checkHealthy}, + }, + }, + }, + }, + }, nil + } + ar, err := NewAllocRunner(conf) require.NoError(t, err) defer destroy(ar) @@ -850,6 +896,10 @@ func TestAllocRunner_TaskFailed_KillTG(t *testing.T) { return false, fmt.Errorf("task2 should have failed") } + if !last.DeploymentStatus.HasHealth() { + return false, fmt.Errorf("Expected deployment health to be non nil") + } + return true, nil }, func(err error) { require.Fail(t, "err: %v", err) From 96d69022dfc2c3d3a592138163f1e9986e130695 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 3 May 2019 10:00:17 -0500 Subject: [PATCH 2/3] Remove unnecessary boolean clause Co-Authored-By: preetapan --- client/allocrunner/alloc_runner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 0e43d3083389..f1d7e90feb90 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -555,7 +555,7 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st // alloc as unhealthy. This guards against the watcher not be started. // If the health status is already set then terminal allocations should not if a.ClientStatus == structs.AllocClientStatusFailed && - alloc.DeploymentID != "" && !a.DeploymentStatus.HasHealth() && !a.DeploymentStatus.IsUnhealthy() { + alloc.DeploymentID != "" && !a.DeploymentStatus.HasHealth() { a.DeploymentStatus = &structs.AllocDeploymentStatus{ Healthy: helper.BoolToPtr(false), } From 5c43a16b037242c6cfb73d90a8ead4defe521988 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Fri, 3 May 2019 10:01:30 -0500 Subject: [PATCH 3/3] Fix comment Co-Authored-By: preetapan --- client/allocrunner/alloc_runner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index f1d7e90feb90..c450b1759893 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -551,7 +551,7 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st if a.ClientTerminalStatus() { alloc := ar.Alloc() - // If we are part of a deployment and the task has failed, mark the + // If we are part of a deployment and the alloc has failed, mark the // alloc as unhealthy. This guards against the watcher not be started. // If the health status is already set then terminal allocations should not if a.ClientStatus == structs.AllocClientStatusFailed &&