Skip to content

Commit

Permalink
consul: able to set pass/fail thresholds on consul service checks
Browse files Browse the repository at this point in the history
This change adds the ability to set the fields `success_before_passing` and
`failures_before_critical` on Consul service check definitions. This is a
feature added to Consul v1.7.0 and later.
  https://www.consul.io/docs/agent/checks#success-failures-before-passing-critical

Nomad doesn't do much besides pass the fields through to Consul.

Fixes #6913
  • Loading branch information
shoenig committed Aug 10, 2020
1 parent 7dd307d commit 9424c55
Show file tree
Hide file tree
Showing 14 changed files with 394 additions and 136 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ FEATURES:
* **Multiple Vault Namespaces (Enterprise)**: Support for multiple Vault Namespaces [[GH-8453](https://github.com/hashicorp/nomad/issues/8453)]
* **Scaling Observability UI**: View changes in task group scale (both manual and automatic) over time. [[GH-8551](https://github.com/hashicorp/nomad/issues/8551)]

IMPROVEMENTS:

* consul: Added support for setting `success_before_passing` and `failures_before_critical` on consul service checks. [[GH-6913](https://github.com/hashicorp/nomad/issues/6913)]

BUG FIXES:

* core: Fixed a bug where `nomad job plan` reports success and no updates if the job contains a scaling policy [[GH-8567](https://github.com/hashicorp/nomad/issues/8567)]
Expand Down
50 changes: 30 additions & 20 deletions api/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,26 +73,28 @@ func (c *CheckRestart) Merge(o *CheckRestart) *CheckRestart {
// ServiceCheck represents the consul health check that Nomad registers.
type ServiceCheck struct {
//FIXME Id is unused. Remove?
Id string
Name string
Type string
Command string
Args []string
Path string
Protocol string
PortLabel string `mapstructure:"port"`
Expose bool
AddressMode string `mapstructure:"address_mode"`
Interval time.Duration
Timeout time.Duration
InitialStatus string `mapstructure:"initial_status"`
TLSSkipVerify bool `mapstructure:"tls_skip_verify"`
Header map[string][]string
Method string
CheckRestart *CheckRestart `mapstructure:"check_restart"`
GRPCService string `mapstructure:"grpc_service"`
GRPCUseTLS bool `mapstructure:"grpc_use_tls"`
TaskName string `mapstructure:"task"`
Id string
Name string
Type string
Command string
Args []string
Path string
Protocol string
PortLabel string `mapstructure:"port"`
Expose bool
AddressMode string `mapstructure:"address_mode"`
Interval time.Duration
Timeout time.Duration
InitialStatus string `mapstructure:"initial_status"`
TLSSkipVerify bool `mapstructure:"tls_skip_verify"`
Header map[string][]string
Method string
CheckRestart *CheckRestart `mapstructure:"check_restart"`
GRPCService string `mapstructure:"grpc_service"`
GRPCUseTLS bool `mapstructure:"grpc_use_tls"`
TaskName string `mapstructure:"task"`
SuccessBeforePassing int `mapstructure:"success_before_passing"`
FailuresBeforeCritical int `mapstructure:"failures_before_critical"`
}

// Service represents a Consul service definition.
Expand Down Expand Up @@ -136,6 +138,14 @@ func (s *Service) Canonicalize(t *Task, tg *TaskGroup, job *Job) {
for i, check := range s.Checks {
s.Checks[i].CheckRestart = s.CheckRestart.Merge(check.CheckRestart)
s.Checks[i].CheckRestart.Canonicalize()

if s.Checks[i].SuccessBeforePassing < 0 {
s.Checks[i].SuccessBeforePassing = 0
}

if s.Checks[i].FailuresBeforeCritical < 0 {
s.Checks[i].FailuresBeforeCritical = 0
}
}
}

Expand Down
34 changes: 34 additions & 0 deletions api/services_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,40 @@ import (
"github.com/stretchr/testify/require"
)

func TestService_Check_PassFail(t *testing.T) {
t.Parallel()

job := &Job{Name: stringToPtr("job")}
tg := &TaskGroup{Name: stringToPtr("group")}
task := &Task{Name: "task"}

t.Run("enforce minimums", func(t *testing.T) {
s := &Service{
Checks: []ServiceCheck{{
SuccessBeforePassing: -1,
FailuresBeforeCritical: -2,
}},
}

s.Canonicalize(task, tg, job)
require.Zero(t, s.Checks[0].SuccessBeforePassing)
require.Zero(t, s.Checks[0].FailuresBeforeCritical)
})

t.Run("normal", func(t *testing.T) {
s := &Service{
Checks: []ServiceCheck{{
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
}},
}

s.Canonicalize(task, tg, job)
require.Equal(t, 3, s.Checks[0].SuccessBeforePassing)
require.Equal(t, 4, s.Checks[0].FailuresBeforeCritical)
})
}

// TestService_CheckRestart asserts Service.CheckRestart settings are properly
// inherited by Checks.
func TestService_CheckRestart(t *testing.T) {
Expand Down
2 changes: 2 additions & 0 deletions command/agent/consul/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1290,6 +1290,8 @@ func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host
chkReg.Status = check.InitialStatus
chkReg.Timeout = check.Timeout.String()
chkReg.Interval = check.Interval.String()
chkReg.SuccessBeforePassing = check.SuccessBeforePassing
chkReg.FailuresBeforeCritical = check.FailuresBeforeCritical

// Require an address for http or tcp checks
if port == 0 && check.RequiresPort() {
Expand Down
34 changes: 18 additions & 16 deletions command/agent/job_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1045,22 +1045,24 @@ func ApiTaskToStructsTask(apiTask *api.Task, structsTask *structs.Task) {
structsTask.Services[i].Checks = make([]*structs.ServiceCheck, l)
for j, check := range service.Checks {
structsTask.Services[i].Checks[j] = &structs.ServiceCheck{
Name: check.Name,
Type: check.Type,
Command: check.Command,
Args: check.Args,
Path: check.Path,
Protocol: check.Protocol,
PortLabel: check.PortLabel,
AddressMode: check.AddressMode,
Interval: check.Interval,
Timeout: check.Timeout,
InitialStatus: check.InitialStatus,
TLSSkipVerify: check.TLSSkipVerify,
Header: check.Header,
Method: check.Method,
GRPCService: check.GRPCService,
GRPCUseTLS: check.GRPCUseTLS,
Name: check.Name,
Type: check.Type,
Command: check.Command,
Args: check.Args,
Path: check.Path,
Protocol: check.Protocol,
PortLabel: check.PortLabel,
AddressMode: check.AddressMode,
Interval: check.Interval,
Timeout: check.Timeout,
InitialStatus: check.InitialStatus,
TLSSkipVerify: check.TLSSkipVerify,
Header: check.Header,
Method: check.Method,
GRPCService: check.GRPCService,
GRPCUseTLS: check.GRPCUseTLS,
SuccessBeforePassing: check.SuccessBeforePassing,
FailuresBeforeCritical: check.FailuresBeforeCritical,
}
if check.CheckRestart != nil {
structsTask.Services[i].Checks[j].CheckRestart = &structs.CheckRestart{
Expand Down
58 changes: 31 additions & 27 deletions command/agent/job_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2024,20 +2024,22 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
},
Checks: []api.ServiceCheck{
{
Id: "hello",
Name: "bar",
Type: "http",
Command: "foo",
Args: []string{"a", "b"},
Path: "/check",
Protocol: "http",
PortLabel: "foo",
AddressMode: "driver",
GRPCService: "foo.Bar",
GRPCUseTLS: true,
Interval: 4 * time.Second,
Timeout: 2 * time.Second,
InitialStatus: "ok",
Id: "hello",
Name: "bar",
Type: "http",
Command: "foo",
Args: []string{"a", "b"},
Path: "/check",
Protocol: "http",
PortLabel: "foo",
AddressMode: "driver",
GRPCService: "foo.Bar",
GRPCUseTLS: true,
Interval: 4 * time.Second,
Timeout: 2 * time.Second,
InitialStatus: "ok",
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
CheckRestart: &api.CheckRestart{
Limit: 3,
IgnoreWarnings: true,
Expand Down Expand Up @@ -2391,19 +2393,21 @@ func TestJobs_ApiJobToStructsJob(t *testing.T) {
},
Checks: []*structs.ServiceCheck{
{
Name: "bar",
Type: "http",
Command: "foo",
Args: []string{"a", "b"},
Path: "/check",
Protocol: "http",
PortLabel: "foo",
AddressMode: "driver",
Interval: 4 * time.Second,
Timeout: 2 * time.Second,
InitialStatus: "ok",
GRPCService: "foo.Bar",
GRPCUseTLS: true,
Name: "bar",
Type: "http",
Command: "foo",
Args: []string{"a", "b"},
Path: "/check",
Protocol: "http",
PortLabel: "foo",
AddressMode: "driver",
Interval: 4 * time.Second,
Timeout: 2 * time.Second,
InitialStatus: "ok",
GRPCService: "foo.Bar",
GRPCUseTLS: true,
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
CheckRestart: &structs.CheckRestart{
Limit: 3,
Grace: 11 * time.Second,
Expand Down
2 changes: 2 additions & 0 deletions jobspec/parse_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,8 @@ func parseChecks(service *api.Service, checkObjs *ast.ObjectList) error {
"grpc_service",
"grpc_use_tls",
"task",
"success_before_passing",
"failures_before_critical",
}
if err := helper.CheckHCLKeys(co.Val, valid); err != nil {
return multierror.Prefix(err, "check ->")
Expand Down
31 changes: 31 additions & 0 deletions jobspec/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,37 @@ func TestParse(t *testing.T) {
},
false,
},
{
"service-check-pass-fail.hcl",
&api.Job{
ID: helper.StringToPtr("check_pass_fail"),
Name: helper.StringToPtr("check_pass_fail"),
Type: helper.StringToPtr("service"),
TaskGroups: []*api.TaskGroup{{
Name: helper.StringToPtr("group"),
Count: helper.IntToPtr(1),
Tasks: []*api.Task{{
Name: "task",
Services: []*api.Service{{
Name: "service",
PortLabel: "http",
Checks: []api.ServiceCheck{{
Name: "check-name",
Type: "http",
Path: "/",
Interval: 10 * time.Second,
Timeout: 2 * time.Second,
InitialStatus: capi.HealthPassing,
Method: "POST",
SuccessBeforePassing: 3,
FailuresBeforeCritical: 4,
}},
}},
}},
}},
},
false,
},
{
"service-check-bad-header.hcl",
nil,
Expand Down
26 changes: 26 additions & 0 deletions jobspec/test-fixtures/service-check-pass-fail.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
job "check_pass_fail" {
type = "service"

group "group" {
count = 1

task "task" {
service {
name = "service"
port = "http"

check {
name = "check-name"
type = "http"
path = "/"
method = "POST"
interval = "10s"
timeout = "2s"
initial_status = "passing"
success_before_passing = 3
failures_before_critical = 4
}
}
}
}
}
Loading

0 comments on commit 9424c55

Please sign in to comment.