Skip to content

Commit

Permalink
consul: register checks along with service on initial registration
Browse files Browse the repository at this point in the history
This PR updates Nomad's Consul service client to include checks in
an initial service registration, so that the checks associated with
the service are registered "atomically" with the service. Before, we
would only register the checks after the service registration, which
causes problems where the service is deemed healthy, even if one or
more checks are unhealthy - especially problematic in the case where
SuccessBeforePassing is configured.

Fixes #3935
  • Loading branch information
shoenig committed Oct 18, 2022
1 parent cc2b449 commit f84fd6f
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 29 deletions.
17 changes: 16 additions & 1 deletion command/agent/consul/catalog_testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,14 +275,29 @@ func (c *MockAgent) CheckRegister(check *api.AgentCheckRegistration) error {
if c.checks[check.Namespace] == nil {
c.checks[check.Namespace] = make(map[string]*api.AgentCheckRegistration)
}

c.checks[check.Namespace][check.ID] = check

// Be nice and make checks reachable-by-service
serviceCheck := check.AgentServiceCheck

if c.services[check.Namespace] == nil {
c.services[check.Namespace] = make(map[string]*api.AgentServiceRegistration)
}
c.services[check.Namespace][check.ServiceID].Checks = append(c.services[check.Namespace][check.ServiceID].Checks, &serviceCheck)

// replace existing check if one with same id already exists
replace := false
for i := 0; i < len(c.services[check.Namespace][check.ServiceID].Checks); i++ {
if c.services[check.Namespace][check.ServiceID].Checks[i].CheckID == check.CheckID {
c.services[check.Namespace][check.ServiceID].Checks[i] = &check.AgentServiceCheck
replace = true
break
}
}

if !replace {
c.services[check.Namespace][check.ServiceID].Checks = append(c.services[check.Namespace][check.ServiceID].Checks, &serviceCheck)
}
return nil
}

Expand Down
45 changes: 41 additions & 4 deletions command/agent/consul/service_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -973,8 +973,10 @@ func (c *ServiceClient) RegisterAgent(role string, services []*structs.Service)
// checks from a service. It returns a service registration object with the
// service and check IDs populated.
func (c *ServiceClient) serviceRegs(
ops *operations, service *structs.Service, workload *serviceregistration.WorkloadServices) (
*serviceregistration.ServiceRegistration, error) {
ops *operations,
service *structs.Service,
workload *serviceregistration.WorkloadServices,
) (*serviceregistration.ServiceRegistration, error) {

// Get the services ID
id := serviceregistration.MakeAllocServiceID(workload.AllocInfo.AllocID, workload.Name(), service)
Expand Down Expand Up @@ -1090,6 +1092,7 @@ func (c *ServiceClient) serviceRegs(
TaggedAddresses: taggedAddresses,
Connect: connect, // will be nil if no Connect stanza
Proxy: gateway, // will be nil if no Connect Gateway stanza
Checks: make([]*api.AgentServiceCheck, 0, len(service.Checks)),
}
ops.regServices = append(ops.regServices, serviceReg)

Expand All @@ -1098,17 +1101,48 @@ func (c *ServiceClient) serviceRegs(
if err != nil {
return nil, err
}

for _, registration := range checkRegs {
sreg.CheckIDs[registration.ID] = struct{}{}
ops.regChecks = append(ops.regChecks, registration)
serviceReg.Checks = append(serviceReg.Checks, apiCheckRegistrationToCheck(registration))
}

return sreg, nil
}

// apiCheckRegistrationToCheck converts a check registration to a check, so that
// we can include them in the initial service registration. It is expected the
// Nomad-conversion (e.g. turning script checks into ttl checks) has already been
// applied.
func apiCheckRegistrationToCheck(r *api.AgentCheckRegistration) *api.AgentServiceCheck {
return &api.AgentServiceCheck{
CheckID: r.ID,
Name: r.Name,
Interval: r.Interval,
Timeout: r.Timeout,
TTL: r.TTL,
HTTP: r.HTTP,
Header: maps.Clone(r.Header),
Method: r.Method,
Body: r.Body,
TCP: r.TCP,
Status: r.Status,
TLSSkipVerify: r.TLSSkipVerify,
GRPC: r.GRPC,
GRPCUseTLS: r.GRPCUseTLS,
SuccessBeforePassing: r.SuccessBeforePassing,
FailuresBeforeCritical: r.FailuresBeforeCritical,
}
}

// checkRegs creates check registrations for the given service
func (c *ServiceClient) checkRegs(serviceID string, service *structs.Service,
workload *serviceregistration.WorkloadServices, sreg *serviceregistration.ServiceRegistration) ([]*api.AgentCheckRegistration, error) {
func (c *ServiceClient) checkRegs(
serviceID string,
service *structs.Service,
workload *serviceregistration.WorkloadServices,
sreg *serviceregistration.ServiceRegistration,
) ([]*api.AgentCheckRegistration, error) {

registrations := make([]*api.AgentCheckRegistration, 0, len(service.Checks))
for _, check := range service.Checks {
Expand Down Expand Up @@ -1563,6 +1597,9 @@ func createCheckReg(serviceID, checkID string, check *structs.ServiceCheck, host
ServiceID: serviceID,
Namespace: normalizeNamespace(namespace),
}
chkReg.AgentServiceCheck.CheckID = checkID
chkReg.AgentServiceCheck.Name = check.Name

chkReg.Status = check.InitialStatus
chkReg.Timeout = check.Timeout.String()
chkReg.Interval = check.Interval.String()
Expand Down
46 changes: 22 additions & 24 deletions command/agent/consul/unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
"github.com/kr/pretty"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -939,6 +940,8 @@ func TestCreateCheckReg_HTTP(t *testing.T) {
Name: "name",
ServiceID: serviceID,
AgentServiceCheck: api.AgentServiceCheck{
CheckID: checkID,
Name: "name",
Timeout: "0s",
Interval: "0s",
HTTP: fmt.Sprintf("http://%s:%d/path", host, port),
Expand Down Expand Up @@ -984,6 +987,8 @@ func TestCreateCheckReg_GRPC(t *testing.T) {
Name: "name",
ServiceID: serviceID,
AgentServiceCheck: api.AgentServiceCheck{
CheckID: checkID,
Name: "name",
Timeout: "1s",
Interval: "1m0s",
GRPC: "localhost:8080/foo.Bar",
Expand All @@ -999,17 +1004,13 @@ func TestCreateCheckReg_GRPC(t *testing.T) {

func TestConsul_ServiceName_Duplicates(t *testing.T) {
ci.Parallel(t)

ctx := setupFake(t)
require := require.New(t)

ctx.Workload.Services = []*structs.Service{
{
Name: "best-service",
PortLabel: "x",
Tags: []string{
"foo",
},
Tags: []string{"foo"},
Checks: []*structs.ServiceCheck{
{
Name: "check-a",
Expand All @@ -1022,12 +1023,10 @@ func TestConsul_ServiceName_Duplicates(t *testing.T) {
{
Name: "best-service",
PortLabel: "y",
Tags: []string{
"bar",
},
Tags: []string{"bar"},
Checks: []*structs.ServiceCheck{
{
Name: "checky-mccheckface",
Name: "check-b",
Type: "tcp",
Interval: time.Second,
Timeout: time.Second,
Expand All @@ -1040,21 +1039,20 @@ func TestConsul_ServiceName_Duplicates(t *testing.T) {
},
}

require.NoError(ctx.ServiceClient.RegisterWorkload(ctx.Workload))

require.NoError(ctx.syncOnce(syncNewOps))

require.Len(ctx.FakeConsul.services["default"], 3)

for _, v := range ctx.FakeConsul.services["default"] {
if v.Name == ctx.Workload.Services[0].Name && v.Port == xPort {
require.ElementsMatch(v.Tags, ctx.Workload.Services[0].Tags)
require.Len(v.Checks, 1)
} else if v.Name == ctx.Workload.Services[1].Name && v.Port == yPort {
require.ElementsMatch(v.Tags, ctx.Workload.Services[1].Tags)
require.Len(v.Checks, 1)
} else if v.Name == ctx.Workload.Services[2].Name {
require.Len(v.Checks, 0)
must.NoError(t, ctx.ServiceClient.RegisterWorkload(ctx.Workload))
must.NoError(t, ctx.syncOnce(syncNewOps))
must.MapLen(t, 3, ctx.FakeConsul.services["default"])

for _, s := range ctx.FakeConsul.services["default"] {
switch {
case s.Name == "best-service" && s.Port == xPort:
must.SliceContainsAll(t, s.Tags, ctx.Workload.Services[0].Tags)
must.SliceLen(t, 1, s.Checks)
case s.Name == "best-service" && s.Port == yPort:
must.SliceContainsAll(t, s.Tags, ctx.Workload.Services[1].Tags)
must.SliceLen(t, 1, s.Checks)
case s.Name == "worst-service":
must.SliceEmpty(t, s.Checks)
}
}
}
Expand Down

0 comments on commit f84fd6f

Please sign in to comment.