From c74de6a455c3d5618af88798bdd884723def0f31 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Thu, 12 Dec 2019 15:46:14 -0800 Subject: [PATCH 1/2] connect: canonicalize before adding sidecar Fixes #6853 Canonicalize jobs first before adding any sidecars. This fixes a bug where sidecar tasks were added without interpolated names and broke validation. Sidecar tasks must be canonicalized independently. Also adds a group network to the mock connect job because it wasn't a valid connect job before! --- nomad/job_endpoint.go | 2 +- nomad/job_endpoint_hook_connect.go | 15 +++++---- nomad/job_endpoint_hook_connect_test.go | 45 ++++++++++++++++++++----- nomad/mock/mock.go | 5 +++ nomad/structs/structs.go | 19 +++++++++-- nomad/structs/structs_test.go | 14 +++----- 6 files changed, 74 insertions(+), 26 deletions(-) diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 4e368386a083..5829d886188e 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -58,8 +58,8 @@ func NewJobEndpoints(s *Server) *Job { srv: s, logger: s.logger.Named("job"), mutators: []jobMutator{ - jobConnectHook{}, jobCanonicalizer{}, + jobConnectHook{}, jobImpliedConstraints{}, }, validators: []jobValidator{ diff --git a/nomad/job_endpoint_hook_connect.go b/nomad/job_endpoint_hook_connect.go index 9a670c5d8a84..b3392f415fa6 100644 --- a/nomad/job_endpoint_hook_connect.go +++ b/nomad/job_endpoint_hook_connect.go @@ -59,7 +59,7 @@ func (jobConnectHook) Mutate(job *structs.Job) (_ *structs.Job, warnings []error continue } - if err := groupConnectHook(g); err != nil { + if err := groupConnectHook(job, g); err != nil { return nil, nil, err } } @@ -96,7 +96,7 @@ func isSidecarForService(t *structs.Task, svc string) bool { return t.Kind == structs.TaskKind(fmt.Sprintf("%s:%s", structs.ConnectProxyPrefix, svc)) } -func groupConnectHook(g *structs.TaskGroup) error { +func groupConnectHook(job *structs.Job, g *structs.TaskGroup) error { for _, service := range g.Services { if service.Connect.HasSidecar() { // Check to see if the sidecar task already exists @@ -104,7 +104,7 @@ func groupConnectHook(g *structs.TaskGroup) error { // If the task doesn't already exist, create a new one and add it to the job if task == nil { - task = newConnectTask(service) + task = newConnectTask(service.Name) // If there happens to be a task defined with the same name // append an UUID fragment to the task name @@ -121,6 +121,9 @@ func groupConnectHook(g *structs.TaskGroup) error { service.Connect.SidecarTask.MergeIntoTask(task) } + // Canonicalize task since this mutator runs after job canonicalization + task.Canonicalize(job, g) + // port to be added for the sidecar task's proxy port port := structs.Port{ Label: fmt.Sprintf("%s-%s", structs.ConnectProxyPrefix, service.Name), @@ -147,11 +150,11 @@ func groupConnectHook(g *structs.TaskGroup) error { return nil } -func newConnectTask(service *structs.Service) *structs.Task { +func newConnectTask(serviceName string) *structs.Task { task := &structs.Task{ // Name is used in container name so must start with '[A-Za-z0-9]' - Name: fmt.Sprintf("%s-%s", structs.ConnectProxyPrefix, service.Name), - Kind: structs.TaskKind(fmt.Sprintf("%s:%s", structs.ConnectProxyPrefix, service.Name)), + Name: fmt.Sprintf("%s-%s", structs.ConnectProxyPrefix, serviceName), + Kind: structs.TaskKind(fmt.Sprintf("%s:%s", structs.ConnectProxyPrefix, serviceName)), Driver: "docker", Config: connectDriverConfig, ShutdownDelay: 5 * time.Second, diff --git a/nomad/job_endpoint_hook_connect_test.go b/nomad/job_endpoint_hook_connect_test.go index c8a3cb2c1957..7e48c4cda7ce 100644 --- a/nomad/job_endpoint_hook_connect_test.go +++ b/nomad/job_endpoint_hook_connect_test.go @@ -4,6 +4,8 @@ import ( "fmt" "testing" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/stretchr/testify/require" ) @@ -49,7 +51,8 @@ func Test_isSidecarForService(t *testing.T) { func Test_groupConnectHook(t *testing.T) { // Test that connect-proxy task is inserted for backend service - tgIn := &structs.TaskGroup{ + job := mock.Job() + job.TaskGroups[0] = &structs.TaskGroup{ Networks: structs.Networks{ { Mode: "bridge", @@ -73,11 +76,16 @@ func Test_groupConnectHook(t *testing.T) { }, } - tgOut := tgIn.Copy() + // Expected tasks + tgOut := job.TaskGroups[0].Copy() tgOut.Tasks = []*structs.Task{ - newConnectTask(tgOut.Services[0]), - newConnectTask(tgOut.Services[1]), + newConnectTask(tgOut.Services[0].Name), + newConnectTask(tgOut.Services[1].Name), } + + // Expect sidecar tasks to be properly canonicalized + tgOut.Tasks[0].Canonicalize(job, tgOut) + tgOut.Tasks[1].Canonicalize(job, tgOut) tgOut.Networks[0].DynamicPorts = []structs.Port{ { Label: fmt.Sprintf("%s-%s", structs.ConnectProxyPrefix, "backend"), @@ -89,10 +97,31 @@ func Test_groupConnectHook(t *testing.T) { }, } - require.NoError(t, groupConnectHook(tgIn)) - require.Exactly(t, tgOut, tgIn) + require.NoError(t, groupConnectHook(job, job.TaskGroups[0])) + require.Exactly(t, tgOut, job.TaskGroups[0]) // Test that hook is idempotent - require.NoError(t, groupConnectHook(tgIn)) - require.Exactly(t, tgOut, tgIn) + require.NoError(t, groupConnectHook(job, job.TaskGroups[0])) + require.Exactly(t, tgOut, job.TaskGroups[0]) +} + +// TestJobEndpoint_ConnectInterpolation asserts that when a Connect sidecar +// proxy task is being created for a group service with an interpolated name, +// the service name is interpolated *before the task is created. +// +// See https://github.com/hashicorp/nomad/issues/6853 +func TestJobEndpoint_ConnectInterpolation(t *testing.T) { + t.Parallel() + + server := &Server{logger: testlog.HCLogger(t)} + jobEndpoint := NewJobEndpoints(server) + + j := mock.ConnectJob() + j.TaskGroups[0].Services[0].Name = "${JOB}-api" + j, warnings, err := jobEndpoint.admissionMutators(j) + require.NoError(t, err) + require.Nil(t, warnings) + + require.Len(t, j.TaskGroups[0].Tasks, 2) + require.Equal(t, "connect-proxy-my-job-api", j.TaskGroups[0].Tasks[1].Name) } diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 63eeb5b6a36d..896b3f208830 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -385,6 +385,11 @@ func MaxParallelJob() *structs.Job { func ConnectJob() *structs.Job { job := Job() tg := job.TaskGroups[0] + tg.Networks = []*structs.NetworkResource{ + { + Mode: "bridge", + }, + } tg.Services = []*structs.Service{ { Name: "testconnect", diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index cf3bb5443830..454b7c6467ec 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -5552,6 +5552,8 @@ func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string, tgServices if t.Leader { mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have leader set")) } + + // Ensure the proxy task has a corresponding service entry serviceErr := ValidateConnectProxyService(t.Kind.Value(), tgServices) if serviceErr != nil { mErr.Errors = append(mErr.Errors, serviceErr) @@ -5746,15 +5748,28 @@ const ConnectProxyPrefix = "connect-proxy" // valid Connect config. func ValidateConnectProxyService(serviceName string, tgServices []*Service) error { found := false + names := make([]string, 0, len(tgServices)) for _, svc := range tgServices { - if svc.Name == serviceName && svc.Connect != nil && svc.Connect.SidecarService != nil { + if svc.Connect == nil || svc.Connect.SidecarService == nil { + continue + } + + if svc.Name == serviceName { found = true break } + + // Build up list of mismatched Connect service names for error + // reporting. + names = append(names, svc.Name) } if !found { - return fmt.Errorf("Connect proxy service name not found in services from task group") + if len(names) == 0 { + return fmt.Errorf("No Connect services in task group with Connect proxy (%q)", serviceName) + } else { + return fmt.Errorf("Connect proxy service name (%q) not found in Connect services from task group: %s", serviceName, names) + } } return nil diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index 808e768cc1bd..dcb2dfcc82b5 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -1599,12 +1599,12 @@ func TestTask_Validate_ConnectProxyKind(t *testing.T) { Service: &Service{ Name: "redis", }, - ErrContains: "Connect proxy service name not found in services from task group", + ErrContains: `No Connect services in task group with Connect proxy ("redis:test")`, }, { Desc: "Service name not found in group", Kind: "connect-proxy:redis", - ErrContains: "Connect proxy service name not found in services from task group", + ErrContains: `No Connect services in task group with Connect proxy ("redis")`, }, { Desc: "Connect stanza not configured in group", @@ -1612,7 +1612,7 @@ func TestTask_Validate_ConnectProxyKind(t *testing.T) { TgService: []*Service{{ Name: "redis", }}, - ErrContains: "Connect proxy service name not found in services from task group", + ErrContains: `No Connect services in task group with Connect proxy ("redis")`, }, { Desc: "Valid connect proxy kind", @@ -1640,12 +1640,8 @@ func TestTask_Validate_ConnectProxyKind(t *testing.T) { // Ok! return } - if err == nil { - t.Fatalf("no error returned. expected: %s", tc.ErrContains) - } - if !strings.Contains(err.Error(), tc.ErrContains) { - t.Fatalf("expected %q but found: %v", tc.ErrContains, err) - } + require.Errorf(t, err, "no error returned. expected: %s", tc.ErrContains) + require.Containsf(t, err.Error(), tc.ErrContains, "expected %q but found: %v", tc.ErrContains, err) }) } From 4f718f7ba66a614932320fc3d0404dcb2b3f216a Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Thu, 12 Dec 2019 20:58:49 -0800 Subject: [PATCH 2/2] docs: add #6855 to changelog Also make Connect related fixes more consistent in the changelog. I suspect users won't care if a Connect related fix is in the server's admission controller or in the client's groupservice hook or somewhere else, so I think grouping them by `consul/connect:` makes the most sense. --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5be710f1850c..89b49c52dd05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ IMPROVEMENTS: BUG FIXES: * cli: Fixed a bug where `nomad monitor -node-id` would cause a cli panic when no nodes where found. [[GH-6828](https://github.com/hashicorp/nomad/issues/6828)] + * consul/connect: Fixed a bug where Connect-enabled jobs failed to validate when service names used interpolation. [[GH-6855](https://github.com/hashicorp/nomad/issues/6855)] ## 0.10.2 (December 4, 2019) @@ -54,10 +55,10 @@ BUG FIXES: * cli: Fixed a bug where a cli user may fail to query FS/Allocation API endpoints if they lack `node:read` capability [[GH-6423](https://github.com/hashicorp/nomad/issues/6423)] * client: client: Return empty values when host stats fail [[GH-6349](https://github.com/hashicorp/nomad/issues/6349)] * client: Fixed a bug where a client may not restart dead internal processes upon client's restart on Windows [[GH-6426](https://github.com/hashicorp/nomad/issues/6426)] + * consul/connect: Fixed registering multiple Connect-enabled services in the same task group [[GH-6646](https://github.com/hashicorp/nomad/issues/6646)] * drivers: Fixed a bug where client may panic if a restored task failed to shutdown cleanly [[GH-6763](https://github.com/hashicorp/nomad/issues/6763)] * driver/exec: Fixed a bug where exec tasks can spawn processes that live beyond task lifecycle [[GH-6722](https://github.com/hashicorp/nomad/issues/6722)] * driver/docker: Added mechanism for detecting running unexpectedly running docker containers [[GH-6325](https://github.com/hashicorp/nomad/issues/6325)] - * nomad: Fixed registering multiple connect enabled services in the same task group [[GH-6646](https://github.com/hashicorp/nomad/issues/6646)] * scheduler: Changes to devices in resource stanza should cause rescheduling [[GH-6644](https://github.com/hashicorp/nomad/issues/6644)] * scheduler: Fixed a bug that allowed inplace updates after affinity or spread were changed [[GH-6703](https://github.com/hashicorp/nomad/issues/6703)] * vault: Allow overriding implicit Vault version constraint [[GH-6687](https://github.com/hashicorp/nomad/issues/6687)]