From af48777dddb89ed71a1d92796245776254c92798 Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Tue, 9 Feb 2021 09:44:48 -0600 Subject: [PATCH] consul/connect: enable custom sidecars to use expose checks This PR enables jobs configured with a custom sidecar_task to make use of the `service.expose` feature for creating checks on services in the service mesh. Before we would check that sidecar_task had not been set (indicating that something other than envoy may be in use, which would not support envoy's expose feature). However Consul has not added support for anything other than envoy and probably never will, so having the restriction in place seems like an unnecessary hindrance. If Consul ever does support something other than Envoy, they will likely find a way to provide the expose feature anyway. Fixes #9854 --- CHANGELOG.md | 1 + e2e/connect/connect.go | 45 +++++++++++++----- e2e/connect/input/demo.nomad | 4 +- e2e/connect/input/expose-custom.nomad | 46 +++++++++++++++++++ e2e/connect/input/native-demo.nomad | 4 +- e2e/e2e_test.go | 2 +- e2e/terraform/.terraform.lock.hcl | 0 nomad/job_endpoint_hook_expose_check.go | 27 +---------- nomad/job_endpoint_hook_expose_check_test.go | 36 ++------------- .../content/docs/job-specification/expose.mdx | 4 +- 10 files changed, 95 insertions(+), 74 deletions(-) create mode 100644 e2e/connect/input/expose-custom.nomad mode change 100755 => 100644 e2e/terraform/.terraform.lock.hcl diff --git a/CHANGELOG.md b/CHANGELOG.md index db71c678a94f..3c1cfae93ed5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ FEATURES: IMPROVEMENTS: * cli: Improved `scaling policy` commands with -verbose, auto-completion, and prefix-matching [[GH-9964](https://github.com/hashicorp/nomad/issues/9964)] * consul/connect: Made handling of sidecar task container image URLs consistent with the `docker` task driver. [[GH-9580](https://github.com/hashicorp/nomad/issues/9580)] + * consul/connect: Enable custom sidecar tasks to use connect expose checks [[GH-9995](https://github.com/hashicorp/nomad/pull/9995)] * drivers/exec+java: Added client plugin configuration to re-enable previous PID/IPC namespace behavior [[GH-9982](https://github.com/hashicorp/nomad/pull/9982)] BUG FIXES: diff --git a/e2e/connect/connect.go b/e2e/connect/connect.go index 3379a750b272..e6e578ceba3f 100644 --- a/e2e/connect/connect.go +++ b/e2e/connect/connect.go @@ -15,6 +15,10 @@ const ( // demoConnectJob is the example connect enabled job useful for testing demoConnectJob = "connect/input/demo.nomad" + // demoConnectCustomProxyExposed is a connect job with custom sidecar_task + // that also uses the expose check feature. + demoConnectCustomProxyExposed = "connect/input/expose-custom.nomad" + // demoConnectNativeJob is the example connect native enabled job useful for testing demoConnectNativeJob = "connect/input/native-demo.nomad" @@ -34,7 +38,7 @@ type ConnectE2ETest struct { } func init() { - // connect tests without Consul ACLs enabled + // Connect tests without Consul ACLs enabled. framework.AddSuites(&framework.TestSuite{ Component: "Connect", CanRunLocal: true, @@ -45,16 +49,22 @@ func init() { }, }) - // connect tests with Consul ACLs enabled - framework.AddSuites(&framework.TestSuite{ - Component: "ConnectACLs", - CanRunLocal: false, - Consul: true, - Parallel: false, - Cases: []framework.TestCase{ - new(ConnectACLsE2ETest), - }, - }) + // Connect tests with Consul ACLs enabled. These are now gated behind the + // NOMAD_TEST_CONNECT_ACLS environment variable, because they cause lots of + // problems for e2e test flakiness (due to restarting consul, nomad, etc.). + // + // Run these tests locally when working on Connect. + if os.Getenv("NOMAD_TEST_CONNECT_ACLS") == "1" { + framework.AddSuites(&framework.TestSuite{ + Component: "ConnectACLs", + CanRunLocal: false, + Consul: true, + Parallel: false, + Cases: []framework.TestCase{ + new(ConnectACLsE2ETest), + }, + }) + } } func (tc *ConnectE2ETest) BeforeAll(f *framework.F) { @@ -90,6 +100,19 @@ func (tc *ConnectE2ETest) TestConnectDemo(f *framework.F) { e2eutil.WaitForAllocsRunning(t, tc.Nomad(), allocIDs) } +// TestConnectCustomSidecarExposed tests that a connect sidecar with custom task +// definition can also make use of the expose service check feature. +func (tc *ConnectE2ETest) TestConnectCustomSidecarExposed(f *framework.F) { + t := f.T() + + jobID := connectJobID() + tc.jobIds = append(tc.jobIds, jobID) + + allocs := e2eutil.RegisterAndWaitForAllocs(t, tc.Nomad(), demoConnectCustomProxyExposed, jobID, "") + allocIDs := e2eutil.AllocIDsFromAllocationListStubs(allocs) + e2eutil.WaitForAllocsRunning(t, tc.Nomad(), allocIDs) +} + // TestConnectNativeDemo tests the demo job file used in Connect Native Integration examples. func (tc *ConnectE2ETest) TestConnectNativeDemo(f *framework.F) { t := f.T() diff --git a/e2e/connect/input/demo.nomad b/e2e/connect/input/demo.nomad index 632326d0bb06..405186c856c3 100644 --- a/e2e/connect/input/demo.nomad +++ b/e2e/connect/input/demo.nomad @@ -33,7 +33,7 @@ job "countdash" { driver = "docker" config { - image = "hashicorpnomad/counter-api:v2" + image = "hashicorpnomad/counter-api:v3" } } } @@ -72,7 +72,7 @@ job "countdash" { } config { - image = "hashicorpnomad/counter-dashboard:v2" + image = "hashicorpnomad/counter-dashboard:v3" } } } diff --git a/e2e/connect/input/expose-custom.nomad b/e2e/connect/input/expose-custom.nomad new file mode 100644 index 000000000000..e26a4ccf698c --- /dev/null +++ b/e2e/connect/input/expose-custom.nomad @@ -0,0 +1,46 @@ +job "expose-custom" { + datacenters = ["dc1"] + + constraint { + attribute = "${attr.kernel.name}" + value = "linux" + } + + group "api" { + network { + mode = "bridge" + } + + service { + name = "count-api" + port = "9001" + + connect { + sidecar_service {} + sidecar_task { + resources { + cpu = 133 + memory = 63 + } + } + } + + check { + expose = true + name = "api-health" + type = "http" + path = "/health" + interval = "10s" + timeout = "3s" + } + } + + task "web" { + driver = "docker" + + config { + image = "hashicorpnomad/counter-api:v3" + } + } + } +} diff --git a/e2e/connect/input/native-demo.nomad b/e2e/connect/input/native-demo.nomad index 3d0b0f784f15..54dbbe13cc85 100644 --- a/e2e/connect/input/native-demo.nomad +++ b/e2e/connect/input/native-demo.nomad @@ -25,7 +25,7 @@ job "cn-demo" { driver = "docker" config { - image = "hashicorpnomad/uuid-api:v1" + image = "hashicorpnomad/uuid-api:v5" network_mode = "host" } @@ -57,7 +57,7 @@ job "cn-demo" { driver = "docker" config { - image = "hashicorpnomad/uuid-fe:v1" + image = "hashicorpnomad/uuid-fe:v5" network_mode = "host" } diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go index d6a9c5156c61..deecc1c4dfc3 100644 --- a/e2e/e2e_test.go +++ b/e2e/e2e_test.go @@ -11,7 +11,7 @@ import ( _ "github.com/hashicorp/nomad/e2e/affinities" _ "github.com/hashicorp/nomad/e2e/clientstate" - // _ "github.com/hashicorp/nomad/e2e/connect" + _ "github.com/hashicorp/nomad/e2e/connect" _ "github.com/hashicorp/nomad/e2e/consul" _ "github.com/hashicorp/nomad/e2e/consultemplate" _ "github.com/hashicorp/nomad/e2e/csi" diff --git a/e2e/terraform/.terraform.lock.hcl b/e2e/terraform/.terraform.lock.hcl old mode 100755 new mode 100644 diff --git a/nomad/job_endpoint_hook_expose_check.go b/nomad/job_endpoint_hook_expose_check.go index 3ebab92200b1..e1bc6d1ee2cb 100644 --- a/nomad/job_endpoint_hook_expose_check.go +++ b/nomad/job_endpoint_hook_expose_check.go @@ -102,9 +102,9 @@ func tgValidateUseOfCheckExpose(tg *structs.TaskGroup) error { // validation for group services (which must use built-in connect proxy) for _, s := range tg.Services { for _, check := range s.Checks { - if check.Expose && !serviceUsesConnectEnvoy(s) { + if check.Expose && !s.Connect.HasSidecar() { return errors.Errorf( - "exposed service check %s->%s->%s requires use of Nomad's builtin Connect proxy", + "exposed service check %s->%s->%s requires use of sidecar_proxy", tg.Name, s.Name, check.Name, ) } @@ -155,29 +155,6 @@ func tgUsesExposeCheck(tg *structs.TaskGroup) bool { return false } -// serviceUsesConnectEnvoy returns true if the service is going to end up using -// the built-in envoy proxy. -// -// This implementation is kind of reading tea leaves - firstly Connect -// must be enabled, and second the sidecar_task must not be overridden. If these -// conditions are met, the preceding connect hook will have injected a Connect -// sidecar task, the configuration of which is interpolated at runtime. -func serviceUsesConnectEnvoy(s *structs.Service) bool { - // A non-nil connect stanza implies this service isn't connect enabled in - // the first place. - if s.Connect == nil { - return false - } - - // A non-nil connect.sidecar_task stanza implies the sidecar task is being - // overridden (i.e. the default Envoy is not being used). - if s.Connect.SidecarTask != nil { - return false - } - - return true -} - // checkIsExposable returns true if check is qualified for automatic generation // of connect proxy expose path configuration based on configured consul checks. // To qualify, the check must be of type "http" or "grpc", and must have a Path diff --git a/nomad/job_endpoint_hook_expose_check_test.go b/nomad/job_endpoint_hook_expose_check_test.go index a587efee3288..6196435c7fa2 100644 --- a/nomad/job_endpoint_hook_expose_check_test.go +++ b/nomad/job_endpoint_hook_expose_check_test.go @@ -13,34 +13,6 @@ func TestJobExposeCheckHook_Name(t *testing.T) { require.Equal(t, "expose-check", new(jobExposeCheckHook).Name()) } -func TestJobExposeCheckHook_serviceUsesConnectEnvoy(t *testing.T) { - t.Parallel() - - t.Run("connect is nil", func(t *testing.T) { - require.False(t, serviceUsesConnectEnvoy(&structs.Service{ - Connect: nil, - })) - }) - - t.Run("sidecar-task is overridden", func(t *testing.T) { - require.False(t, serviceUsesConnectEnvoy(&structs.Service{ - Connect: &structs.ConsulConnect{ - SidecarTask: &structs.SidecarTask{ - Name: "my-sidecar", - }, - }, - })) - }) - - t.Run("sidecar-task is nil", func(t *testing.T) { - require.True(t, serviceUsesConnectEnvoy(&structs.Service{ - Connect: &structs.ConsulConnect{ - SidecarTask: nil, - }, - })) - }) -} - func TestJobExposeCheckHook_tgUsesExposeCheck(t *testing.T) { t.Parallel() @@ -135,7 +107,7 @@ func TestJobExposeCheckHook_tgValidateUseOfCheckExpose(t *testing.T) { require.EqualError(t, tgValidateUseOfCheckExpose(&structs.TaskGroup{ Name: "g1", Services: []*structs.Service{withCustomProxyTask}, - }), `exposed service check g1->s1->s1-check1 requires use of Nomad's builtin Connect proxy`) + }), `exposed service check g1->s1->s1-check1 requires use of sidecar_proxy`) }) t.Run("group-service uses custom proxy but no expose", func(t *testing.T) { @@ -223,8 +195,10 @@ func TestJobExposeCheckHook_Validate(t *testing.T) { Mode: "bridge", }}, Services: []*structs.Service{{ - Name: "s1", - Connect: &structs.ConsulConnect{}, + Name: "s1", + Connect: &structs.ConsulConnect{ + SidecarService: &structs.ConsulSidecarService{}, + }, Checks: []*structs.ServiceCheck{{ Name: "check1", Type: "http", diff --git a/website/content/docs/job-specification/expose.mdx b/website/content/docs/job-specification/expose.mdx index 83720c30e45c..577be44e09a9 100644 --- a/website/content/docs/job-specification/expose.mdx +++ b/website/content/docs/job-specification/expose.mdx @@ -65,7 +65,7 @@ job "expose-check-example" { driver = "docker" config { - image = "hashicorpnomad/counter-api:v2" + image = "hashicorpnomad/counter-api:v3" } } } @@ -123,7 +123,7 @@ job "expose-example" { driver = "docker" config { - image = "hashicorpnomad/counter-api:v2" + image = "hashicorpnomad/counter-api:v3" } # e.g. reference ${NOMAD_PORT_api_expose_healthcheck} for other uses