From 66a720863725552d27d72f3d69dadee11edbef0d Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Fri, 9 Jul 2021 13:44:02 -0500 Subject: [PATCH] consul/connect: fix bug causing high cpu with multiple connect sidecars in group This PR fixes a bug where the underlying Envoy process of a Connect gateway would consume a full core of CPU if there is more than one sidecar or gateway in a group. The utilization was being caused by Consul injecting an envoy_ready_listener on 127.0.0.1:8443, of which only one of the Envoys would be able to bind to. The others would spin in a hot loop trying to bind the listener. As a workaround, we now specify -address during the Envoy bootstrap config step, which is how Consul maps this ready listener. Because there is already the envoy_admin_listener, and we need to continue supporting running gateways in host networking mode, and in those case we want to use the same port value coming from the service.port field, we now bind the admin listener to the 127.0.0.2 loop-back interface, and the ready listener takes 127.0.0.1. This shouldn't make a difference in the 99.999% use case where envoy is being run in its official docker container. Advanced users can reference ${NOMAD_ENVOY_ADMIN_ADDR_} (as they 'ought to) if needed, as well as the new variable ${NOMAD_ENVOY_READY_ADDR_} for the envoy_ready_listener. --- .../taskrunner/envoy_bootstrap_hook.go | 59 +++++++++++++------ .../taskrunner/envoy_bootstrap_hook_test.go | 30 ++++++---- website/content/partials/envvars.mdx | 20 +++++-- 3 files changed, 77 insertions(+), 32 deletions(-) diff --git a/client/allocrunner/taskrunner/envoy_bootstrap_hook.go b/client/allocrunner/taskrunner/envoy_bootstrap_hook.go index ee58b03a0bd6..3aaf2d2e019e 100644 --- a/client/allocrunner/taskrunner/envoy_bootstrap_hook.go +++ b/client/allocrunner/taskrunner/envoy_bootstrap_hook.go @@ -92,8 +92,10 @@ func newEnvoyBootstrapHookConfig(alloc *structs.Allocation, consul *config.Consu } const ( - envoyBaseAdminPort = 19000 + envoyBaseAdminPort = 19000 // Consul default (bridge only) + envoyBaseReadyPort = 19100 // Consul default (bridge only) envoyAdminBindEnvPrefix = "NOMAD_ENVOY_ADMIN_ADDR_" + envoyReadyBindEnvPrefix = "NOMAD_ENVOY_READY_ADDR_" ) const ( @@ -240,12 +242,17 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart h.logger.Debug("bootstrapping Consul "+serviceKind, "task", req.Task.Name, "service", serviceName) - // Envoy runs an administrative API on the loopback interface. There is no - // way to turn this feature off. + // Envoy runs an administrative listener. There is no way to turn this feature off. // https://github.com/envoyproxy/envoy/issues/1297 envoyAdminBind := buildEnvoyAdminBind(h.alloc, serviceName, req.Task.Name, req.TaskEnv) + + // Consul configures a ready listener. There is no way to turn this feature off. + envoyReadyBind := buildEnvoyReadyBind(h.alloc, serviceName, req.Task.Name, req.TaskEnv) + + // Set runtime environment variables for the envoy admin and ready listeners. resp.Env = map[string]string{ helper.CleanEnvVar(envoyAdminBindEnvPrefix+serviceName, '_'): envoyAdminBind, + helper.CleanEnvVar(envoyReadyBindEnvPrefix+serviceName, '_'): envoyReadyBind, } // Envoy bootstrap configuration may contain a Consul token, so write @@ -259,7 +266,7 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart } h.logger.Debug("check for SI token for task", "task", req.Task.Name, "exists", siToken != "") - bootstrap := h.newEnvoyBootstrapArgs(h.alloc.TaskGroup, service, grpcAddr, envoyAdminBind, siToken, bootstrapFilePath) + bootstrap := h.newEnvoyBootstrapArgs(h.alloc.TaskGroup, service, grpcAddr, envoyAdminBind, envoyReadyBind, siToken, bootstrapFilePath) bootstrapArgs := bootstrap.args() bootstrapEnv := bootstrap.env(os.Environ()) @@ -331,37 +338,50 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart return nil } -// buildEnvoyAdminBind determines a unique port for use by the envoy admin -// listener. +// buildEnvoyAdminBind determines a unique port for use by the envoy admin listener. +// +// This listener will be bound to 127.0.0.2. +func buildEnvoyAdminBind(alloc *structs.Allocation, service, task string, env *taskenv.TaskEnv) string { + return buildEnvoyBind(alloc, "127.0.0.2", service, task, env, envoyBaseAdminPort) +} + +// buildEnvoyAdminBind determines a unique port for use by the envoy ready listener. +// +// This listener will be bound to 127.0.0.1. +func buildEnvoyReadyBind(alloc *structs.Allocation, service, task string, env *taskenv.TaskEnv) string { + return buildEnvoyBind(alloc, "127.0.0.1", service, task, env, envoyBaseReadyPort) +} + +// buildEnvoyBind is used to determine a unique port for an envoy listener. // // In bridge mode, if multiple sidecars are running, the bind addresses need -// to be unique within the namespace, so we simply start at 19000 and increment +// to be unique within the namespace, so we simply start at basePort and increment // by the index of the task. // // In host mode, use the port provided through the service definition, which can // be a port chosen by Nomad. -func buildEnvoyAdminBind(alloc *structs.Allocation, serviceName, taskName string, taskEnv *taskenv.TaskEnv) string { +func buildEnvoyBind(alloc *structs.Allocation, ifce, service, task string, taskEnv *taskenv.TaskEnv, basePort int) string { tg := alloc.Job.LookupTaskGroup(alloc.TaskGroup) - port := envoyBaseAdminPort + port := basePort switch tg.Networks[0].Mode { case "host": interpolatedServices := taskenv.InterpolateServices(taskEnv, tg.Services) - for _, service := range interpolatedServices { - if service.Name == serviceName { - mapping := tg.Networks.Port(service.PortLabel) + for _, svc := range interpolatedServices { + if svc.Name == service { + mapping := tg.Networks.Port(svc.PortLabel) port = mapping.Value break } } default: - for idx, task := range tg.Tasks { - if task.Name == taskName { + for idx, tgTask := range tg.Tasks { + if tgTask.Name == task { port += idx break } } } - return fmt.Sprintf("localhost:%d", port) + return fmt.Sprintf("%s:%d", ifce, port) } func (h *envoyBootstrapHook) writeConfig(filename, config string) error { @@ -421,7 +441,7 @@ func (h *envoyBootstrapHook) proxyServiceID(group string, service *structs.Servi // https://www.consul.io/commands/connect/envoy#consul-connect-envoy func (h *envoyBootstrapHook) newEnvoyBootstrapArgs( group string, service *structs.Service, - grpcAddr, envoyAdminBind, siToken, filepath string, + grpcAddr, envoyAdminBind, envoyReadyBind, siToken, filepath string, ) envoyBootstrapArgs { var ( sidecarForID string // sidecar only @@ -450,8 +470,8 @@ func (h *envoyBootstrapHook) newEnvoyBootstrapArgs( h.logger.Info("bootstrapping envoy", "sidecar_for", service.Name, "bootstrap_file", filepath, "sidecar_for_id", sidecarForID, "grpc_addr", grpcAddr, - "admin_bind", envoyAdminBind, "gateway", gateway, - "proxy_id", proxyID, "namespace", namespace, + "admin_bind", envoyAdminBind, "ready_bind", envoyReadyBind, + "gateway", gateway, "proxy_id", proxyID, "namespace", namespace, ) return envoyBootstrapArgs{ @@ -459,6 +479,7 @@ func (h *envoyBootstrapHook) newEnvoyBootstrapArgs( sidecarFor: sidecarForID, grpcAddr: grpcAddr, envoyAdminBind: envoyAdminBind, + envoyReadyBind: envoyReadyBind, siToken: siToken, gateway: gateway, proxyID: proxyID, @@ -474,6 +495,7 @@ type envoyBootstrapArgs struct { sidecarFor string // sidecars only grpcAddr string envoyAdminBind string + envoyReadyBind string siToken string gateway string // gateways only proxyID string // gateways only @@ -489,6 +511,7 @@ func (e envoyBootstrapArgs) args() []string { "-grpc-addr", e.grpcAddr, "-http-addr", e.consulConfig.HTTPAddr, "-admin-bind", e.envoyAdminBind, + "-address", e.envoyReadyBind, "-bootstrap", } diff --git a/client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go b/client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go index 98d20440650e..4683ba32077f 100644 --- a/client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go +++ b/client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go @@ -123,13 +123,15 @@ func TestEnvoyBootstrapHook_envoyBootstrapArgs(t *testing.T) { sidecarFor: "s1", grpcAddr: "1.1.1.1", consulConfig: consulPlainConfig, - envoyAdminBind: "localhost:3333", + envoyAdminBind: "127.0.0.2:19000", + envoyReadyBind: "127.0.0.1:19100", } result := ebArgs.args() require.Equal(t, []string{"connect", "envoy", "-grpc-addr", "1.1.1.1", "-http-addr", "2.2.2.2", - "-admin-bind", "localhost:3333", + "-admin-bind", "127.0.0.2:19000", + "-address", "127.0.0.1:19100", "-bootstrap", "-sidecar-for", "s1", }, result) @@ -141,14 +143,16 @@ func TestEnvoyBootstrapHook_envoyBootstrapArgs(t *testing.T) { sidecarFor: "s1", grpcAddr: "1.1.1.1", consulConfig: consulPlainConfig, - envoyAdminBind: "localhost:3333", + envoyAdminBind: "127.0.0.2:19000", + envoyReadyBind: "127.0.0.1:19100", siToken: token, } result := ebArgs.args() require.Equal(t, []string{"connect", "envoy", "-grpc-addr", "1.1.1.1", "-http-addr", "2.2.2.2", - "-admin-bind", "localhost:3333", + "-admin-bind", "127.0.0.2:19000", + "-address", "127.0.0.1:19100", "-bootstrap", "-sidecar-for", "s1", "-token", token, @@ -160,13 +164,15 @@ func TestEnvoyBootstrapHook_envoyBootstrapArgs(t *testing.T) { sidecarFor: "s1", grpcAddr: "1.1.1.1", consulConfig: consulTLSConfig, - envoyAdminBind: "localhost:3333", + envoyAdminBind: "127.0.0.2:19000", + envoyReadyBind: "127.0.0.1:19100", } result := ebArgs.args() require.Equal(t, []string{"connect", "envoy", "-grpc-addr", "1.1.1.1", "-http-addr", "2.2.2.2", - "-admin-bind", "localhost:3333", + "-admin-bind", "127.0.0.2:19000", + "-address", "127.0.0.1:19100", "-bootstrap", "-sidecar-for", "s1", "-ca-file", "/etc/tls/ca-file", @@ -179,7 +185,8 @@ func TestEnvoyBootstrapHook_envoyBootstrapArgs(t *testing.T) { ebArgs := envoyBootstrapArgs{ consulConfig: consulPlainConfig, grpcAddr: "1.1.1.1", - envoyAdminBind: "localhost:3333", + envoyAdminBind: "127.0.0.2:19000", + envoyReadyBind: "127.0.0.1:19100", gateway: "my-ingress-gateway", proxyID: "_nomad-task-803cb569-881c-b0d8-9222-360bcc33157e-group-ig-ig-8080", } @@ -187,7 +194,8 @@ func TestEnvoyBootstrapHook_envoyBootstrapArgs(t *testing.T) { require.Equal(t, []string{"connect", "envoy", "-grpc-addr", "1.1.1.1", "-http-addr", "2.2.2.2", - "-admin-bind", "localhost:3333", + "-admin-bind", "127.0.0.2:19000", + "-address", "127.0.0.1:19100", "-bootstrap", "-gateway", "my-ingress-gateway", "-proxy-id", "_nomad-task-803cb569-881c-b0d8-9222-360bcc33157e-group-ig-ig-8080", @@ -198,7 +206,8 @@ func TestEnvoyBootstrapHook_envoyBootstrapArgs(t *testing.T) { ebArgs := envoyBootstrapArgs{ consulConfig: consulPlainConfig, grpcAddr: "1.1.1.1", - envoyAdminBind: "localhost:3333", + envoyAdminBind: "127.0.0.2:19000", + envoyReadyBind: "127.0.0.1:19100", gateway: "my-mesh-gateway", proxyID: "_nomad-task-803cb569-881c-b0d8-9222-360bcc33157e-group-mesh-mesh-8080", } @@ -206,7 +215,8 @@ func TestEnvoyBootstrapHook_envoyBootstrapArgs(t *testing.T) { require.Equal(t, []string{"connect", "envoy", "-grpc-addr", "1.1.1.1", "-http-addr", "2.2.2.2", - "-admin-bind", "localhost:3333", + "-admin-bind", "127.0.0.2:19000", + "-address", "127.0.0.1:19100", "-bootstrap", "-gateway", "my-mesh-gateway", "-proxy-id", "_nomad-task-803cb569-881c-b0d8-9222-360bcc33157e-group-mesh-mesh-8080", diff --git a/website/content/partials/envvars.mdx b/website/content/partials/envvars.mdx index 9ccb0cc555ea..6e8273c6e0cc 100644 --- a/website/content/partials/envvars.mdx +++ b/website/content/partials/envvars.mdx @@ -264,11 +264,23 @@ NOMAD_ENVOY_ADMIN_ADDR_<service> - Local address localhost:Port for the admin port of the + Local address 127.0.0.2:Port for the admin port of the envoy sidecar for the given service when defined as a - Consul Connect enabled service. - - + Consul Connect enabled service. Envoy runs inside the group network + namespace unless configured for host networking. + + + + + NOMAD_ENVOY_READY_ADDR_<service> + + + Local address 127.0.0.1:Port for the ready port of the + envoy sidecar for the given service when defined as a + Consul Connect enabled service. Envoy runs inside the group network + namespace unless configured for host networking. + + Consul-related Variables (only set for connect native tasks)