From 2a397dbda26f5e00c90834bc05b73bd750672cdd Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Tue, 1 Dec 2020 13:01:32 -0600 Subject: [PATCH] consul/connect: default envoy concurrency to 1 Previously, every Envoy Connect sidecar would spawn as many worker threads as logical CPU cores. That is Envoy's default behavior when `--concurrency` is not explicitly set. Nomad now sets the concurrency flag to 1, which is sensible for the default cpu = 250 Mhz resources allocated for sidecar proxies. The concurrency value can be configured in Client configuration by setting `meta.connect.proxy_concurrency`. Closes #9341 --- CHANGELOG.md | 1 + client/client.go | 20 ++++-- nomad/job_endpoint_hook_connect.go | 1 + .../docs/job-specification/sidecar_task.mdx | 62 ++++++++++--------- .../pages/docs/upgrade/upgrade-specific.mdx | 17 ++++- 5 files changed, 66 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b2170ccfac9..7527f563a6bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ IMPROVEMENTS: * consul: Support Consul namespace (Consul Enterprise) in client configuration. [[GH-8849](https://github.com/hashicorp/nomad/pull/8849)] * consul/connect: Dynamically select envoy sidecar at runtime [[GH-8945](https://github.com/hashicorp/nomad/pull/8945)] * consul/connect: Enable setting `datacenter` field on connect upstreams [[GH-8964](https://github.com/hashicorp/nomad/issues/8964)] + * consul/connect: Envoy concurrency now defaults to 1 rather than number of cores [[GH-9341](https://github.com/hashicorp/nomad/issues/9341)] * csi: Support `nomad volume detach` with previously garbage-collected nodes. [[GH-9057](https://github.com/hashicorp/nomad/issues/9057)] * csi: Relaxed validation requirements when checking volume capabilities with controller plugins, to accommodate existing plugin behaviors. [[GH-9049](https://github.com/hashicorp/nomad/issues/9049)] * driver/docker: Upgrade pause container and detect architecture [[GH-8957](https://github.com/hashicorp/nomad/pull/8957)] diff --git a/client/client.go b/client/client.go index 9a1e8e1d04e3..3f375c6a7e7d 100644 --- a/client/client.go +++ b/client/client.go @@ -98,17 +98,26 @@ const ( allocSyncRetryIntv = 5 * time.Second // defaultConnectSidecarImage is the image set in the node meta by default - // to be used by Consul Connect sidecar tasks - // Update sidecar_task.html when updating this. + // to be used by Consul Connect sidecar tasks. As of Nomad 1.0, this value + // is only used as a fallback when the version of Consul does not yet support + // dynamic envoy versions. defaultConnectSidecarImage = "envoyproxy/envoy:v1.11.2@sha256:a7769160c9c1a55bb8d07a3b71ce5d64f72b1f665f10d81aa1581bc3cf850d09" // defaultConnectGatewayImage is the image set in the node meta by default - // to be used by Consul Connect Gateway tasks. + // to be used by Consul Connect Gateway tasks. As of Nomad 1.0, this value + // is only used as a fallback when the version of Consul does not yet support + // dynamic envoy versions. defaultConnectGatewayImage = defaultConnectSidecarImage // defaultConnectLogLevel is the log level set in the node meta by default - // to be used by Consul Connect sidecar tasks + // to be used by Consul Connect sidecar tasks. defaultConnectLogLevel = "info" + + // defaultConnectProxyConcurrency is the default number of worker threads the + // connect sidecar should be configured to use. + // + // https://www.envoyproxy.io/docs/envoy/latest/operations/cli#cmdoption-concurrency + defaultConnectProxyConcurrency = "1" ) var ( @@ -1403,6 +1412,9 @@ func (c *Client) setupNode() error { if _, ok := node.Meta["connect.log_level"]; !ok { node.Meta["connect.log_level"] = defaultConnectLogLevel } + if _, ok := node.Meta["connect.proxy_concurrency"]; !ok { + node.Meta["connect.proxy_concurrency"] = defaultConnectProxyConcurrency + } return nil } diff --git a/nomad/job_endpoint_hook_connect.go b/nomad/job_endpoint_hook_connect.go index 11901bb7e87a..36142ebb5acb 100644 --- a/nomad/job_endpoint_hook_connect.go +++ b/nomad/job_endpoint_hook_connect.go @@ -28,6 +28,7 @@ var ( "args": []interface{}{ "-c", structs.EnvoyBootstrapPath, "-l", "${meta.connect.log_level}", + "--concurrency", "${meta.connect.proxy_concurrency}", "--disable-hot-restart", }, } diff --git a/website/pages/docs/job-specification/sidecar_task.mdx b/website/pages/docs/job-specification/sidecar_task.mdx index 45d1d81e10c9..64c64ce0c901 100644 --- a/website/pages/docs/job-specification/sidecar_task.mdx +++ b/website/pages/docs/job-specification/sidecar_task.mdx @@ -61,40 +61,44 @@ group service has a [`sidecar_service`][sidecar_service] stanza. The default sidecar task is equivalent to: ```hcl - sidecar_task { - name = "connect-proxy-" +sidecar_task { + name = "connect-proxy-" - lifecycle { - hook = "prestart" - sidecar = true - } + lifecycle { + hook = "prestart" + sidecar = true + } - driver = "docker" - config { - image = "${meta.connect.sidecar_image}" - args = [ - "-c", - "${NOMAD_SECRETS_DIR}/envoy_bootstrap.json", - "-l", - "${meta.connect.log_level}" - ] - } + driver = "docker" + config { + image = "${meta.connect.sidecar_image}" + args = [ + "-c", + "${NOMAD_SECRETS_DIR}/envoy_bootstrap.json", + "-l", + "${meta.connect.log_level}", + "--concurrency", + "${meta.connect.proxy_concurrency}", + "--disable-hot-restart" + ] + } - logs { - max_files = 2 - max_file_size = 2 # MB - } + logs { + max_files = 2 + max_file_size = 2 # MB + } - resources { - cpu = 250 # MHz - memory = 128 # MB - } + resources { + cpu = 250 # MHz + memory = 128 # MB + } - shutdown_delay = "5s" - } + shutdown_delay = "5s" +} ``` -The `meta.connect.sidecar_image` and `meta.connect.log_level` are [_client_ +The `meta.connect.sidecar_image`, `meta.connect.log_level`, and +`meta.connect.proxy_concurrency` variables are [_client_ configurable][nodemeta] variables with the following defaults: - `sidecar_image` - `(string: "envoyproxy/envoy:v${NOMAD_envoy_version}")` - The official @@ -102,8 +106,9 @@ configurable][nodemeta] variables with the following defaults: by a query to Consul. - `log_level` - `(string: "info")` - Envoy sidecar log level. "`debug`" is useful for debugging Connect related issues. +- `proxy_concurrency` - `(string: "1")` - The number of [worker threads][worker_threads] the Envoy + sidecar will run. -`meta.connect.sidecar_image` can be configured at the job, group, or task level. Custom images can make use of Consul's preferred Envoy version by making use of Nomad's version interpolation, e.g. @@ -169,3 +174,4 @@ The following example configures resources for the sidecar task and other config [resources]: /docs/job-specification/resources 'Nomad resources Job Specification' [logs]: /docs/job-specification/logs 'Nomad logs Job Specification' [nodemeta]: /docs/configuration/client#meta +[worker_threads]: https://www.envoyproxy.io/docs/envoy/latest/operations/cli#cmdoption-concurrency diff --git a/website/pages/docs/upgrade/upgrade-specific.mdx b/website/pages/docs/upgrade/upgrade-specific.mdx index eaf3b5c23b2e..77b6d3829b31 100644 --- a/website/pages/docs/upgrade/upgrade-specific.mdx +++ b/website/pages/docs/upgrade/upgrade-specific.mdx @@ -19,7 +19,7 @@ standard upgrade flow. ### HCL2 for Job specification -Nomad 1.0.0 adopts HCL2 for parsing the job spec. HCL2 extends HCL with more +Nomad v1.0.0 adopts HCL2 for parsing the job spec. HCL2 extends HCL with more expression and reuse support, but adds some stricter schema for HCL blocks (a.k.a. stanzas). Check [HCL](/docs/job-specification/hcl2) for more details. ### Signal used when stopping Docker tasks @@ -73,12 +73,12 @@ Nomad. The specific configuration values replaced are: ### Envoy proxy versions -Nomad 1.0.0 changes the behavior around the selection of Envoy version used +Nomad v1.0.0 changes the behavior around the selection of Envoy version used for Connect sidecar proxies. Previously, Nomad always defaulted to Envoy v1.11.2 if neither the `meta.connect.sidecar_image` parameter or `sidecar_task` stanza were explicitly configured. Likewise the same version of Envoy would be used for Connect ingress gateways if `meta.connect.gateway_image` was unset. Starting with -Nomad 1.0.0, each Nomad Client will query Consul for a list of supported Envoy +Nomad v1.0.0, each Nomad Client will query Consul for a list of supported Envoy versions. Nomad will make use of the latest version of Envoy supported by the Consul agent when launching Envoy as a Connect sidecar proxy. If the version of the Consul agent is older than v1.7.8, v1.8.4, or v1.9.0, Nomad will fallback to @@ -94,6 +94,15 @@ the time of the upgrade for each node will ensure Connect workloads are properly rescheduled onto nodes in such a way that the Nomad Clients, Consul agents, and Envoy sidecar tasks maintain compatibility with one another. +### Envoy worker threads + +Nomad v1.0.0 changes the default behaviour around the number of worker threads +created by the Envoy sidecar proxy when using Consul Connect. Previously, the +Envoy [`--concurrency`][envoy_concurrency] argument was left unset, which caused +Envoy to spawn as many worker threads as logical cores available on the CPU. The +`--concurrency` value now defaults to `1` and can be configured by setting the +[`meta.connect.proxy_concurrency`][proxy_concurrency] property in client configuration. + ## Nomad 0.12.8 ### Docker volume mounts @@ -859,6 +868,7 @@ deleted and then Nomad 0.3.0 can be launched. [drain-api]: /api-docs/nodes#drain-node [drain-cli]: /docs/commands/node/drain [dst]: /docs/job-specification/periodic#daylight-saving-time +[envoy_concurrency]: https://www.envoyproxy.io/docs/envoy/latest/operations/cli#cmdoption-concurrency [gh-6787]: https://github.com/hashicorp/nomad/issues/6787 [gh-8457]: https://github.com/hashicorp/nomad/issues/8457 [gh-9148]: https://github.com/hashicorp/nomad/issues/9148 @@ -870,6 +880,7 @@ deleted and then Nomad 0.3.0 can be launched. [plugins]: /docs/drivers/external [preemption-api]: /api-docs/operator#update-scheduler-configuration [preemption]: /docs/internals/scheduling/preemption +[proxy_concurrency]: /docs/job-specification/sidecar_task#proxy_concurrency [reserved]: /docs/configuration/client#reserved-parameters [task-config]: /docs/job-specification/task#config [tls-guide]: https://learn.hashicorp.com/tutorials/nomad/security-enable-tls