From 8f899ede91792c7e3318ff7fb855f15f9db30f5a Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Fri, 4 Sep 2020 12:50:11 -0500 Subject: [PATCH] consul/connect: dynamically select envoy sidecar at runtime As newer versions of Consul are released, the minimum version of Envoy it supports as a sidecar proxy also gets bumped. Starting with the upcoming Consul v1.9.X series, Envoy v1.11.X will no longer be supported. Current versions of Nomad hardcode a version of Envoy v1.11.2 to be used as the default implementation of Connect sidecar proxy. This PR introduces a change such that each Nomad Client will query its local Consul for a list of Envoy proxies that it supports (https://github.com/hashicorp/consul/pull/8545) and then launch the Connect sidecar proxy task using the latest supported version of Envoy. If the API component is not available, Nomad will fallback to the old version of Envoy supported by old versions of Consul. Setting the meta configuration option `meta.connect.sidecar_image` or setting the `connect.sidecar_task` stanza will take precedence as is the current behavior. Addresses #8585 #7665 --- client/allocrunner/alloc_runner.go | 10 +- client/allocrunner/config.go | 4 + ...tstrap_hook.go => envoy_bootstrap_hook.go} | 4 +- ...k_test.go => envoy_bootstrap_hook_test.go} | 0 .../taskrunner/envoy_version_hook.go | 145 ++++++++++++++++++ client/allocrunner/taskrunner/task_runner.go | 14 +- .../taskrunner/task_runner_hooks.go | 13 +- client/client.go | 8 +- client/client_test.go | 2 +- client/config/config.go | 14 +- client/consul/consul.go | 8 + client/consul/proxies.go | 8 + client/testing.go | 10 +- command/agent/agent.go | 25 +-- command/agent/consul/connect_proxies.go | 92 +++++++++++ .../consul/{client.go => service_client.go} | 0 ...{client_test.go => service_client_test.go} | 0 nomad/job_endpoint_hook_connect.go | 4 +- nomad/structs/config/consul.go | 3 + nomad/structs/connect.go | 4 + nomad/structs/structs.go | 6 +- .../pages/docs/upgrade/upgrade-specific.mdx | 23 +++ 22 files changed, 359 insertions(+), 38 deletions(-) rename client/allocrunner/taskrunner/{envoybootstrap_hook.go => envoy_bootstrap_hook.go} (98%) rename client/allocrunner/taskrunner/{envoybootstrap_hook_test.go => envoy_bootstrap_hook_test.go} (100%) create mode 100644 client/allocrunner/taskrunner/envoy_version_hook.go create mode 100644 client/consul/proxies.go create mode 100644 command/agent/consul/connect_proxies.go rename command/agent/consul/{client.go => service_client.go} (100%) rename command/agent/consul/{client_test.go => service_client_test.go} (100%) create mode 100644 nomad/structs/connect.go diff --git a/client/allocrunner/alloc_runner.go b/client/allocrunner/alloc_runner.go index 0972d77dce17..0b75239f1751 100644 --- a/client/allocrunner/alloc_runner.go +++ b/client/allocrunner/alloc_runner.go @@ -64,6 +64,10 @@ type allocRunner struct { // registering services and checks consulClient consul.ConsulServiceAPI + // consulProxiesClient is the client used by the envoy version hook for + // looking up supported envoy versions of the consul agent. + consulProxiesClient consul.SupportedProxiesAPI + // sidsClient is the client used by the service identity hook for // managing SI tokens sidsClient consul.ServiceIdentityAPI @@ -186,6 +190,7 @@ func NewAllocRunner(config *Config) (*allocRunner, error) { alloc: alloc, clientConfig: config.ClientConfig, consulClient: config.Consul, + consulProxiesClient: config.ConsulProxies, sidsClient: config.ConsulSI, vaultClient: config.Vault, tasks: make(map[string]*taskrunner.TaskRunner, len(tg.Tasks)), @@ -236,7 +241,7 @@ func NewAllocRunner(config *Config) (*allocRunner, error) { // initTaskRunners creates task runners but does *not* run them. func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error { for _, task := range tasks { - config := &taskrunner.Config{ + trConfig := &taskrunner.Config{ Alloc: ar.alloc, ClientConfig: ar.clientConfig, Task: task, @@ -246,6 +251,7 @@ func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error { StateUpdater: ar, DynamicRegistry: ar.dynamicRegistry, Consul: ar.consulClient, + ConsulProxies: ar.consulProxiesClient, ConsulSI: ar.sidsClient, Vault: ar.vaultClient, DeviceStatsReporter: ar.deviceStatsReporter, @@ -257,7 +263,7 @@ func (ar *allocRunner) initTaskRunners(tasks []*structs.Task) error { } // Create, but do not Run, the task runner - tr, err := taskrunner.NewTaskRunner(config) + tr, err := taskrunner.NewTaskRunner(trConfig) if err != nil { return fmt.Errorf("failed creating runner for task %q: %v", task.Name, err) } diff --git a/client/allocrunner/config.go b/client/allocrunner/config.go index 8eb013edeac3..fba790b5e6db 100644 --- a/client/allocrunner/config.go +++ b/client/allocrunner/config.go @@ -32,6 +32,10 @@ type Config struct { // Consul is the Consul client used to register task services and checks Consul consul.ConsulServiceAPI + // ConsulProxies is the Consul client used to lookup supported envoy versions + // of the Consul agent. + ConsulProxies consul.SupportedProxiesAPI + // ConsulSI is the Consul client used to manage service identity tokens. ConsulSI consul.ServiceIdentityAPI diff --git a/client/allocrunner/taskrunner/envoybootstrap_hook.go b/client/allocrunner/taskrunner/envoy_bootstrap_hook.go similarity index 98% rename from client/allocrunner/taskrunner/envoybootstrap_hook.go rename to client/allocrunner/taskrunner/envoy_bootstrap_hook.go index ea3010638d02..64500cd54176 100644 --- a/client/allocrunner/taskrunner/envoybootstrap_hook.go +++ b/client/allocrunner/taskrunner/envoy_bootstrap_hook.go @@ -12,7 +12,7 @@ import ( "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/client/allocdir" - "github.com/hashicorp/nomad/client/allocrunner/interfaces" + ifs "github.com/hashicorp/nomad/client/allocrunner/interfaces" agentconsul "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" @@ -150,7 +150,7 @@ func (h *envoyBootstrapHook) lookupService(svcKind, svcName, tgName string) (*st // Prestart creates an envoy bootstrap config file. // // Must be aware of both launching envoy as a sidecar proxy, as well as a connect gateway. -func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { +func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestartRequest, resp *ifs.TaskPrestartResponse) error { if !req.Task.Kind.IsConnectProxy() && !req.Task.Kind.IsAnyConnectGateway() { // Not a Connect proxy sidecar resp.Done = true diff --git a/client/allocrunner/taskrunner/envoybootstrap_hook_test.go b/client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go similarity index 100% rename from client/allocrunner/taskrunner/envoybootstrap_hook_test.go rename to client/allocrunner/taskrunner/envoy_bootstrap_hook_test.go diff --git a/client/allocrunner/taskrunner/envoy_version_hook.go b/client/allocrunner/taskrunner/envoy_version_hook.go new file mode 100644 index 000000000000..43e2fb724785 --- /dev/null +++ b/client/allocrunner/taskrunner/envoy_version_hook.go @@ -0,0 +1,145 @@ +package taskrunner + +import ( + "context" + "fmt" + + hclog "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-version" + ifs "github.com/hashicorp/nomad/client/allocrunner/interfaces" + "github.com/hashicorp/nomad/client/consul" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/pkg/errors" +) + +const ( + // envoyVersionHookName is the name of this hook and appears in logs. + envoyVersionHookName = "envoy_version" + + // envoyLegacyImage is used when the version of Consul is too old to support + // the SupportedProxies field in the self API. + // + // This is the version defaulted by Nomad before v1.0. + envoyLegacyImage = "envoyproxy/envoy:v1.11.2@sha256:a7769160c9c1a55bb8d07a3b71ce5d64f72b1f665f10d81aa1581bc3cf850d09" + + // envoyImageFormat is the format string used for official envoy Docker images + // with the tag being the semver of the version of envoy. + envoyImageFormat = "envoyproxy/envoy:%s" +) + +type envoyVersionHookConfig struct { + alloc *structs.Allocation + proxiesClient consul.SupportedProxiesAPI + logger hclog.Logger +} + +func newEnvoyVersionHookConfig(alloc *structs.Allocation, proxiesClient consul.SupportedProxiesAPI, logger hclog.Logger) *envoyVersionHookConfig { + return &envoyVersionHookConfig{ + alloc: alloc, + logger: logger, + proxiesClient: proxiesClient, + } +} + +type envoyVersionHook struct { + // alloc is the allocation with the envoy task being rewritten. + alloc *structs.Allocation + + // proxiesClient is the subset of the Consul API for getting information + // from Consul about the versions of Envoy it supports. + proxiesClient consul.SupportedProxiesAPI + + // logger is used to log things. + logger hclog.Logger +} + +func newEnvoyVersionHook(c *envoyVersionHookConfig) *envoyVersionHook { + return &envoyVersionHook{ + alloc: c.alloc, + proxiesClient: c.proxiesClient, + logger: c.logger.Named(envoyVersionHookName), + } +} + +func (envoyVersionHook) Name() string { + return envoyVersionHookName +} + +func (h *envoyVersionHook) Prestart(ctx context.Context, request *ifs.TaskPrestartRequest, response *ifs.TaskPrestartResponse) error { + if h.skip(request) { + response.Done = true + return nil + } + + // it's either legacy or manageable, need to know consul version + proxies, err := h.proxiesClient.Proxies() + if err != nil { + return err + } + + image, err := h.image(proxies) + if err != nil { + return err + } + + h.logger.Trace("setting task envoy image", "image", image) + request.Task.Config["image"] = image + response.Done = true + return nil +} + +// skip will return true if the request does not contain a task that should have +// its envoy proxy version resolved automatically. +func (h *envoyVersionHook) skip(request *ifs.TaskPrestartRequest) bool { + switch { + case request.Task.Driver != "docker": + return true + case !request.Task.UsesConnectSidecar(): + return true + case !h.isSentinel(request.Task.Config): + return true + } + return false +} + +// isSentinel returns true if the docker.config.image value has been left to +// Nomad's default sentinel value, indicating that Nomad and Consul should work +// together to determine the best Envoy version to use. +func (_ *envoyVersionHook) isSentinel(config map[string]interface{}) bool { + if len(config) == 0 { + return false + } + + image, ok := config["image"].(string) + if !ok { + return false + } + + return image == structs.ConnectEnvoySentinel +} + +// image determines the best Envoy version to use. if supported is nil or empty +// Nomad will fallback to the legacy envoy image used before Nomad v1.0. +func (_ *envoyVersionHook) image(supported map[string][]string) (string, error) { + versions := supported["envoy"] + if len(versions) == 0 { + return envoyLegacyImage, nil + } + + latest, err := semver(versions[0]) + if err != nil { + return "", err + } + + return fmt.Sprintf(envoyImageFormat, latest), nil +} + +// semver sanitizes the envoy version string coming from Consul into the format +// used by the Envoy project when publishing images (i.e. proper semver). +func semver(chosen string) (string, error) { + v, err := version.NewVersion(chosen) + if err != nil { + return "", errors.Wrap(err, "unexpected envoy version format") + } + return "v" + v.String(), nil +} diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index 55bf98ac7de3..d5d5d5f8f109 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -158,7 +158,12 @@ type TaskRunner struct { // consulClient is the client used by the consul service hook for // registering services and checks - consulClient consul.ConsulServiceAPI + consulServiceClient consul.ConsulServiceAPI + + // consulProxiesClient is the client used by the envoy version hook for + // asking consul what version of envoy nomad should inject into the connect + // sidecar or gateway task. + consulProxiesClient consul.SupportedProxiesAPI // sidsClient is the client used by the service identity hook for managing // service identity tokens @@ -234,6 +239,10 @@ type Config struct { // Consul is the client to use for managing Consul service registrations Consul consul.ConsulServiceAPI + // ConsulProxies is the client to use for looking up supported envoy versions + // from Consul. + ConsulProxies consul.SupportedProxiesAPI + // ConsulSI is the client to use for managing Consul SI tokens ConsulSI consul.ServiceIdentityAPI @@ -302,7 +311,8 @@ func NewTaskRunner(config *Config) (*TaskRunner, error) { taskLeader: config.Task.Leader, envBuilder: envBuilder, dynamicRegistry: config.DynamicRegistry, - consulClient: config.Consul, + consulServiceClient: config.Consul, + consulProxiesClient: config.ConsulProxies, siClient: config.ConsulSI, vaultClient: config.Vault, state: tstate, diff --git a/client/allocrunner/taskrunner/task_runner_hooks.go b/client/allocrunner/taskrunner/task_runner_hooks.go index c9c2f752ec10..058a099d3235 100644 --- a/client/allocrunner/taskrunner/task_runner_hooks.go +++ b/client/allocrunner/taskrunner/task_runner_hooks.go @@ -106,7 +106,7 @@ func (tr *TaskRunner) initHooks() { tr.runnerHooks = append(tr.runnerHooks, newServiceHook(serviceHookConfig{ alloc: tr.Alloc(), task: tr.Task(), - consul: tr.consulClient, + consul: tr.consulServiceClient, restarter: tr, logger: hookLogger, })) @@ -127,10 +127,11 @@ func (tr *TaskRunner) initHooks() { })) } - if task.Kind.IsConnectProxy() || task.Kind.IsAnyConnectGateway() { - tr.runnerHooks = append(tr.runnerHooks, newEnvoyBootstrapHook( - newEnvoyBootstrapHookConfig(alloc, tr.clientConfig.ConsulConfig, hookLogger), - )) + if task.UsesConnectSidecar() { + tr.runnerHooks = append(tr.runnerHooks, + newEnvoyVersionHook(newEnvoyVersionHookConfig(alloc, tr.consulProxiesClient, hookLogger)), + newEnvoyBootstrapHook(newEnvoyBootstrapHookConfig(alloc, tr.clientConfig.ConsulConfig, hookLogger)), + ) } else if task.Kind.IsConnectNative() { tr.runnerHooks = append(tr.runnerHooks, newConnectNativeHook( newConnectNativeHookConfig(alloc, tr.clientConfig.ConsulConfig, hookLogger), @@ -142,7 +143,7 @@ func (tr *TaskRunner) initHooks() { scriptCheckHook := newScriptCheckHook(scriptCheckHookConfig{ alloc: tr.Alloc(), task: tr.Task(), - consul: tr.consulClient, + consul: tr.consulServiceClient, logger: hookLogger, }) tr.runnerHooks = append(tr.runnerHooks, scriptCheckHook) diff --git a/client/client.go b/client/client.go index 16339d25eb0f..c393b84827a9 100644 --- a/client/client.go +++ b/client/client.go @@ -224,6 +224,10 @@ type Client struct { // and checks. consulService consulApi.ConsulServiceAPI + // consulProxies is Nomad's custom Consul client for looking up supported + // envoy versions + consulProxies consulApi.SupportedProxiesAPI + // consulCatalog is the subset of Consul's Catalog API Nomad uses. consulCatalog consul.CatalogAPI @@ -306,7 +310,7 @@ var ( ) // NewClient is used to create a new client from the given configuration -func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulService consulApi.ConsulServiceAPI) (*Client, error) { +func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxies consulApi.SupportedProxiesAPI, consulService consulApi.ConsulServiceAPI) (*Client, error) { // Create the tls wrapper var tlsWrap tlsutil.RegionWrapper if cfg.TLSConfig.EnableRPC { @@ -331,6 +335,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulServic c := &Client{ config: cfg, consulCatalog: consulCatalog, + consulProxies: consulProxies, consulService: consulService, start: time.Now(), connPool: pool.NewPool(logger, clientRPCCache, clientMaxStreams, tlsWrap), @@ -2382,6 +2387,7 @@ func (c *Client) addAlloc(alloc *structs.Allocation, migrateToken string) error ClientConfig: c.configCopy, StateDB: c.stateDB, Consul: c.consulService, + ConsulProxies: c.consulProxies, ConsulSI: c.tokensClient, Vault: c.vaultClient, StateUpdater: c, diff --git a/client/client_test.go b/client/client_test.go index 96bf55f44a73..c8e472d06c7c 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -622,7 +622,7 @@ func TestClient_SaveRestoreState(t *testing.T) { c1.config.PluginLoader = catalog.TestPluginLoaderWithOptions(t, "", c1.config.Options, nil) c1.config.PluginSingletonLoader = singleton.NewSingletonLoader(logger, c1.config.PluginLoader) - c2, err := NewClient(c1.config, consulCatalog, mockService) + c2, err := NewClient(c1.config, consulCatalog, nil, mockService) // todo(shoenig) if err != nil { t.Fatalf("err: %v", err) } diff --git a/client/config/config.go b/client/config/config.go index ab282691f4ee..eafcde6e3c79 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -13,7 +13,7 @@ import ( "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pluginutils/loader" "github.com/hashicorp/nomad/nomad/structs" - "github.com/hashicorp/nomad/nomad/structs/config" + structsc "github.com/hashicorp/nomad/nomad/structs/config" "github.com/hashicorp/nomad/plugins/base" "github.com/hashicorp/nomad/version" ) @@ -144,10 +144,10 @@ type Config struct { Version *version.VersionInfo // ConsulConfig is this Agent's Consul configuration - ConsulConfig *config.ConsulConfig + ConsulConfig *structsc.ConsulConfig // VaultConfig is this Agent's Vault configuration - VaultConfig *config.VaultConfig + VaultConfig *structsc.VaultConfig // StatsCollectionInterval is the interval at which the Nomad client // collects resource usage stats @@ -162,7 +162,7 @@ type Config struct { PublishAllocationMetrics bool // TLSConfig holds various TLS related configurations - TLSConfig *config.TLSConfig + TLSConfig *structsc.TLSConfig // GCInterval is the time interval at which the client triggers garbage // collection @@ -303,12 +303,12 @@ func (c *Config) Copy() *Config { func DefaultConfig() *Config { return &Config{ Version: version.GetVersion(), - VaultConfig: config.DefaultVaultConfig(), - ConsulConfig: config.DefaultConsulConfig(), + VaultConfig: structsc.DefaultVaultConfig(), + ConsulConfig: structsc.DefaultConsulConfig(), LogOutput: os.Stderr, Region: "global", StatsCollectionInterval: 1 * time.Second, - TLSConfig: &config.TLSConfig{}, + TLSConfig: &structsc.TLSConfig{}, LogLevel: "DEBUG", GCInterval: 1 * time.Minute, GCParallelDestroys: 2, diff --git a/client/consul/consul.go b/client/consul/consul.go index 5cb2ef165207..2322dcafeed3 100644 --- a/client/consul/consul.go +++ b/client/consul/consul.go @@ -42,3 +42,11 @@ type ServiceIdentityAPI interface { // identity tokens be generated for tasks in the allocation. DeriveSITokens(alloc *structs.Allocation, tasks []string) (map[string]string, error) } + +// SupportedProxiesAPI is the interface the Nomad Client uses to request from +// Consul the set of supported proxied to use for Consul Connect. +// +// No ACL requirements +type SupportedProxiesAPI interface { + Proxies() (map[string][]string, error) +} diff --git a/client/consul/proxies.go b/client/consul/proxies.go new file mode 100644 index 000000000000..57653539f4cd --- /dev/null +++ b/client/consul/proxies.go @@ -0,0 +1,8 @@ +package consul + +import "github.com/hashicorp/go-hclog" + +// Implementation of SupportedProxiesAPI used to interact with Consul +type proxiesClient struct { + logger hclog.Logger +} diff --git a/client/testing.go b/client/testing.go index 8b881c0e9cb0..6ce3ddd29e08 100644 --- a/client/testing.go +++ b/client/testing.go @@ -5,9 +5,9 @@ import ( "time" "github.com/hashicorp/nomad/client/config" - consulApi "github.com/hashicorp/nomad/client/consul" + consulapi "github.com/hashicorp/nomad/client/consul" "github.com/hashicorp/nomad/client/fingerprint" - "github.com/hashicorp/nomad/command/agent/consul" + agentconsul "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper/pluginutils/catalog" "github.com/hashicorp/nomad/helper/pluginutils/singleton" "github.com/hashicorp/nomad/helper/testlog" @@ -44,9 +44,9 @@ func TestClient(t testing.T, cb func(c *config.Config)) (*Client, func() error) if conf.PluginSingletonLoader == nil { conf.PluginSingletonLoader = singleton.NewSingletonLoader(logger, conf.PluginLoader) } - catalog := consul.NewMockCatalog(logger) - mockService := consulApi.NewMockConsulServiceClient(t, logger) - client, err := NewClient(conf, catalog, mockService) + mockCatalog := agentconsul.NewMockCatalog(logger) + mockService := consulapi.NewMockConsulServiceClient(t, logger) + client, err := NewClient(conf, mockCatalog, nil, mockService) if err != nil { cleanup() t.Fatalf("err: %v", err) diff --git a/command/agent/agent.go b/command/agent/agent.go index 909b7c9f73ba..16bfb2179e98 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -15,7 +15,7 @@ import ( "time" metrics "github.com/armon/go-metrics" - "github.com/hashicorp/consul/api" + consulapi "github.com/hashicorp/consul/api" "github.com/hashicorp/consul/lib" log "github.com/hashicorp/go-hclog" uuidparse "github.com/hashicorp/go-uuid" @@ -74,10 +74,13 @@ type Agent struct { // and checks. consulService *consul.ServiceClient + // consulProxies is the subset of Consul's Agent API Nomad uses. + consulProxies *consul.ConnectProxies + // consulCatalog is the subset of Consul's Catalog API Nomad uses. consulCatalog consul.CatalogAPI - // consulConfigEntries is the subset of Consul's Configuration Entires API Nomad uses. + // consulConfigEntries is the subset of Consul's Configuration Entries API Nomad uses. consulConfigEntries consul.ConfigAPI // consulACLs is Nomad's subset of Consul's ACL API Nomad uses. @@ -845,11 +848,11 @@ func (a *Agent) setupClient() error { conf.StateDBFactory = state.GetStateDBFactory(conf.DevMode) } - client, err := client.NewClient(conf, a.consulCatalog, a.consulService) + nomadClient, err := client.NewClient(conf, a.consulCatalog, a.consulProxies, a.consulService) if err != nil { return fmt.Errorf("client setup failed: %v", err) } - a.client = client + a.client = nomadClient // Create the Nomad Client services for Consul if *a.config.Consul.AutoAdvertise { @@ -1119,26 +1122,30 @@ func (a *Agent) setupConsul(consulConfig *config.ConsulConfig) error { if err != nil { return err } - client, err := api.NewClient(apiConf) + + consulClient, err := consulapi.NewClient(apiConf) if err != nil { return err } // Create Consul Catalog client for service discovery. - a.consulCatalog = client.Catalog() + a.consulCatalog = consulClient.Catalog() // Create Consul ConfigEntries client for managing Config Entries. - a.consulConfigEntries = client.ConfigEntries() + a.consulConfigEntries = consulClient.ConfigEntries() // Create Consul ACL client for managing tokens. - a.consulACLs = client.ACL() + a.consulACLs = consulClient.ACL() // Create Consul Service client for service advertisement and checks. isClient := false if a.config.Client != nil && a.config.Client.Enabled { isClient = true } - a.consulService = consul.NewServiceClient(client.Agent(), a.logger, isClient) + // Create Consul Agent client for looking info about the agent. + consulAgentClient := consulClient.Agent() + a.consulService = consul.NewServiceClient(consulAgentClient, a.logger, isClient) + a.consulProxies = consul.NewConnectProxiesClient(consulAgentClient) // Run the Consul service client's sync'ing main loop go a.consulService.Run() diff --git a/command/agent/consul/connect_proxies.go b/command/agent/consul/connect_proxies.go new file mode 100644 index 000000000000..91cb03fc5acc --- /dev/null +++ b/command/agent/consul/connect_proxies.go @@ -0,0 +1,92 @@ +package consul + +import ( + "errors" +) + +// ConnectProxies implements SupportedProxiesAPI by using the Consul Agent API. +type ConnectProxies struct { + agentAPI AgentAPI +} + +func NewConnectProxiesClient(agentAPI AgentAPI) *ConnectProxies { + return &ConnectProxies{ + agentAPI: agentAPI, + } +} + +// Proxies returns a map of the supported proxies. The proxies are sorted from +// Consul with the most preferred version as the 0th element. +// +// If Consul is of a version that does not support the API, a nil map is returned +// with no error. +// +// If Consul cannot be reached an error is returned. +func (c *ConnectProxies) Proxies() (map[string][]string, error) { + // Based on the Consul query: + // $ curl -s localhost:8500/v1/agent/self | jq .xDS + // { + // "SupportedProxies": { + // "envoy": [ + // "1.15.0", + // "1.14.4", + // "1.13.4", + // "1.12.6" + // ] + // } + // } + + self, err := c.agentAPI.Self() + if err != nil { + // this should not fail as long as we can reach consul + return nil, err + } + + // if consul does not return a map of the supported consul proxies, it + // must be a version from before when the API was added in versions + // 1.9.0, 1.8.3, 1.7.7. Earlier versions in the same point release as well + // as all of 1.6.X support Connect, but not the supported proxies API. + // For these cases, we can simply fallback to the old version of Envoy + // that Nomad defaulted to in the before time - but not here. Instead, + // return nil so we can choose what to do at the caller. + + xds, xdsExists := self["xDS"] + if !xdsExists { + return nil, nil + } + + proxies, proxiesExists := xds["SupportedProxies"] + if !proxiesExists { + return nil, nil + } + + // convert interface{} to map[string]interface{} + + intermediate, ok := proxies.(map[string]interface{}) + if !ok { + return nil, errors.New("unexpected response format from Consul") + } + + // convert map[string]interface{} to map[string][]string + + result := make(map[string][]string, len(intermediate)) + for k, v := range intermediate { + + // convert interface{} to []interface{} + + if si, ok := v.([]interface{}); ok { + ss := make([]string, 0, len(si)) + for _, z := range si { + + // convert interface{} to string + + if s, ok := z.(string); ok { + ss = append(ss, s) + } + } + result[k] = ss + } + } + + return result, nil +} diff --git a/command/agent/consul/client.go b/command/agent/consul/service_client.go similarity index 100% rename from command/agent/consul/client.go rename to command/agent/consul/service_client.go diff --git a/command/agent/consul/client_test.go b/command/agent/consul/service_client_test.go similarity index 100% rename from command/agent/consul/client_test.go rename to command/agent/consul/service_client_test.go diff --git a/nomad/job_endpoint_hook_connect.go b/nomad/job_endpoint_hook_connect.go index 6b8b9269eeb0..381cc4d3ae7a 100644 --- a/nomad/job_endpoint_hook_connect.go +++ b/nomad/job_endpoint_hook_connect.go @@ -24,7 +24,7 @@ var ( // connect proxy sidecar task. connectSidecarDriverConfig = func() map[string]interface{} { return map[string]interface{}{ - "image": "${meta.connect.sidecar_image}", + "image": structs.ConnectEnvoySentinel, "args": []interface{}{ "-c", structs.EnvoyBootstrapPath, "-l", "${meta.connect.log_level}", @@ -40,7 +40,7 @@ var ( // networking is being used the network_mode driver configuration is set here. connectGatewayDriverConfig = func(hostNetwork bool) map[string]interface{} { m := map[string]interface{}{ - "image": "${meta.connect.gateway_image}", + "image": structs.ConnectEnvoySentinel, "args": []interface{}{ "-c", structs.EnvoyBootstrapPath, "-l", "${meta.connect.log_level}", diff --git a/nomad/structs/config/consul.go b/nomad/structs/config/consul.go index 538cd2f111a8..145bfd5ba38f 100644 --- a/nomad/structs/config/consul.go +++ b/nomad/structs/config/consul.go @@ -17,6 +17,9 @@ import ( // - Bootstrap this Nomad Client with the list of Nomad Servers registered // with Consul // +// - Establish how this Nomad Client will resolve Envoy Connect Sidecar +// images. +// // Both the Agent and the executor need to be able to import ConsulConfig. type ConsulConfig struct { // ServerServiceName is the name of the service that Nomad uses to register diff --git a/nomad/structs/connect.go b/nomad/structs/connect.go new file mode 100644 index 000000000000..87cd5d7a221a --- /dev/null +++ b/nomad/structs/connect.go @@ -0,0 +1,4 @@ +package structs + +// todo(shoenig): move existing connect stuff over from services.go +const ConnectEnvoySentinel = "hashicorpnomad:envoy:sentinel" diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index a5a8615d944c..da68a560c925 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6229,7 +6229,11 @@ type Task struct { // Task, which exports known types of Tasks. UsesConnect will be true if the // task is a connect proxy, connect native, or is a connect gateway. func (t *Task) UsesConnect() bool { - return t.Kind.IsConnectProxy() || t.Kind.IsConnectNative() || t.Kind.IsAnyConnectGateway() + return t.Kind.IsConnectNative() || t.UsesConnectSidecar() +} + +func (t *Task) UsesConnectSidecar() bool { + return t.Kind.IsConnectProxy() || t.Kind.IsAnyConnectGateway() } func (t *Task) Copy() *Task { diff --git a/website/pages/docs/upgrade/upgrade-specific.mdx b/website/pages/docs/upgrade/upgrade-specific.mdx index d26629a65856..06d4d9c8362d 100644 --- a/website/pages/docs/upgrade/upgrade-specific.mdx +++ b/website/pages/docs/upgrade/upgrade-specific.mdx @@ -15,6 +15,29 @@ details provided for their upgrades as a result of new features or changed behavior. This page is used to document those details separately from the standard upgrade flow. +## Nomad 0.13.0 + +### Envoy proxy versions + +Nomad 0.13.0 changes the behavior around the selection of Envoy version used +for Connect sidecar proxies. Previously, Nomad always defaulted to Envoy v1.11.2 +if neither the `meta.connect.sidecar_image` parameter or `sidecar_task` stanza +were explicitly configured. Starting with Nomad 0.13.0, each Nomad Client will +query Consul for a list of supported Envoy versions. Nomad will make use of the +latest version of Envoy supported by the Consul agent when launching Envoy as a +Connect sidecar proxy. If the version of the Consul agent is older than v1.7.8, +v1.8.4, or v1.9.0, Nomad will fallback to the v1.11.2 version of Envoy. +As before, if the `meta.connect.sidecar_image` or `sidecar_task` stanza are set, +those settings take precedence. + +When upgrading Nomad Clients from a previous version to v0.13.0 and above, it is +recommended to also upgrade the Consul agents to v1.9.0 or newer. Upgrading +Nomad and Consul to versions that support the new behaviour while also doing a +full [node drain](https://www.nomadproject.io/docs/upgrade#5-upgrade-clients) at +the time of the upgrade for each node will ensure Connect workloads are properly +rescheduled onto nodes in such a way that the Nomad Clients, Consul agents, and +Envoy sidecar tasks maintain compatibility with one another. + ## Nomad 0.12.0 ### Enterprise Licensing