diff --git a/.changelog/15309.txt b/.changelog/15309.txt new file mode 100644 index 000000000000..73621fea253a --- /dev/null +++ b/.changelog/15309.txt @@ -0,0 +1,3 @@ +```release-note:bug +fingerprint: Ensure Nomad can correctly fingerprint Consul gRPC where the Consul agent is running v1.14.0 or greater +``` diff --git a/client/allocrunner/alloc_runner_hooks.go b/client/allocrunner/alloc_runner_hooks.go index 37d9b4a49735..c2d69ca0ebb4 100644 --- a/client/allocrunner/alloc_runner_hooks.go +++ b/client/allocrunner/alloc_runner_hooks.go @@ -161,7 +161,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error { logger: hookLogger, shutdownDelayCtx: ar.shutdownDelayCtx, }), - newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), + newConsulGRPCSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig, config.Node.Attributes), newConsulHTTPSocketHook(hookLogger, alloc, ar.allocDir, config.ConsulConfig), newCSIHook(alloc, hookLogger, ar.csiManager, ar.rpcClient, ar, hrs, ar.clientConfig.Node.SecretID), newChecksHook(hookLogger, alloc, ar.checkStore, ar), diff --git a/client/allocrunner/consul_grpc_sock_hook.go b/client/allocrunner/consul_grpc_sock_hook.go index 4a7cf4e34c67..a9e7c2ff1b4a 100644 --- a/client/allocrunner/consul_grpc_sock_hook.go +++ b/client/allocrunner/consul_grpc_sock_hook.go @@ -25,6 +25,11 @@ const ( // socketProxyStopWaitTime is the amount of time to wait for a socket proxy // to stop before assuming something went awry and return a timeout error. socketProxyStopWaitTime = 3 * time.Second + + // consulGRPCFallbackPort is the last resort fallback port to use in + // combination with the Consul HTTP config address when creating the + // socket. + consulGRPCFallbackPort = "8502" ) var ( @@ -45,10 +50,20 @@ type consulGRPCSocketHook struct { proxy *grpcSocketProxy } -func newConsulGRPCSocketHook(logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *consulGRPCSocketHook { +func newConsulGRPCSocketHook( + logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, + config *config.ConsulConfig, nodeAttrs map[string]string) *consulGRPCSocketHook { + + // Attempt to find the gRPC port via the node attributes, otherwise use the + // default fallback. + consulGRPCPort, ok := nodeAttrs["consul.grpc"] + if !ok { + consulGRPCPort = consulGRPCFallbackPort + } + return &consulGRPCSocketHook{ alloc: alloc, - proxy: newGRPCSocketProxy(logger, allocDir, config), + proxy: newGRPCSocketProxy(logger, allocDir, config, consulGRPCPort), logger: logger.Named(consulGRPCSockHookName), } } @@ -119,21 +134,29 @@ type grpcSocketProxy struct { allocDir *allocdir.AllocDir config *config.ConsulConfig + // consulGRPCFallbackPort is the port to use if the operator did not + // specify a gRPC config address. + consulGRPCFallbackPort string + ctx context.Context cancel func() doneCh chan struct{} runOnce bool } -func newGRPCSocketProxy(logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *grpcSocketProxy { +func newGRPCSocketProxy( + logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig, + consulGRPCFallbackPort string) *grpcSocketProxy { + ctx, cancel := context.WithCancel(context.Background()) return &grpcSocketProxy{ - allocDir: allocDir, - config: config, - ctx: ctx, - cancel: cancel, - doneCh: make(chan struct{}), - logger: logger, + allocDir: allocDir, + config: config, + consulGRPCFallbackPort: consulGRPCFallbackPort, + ctx: ctx, + cancel: cancel, + doneCh: make(chan struct{}), + logger: logger, } } @@ -173,7 +196,7 @@ func (p *grpcSocketProxy) run(alloc *structs.Allocation) error { p.config.Addr, err) } - destAddr = net.JoinHostPort(host, "8502") + destAddr = net.JoinHostPort(host, p.consulGRPCFallbackPort) } hostGRPCSocketPath := filepath.Join(p.allocDir.AllocDir, allocdir.AllocGRPCSocket) diff --git a/client/allocrunner/consul_grpc_sock_hook_test.go b/client/allocrunner/consul_grpc_sock_hook_test.go index 9f7855ddf412..6e8e1ed069ff 100644 --- a/client/allocrunner/consul_grpc_sock_hook_test.go +++ b/client/allocrunner/consul_grpc_sock_hook_test.go @@ -42,7 +42,7 @@ func TestConsulGRPCSocketHook_PrerunPostrun_Ok(t *testing.T) { defer cleanup() // Start the unix socket proxy - h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig, map[string]string{}) require.NoError(t, h.Prerun()) gRPCSock := filepath.Join(allocDir.AllocDir, allocdir.AllocGRPCSocket) @@ -116,7 +116,7 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) { { // An alloc without a Connect proxy sidecar should not return // an error. - h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig, map[string]string{}) require.NoError(t, h.Prerun()) // Postrun should be a noop @@ -126,7 +126,7 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) { { // An alloc *with* a Connect proxy sidecar *should* return an error // when Consul is not configured. - h := newConsulGRPCSocketHook(logger, connectAlloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, connectAlloc, allocDir, consulConfig, map[string]string{}) require.EqualError(t, h.Prerun(), "consul address must be set on nomad client") // Postrun should be a noop @@ -136,7 +136,7 @@ func TestConsulGRPCSocketHook_Prerun_Error(t *testing.T) { { // Updating an alloc without a sidecar to have a sidecar should // error when the sidecar is added. - h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig) + h := newConsulGRPCSocketHook(logger, alloc, allocDir, consulConfig, map[string]string{}) require.NoError(t, h.Prerun()) req := &interfaces.RunnerUpdateRequest{ diff --git a/client/fingerprint/consul.go b/client/fingerprint/consul.go index e1b2b449d904..15f8ce25e524 100644 --- a/client/fingerprint/consul.go +++ b/client/fingerprint/consul.go @@ -3,10 +3,12 @@ package fingerprint import ( "fmt" "strconv" + "strings" "time" consulapi "github.com/hashicorp/consul/api" log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-version" agentconsul "github.com/hashicorp/nomad/command/agent/consul" ) @@ -15,6 +17,14 @@ const ( consulUnavailable = "unavailable" ) +var ( + // consulGRPCPortChangeVersion is the Consul version which made a breaking + // change to the way gRPC API listeners are created. This means Nomad must + // perform different fingerprinting depending on which version of Consul it + // is communicating with. + consulGRPCPortChangeVersion = version.Must(version.NewVersion("1.14.0")) +) + // ConsulFingerprint is used to fingerprint for Consul type ConsulFingerprint struct { logger log.Logger @@ -97,7 +107,7 @@ func (f *ConsulFingerprint) initialize(req *FingerprintRequest) error { "consul.datacenter": f.dc, "consul.segment": f.segment, "consul.connect": f.connect, - "consul.grpc": f.grpc, + "consul.grpc": f.grpc(consulConfig.Scheme), "consul.ft.namespaces": f.namespaces, } } @@ -173,11 +183,50 @@ func (f *ConsulFingerprint) connect(info agentconsul.Self) (string, bool) { return strconv.FormatBool(c), ok } -func (f *ConsulFingerprint) grpc(info agentconsul.Self) (string, bool) { +func (f *ConsulFingerprint) grpc(scheme string) func(info agentconsul.Self) (string, bool) { + return func(info agentconsul.Self) (string, bool) { + + // The version is needed in order to understand which config object to + // query. This is because Consul 1.14.0 added a new gRPC port which + // broke the previous behaviour. + v, ok := info["Config"]["Version"].(string) + if !ok { + return "", false + } + + consulVersion, err := version.NewVersion(v) + if err != nil { + return "", false + } + + // If the Consul agent being fingerprinted is running a version less + // than 1.14.0 we use the original single gRPC port. + if consulVersion.Core().LessThan(consulGRPCPortChangeVersion.Core()) { + return f.grpcPort(info) + } + + // Now that we know we are querying a Consul agent running v1.14.0 or + // greater, we need to select the correct port parameter from the + // config depending on whether we have been asked to speak TLS or not. + switch strings.ToLower(scheme) { + case "https": + return f.grpcTLSPort(info) + default: + return f.grpcPort(info) + } + } +} + +func (f *ConsulFingerprint) grpcPort(info agentconsul.Self) (string, bool) { p, ok := info["DebugConfig"]["GRPCPort"].(float64) return fmt.Sprintf("%d", int(p)), ok } +func (f *ConsulFingerprint) grpcTLSPort(info agentconsul.Self) (string, bool) { + p, ok := info["DebugConfig"]["GRPCTLSPort"].(float64) + return fmt.Sprintf("%d", int(p)), ok +} + func (f *ConsulFingerprint) namespaces(info agentconsul.Self) (string, bool) { return strconv.FormatBool(agentconsul.Namespaces(info)), true } diff --git a/client/fingerprint/consul_test.go b/client/fingerprint/consul_test.go index b8b322fb0882..00d6ace47232 100644 --- a/client/fingerprint/consul_test.go +++ b/client/fingerprint/consul_test.go @@ -296,29 +296,91 @@ func TestConsulFingerprint_grpc(t *testing.T) { fp := newConsulFingerPrint(t) - t.Run("grpc set", func(t *testing.T) { - s, ok := fp.grpc(agentconsul.Self{ + t.Run("grpc set pre-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, "DebugConfig": {"GRPCPort": 8502.0}, // JSON numbers are floats }) require.True(t, ok) require.Equal(t, "8502", s) }) - t.Run("grpc disabled", func(t *testing.T) { - s, ok := fp.grpc(agentconsul.Self{ + t.Run("grpc disabled pre-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, "DebugConfig": {"GRPCPort": -1.0}, // JSON numbers are floats }) require.True(t, ok) require.Equal(t, "-1", s) }) - t.Run("grpc missing", func(t *testing.T) { - _, ok := fp.grpc(agentconsul.Self{ + t.Run("grpc set pre-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, + "DebugConfig": {"GRPCPort": 8502.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "8502", s) + }) + + t.Run("grpc disabled pre-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.13.3"}, + "DebugConfig": {"GRPCPort": -1.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "-1", s) + }) + + t.Run("grpc set post-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCPort": 8502.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "8502", s) + }) + + t.Run("grpc disabled post-1.14 http", func(t *testing.T) { + s, ok := fp.grpc("http")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCPort": -1.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "-1", s) + }) + + t.Run("grpc disabled post-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCTLSPort": -1.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "-1", s) + }) + + t.Run("grpc set post-1.14 https", func(t *testing.T) { + s, ok := fp.grpc("https")(agentconsul.Self{ + "Config": {"Version": "1.14.0"}, + "DebugConfig": {"GRPCTLSPort": 8503.0}, // JSON numbers are floats + }) + require.True(t, ok) + require.Equal(t, "8503", s) + }) + + t.Run("grpc missing http", func(t *testing.T) { + _, ok := fp.grpc("http")(agentconsul.Self{ "DebugConfig": {}, }) require.False(t, ok) }) + t.Run("grpc missing https", func(t *testing.T) { + _, ok := fp.grpc("https")(agentconsul.Self{ + "DebugConfig": {}, + }) + require.False(t, ok) + }) } func TestConsulFingerprint_namespaces(t *testing.T) { diff --git a/website/content/docs/configuration/consul.mdx b/website/content/docs/configuration/consul.mdx index 6858a2d5ec7d..3c3fc3d5c362 100644 --- a/website/content/docs/configuration/consul.mdx +++ b/website/content/docs/configuration/consul.mdx @@ -90,8 +90,8 @@ configuring Nomad to talk to Consul via DNS such as consul.service.consul - `grpc_address` `(string: "127.0.0.1:8502")` - Specifies the address to the local Consul agent for `gRPC` requests, given in the format `host:port`. Note that - Consul does not enable the [`grpc`](https://developer.hashicorp.com/consul/docs/agent/config/config-files#_grpc_port) - listener by default. + Consul does not enable the [`grpc`][grpc_port] or [`grpc_tls`][grpctls_port] + listeners by default. - `key_file` `(string: "")` - Specifies the path to the private key used for Consul communication. If this is set then you need to also set `cert_file`. @@ -227,3 +227,5 @@ namespace "nomad-ns" { [consul]: https://www.consul.io/ 'Consul by HashiCorp' [bootstrap]: https://learn.hashicorp.com/tutorials/nomad/clustering 'Automatic Bootstrapping' [go-sockaddr/template]: https://pkg.go.dev/github.com/hashicorp/go-sockaddr/template +[grpc_port]: https://developer.hashicorp.com/consul/docs/agent/config/config-files#grpc_port +[grpctls_port]: https://developer.hashicorp.com/consul/docs/agent/config/config-files#grpc_tls_port diff --git a/website/content/docs/integrations/consul-connect.mdx b/website/content/docs/integrations/consul-connect.mdx index 75e314c02f89..ea8490d4a84a 100644 --- a/website/content/docs/integrations/consul-connect.mdx +++ b/website/content/docs/integrations/consul-connect.mdx @@ -63,7 +63,10 @@ $ consul agent -dev To use service mesh on a non-dev Consul agent, you will minimally need to enable the GRPC port and set `connect` to enabled by adding some additional information to -your Consul client configurations, depending on format. +your Consul client configurations, depending on format. Consul agents running TLS +and a version greater than [1.14.0](https://releases.hashicorp.com/consul/1.14.0) +should set the `grpc_tls` configuration parameter instead of `grpc`. Please see +the Consul [port documentation](consul_ports) for further reference material. For HCL configurations: @@ -352,3 +355,4 @@ dashes (`-`) are converted to underscores (`_`) in environment variables so [gh-9907]: https://github.com/hashicorp/nomad/issues/9907 [`Local`]: https://developer.hashicorp.com/consul/docs/security/acl/acl-tokens#token-attributes [anon_token]: https://developer.hashicorp.com/consul/docs/security/acl/acl-tokens#special-purpose-tokens +[consul_ports]: https://developer.hashicorp.com/consul/docs/agent/config/config-files#ports