diff --git a/.changelog/17755.txt b/.changelog/17755.txt new file mode 100644 index 000000000000..7edf7b26e159 --- /dev/null +++ b/.changelog/17755.txt @@ -0,0 +1,3 @@ +```release-note:improvement +mesh: Stop jwt providers referenced by intentions from being deleted. +``` \ No newline at end of file diff --git a/.changelog/17757.txt b/.changelog/17757.txt new file mode 100644 index 000000000000..e207438cf843 --- /dev/null +++ b/.changelog/17757.txt @@ -0,0 +1,3 @@ +```release-note:improvement +connect: Improve transparent proxy support for virtual services and failovers. +``` diff --git a/.changelog/17759.txt b/.changelog/17759.txt new file mode 100644 index 000000000000..0836608ae1f2 --- /dev/null +++ b/.changelog/17759.txt @@ -0,0 +1,3 @@ +```release-note:improvement +extensions: Improve validation and error feedback for `property-override` builtin Envoy extension +``` diff --git a/.changelog/17775.txt b/.changelog/17775.txt new file mode 100644 index 000000000000..8060cfa128ce --- /dev/null +++ b/.changelog/17775.txt @@ -0,0 +1,3 @@ +```release-note:bug +connect: Fix issue where changes to service exports were not reflected in proxies. +``` diff --git a/.github/workflows/jira-issues.yaml b/.github/workflows/jira-issues.yaml index c136dfd69a78..6e9b2b9e959b 100644 --- a/.github/workflows/jira-issues.yaml +++ b/.github/workflows/jira-issues.yaml @@ -91,14 +91,14 @@ jobs: - name: Close ticket if: ( github.event.action == 'closed' || github.event.action == 'deleted' ) && steps.search.outputs.issue - uses: atlassian/gajira-transition@4749176faf14633954d72af7a44d7f2af01cc92b # v3 + uses: atlassian/gajira-transition@38fc9cd61b03d6a53dd35fcccda172fe04b36de3 # v3 with: issue: ${{ steps.search.outputs.issue }} transition: "Closed" - name: Reopen ticket if: github.event.action == 'reopened' && steps.search.outputs.issue - uses: atlassian/gajira-transition@4749176faf14633954d72af7a44d7f2af01cc92b # v3 + uses: atlassian/gajira-transition@38fc9cd61b03d6a53dd35fcccda172fe04b36de3 # v3 with: issue: ${{ steps.search.outputs.issue }} transition: "To Do" diff --git a/.github/workflows/jira-pr.yaml b/.github/workflows/jira-pr.yaml index f63f7af53162..e18559a022b3 100644 --- a/.github/workflows/jira-pr.yaml +++ b/.github/workflows/jira-pr.yaml @@ -105,14 +105,14 @@ jobs: - name: Close ticket if: ( github.event.action == 'closed' || github.event.action == 'deleted' ) && steps.search.outputs.issue - uses: atlassian/gajira-transition@4749176faf14633954d72af7a44d7f2af01cc92b # v3 + uses: atlassian/gajira-transition@38fc9cd61b03d6a53dd35fcccda172fe04b36de3 # v3 with: issue: ${{ steps.search.outputs.issue }} transition: "Closed" - name: Reopen ticket if: github.event.action == 'reopened' && steps.search.outputs.issue - uses: atlassian/gajira-transition@4749176faf14633954d72af7a44d7f2af01cc92b # v3 + uses: atlassian/gajira-transition@38fc9cd61b03d6a53dd35fcccda172fe04b36de3 # v3 with: issue: ${{ steps.search.outputs.issue }} transition: "To Do" diff --git a/GNUmakefile b/GNUmakefile index 3443b71db7b7..fe554b3c5432 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -3,6 +3,7 @@ SHELL = bash + GO_MODULES := $(shell find . -name go.mod -exec dirname {} \; | grep -v "proto-gen-rpc-glue/e2e" | sort) ### @@ -72,6 +73,7 @@ CI_DEV_DOCKER_NAMESPACE?=hashicorpdev CI_DEV_DOCKER_IMAGE_NAME?=consul CI_DEV_DOCKER_WORKDIR?=bin/ ################ +CONSUL_VERSION?=$(shell cat version/VERSION) TEST_MODCACHE?=1 TEST_BUILDCACHE?=1 @@ -188,6 +190,8 @@ dev-docker: linux dev-build @docker buildx use default && docker buildx build -t 'consul:local' -t '$(CONSUL_DEV_IMAGE)' \ --platform linux/$(GOARCH) \ --build-arg CONSUL_IMAGE_VERSION=$(CONSUL_IMAGE_VERSION) \ + --label org.opencontainers.image.version=$(CONSUL_VERSION) \ + --label version=$(CONSUL_VERSION) \ --load \ -f $(CURDIR)/build-support/docker/Consul-Dev-Multiarch.dockerfile $(CURDIR)/pkg/bin/ @@ -208,6 +212,8 @@ remote-docker: check-remote-dev-image-env @docker buildx use consul-builder && docker buildx build -t '$(REMOTE_DEV_IMAGE)' \ --platform linux/amd64,linux/arm64 \ --build-arg CONSUL_IMAGE_VERSION=$(CONSUL_IMAGE_VERSION) \ + --label org.opencontainers.image.version=$(CONSUL_VERSION) \ + --label version=$(CONSUL_VERSION) \ --push \ -f $(CURDIR)/build-support/docker/Consul-Dev-Multiarch.dockerfile $(CURDIR)/pkg/bin/ @@ -351,16 +357,17 @@ lint/%: @echo "--> Running enumcover ($*)" @cd $* && GOWORK=off enumcover ./... +# check that the test-container module only imports allowlisted packages +# from the root consul module. Generally we don't want to allow these imports. +# In a few specific instances though it is okay to import test definitions and +# helpers from some of the packages in the root module. .PHONY: lint-container-test-deps lint-container-test-deps: @echo "--> Checking container tests for bad dependencies" - @cd test/integration/consul-container && ( \ - found="$$(go list -m all | grep -c '^github.com/hashicorp/consul ')" ; \ - if [[ "$$found" != "0" ]]; then \ - echo "test/integration/consul-container: This project should not depend on the root consul module" >&2 ; \ - exit 1 ; \ - fi \ - ) + @cd test/integration/consul-container && \ + $(CURDIR)/build-support/scripts/check-allowed-imports.sh \ + github.com/hashicorp/consul \ + internal/catalog/catalogtest # Build the static web ui inside a Docker container. For local testing only; do not commit these assets. ui: ui-docker diff --git a/agent/agent.go b/agent/agent.go index 7f68bd8d080b..54a266a95856 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -4564,7 +4564,11 @@ func (a *Agent) proxyDataSources() proxycfg.DataSources { sources.ExportedPeeredServices = proxycfgglue.ServerExportedPeeredServices(deps) sources.FederationStateListMeshGateways = proxycfgglue.ServerFederationStateListMeshGateways(deps) sources.GatewayServices = proxycfgglue.ServerGatewayServices(deps) - sources.Health = proxycfgglue.ServerHealth(deps, proxycfgglue.ClientHealth(a.rpcClientHealth)) + // We do not use this health check currently due to a bug with the way that service exports + // interact with ACLs and the streaming backend. See comments in `proxycfgglue.ServerHealthBlocking` + // for more details. + // sources.Health = proxycfgglue.ServerHealth(deps, proxycfgglue.ClientHealth(a.rpcClientHealth)) + sources.Health = proxycfgglue.ServerHealthBlocking(deps, proxycfgglue.ClientHealth(a.rpcClientHealth), server.FSM().State()) sources.HTTPChecks = proxycfgglue.ServerHTTPChecks(deps, a.config.NodeName, proxycfgglue.CacheHTTPChecks(a.cache), a.State) sources.Intentions = proxycfgglue.ServerIntentions(deps) sources.IntentionUpstreams = proxycfgglue.ServerIntentionUpstreams(deps) diff --git a/agent/config/builder.go b/agent/config/builder.go index 665688b8c864..8e0bb37ef999 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -984,7 +984,7 @@ func (b *builder) build() (rt RuntimeConfig, err error) { AutoEncryptIPSAN: autoEncryptIPSAN, AutoEncryptAllowTLS: autoEncryptAllowTLS, AutoConfig: autoConfig, - Cloud: b.cloudConfigVal(c.Cloud), + Cloud: b.cloudConfigVal(c), ConnectEnabled: connectEnabled, ConnectCAProvider: connectCAProvider, ConnectCAConfig: connectCAConfig, @@ -1290,6 +1290,10 @@ func (b *builder) validate(rt RuntimeConfig) error { "1 and 63 bytes.", rt.NodeName) } + if err := rt.StructLocality().Validate(); err != nil { + return fmt.Errorf("locality is invalid: %s", err) + } + if ipaddr.IsAny(rt.AdvertiseAddrLAN.IP) { return fmt.Errorf("Advertise address cannot be 0.0.0.0, :: or [::]") } @@ -2541,21 +2545,26 @@ func validateAutoConfigAuthorizer(rt RuntimeConfig) error { return nil } -func (b *builder) cloudConfigVal(v *CloudConfigRaw) hcpconfig.CloudConfig { +func (b *builder) cloudConfigVal(v Config) hcpconfig.CloudConfig { val := hcpconfig.CloudConfig{ ResourceID: os.Getenv("HCP_RESOURCE_ID"), } - if v == nil { + // Node id might get overriden in setup.go:142 + nodeID := stringVal(v.NodeID) + val.NodeID = types.NodeID(nodeID) + val.NodeName = b.nodeName(v.NodeName) + + if v.Cloud == nil { return val } - val.ClientID = stringVal(v.ClientID) - val.ClientSecret = stringVal(v.ClientSecret) - val.AuthURL = stringVal(v.AuthURL) - val.Hostname = stringVal(v.Hostname) - val.ScadaAddress = stringVal(v.ScadaAddress) + val.ClientID = stringVal(v.Cloud.ClientID) + val.ClientSecret = stringVal(v.Cloud.ClientSecret) + val.AuthURL = stringVal(v.Cloud.AuthURL) + val.Hostname = stringVal(v.Cloud.Hostname) + val.ScadaAddress = stringVal(v.Cloud.ScadaAddress) - if resourceID := stringVal(v.ResourceID); resourceID != "" { + if resourceID := stringVal(v.Cloud.ResourceID); resourceID != "" { val.ResourceID = resourceID } return val diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index c1cd85ac502f..beaf214dba71 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -619,6 +619,7 @@ func TestLoad_IntegrationWithFlags(t *testing.T) { rt.NodeName = "a" rt.TLS.NodeName = "a" rt.DataDir = dataDir + rt.Cloud.NodeName = "a" }, }) run(t, testCase{ @@ -630,6 +631,7 @@ func TestLoad_IntegrationWithFlags(t *testing.T) { expected: func(rt *RuntimeConfig) { rt.NodeID = "a" rt.DataDir = dataDir + rt.Cloud.NodeID = "a" }, }) run(t, testCase{ @@ -1036,6 +1038,13 @@ func TestLoad_IntegrationWithFlags(t *testing.T) { }, }, }) + run(t, testCase{ + desc: "locality invalid", + args: []string{`-data-dir=` + dataDir}, + json: []string{`{"locality": {"zone": "us-west-1a"}}`}, + hcl: []string{`locality { zone = "us-west-1a" }`}, + expectedErr: "locality is invalid: zone cannot be set without region", + }) run(t, testCase{ desc: "client addr and ports == 0", args: []string{`-data-dir=` + dataDir}, @@ -2319,6 +2328,8 @@ func TestLoad_IntegrationWithFlags(t *testing.T) { rt.Cloud = hcpconfig.CloudConfig{ // ID is only populated from env if not populated from other sources. ResourceID: "env-id", + NodeName: "thehostname", + NodeID: "", } // server things @@ -2359,6 +2370,7 @@ func TestLoad_IntegrationWithFlags(t *testing.T) { rt.Cloud = hcpconfig.CloudConfig{ // ID is only populated from env if not populated from other sources. ResourceID: "file-id", + NodeName: "thehostname", } // server things @@ -6317,6 +6329,8 @@ func TestLoad_FullConfig(t *testing.T) { Hostname: "DH4bh7aC", AuthURL: "332nCdR2", ScadaAddress: "aoeusth232", + NodeID: types.NodeID("AsUIlw99"), + NodeName: "otlLxGaI", }, DNSAddrs: []net.Addr{tcpAddr("93.95.95.81:7001"), udpAddr("93.95.95.81:7001")}, DNSARecordLimit: 29907, diff --git a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden index b6ee9a98129f..6bb08ff95fed 100644 --- a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden +++ b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden @@ -134,7 +134,9 @@ "ManagementToken": "hidden", "ResourceID": "cluster1", "ScadaAddress": "", - "TLSConfig": null + "TLSConfig": null, + "NodeID": "", + "NodeName": "" }, "ConfigEntryBootstrap": [], "ConnectCAConfig": {}, diff --git a/agent/consul/state/config_entry.go b/agent/consul/state/config_entry.go index 340a53f1192c..9abaafc390d3 100644 --- a/agent/consul/state/config_entry.go +++ b/agent/consul/state/config_entry.go @@ -634,6 +634,12 @@ func validateProposedConfigEntryInGraph( case structs.TCPRoute: case structs.RateLimitIPConfig: case structs.JWTProvider: + if newEntry == nil && existingEntry != nil { + err := validateJWTProviderIsReferenced(tx, kindName, existingEntry) + if err != nil { + return err + } + } default: return fmt.Errorf("unhandled kind %q during validation of %q", kindName.Kind, kindName.Name) } @@ -704,6 +710,66 @@ func getReferencedProviderNames(j *structs.IntentionJWTRequirement, s []*structs return providerNames } +// validateJWTProviderIsReferenced iterates over intentions to determine if the provider being +// deleted is referenced by any intention. +// +// This could be an expensive operation based on the number of intentions. We purposely set this to only +// run on delete and don't expect this to be called often. +func validateJWTProviderIsReferenced(tx ReadTxn, kn configentry.KindName, ce structs.ConfigEntry) error { + meta := acl.NewEnterpriseMetaWithPartition( + kn.EnterpriseMeta.PartitionOrDefault(), + acl.DefaultNamespaceName, + ) + entry, ok := ce.(*structs.JWTProviderConfigEntry) + if !ok { + return fmt.Errorf("invalid jwt provider config entry: %T", entry) + } + + _, ixnEntries, err := configEntriesByKindTxn(tx, nil, structs.ServiceIntentions, &meta) + if err != nil { + return err + } + + err = findJWTProviderNameReferences(ixnEntries, entry.Name) + if err != nil { + return err + } + + return nil +} + +func findJWTProviderNameReferences(entries []structs.ConfigEntry, pName string) error { + errMsg := "cannot delete jwt provider config entry referenced by an intention. Provider name: %s, intention name: %s" + for _, entry := range entries { + ixn, ok := entry.(*structs.ServiceIntentionsConfigEntry) + if !ok { + return fmt.Errorf("type %T is not a service intentions config entry", entry) + } + + if ixn.JWT != nil { + for _, prov := range ixn.JWT.Providers { + if prov.Name == pName { + return fmt.Errorf(errMsg, pName, ixn.Name) + } + } + } + + for _, s := range ixn.Sources { + for _, perm := range s.Permissions { + if perm.JWT == nil { + continue + } + for _, prov := range perm.JWT.Providers { + if prov.Name == pName { + return fmt.Errorf(errMsg, pName, ixn.Name) + } + } + } + } + } + return nil +} + // This fetches all the jwt-providers config entries and iterates over them // to validate that any provider referenced exists. // This is okay because we assume there are very few jwt-providers per partition diff --git a/agent/consul/state/config_entry_test.go b/agent/consul/state/config_entry_test.go index 572719dc4b1f..d72f12c87689 100644 --- a/agent/consul/state/config_entry_test.go +++ b/agent/consul/state/config_entry_test.go @@ -3714,3 +3714,178 @@ func TestStateStore_DiscoveryChain_AttachVirtualIPs(t *testing.T) { require.Equal(t, []string{"2.2.2.2", "3.3.3.3"}, chain.ManualVirtualIPs) } + +func TestFindJWTProviderNameReferences(t *testing.T) { + oktaProvider := structs.IntentionJWTProvider{Name: "okta"} + auth0Provider := structs.IntentionJWTProvider{Name: "auth0"} + cases := map[string]struct { + entries []structs.ConfigEntry + providerName string + expectedError string + }{ + "no jwt at any level": { + entries: []structs.ConfigEntry{}, + providerName: "okta", + }, + "provider not referenced": { + entries: []structs.ConfigEntry{ + &structs.ServiceIntentionsConfigEntry{ + Kind: "service-intentions", + Name: "api-intention", + JWT: &structs.IntentionJWTRequirement{ + Providers: []*structs.IntentionJWTProvider{&oktaProvider, &auth0Provider}, + }, + }, + }, + providerName: "fake-provider", + }, + "only top level jwt with no permissions": { + entries: []structs.ConfigEntry{ + &structs.ServiceIntentionsConfigEntry{ + Kind: "service-intentions", + Name: "api-intention", + JWT: &structs.IntentionJWTRequirement{ + Providers: []*structs.IntentionJWTProvider{&oktaProvider, &auth0Provider}, + }, + }, + }, + providerName: "okta", + expectedError: "cannot delete jwt provider config entry referenced by an intention. Provider name: okta, intention name: api-intention", + }, + "top level jwt with permissions": { + entries: []structs.ConfigEntry{ + &structs.ServiceIntentionsConfigEntry{ + Kind: "service-intentions", + Name: "api-intention", + JWT: &structs.IntentionJWTRequirement{ + Providers: []*structs.IntentionJWTProvider{&oktaProvider}, + }, + Sources: []*structs.SourceIntention{ + { + Name: "api", + Action: "allow", + Permissions: []*structs.IntentionPermission{ + { + Action: "allow", + JWT: &structs.IntentionJWTRequirement{ + Providers: []*structs.IntentionJWTProvider{&oktaProvider}, + }, + }, + }, + }, + { + Name: "serv", + Action: "allow", + Permissions: []*structs.IntentionPermission{ + { + Action: "allow", + JWT: &structs.IntentionJWTRequirement{ + Providers: []*structs.IntentionJWTProvider{&auth0Provider}, + }, + }, + }, + }, + { + Name: "web", + Action: "allow", + Permissions: []*structs.IntentionPermission{ + {Action: "allow"}, + }, + }, + }, + }, + }, + providerName: "auth0", + expectedError: "cannot delete jwt provider config entry referenced by an intention. Provider name: auth0, intention name: api-intention", + }, + "no top level jwt and existing permissions": { + entries: []structs.ConfigEntry{ + &structs.ServiceIntentionsConfigEntry{ + Kind: "service-intentions", + Name: "api-intention", + Sources: []*structs.SourceIntention{ + { + Name: "api", + Action: "allow", + Permissions: []*structs.IntentionPermission{ + { + Action: "allow", + JWT: &structs.IntentionJWTRequirement{ + Providers: []*structs.IntentionJWTProvider{&oktaProvider}, + }, + }, + }, + }, + { + Name: "serv", + Action: "allow", + Permissions: []*structs.IntentionPermission{ + { + Action: "allow", + JWT: &structs.IntentionJWTRequirement{ + Providers: []*structs.IntentionJWTProvider{&auth0Provider}, + }, + }, + }, + }, + { + Name: "web", + Action: "allow", + Permissions: []*structs.IntentionPermission{ + {Action: "allow"}, + }, + }, + }, + }, + }, + providerName: "okta", + expectedError: "cannot delete jwt provider config entry referenced by an intention. Provider name: okta, intention name: api-intention", + }, + } + + for name, tt := range cases { + tt := tt + t.Run(name, func(t *testing.T) { + err := findJWTProviderNameReferences(tt.entries, tt.providerName) + + if tt.expectedError != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tt.expectedError) + } else { + require.NoError(t, err) + } + }) + } +} + +func TestStore_ValidateJWTProviderIsReferenced(t *testing.T) { + s := testStateStore(t) + + // First create a config entry + provider := &structs.JWTProviderConfigEntry{ + Kind: structs.JWTProvider, + Name: "okta", + } + require.NoError(t, s.EnsureConfigEntry(0, provider)) + + // create a service intention referencing the config entry + ixn := &structs.ServiceIntentionsConfigEntry{ + Name: "api", + JWT: &structs.IntentionJWTRequirement{ + Providers: []*structs.IntentionJWTProvider{ + {Name: provider.Name}, + }, + }, + } + require.NoError(t, s.EnsureConfigEntry(1, ixn)) + + // attempt deleting a referenced provider + err := s.DeleteConfigEntry(0, structs.JWTProvider, provider.Name, nil) + require.Error(t, err) + require.Contains(t, err.Error(), `cannot delete jwt provider config entry referenced by an intention. Provider name: okta, intention name: api`) + + // delete the intention + require.NoError(t, s.DeleteConfigEntry(1, structs.ServiceIntentions, ixn.Name, nil)) + // successfully delete the provider after deleting the intention + require.NoError(t, s.DeleteConfigEntry(0, structs.JWTProvider, provider.Name, nil)) +} diff --git a/agent/consul/watch/server_local.go b/agent/consul/watch/server_local.go index f407d2c1648f..5937ba1c6a10 100644 --- a/agent/consul/watch/server_local.go +++ b/agent/consul/watch/server_local.go @@ -16,8 +16,9 @@ import ( ) var ( - ErrorNotFound = errors.New("no data found for query") - ErrorNotChanged = errors.New("data did not change for query") + ErrorNotFound = errors.New("no data found for query") + ErrorNotChanged = errors.New("data did not change for query") + ErrorACLResetData = errors.New("an acl update forced a state reset") errNilContext = errors.New("cannot call ServerLocalNotify with a nil context") errNilGetStore = errors.New("cannot call ServerLocalNotify without a callback to get a StateStore") @@ -320,8 +321,15 @@ func serverLocalNotifyRoutine[ResultType any, StoreType StateStore]( return } + // An ACL reset error can be raised so that the index greater-than check is + // bypassed. We should not propagate it to the caller. + forceReset := errors.Is(err, ErrorACLResetData) + if forceReset { + err = nil + } + // Check the index to see if we should call notify - if minIndex == 0 || minIndex < index { + if minIndex == 0 || minIndex < index || forceReset { notify(ctx, correlationID, result, err) minIndex = index } diff --git a/agent/envoyextensions/builtin/property-override/property_override.go b/agent/envoyextensions/builtin/property-override/property_override.go index 51d78368523f..41e98074b7a2 100644 --- a/agent/envoyextensions/builtin/property-override/property_override.go +++ b/agent/envoyextensions/builtin/property-override/property_override.go @@ -191,6 +191,10 @@ func (f *ResourceFilter) validate() error { return err } + if len(f.Services) > 0 && f.TrafficDirection != extensioncommon.TrafficDirectionOutbound { + return fmt.Errorf("patch contains non-empty ResourceFilter.Services but ResourceFilter.TrafficDirection is not %q", + extensioncommon.TrafficDirectionOutbound) + } for i := range f.Services { sn := f.Services[i] sn.normalize() @@ -255,9 +259,9 @@ func (p *propertyOverride) validate() error { } var resultErr error - for _, patch := range p.Patches { + for i, patch := range p.Patches { if err := patch.validate(p.Debug); err != nil { - resultErr = multierror.Append(resultErr, err) + resultErr = multierror.Append(resultErr, fmt.Errorf("invalid Patches[%d]: %w", i, err)) } } diff --git a/agent/envoyextensions/builtin/property-override/property_override_test.go b/agent/envoyextensions/builtin/property-override/property_override_test.go index 21889d840f4a..0e4317f9ddb7 100644 --- a/agent/envoyextensions/builtin/property-override/property_override_test.go +++ b/agent/envoyextensions/builtin/property-override/property_override_test.go @@ -63,6 +63,7 @@ func TestConstructor(t *testing.T) { expected propertyOverride ok bool errMsg string + errFunc func(*testing.T, error) } validTestCase := func(o Op, d extensioncommon.TrafficDirection, t ResourceType) testCase { @@ -216,6 +217,50 @@ func TestConstructor(t *testing.T) { ok: false, errMsg: fmt.Sprintf("field Value is not supported for %s operation", OpRemove), }, + "multiple patches includes indexed errors": { + arguments: makeArguments(map[string]any{"Patches": []map[string]any{ + makePatch(map[string]any{ + "Op": OpRemove, + "Value": 0, + }), + makePatch(map[string]any{ + "Op": OpAdd, + "Value": nil, + }), + makePatch(map[string]any{ + "Op": OpAdd, + "Path": "/foo", + }), + }}), + ok: false, + errFunc: func(t *testing.T, err error) { + require.ErrorContains(t, err, "invalid Patches[0]: field Value is not supported for remove operation") + require.ErrorContains(t, err, "invalid Patches[1]: non-nil Value is required") + require.ErrorContains(t, err, "invalid Patches[2]: no match for field 'foo'") + }, + }, + "multiple patches single error contains correct index": { + arguments: makeArguments(map[string]any{"Patches": []map[string]any{ + makePatch(map[string]any{ + "Op": OpAdd, + "Value": "foo", + }), + makePatch(map[string]any{ + "Op": OpRemove, + "Value": 1, + }), + makePatch(map[string]any{ + "Op": OpAdd, + "Value": "bar", + }), + }}), + ok: false, + errFunc: func(t *testing.T, err error) { + require.ErrorContains(t, err, "invalid Patches[1]: field Value is not supported for remove operation") + require.NotContains(t, err.Error(), "invalid Patches[0]") + require.NotContains(t, err.Error(), "invalid Patches[2]") + }, + }, "empty service name": { arguments: makeArguments(map[string]any{"Patches": []map[string]any{ makePatch(map[string]any{ @@ -229,6 +274,20 @@ func TestConstructor(t *testing.T) { ok: false, errMsg: "service name is required", }, + "non-empty services with invalid traffic direction": { + arguments: makeArguments(map[string]any{"Patches": []map[string]any{ + makePatch(map[string]any{ + "ResourceFilter": makeResourceFilter(map[string]any{ + "TrafficDirection": extensioncommon.TrafficDirectionInbound, + "Services": []map[string]any{ + {"Name:": "foo"}, + }, + }), + }), + }}), + ok: false, + errMsg: "patch contains non-empty ResourceFilter.Services but ResourceFilter.TrafficDirection is not \"outbound\"", + }, // See decode.HookWeakDecodeFromSlice for more details. In practice, we can end up // with a "Patches" field decoded to the single "Patch" value contained in the // serialized slice (raised from the containing slice). Using WeakDecode solves @@ -333,7 +392,13 @@ func TestConstructor(t *testing.T) { require.NoError(t, err) require.Equal(t, &extensioncommon.BasicEnvoyExtender{Extension: &tc.expected}, e) } else { - require.ErrorContains(t, err, tc.errMsg) + require.Error(t, err) + if tc.errMsg != "" { + require.ErrorContains(t, err, tc.errMsg) + } + if tc.errFunc != nil { + tc.errFunc(t, err) + } } }) } diff --git a/agent/envoyextensions/builtin/property-override/structpatcher.go b/agent/envoyextensions/builtin/property-override/structpatcher.go index 3a54ca25e40a..91de4cf7f86d 100644 --- a/agent/envoyextensions/builtin/property-override/structpatcher.go +++ b/agent/envoyextensions/builtin/property-override/structpatcher.go @@ -75,7 +75,7 @@ func findTargetMessageAndField(m protoreflect.Message, parsedPath []string, patc } // Check whether we have a non-terminal (parent) field in the path for which we - // don't support child lookup. + // don't support child operations. switch { case fieldDesc.IsList(): return nil, nil, fmt.Errorf("path contains member of repeated field '%s'; repeated field member access is not supported", @@ -83,6 +83,21 @@ func findTargetMessageAndField(m protoreflect.Message, parsedPath []string, patc case fieldDesc.IsMap(): return nil, nil, fmt.Errorf("path contains member of map field '%s'; map field member access is not supported", fieldName) + case fieldDesc.Message() != nil && fieldDesc.Message().FullName() == "google.protobuf.Any": + // Return a more helpful error for Any fields early. + // + // Doing this here prevents confusing two-step errors, e.g. "no match for field @type" + // on Any, when in fact we don't support variant proto message fields like Any in general. + // Because Any is a Message, we'd fail on invalid child fields or unsupported bytes target + // fields first. + // + // In the future, we could support Any by using the type field to initialize a struct for + // the nested message value. + return nil, nil, fmt.Errorf("variant-type message fields (google.protobuf.Any) are not supported") + case !(fieldDesc.Kind() == protoreflect.MessageKind): + // Non-Any fields that could be used to serialize protos as bytes will get a clear error message + // in this scenario. This also catches accidental use of non-complex fields as parent fields. + return nil, nil, fmt.Errorf("path contains member of non-message field '%s' (type '%s'); this type does not support child fields", fieldName, fieldDesc.Kind()) } fieldM := m.Get(fieldDesc).Message() @@ -137,6 +152,10 @@ func applyAdd(parentM protoreflect.Message, fieldDesc protoreflect.FieldDescript // similar to a list (repeated field). This map handling is specific to _our_ patch semantics for // updating multiple message fields at once. if isMapValue && !fieldDesc.IsMap() { + if fieldDesc.Kind() != protoreflect.MessageKind { + return fmt.Errorf("non-message field type '%s' cannot be set via a map", fieldDesc.Kind()) + } + // Get a fresh copy of the target field's message, then set the children indicated by the patch. fieldM := parentM.Get(fieldDesc).Message().New() for k, v := range mapValue { @@ -151,6 +170,7 @@ func applyAdd(parentM protoreflect.Message, fieldDesc protoreflect.FieldDescript fieldM.Set(targetFieldDesc, val) } parentM.Set(fieldDesc, protoreflect.ValueOf(fieldM)) + } else { // Just set the field directly, as our patch value is not a map. val, err := toProtoValue(parentM, fieldDesc, patch.Value) @@ -280,6 +300,9 @@ func toProtoValue(parentM protoreflect.Message, fieldDesc protoreflect.FieldDesc case float64: return toProtoNumericValue(fieldDesc, val) } + case protoreflect.BytesKind, + protoreflect.GroupKind: + return unsupportedTargetTypeErr(fieldDesc) } // Fall back to protoreflect.ValueOf, which may panic if an unexpected type is passed. diff --git a/agent/envoyextensions/builtin/property-override/structpatcher_test.go b/agent/envoyextensions/builtin/property-override/structpatcher_test.go index 579f0f71c98a..ac7379f9f186 100644 --- a/agent/envoyextensions/builtin/property-override/structpatcher_test.go +++ b/agent/envoyextensions/builtin/property-override/structpatcher_test.go @@ -2,6 +2,7 @@ package propertyoverride import ( "fmt" + "google.golang.org/protobuf/types/known/anypb" "testing" envoy_cluster_v3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" @@ -592,6 +593,31 @@ func TestPatchStruct(t *testing.T) { }, ok: true, }, + "remove single field: Any": { + args: args{ + k: &envoy_cluster_v3.Cluster{ + ClusterDiscoveryType: &envoy_cluster_v3.Cluster_ClusterType{ + ClusterType: &envoy_cluster_v3.Cluster_CustomClusterType{ + TypedConfig: &anypb.Any{ + TypeUrl: "foo", + }, + }, + }, + }, + patches: []Patch{ + makeRemovePatch( + "/cluster_type/typed_config", + ), + }, + }, + // Invalid actual config, but used as an example of removing Any field directly + expected: &envoy_cluster_v3.Cluster{ + ClusterDiscoveryType: &envoy_cluster_v3.Cluster_ClusterType{ + ClusterType: &envoy_cluster_v3.Cluster_CustomClusterType{}, + }, + }, + ok: true, + }, "remove single field deeply nested": { args: args{ k: &envoy_cluster_v3.Cluster{ @@ -858,6 +884,69 @@ func TestPatchStruct(t *testing.T) { ok: false, errMsg: "unsupported target field type 'map'", }, + "add unsupported target: non-message field via map": { + args: args{ + k: &envoy_cluster_v3.Cluster{}, + patches: []Patch{ + makeAddPatch( + "/name", + map[string]any{ + "cluster_refresh_rate": "5s", + "cluster_refresh_timeout": "3s", + "redirect_refresh_interval": "5s", + "redirect_refresh_threshold": 5, + }, + ), + }, + }, + ok: false, + errMsg: "non-message field type 'string' cannot be set via a map", + }, + "add unsupported target: non-message parent field via single value": { + args: args{ + k: &envoy_cluster_v3.Cluster{}, + patches: []Patch{ + makeAddPatch( + "/name/foo", + "bar", + ), + }, + }, + ok: false, + errMsg: "path contains member of non-message field 'name' (type 'string'); this type does not support child fields", + }, + "add unsupported target: non-message parent field via map": { + args: args{ + k: &envoy_cluster_v3.Cluster{}, + patches: []Patch{ + makeAddPatch( + "/name/foo", + map[string]any{ + "cluster_refresh_rate": "5s", + "cluster_refresh_timeout": "3s", + "redirect_refresh_interval": "5s", + "redirect_refresh_threshold": 5, + }, + ), + }, + }, + ok: false, + errMsg: "path contains member of non-message field 'name' (type 'string'); this type does not support child fields", + }, + "add unsupported target: Any field": { + args: args{ + k: &envoy_cluster_v3.Cluster{}, + patches: []Patch{ + makeAddPatch( + // Purposefully use a wrong-but-reasonable field name to ensure special error is returned + "/cluster_type/typed_config/@type", + "foo", + ), + }, + }, + ok: false, + errMsg: "variant-type message fields (google.protobuf.Any) are not supported", + }, "add unsupported target: repeated message": { args: args{ k: &envoy_cluster_v3.Cluster{}, diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index 212647c51e87..1c49fd792471 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -313,9 +313,14 @@ func (t *TelemetryConfig) Enabled() (string, bool) { } // DefaultLabels returns a set of string pairs that must be added as attributes to all exported telemetry data. -func (t *TelemetryConfig) DefaultLabels(nodeID string) map[string]string { - labels := map[string]string{ - "node_id": nodeID, // used to delineate Consul nodes in graphs +func (t *TelemetryConfig) DefaultLabels(cfg config.CloudConfig) map[string]string { + labels := make(map[string]string) + nodeID := string(cfg.NodeID) + if nodeID != "" { + labels["node_id"] = nodeID + } + if cfg.NodeName != "" { + labels["node_name"] = cfg.NodeName } for k, v := range t.Labels { diff --git a/agent/hcp/client/client_test.go b/agent/hcp/client/client_test.go index 8c8a6addd70c..0292fa3fab22 100644 --- a/agent/hcp/client/client_test.go +++ b/agent/hcp/client/client_test.go @@ -4,6 +4,8 @@ import ( "context" "testing" + "github.com/hashicorp/consul/agent/hcp/config" + "github.com/hashicorp/consul/types" "github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/client/consul_telemetry_service" "github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/models" "github.com/stretchr/testify/mock" @@ -147,3 +149,53 @@ func TestConvertTelemetryConfig(t *testing.T) { }) } } + +func Test_DefaultLabels(t *testing.T) { + for name, tc := range map[string]struct { + cfg config.CloudConfig + expectedLabels map[string]string + }{ + "Success": { + cfg: config.CloudConfig{ + NodeID: types.NodeID("nodeyid"), + NodeName: "nodey", + }, + expectedLabels: map[string]string{ + "node_id": "nodeyid", + "node_name": "nodey", + }, + }, + + "NoNodeID": { + cfg: config.CloudConfig{ + NodeID: types.NodeID(""), + NodeName: "nodey", + }, + expectedLabels: map[string]string{ + "node_name": "nodey", + }, + }, + "NoNodeName": { + cfg: config.CloudConfig{ + NodeID: types.NodeID("nodeyid"), + NodeName: "", + }, + expectedLabels: map[string]string{ + "node_id": "nodeyid", + }, + }, + "Empty": { + cfg: config.CloudConfig{ + NodeID: "", + NodeName: "", + }, + expectedLabels: map[string]string{}, + }, + } { + t.Run(name, func(t *testing.T) { + tCfg := &TelemetryConfig{} + labels := tCfg.DefaultLabels(tc.cfg) + require.Equal(t, labels, tc.expectedLabels) + }) + } +} diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go index 7e19c9857a97..0bcb90b81ce2 100644 --- a/agent/hcp/client/metrics_client.go +++ b/agent/hcp/client/metrics_client.go @@ -32,6 +32,10 @@ const ( // defaultRetryMax is set to 0 to turn off retry functionality, until dynamic configuration is possible. // This is to circumvent any spikes in load that may cause or exacerbate server-side issues for now. defaultRetryMax = 0 + + // defaultErrRespBodyLength refers to the max character length of the body on a failure to export metrics. + // anything beyond we will truncate. + defaultErrRespBodyLength = 100 ) // MetricsClient exports Consul metrics in OTLP format to the HCP Telemetry Gateway. @@ -54,7 +58,7 @@ type otlpClient struct { // NewMetricsClient returns a configured MetricsClient. // The current implementation uses otlpClient to provide retry functionality. -func NewMetricsClient(cfg CloudConfig, ctx context.Context) (MetricsClient, error) { +func NewMetricsClient(ctx context.Context, cfg CloudConfig) (MetricsClient, error) { if cfg == nil { return nil, fmt.Errorf("failed to init telemetry client: provide valid cloudCfg (Cloud Configuration for TLS)") } @@ -150,8 +154,18 @@ func (o *otlpClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.R } if resp.StatusCode != http.StatusOK { - return fmt.Errorf("failed to export metrics: code %d: %s", resp.StatusCode, string(body)) + truncatedBody := truncate(respData.String(), defaultErrRespBodyLength) + return fmt.Errorf("failed to export metrics: code %d: %s", resp.StatusCode, truncatedBody) } return nil } + +func truncate(text string, width uint) string { + if len(text) <= int(width) { + return text + } + r := []rune(text) + trunc := r[:width] + return string(trunc) + "..." +} diff --git a/agent/hcp/client/metrics_client_test.go b/agent/hcp/client/metrics_client_test.go index e80996fcf5eb..4119e326e9dc 100644 --- a/agent/hcp/client/metrics_client_test.go +++ b/agent/hcp/client/metrics_client_test.go @@ -3,6 +3,7 @@ package client import ( "context" "fmt" + "math/rand" "net/http" "net/http/httptest" "testing" @@ -51,7 +52,7 @@ func TestNewMetricsClient(t *testing.T) { }, } { t.Run(name, func(t *testing.T) { - client, err := NewMetricsClient(test.cfg, test.ctx) + client, err := NewMetricsClient(test.ctx, test.cfg) if test.wantErr != "" { require.Error(t, err) require.Contains(t, err.Error(), test.wantErr) @@ -64,10 +65,21 @@ func TestNewMetricsClient(t *testing.T) { } } +var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZäöüÄÖÜ世界") + +func randStringRunes(n int) string { + b := make([]rune, n) + for i := range b { + b[i] = letterRunes[rand.Intn(len(letterRunes))] + } + return string(b) +} + func TestExportMetrics(t *testing.T) { for name, test := range map[string]struct { - wantErr string - status int + wantErr string + status int + largeBodyError bool }{ "success": { status: http.StatusOK, @@ -76,8 +88,14 @@ func TestExportMetrics(t *testing.T) { status: http.StatusBadRequest, wantErr: "failed to export metrics: code 400", }, + "failsWithNonRetryableErrorWithLongError": { + status: http.StatusBadRequest, + wantErr: "failed to export metrics: code 400", + largeBodyError: true, + }, } { t.Run(name, func(t *testing.T) { + randomBody := randStringRunes(1000) srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { require.Equal(t, r.Header.Get("content-type"), "application/x-protobuf") require.Equal(t, r.Header.Get("x-hcp-resource-id"), testResourceID) @@ -91,11 +109,16 @@ func TestExportMetrics(t *testing.T) { w.Header().Set("Content-Type", "application/x-protobuf") w.WriteHeader(test.status) - w.Write(bytes) + if test.largeBodyError { + w.Write([]byte(randomBody)) + } else { + w.Write(bytes) + } + })) defer srv.Close() - client, err := NewMetricsClient(MockCloudCfg{}, context.Background()) + client, err := NewMetricsClient(context.Background(), MockCloudCfg{}) require.NoError(t, err) ctx := context.Background() @@ -105,6 +128,10 @@ func TestExportMetrics(t *testing.T) { if test.wantErr != "" { require.Error(t, err) require.Contains(t, err.Error(), test.wantErr) + if test.largeBodyError { + truncatedBody := truncate(randomBody, defaultErrRespBodyLength) + require.Contains(t, err.Error(), truncatedBody) + } return } @@ -112,3 +139,37 @@ func TestExportMetrics(t *testing.T) { }) } } + +func TestTruncate(t *testing.T) { + for name, tc := range map[string]struct { + body string + expectedSize int + }{ + "ZeroSize": { + body: "", + expectedSize: 0, + }, + "LessThanSize": { + body: "foobar", + expectedSize: 6, + }, + "defaultSize": { + body: "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis vel tincidunt nunc, sed tristique risu", + expectedSize: 100, + }, + "greaterThanSize": { + body: "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis vel tincidunt nunc, sed tristique risus", + expectedSize: 103, + }, + "greaterThanSizeWithUnicode": { + body: randStringRunes(1000), + expectedSize: 103, + }, + } { + t.Run(name, func(t *testing.T) { + truncatedBody := truncate(tc.body, defaultErrRespBodyLength) + truncatedRunes := []rune(truncatedBody) + require.Equal(t, len(truncatedRunes), tc.expectedSize) + }) + } +} diff --git a/agent/hcp/client/mock_metrics_client.go b/agent/hcp/client/mock_metrics_client.go new file mode 100644 index 000000000000..a30b1f1c62c0 --- /dev/null +++ b/agent/hcp/client/mock_metrics_client.go @@ -0,0 +1,5 @@ +package client + +type MockMetricsClient struct { + MetricsClient +} diff --git a/agent/hcp/config/config.go b/agent/hcp/config/config.go index 8d1358fa4adf..319c39e40e94 100644 --- a/agent/hcp/config/config.go +++ b/agent/hcp/config/config.go @@ -6,6 +6,7 @@ package config import ( "crypto/tls" + "github.com/hashicorp/consul/types" hcpcfg "github.com/hashicorp/hcp-sdk-go/config" "github.com/hashicorp/hcp-sdk-go/resource" ) @@ -25,6 +26,9 @@ type CloudConfig struct { // TlsConfig for testing. TLSConfig *tls.Config + + NodeID types.NodeID + NodeName string } func (c *CloudConfig) WithTLSConfig(cfg *tls.Config) { diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index f4ad161daba4..e3e83dec9657 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -14,7 +14,6 @@ import ( "github.com/hashicorp/consul/agent/hcp/config" "github.com/hashicorp/consul/agent/hcp/scada" "github.com/hashicorp/consul/agent/hcp/telemetry" - "github.com/hashicorp/consul/types" "github.com/hashicorp/go-hclog" ) @@ -25,10 +24,13 @@ type Deps struct { Sink metrics.MetricSink } -func NewDeps(cfg config.CloudConfig, logger hclog.Logger, nodeID types.NodeID) (Deps, error) { +func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (Deps, error) { + ctx := context.Background() + ctx = hclog.WithContext(ctx, logger) + client, err := hcpclient.NewClient(cfg) if err != nil { - return Deps{}, fmt.Errorf("failed to init client: %w:", err) + return Deps{}, fmt.Errorf("failed to init client: %w", err) } provider, err := scada.New(cfg, logger.Named("scada")) @@ -36,7 +38,13 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger, nodeID types.NodeID) ( return Deps{}, fmt.Errorf("failed to init scada: %w", err) } - sink := sink(client, &cfg, logger.Named("sink"), nodeID) + metricsClient, err := hcpclient.NewMetricsClient(ctx, &cfg) + if err != nil { + logger.Error("failed to init metrics client", "error", err) + return Deps{}, fmt.Errorf("failed to init metrics client: %w", err) + } + + sink := sink(ctx, client, metricsClient, cfg) return Deps{ Client: client, @@ -48,10 +56,13 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger, nodeID types.NodeID) ( // sink provides initializes an OTELSink which forwards Consul metrics to HCP. // The sink is only initialized if the server is registered with the management plane (CCM). // This step should not block server initialization, so errors are logged, but not returned. -func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger, nodeID types.NodeID) metrics.MetricSink { - ctx := context.Background() - ctx = hclog.WithContext(ctx, logger) - +func sink( + ctx context.Context, + hcpClient hcpclient.Client, + metricsClient hcpclient.MetricsClient, + cfg config.CloudConfig, +) metrics.MetricSink { + logger := hclog.FromContext(ctx).Named("sink") reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() @@ -72,16 +83,10 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo return nil } - metricsClient, err := hcpclient.NewMetricsClient(cfg, ctx) - if err != nil { - logger.Error("failed to init metrics client", "error", err) - return nil - } - sinkOpts := &telemetry.OTELSinkOpts{ Ctx: ctx, Reader: telemetry.NewOTELReader(metricsClient, u, telemetry.DefaultExportInterval), - Labels: telemetryCfg.DefaultLabels(string(nodeID)), + Labels: telemetryCfg.DefaultLabels(cfg), Filters: telemetryCfg.MetricsConfig.Filters, } diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 54ec7b6de478..9a90c26d50ad 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -1,10 +1,11 @@ package hcp import ( + "context" "fmt" "testing" - "github.com/hashicorp/go-hclog" + "github.com/hashicorp/consul/agent/hcp/config" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -16,7 +17,7 @@ func TestSink(t *testing.T) { t.Parallel() for name, test := range map[string]struct { expect func(*client.MockClient) - mockCloudCfg client.CloudConfig + cloudCfg config.CloudConfig expectedSink bool }{ "success": { @@ -28,7 +29,10 @@ func TestSink(t *testing.T) { }, }, nil) }, - mockCloudCfg: client.MockCloudCfg{}, + cloudCfg: config.CloudConfig{ + NodeID: types.NodeID("nodeyid"), + NodeName: "nodey", + }, expectedSink: true, }, "noSinkWhenServerNotRegisteredWithCCM": { @@ -40,26 +44,13 @@ func TestSink(t *testing.T) { }, }, nil) }, - mockCloudCfg: client.MockCloudCfg{}, + cloudCfg: config.CloudConfig{}, }, "noSinkWhenCCMVerificationFails": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(nil, fmt.Errorf("fetch failed")) }, - mockCloudCfg: client.MockCloudCfg{}, - }, - "noSinkWhenMetricsClientInitFails": { - mockCloudCfg: client.MockCloudCfg{ - ConfigErr: fmt.Errorf("test bad hcp config"), - }, - expect: func(mockClient *client.MockClient) { - mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ - Endpoint: "https://test.com", - MetricsConfig: &client.MetricsConfig{ - Endpoint: "", - }, - }, nil) - }, + cloudCfg: config.CloudConfig{}, }, "failsWithFetchTelemetryFailure": { expect: func(mockClient *client.MockClient) { @@ -93,14 +84,17 @@ func TestSink(t *testing.T) { t.Run(name, func(t *testing.T) { t.Parallel() c := client.NewMockClient(t) - l := hclog.NewNullLogger() + mc := client.MockMetricsClient{} + test.expect(c) - sinkOpts := sink(c, test.mockCloudCfg, l, types.NodeID("server1234")) + ctx := context.Background() + + s := sink(ctx, c, mc, test.cloudCfg) if !test.expectedSink { - require.Nil(t, sinkOpts) + require.Nil(t, s) return } - require.NotNil(t, sinkOpts) + require.NotNil(t, s) }) } } diff --git a/agent/proxycfg-glue/health_blocking.go b/agent/proxycfg-glue/health_blocking.go new file mode 100644 index 000000000000..0a47a920d157 --- /dev/null +++ b/agent/proxycfg-glue/health_blocking.go @@ -0,0 +1,164 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package proxycfgglue + +import ( + "context" + "fmt" + "time" + + "github.com/hashicorp/go-bexpr" + "github.com/hashicorp/go-memdb" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/consul/watch" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/agent/structs/aclfilter" +) + +// ServerHealthBlocking exists due to a bug with the streaming backend and its interaction with ACLs. +// Whenever an exported-services config entry is modified, this is effectively an ACL change. +// Assume the following situation: +// - no services are exported +// - an upstream watch to service X is spawned +// - the streaming backend filters out data for service X (because it's not exported yet) +// - service X is finally exported +// +// In this situation, the streaming backend does not trigger a refresh of its data. +// This means that any events that were supposed to have been received prior to the export are NOT backfilled, +// and the watches never see service X spawning. +// +// We currently have decided to not trigger a stream refresh in this situation due to the potential for a +// thundering herd effect (touching exports would cause a re-fetch of all watches for that partition, potentially). +// Therefore, this local blocking-query approach exists for agentless. +// +// It's also worth noting that the streaming subscription is currently bypassed most of the time with agentful, +// because proxycfg has a `req.Source.Node != ""` which prevents the `streamingEnabled` check from passing. +// This means that while agents should technically have this same issue, they don't experience it with mesh health +// watches. +func ServerHealthBlocking(deps ServerDataSourceDeps, remoteSource proxycfg.Health, state *state.Store) *serverHealthBlocking { + return &serverHealthBlocking{deps, remoteSource, state, 5 * time.Minute} +} + +type serverHealthBlocking struct { + deps ServerDataSourceDeps + remoteSource proxycfg.Health + state *state.Store + watchTimeout time.Duration +} + +// Notify is mostly a copy of the function in `agent/consul/health_endpoint.go` with a few minor tweaks. +// Most notably, some query features unnecessary for mesh have been stripped out. +func (h *serverHealthBlocking) Notify(ctx context.Context, args *structs.ServiceSpecificRequest, correlationID string, ch chan<- proxycfg.UpdateEvent) error { + if args.Datacenter != h.deps.Datacenter { + return h.remoteSource.Notify(ctx, args, correlationID, ch) + } + + // Verify the arguments + if args.ServiceName == "" { + return fmt.Errorf("Must provide service name") + } + if args.EnterpriseMeta.PartitionOrDefault() == acl.WildcardName { + return fmt.Errorf("Wildcards are not allowed in the partition field") + } + + // Determine the function we'll call + var f func(memdb.WatchSet, *state.Store, *structs.ServiceSpecificRequest) (uint64, structs.CheckServiceNodes, error) + switch { + case args.Connect: + f = serviceNodesConnect + case args.Ingress: + f = serviceNodesIngress + default: + f = serviceNodesDefault + } + + filter, err := bexpr.CreateFilter(args.Filter, nil, structs.CheckServiceNode{}) + if err != nil { + return err + } + + var hadResults bool = false + return watch.ServerLocalNotify(ctx, correlationID, h.deps.GetStore, + func(ws memdb.WatchSet, store Store) (uint64, *structs.IndexedCheckServiceNodes, error) { + // This is necessary so that service export changes are eventually picked up, since + // they won't trigger the watch themselves. + timeoutCh := make(chan struct{}) + time.AfterFunc(h.watchTimeout, func() { + close(timeoutCh) + }) + ws.Add(timeoutCh) + + authzContext := acl.AuthorizerContext{ + Peer: args.PeerName, + } + authz, err := h.deps.ACLResolver.ResolveTokenAndDefaultMeta(args.Token, &args.EnterpriseMeta, &authzContext) + if err != nil { + return 0, nil, err + } + // If we're doing a connect or ingress query, we need read access to the service + // we're trying to find proxies for, so check that. + if args.Connect || args.Ingress { + if authz.ServiceRead(args.ServiceName, &authzContext) != acl.Allow { + // If access was somehow revoked (via token deletion or unexporting), then we clear the + // last-known results before triggering an error. This way, the proxies will actually update + // their data, rather than holding onto the last-known list of healthy nodes indefinitely. + if hadResults { + hadResults = false + return 0, &structs.IndexedCheckServiceNodes{}, watch.ErrorACLResetData + } + return 0, nil, acl.ErrPermissionDenied + } + } + + var thisReply structs.IndexedCheckServiceNodes + thisReply.Index, thisReply.Nodes, err = f(ws, h.state, args) + if err != nil { + return 0, nil, err + } + + raw, err := filter.Execute(thisReply.Nodes) + if err != nil { + return 0, nil, err + } + thisReply.Nodes = raw.(structs.CheckServiceNodes) + + // Note: we filter the results with ACLs *after* applying the user-supplied + // bexpr filter, to ensure QueryMeta.ResultsFilteredByACLs does not include + // results that would be filtered out even if the user did have permission. + if err := h.filterACL(&authzContext, args.Token, &thisReply); err != nil { + return 0, nil, err + } + + hadResults = true + return thisReply.Index, &thisReply, nil + }, + dispatchBlockingQueryUpdate[*structs.IndexedCheckServiceNodes](ch), + ) +} + +func (h *serverHealthBlocking) filterACL(authz *acl.AuthorizerContext, token string, subj *structs.IndexedCheckServiceNodes) error { + // Get the ACL from the token + var entMeta acl.EnterpriseMeta + authorizer, err := h.deps.ACLResolver.ResolveTokenAndDefaultMeta(token, &entMeta, authz) + if err != nil { + return err + } + aclfilter.New(authorizer, h.deps.Logger).Filter(subj) + return nil +} + +func serviceNodesConnect(ws memdb.WatchSet, s *state.Store, args *structs.ServiceSpecificRequest) (uint64, structs.CheckServiceNodes, error) { + return s.CheckConnectServiceNodes(ws, args.ServiceName, &args.EnterpriseMeta, args.PeerName) +} + +func serviceNodesIngress(ws memdb.WatchSet, s *state.Store, args *structs.ServiceSpecificRequest) (uint64, structs.CheckServiceNodes, error) { + return s.CheckIngressServiceNodes(ws, args.ServiceName, &args.EnterpriseMeta) +} + +func serviceNodesDefault(ws memdb.WatchSet, s *state.Store, args *structs.ServiceSpecificRequest) (uint64, structs.CheckServiceNodes, error) { + return s.CheckServiceNodes(ws, args.ServiceName, &args.EnterpriseMeta, args.PeerName) +} diff --git a/agent/proxycfg-glue/health_blocking_test.go b/agent/proxycfg-glue/health_blocking_test.go new file mode 100644 index 000000000000..3dcdaf17d614 --- /dev/null +++ b/agent/proxycfg-glue/health_blocking_test.go @@ -0,0 +1,183 @@ +package proxycfgglue + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/hashicorp/consul/acl" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/proxycfg" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/sdk/testutil" + "github.com/stretchr/testify/require" +) + +func TestServerHealthBlocking(t *testing.T) { + t.Run("remote queries are delegated to the remote source", func(t *testing.T) { + var ( + ctx = context.Background() + req = &structs.ServiceSpecificRequest{Datacenter: "dc2"} + correlationID = "correlation-id" + ch = make(chan<- proxycfg.UpdateEvent) + result = errors.New("KABOOM") + ) + + remoteSource := newMockHealth(t) + remoteSource.On("Notify", ctx, req, correlationID, ch).Return(result) + + store := state.NewStateStore(nil) + dataSource := ServerHealthBlocking(ServerDataSourceDeps{Datacenter: "dc1"}, remoteSource, store) + err := dataSource.Notify(ctx, req, correlationID, ch) + require.Equal(t, result, err) + }) + + t.Run("services notify correctly", func(t *testing.T) { + const ( + datacenter = "dc1" + serviceName = "web" + ) + + ctx, cancel := context.WithCancel(context.Background()) + t.Cleanup(cancel) + + store := state.NewStateStore(nil) + aclResolver := newStaticResolver(acl.ManageAll()) + dataSource := ServerHealthBlocking(ServerDataSourceDeps{ + GetStore: func() Store { return store }, + Datacenter: datacenter, + ACLResolver: aclResolver, + Logger: testutil.Logger(t), + }, nil, store) + dataSource.watchTimeout = 1 * time.Second + + // Watch for all events + eventCh := make(chan proxycfg.UpdateEvent) + require.NoError(t, dataSource.Notify(ctx, &structs.ServiceSpecificRequest{ + Datacenter: datacenter, + ServiceName: serviceName, + }, "", eventCh)) + + // Watch for a subset of events + filteredCh := make(chan proxycfg.UpdateEvent) + require.NoError(t, dataSource.Notify(ctx, &structs.ServiceSpecificRequest{ + Datacenter: datacenter, + ServiceName: serviceName, + QueryOptions: structs.QueryOptions{ + Filter: "Service.ID == \"web1\"", + }, + }, "", filteredCh)) + + testutil.RunStep(t, "initial state", func(t *testing.T) { + result := getEventResult[*structs.IndexedCheckServiceNodes](t, eventCh) + require.Empty(t, result.Nodes) + result = getEventResult[*structs.IndexedCheckServiceNodes](t, filteredCh) + require.Empty(t, result.Nodes) + }) + + testutil.RunStep(t, "register services", func(t *testing.T) { + require.NoError(t, store.EnsureRegistration(10, &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + ID: serviceName + "1", + Service: serviceName, + Port: 80, + }, + })) + result := getEventResult[*structs.IndexedCheckServiceNodes](t, eventCh) + require.Len(t, result.Nodes, 1) + result = getEventResult[*structs.IndexedCheckServiceNodes](t, filteredCh) + require.Len(t, result.Nodes, 1) + + require.NoError(t, store.EnsureRegistration(11, &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + ID: serviceName + "2", + Service: serviceName, + Port: 81, + }, + })) + result = getEventResult[*structs.IndexedCheckServiceNodes](t, eventCh) + require.Len(t, result.Nodes, 2) + result = getEventResult[*structs.IndexedCheckServiceNodes](t, filteredCh) + require.Len(t, result.Nodes, 1) + require.Equal(t, "web1", result.Nodes[0].Service.ID) + }) + + testutil.RunStep(t, "deregister service", func(t *testing.T) { + require.NoError(t, store.DeleteService(12, "foo", serviceName+"1", nil, "")) + result := getEventResult[*structs.IndexedCheckServiceNodes](t, eventCh) + require.Len(t, result.Nodes, 1) + result = getEventResult[*structs.IndexedCheckServiceNodes](t, filteredCh) + require.Len(t, result.Nodes, 0) + }) + + testutil.RunStep(t, "acl enforcement", func(t *testing.T) { + require.NoError(t, store.EnsureRegistration(11, &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + Service: serviceName + "-sidecar-proxy", + Kind: structs.ServiceKindConnectProxy, + Proxy: structs.ConnectProxyConfig{ + DestinationServiceName: serviceName, + }, + }, + })) + + authzDeny := policyAuthorizer(t, ``) + authzAllow := policyAuthorizer(t, ` + node_prefix "" { policy = "read" } + service_prefix "web" { policy = "read" } + `) + + // Start a stream where insufficient permissions are denied + aclDenyCh := make(chan proxycfg.UpdateEvent) + aclResolver.SwapAuthorizer(authzDeny) + require.NoError(t, dataSource.Notify(ctx, &structs.ServiceSpecificRequest{ + Connect: true, + Datacenter: datacenter, + ServiceName: serviceName, + }, "", aclDenyCh)) + require.ErrorContains(t, getEventError(t, aclDenyCh), "Permission denied") + + // Adding ACL permissions will send valid data + aclResolver.SwapAuthorizer(authzAllow) + time.Sleep(dataSource.watchTimeout) + result := getEventResult[*structs.IndexedCheckServiceNodes](t, aclDenyCh) + require.Len(t, result.Nodes, 1) + require.Equal(t, "web-sidecar-proxy", result.Nodes[0].Service.Service) + + // Start a stream where sufficient permissions are allowed + aclAllowCh := make(chan proxycfg.UpdateEvent) + aclResolver.SwapAuthorizer(authzAllow) + require.NoError(t, dataSource.Notify(ctx, &structs.ServiceSpecificRequest{ + Connect: true, + Datacenter: datacenter, + ServiceName: serviceName, + }, "", aclAllowCh)) + result = getEventResult[*structs.IndexedCheckServiceNodes](t, aclAllowCh) + require.Len(t, result.Nodes, 1) + require.Equal(t, "web-sidecar-proxy", result.Nodes[0].Service.Service) + + // Removing ACL permissions will send empty data + aclResolver.SwapAuthorizer(authzDeny) + time.Sleep(dataSource.watchTimeout) + result = getEventResult[*structs.IndexedCheckServiceNodes](t, aclAllowCh) + require.Len(t, result.Nodes, 0) + + // Adding ACL permissions will send valid data + aclResolver.SwapAuthorizer(authzAllow) + time.Sleep(dataSource.watchTimeout) + result = getEventResult[*structs.IndexedCheckServiceNodes](t, aclAllowCh) + require.Len(t, result.Nodes, 1) + require.Equal(t, "web-sidecar-proxy", result.Nodes[0].Service.Service) + }) + }) +} diff --git a/agent/setup.go b/agent/setup.go index bf1b4d004077..4d5d0feed7a1 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -138,7 +138,10 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl var extraSinks []metrics.MetricSink if cfg.IsCloudEnabled() { - d.HCP, err = hcp.NewDeps(cfg.Cloud, d.Logger.Named("hcp"), cfg.NodeID) + // This values is set late within newNodeIDFromConfig above + cfg.Cloud.NodeID = cfg.NodeID + + d.HCP, err = hcp.NewDeps(cfg.Cloud, d.Logger.Named("hcp")) if err != nil { return d, err } diff --git a/agent/structs/structs.go b/agent/structs/structs.go index b356a31aeaf9..1499c35eb80d 100644 --- a/agent/structs/structs.go +++ b/agent/structs/structs.go @@ -1480,6 +1480,10 @@ func (s *NodeService) IsGateway() bool { func (s *NodeService) Validate() error { var result error + if err := s.Locality.Validate(); err != nil { + result = multierror.Append(result, err) + } + if s.Kind == ServiceKindConnectProxy { if s.Port == 0 && s.SocketPath == "" { result = multierror.Append(result, fmt.Errorf("Port or SocketPath must be set for a %s", s.Kind)) @@ -3111,3 +3115,15 @@ func (l *Locality) GetRegion() string { } return l.Region } + +func (l *Locality) Validate() error { + if l == nil { + return nil + } + + if l.Region == "" && l.Zone != "" { + return fmt.Errorf("zone cannot be set without region") + } + + return nil +} diff --git a/agent/structs/structs_test.go b/agent/structs/structs_test.go index 6d887da9ac77..668f5fb08fae 100644 --- a/agent/structs/structs_test.go +++ b/agent/structs/structs_test.go @@ -592,6 +592,43 @@ func TestStructs_ServiceNode_Conversions(t *testing.T) { } } +func TestStructs_Locality_Validate(t *testing.T) { + type testCase struct { + locality *Locality + err string + } + cases := map[string]testCase{ + "nil": { + nil, + "", + }, + "region only": { + &Locality{Region: "us-west-1"}, + "", + }, + "region and zone": { + &Locality{Region: "us-west-1", Zone: "us-west-1a"}, + "", + }, + "zone only": { + &Locality{Zone: "us-west-1a"}, + "zone cannot be set without region", + }, + } + + for name, tc := range cases { + t.Run(name, func(t *testing.T) { + err := tc.locality.Validate() + if tc.err == "" { + require.NoError(t, err) + } else { + require.Error(t, err) + require.Contains(t, err.Error(), tc.err) + } + }) + } +} + func TestStructs_NodeService_ValidateMeshGateway(t *testing.T) { type testCase struct { Modify func(*NodeService) @@ -1152,6 +1189,13 @@ func TestStructs_NodeService_ValidateConnectProxy(t *testing.T) { }, "", }, + { + "connect-proxy: invalid locality", + func(x *NodeService) { + x.Locality = &Locality{Zone: "bad"} + }, + "zone cannot be set without region", + }, } for _, tc := range cases { diff --git a/agent/xds/delta.go b/agent/xds/delta.go index 5e4cf702090a..f84b633a852b 100644 --- a/agent/xds/delta.go +++ b/agent/xds/delta.go @@ -492,6 +492,7 @@ func applyEnvoyExtension(logger hclog.Logger, cfgSnap *proxycfg.ConfigSnapshot, extender, err := envoyextensions.ConstructExtension(ext) metrics.MeasureSinceWithLabels([]string{"envoy_extension", "validate_arguments"}, now, getMetricLabels(err)) if err != nil { + errorParams = append(errorParams, "error", err) logFn("failed to construct extension", errorParams...) if ext.Required { @@ -507,6 +508,7 @@ func applyEnvoyExtension(logger hclog.Logger, cfgSnap *proxycfg.ConfigSnapshot, if err != nil { errorParams = append(errorParams, "error", err) logFn("failed to validate extension arguments", errorParams...) + if ext.Required { return status.Errorf(codes.InvalidArgument, "failed to validate arguments for extension %q for service %q", ext.Name, svc.Name) } @@ -517,9 +519,13 @@ func applyEnvoyExtension(logger hclog.Logger, cfgSnap *proxycfg.ConfigSnapshot, now = time.Now() _, err = extender.Extend(resources, &runtimeConfig) metrics.MeasureSinceWithLabels([]string{"envoy_extension", "extend"}, now, getMetricLabels(err)) - logFn("failed to apply envoy extension", errorParams...) - if err != nil && ext.Required { - return status.Errorf(codes.InvalidArgument, "failed to patch xDS resources in the %q extension: %v", ext.Name, err) + if err != nil { + errorParams = append(errorParams, "error", err) + logFn("failed to apply envoy extension", errorParams...) + + if ext.Required { + return status.Errorf(codes.InvalidArgument, "failed to patch xDS resources in the %q extension: %v", ext.Name, err) + } } return nil diff --git a/build-support/scripts/check-allowed-imports.sh b/build-support/scripts/check-allowed-imports.sh new file mode 100755 index 000000000000..fb0280e6ff08 --- /dev/null +++ b/build-support/scripts/check-allowed-imports.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: MPL-2.0 + + +readonly SCRIPT_NAME="$(basename ${BASH_SOURCE[0]})" +readonly SCRIPT_DIR="$(dirname "${BASH_SOURCE[0]}")" +readonly SOURCE_DIR="$(dirname "$(dirname "${SCRIPT_DIR}")")" +readonly FN_DIR="$(dirname "${SCRIPT_DIR}")/functions" + +source "${SCRIPT_DIR}/functions.sh" + + +set -uo pipefail + +usage() { +cat <<-EOF +Usage: ${SCRIPT_NAME} [...] + +Description: + Verifies that only the specified packages may be imported from the given module + +Options: + -h | --help Print this help text. +EOF +} + +function err_usage { + err "$1" + err "" + err "$(usage)" +} + +function main { + local module_root="" + declare -a allowed_packages=() + while test $# -gt 0 + do + case "$1" in + -h | --help ) + usage + return 0 + ;; + * ) + if test -z "$module_root" + then + module_root="$1" + else + allowed_packages+="$1" + fi + shift + esac + done + + # If we could guarantee this ran with bash 4.2+ then the final argument could + # be just ${allowed_packages[@]}. However that with older versions of bash + # in combination with set -u causes bash to emit errors about using unbound + # variables when no allowed packages have been specified (i.e. the module should + # generally be disallowed with no exceptions). This syntax is very strange + # but seems to be the prescribed workaround I found. + check_imports "$module_root" ${allowed_packages[@]+"${allowed_packages[@]}"} + return $? +} + +function check_imports { + local module_root="$1" + shift + local allowed_packages="$@" + + module_imports=$( go list -test -f '{{join .TestImports "\n"}}' ./... | grep "$module_root" | sort | uniq) + module_test_imports=$( go list -test -f '{{join .TestImports "\n"}}' ./... | grep "$module_root" | sort | uniq) + + any_error=0 + + for imp in $module_imports + do + is_import_allowed "$imp" "$module_root" $allowed_packages + allowed=$? + + if test $any_error -ne 1 + then + any_error=$allowed + fi + done + + if test $any_error -eq 1 + then + echo "Only the following direct imports are allowed from module $module_root:" + for pkg in $allowed_packages + do + echo " * $pkg" + done + fi + + return $any_error +} + +function is_import_allowed { + local pkg_import=$1 + shift + local module_root=$1 + shift + local allowed_packages="$@" + + # check if the import path is a part of the module we are restricting imports for + if test "$( go list -f '{{.Module.Path}}' $pkg_import)" != "$module_root" + then + return 0 + fi + + for pkg in $allowed_packages + do + if test "${module_root}/$pkg" == "$pkg_import" + then + return 0 + fi + done + + err "Import of package $pkg_import is not allowed" + return 1 +} + +main "$@" +exit $? \ No newline at end of file diff --git a/internal/catalog/catalogtest/run_test.go b/internal/catalog/catalogtest/run_test.go index 7c17052d8246..defaad2a16d6 100644 --- a/internal/catalog/catalogtest/run_test.go +++ b/internal/catalog/catalogtest/run_test.go @@ -37,3 +37,8 @@ func TestControllers_Integration(t *testing.T) { client := runInMemResourceServiceAndControllers(t, catalog.DefaultControllerDependencies()) RunCatalogV1Alpha1IntegrationTest(t, client) } + +func TestControllers_Lifecycle(t *testing.T) { + client := runInMemResourceServiceAndControllers(t, catalog.DefaultControllerDependencies()) + RunCatalogV1Alpha1LifecycleIntegrationTest(t, client) +} diff --git a/internal/catalog/catalogtest/test_integration_v1alpha1.go b/internal/catalog/catalogtest/test_integration_v1alpha1.go index 8a7f4cd9a248..19be6d7a4846 100644 --- a/internal/catalog/catalogtest/test_integration_v1alpha1.go +++ b/internal/catalog/catalogtest/test_integration_v1alpha1.go @@ -698,6 +698,7 @@ func expectedGRPCApiServiceEndpoints(t *testing.T, c *rtest.Client) *pbcatalog.S } func verifyServiceEndpoints(t *testing.T, c *rtest.Client, id *pbresource.ID, expected *pbcatalog.ServiceEndpoints) { + t.Helper() c.WaitForResourceState(t, id, func(t rtest.T, res *pbresource.Resource) { var actual pbcatalog.ServiceEndpoints err := res.Data.UnmarshalTo(&actual) diff --git a/internal/catalog/catalogtest/test_lifecycle_v1alpha1.go b/internal/catalog/catalogtest/test_lifecycle_v1alpha1.go new file mode 100644 index 000000000000..d7529a6ec48c --- /dev/null +++ b/internal/catalog/catalogtest/test_lifecycle_v1alpha1.go @@ -0,0 +1,706 @@ +package catalogtest + +import ( + "testing" + + "github.com/hashicorp/consul/internal/catalog" + rtest "github.com/hashicorp/consul/internal/resource/resourcetest" + pbcatalog "github.com/hashicorp/consul/proto-public/pbcatalog/v1alpha1" + "github.com/hashicorp/consul/proto-public/pbresource" + "github.com/hashicorp/consul/sdk/testutil" +) + +// RunCatalogV1Alpha1LifecycleIntegrationTest intends to excercise functionality of +// managing catalog resources over their normal lifecycle where they will be modified +// several times, change state etc. +func RunCatalogV1Alpha1LifecycleIntegrationTest(t *testing.T, client pbresource.ResourceServiceClient) { + t.Helper() + + testutil.RunStep(t, "node-lifecycle", func(t *testing.T) { + RunCatalogV1Alpha1NodeLifecycleIntegrationTest(t, client) + }) + + testutil.RunStep(t, "workload-lifecycle", func(t *testing.T) { + RunCatalogV1Alpha1WorkloadLifecycleIntegrationTest(t, client) + }) + + testutil.RunStep(t, "endpoints-lifecycle", func(t *testing.T) { + RunCatalogV1Alpha1EndpointsLifecycleIntegrationTest(t, client) + }) +} + +// RunCatalogV1Alpha1NodeLifecycleIntegrationTest verifies correct functionality of +// the node-health controller. This test will exercise the following behaviors: +// +// * Creating a Node without associated HealthStatuses will mark the node as passing +// * Associating a HealthStatus with a Node will cause recomputation of the Health +// * Changing HealthStatus to a worse health will cause recomputation of the Health +// * Changing HealthStatus to a better health will cause recomputation of the Health +// * Deletion of associated HealthStatuses will recompute the Health (back to passing) +// * Deletion of the node will cause deletion of associated health statuses +func RunCatalogV1Alpha1NodeLifecycleIntegrationTest(t *testing.T, client pbresource.ResourceServiceClient) { + c := rtest.NewClient(client) + + nodeName := "test-lifecycle" + nodeHealthName := "test-lifecycle-node-status" + + // initial node creation + node := rtest.Resource(catalog.NodeV1Alpha1Type, nodeName). + WithData(t, &pbcatalog.Node{ + Addresses: []*pbcatalog.NodeAddress{ + {Host: "172.16.2.3"}, + {Host: "198.18.2.3", External: true}, + }, + }). + Write(t, c) + + // wait for the node health controller to mark the node as healthy + c.WaitForStatusCondition(t, node.Id, + catalog.NodeHealthStatusKey, + catalog.NodeHealthConditions[pbcatalog.Health_HEALTH_PASSING]) + + // Its easy enough to simply repeatedly set the health status and it proves + // that going both from better to worse health and worse to better all + // happen as expected. We leave the health in a warning state to allow for + // the subsequent health status deletion to cause the health to go back + // to passing. + healthChanges := []pbcatalog.Health{ + pbcatalog.Health_HEALTH_PASSING, + pbcatalog.Health_HEALTH_WARNING, + pbcatalog.Health_HEALTH_CRITICAL, + pbcatalog.Health_HEALTH_MAINTENANCE, + pbcatalog.Health_HEALTH_CRITICAL, + pbcatalog.Health_HEALTH_WARNING, + pbcatalog.Health_HEALTH_PASSING, + pbcatalog.Health_HEALTH_WARNING, + } + + // This will be set within the loop and used afterwards to delete the health status + var nodeHealth *pbresource.Resource + + // Iterate through the various desired health statuses, updating + // a HealthStatus resource owned by the node and waiting for + // reconciliation at each point + for _, health := range healthChanges { + // update the health check + nodeHealth = setHealthStatus(t, c, node.Id, nodeHealthName, health) + + // wait for reconciliation to kick in and put the node into the right + // health status. + c.WaitForStatusCondition(t, node.Id, + catalog.NodeHealthStatusKey, + catalog.NodeHealthConditions[health]) + } + + // now delete the health status and ensure things go back to passing + c.MustDelete(t, nodeHealth.Id) + + // wait for the node health controller to mark the node as healthy + c.WaitForStatusCondition(t, node.Id, + catalog.NodeHealthStatusKey, + catalog.NodeHealthConditions[pbcatalog.Health_HEALTH_PASSING]) + + // Add the health status back once more, the actual status doesn't matter. + // It just must be owned by the node so that we can show cascading + // deletions of owned health statuses working. + healthStatus := setHealthStatus(t, c, node.Id, nodeHealthName, pbcatalog.Health_HEALTH_CRITICAL) + + // Delete the node and wait for the health status to be deleted. + c.MustDelete(t, node.Id) + c.WaitForDeletion(t, healthStatus.Id) +} + +// RunCatalogV1Alpha1WorkloadLifecycleIntegrationTest verifies correct functionality of +// the workload-health controller. This test will exercise the following behaviors: +// +// - Associating a workload with a node causes recomputation of the health and takes +// into account the nodes health +// - Modifying the workloads associated node causes health recomputation and takes into +// account the new nodes health +// - Removal of the node association causes recomputation of health and for no node health +// to be taken into account. +// - Creating a workload without associated health statuses or node association will +// be marked passing +// - Creating a workload without associated health statuses but with a node will +// inherit its health from the node. +// - Changing HealthStatus to a worse health will cause recompuation of the Health +// - Changing HealthStatus to a better health will cause recompuation of the Health +// - Overall health is computed as the worst health amongst the nodes health and all +// of the workloads associated HealthStatuses +// - Deletion of the workload will cause deletion of all associated health statuses. +func RunCatalogV1Alpha1WorkloadLifecycleIntegrationTest(t *testing.T, client pbresource.ResourceServiceClient) { + c := rtest.NewClient(client) + testutil.RunStep(t, "nodeless-workload", func(t *testing.T) { + runV1Alpha1NodelessWorkloadLifecycleIntegrationTest(t, c) + }) + + testutil.RunStep(t, "node-associated-workload", func(t *testing.T) { + runV1Alpha1NodeAssociatedWorkloadLifecycleIntegrationTest(t, c) + }) +} + +// runV1Alpha1NodelessWorkloadLifecycleIntegrationTest verifies correct functionality of +// the workload-health controller for workloads without node associations. In particular +// the following behaviors are being tested +// +// - Creating a workload without associated health statuses or node association will +// be marked passing +// - Changing HealthStatus to a worse health will cause recompuation of the Health +// - Changing HealthStatus to a better health will cause recompuation of the Health +// - Deletion of associated HealthStatus for a nodeless workload will be set back to passing +// - Deletion of the workload will cause deletion of all associated health statuses. +func runV1Alpha1NodelessWorkloadLifecycleIntegrationTest(t *testing.T, c *rtest.Client) { + workloadName := "test-lifecycle-workload" + workloadHealthName := "test-lifecycle-workload-status" + + // create a workload without a node association or health statuses yet + workload := rtest.Resource(catalog.WorkloadV1Alpha1Type, workloadName). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "198.18.9.8"}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}, + }, + Identity: "test-lifecycle", + }). + Write(t, c) + + // wait for the workload health controller to mark the workload as healthy + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadHealthConditions[pbcatalog.Health_HEALTH_PASSING]) + + // We may not need to iterate through all of these states but its easy + // enough and quick enough to do so. The general rationale is that we + // should move through changing the workloads associated health status + // in this progression. We can prove that moving from better to worse + // health or worse to better both function correctly. + healthChanges := []pbcatalog.Health{ + pbcatalog.Health_HEALTH_PASSING, + pbcatalog.Health_HEALTH_WARNING, + pbcatalog.Health_HEALTH_CRITICAL, + pbcatalog.Health_HEALTH_MAINTENANCE, + pbcatalog.Health_HEALTH_CRITICAL, + pbcatalog.Health_HEALTH_WARNING, + pbcatalog.Health_HEALTH_PASSING, + pbcatalog.Health_HEALTH_WARNING, + } + + var workloadHealth *pbresource.Resource + // Iterate through the various desired health statuses, updating + // a HealthStatus resource owned by the workload and waiting for + // reconciliation at each point + for _, health := range healthChanges { + // update the health status + workloadHealth = setHealthStatus(t, c, workload.Id, workloadHealthName, health) + + // wait for reconciliation to kick in and put the workload into + // the right health status. + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadHealthConditions[health]) + } + + // Now delete the health status, things should go back to passing status + c.MustDelete(t, workloadHealth.Id) + + // ensure the workloads health went back to passing + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadHealthConditions[pbcatalog.Health_HEALTH_PASSING]) + + // Reset the workload health. The actual health is irrelevant, we just want it + // to exist to provde that Health Statuses get deleted along with the workload + // when its deleted. + workloadHealth = setHealthStatus(t, c, workload.Id, workloadHealthName, pbcatalog.Health_HEALTH_WARNING) + + // Delete the workload and wait for the HealthStatus to also be deleted + c.MustDelete(t, workload.Id) + c.WaitForDeletion(t, workloadHealth.Id) +} + +// runV1Alpha1NodeAssociatedWorkloadLifecycleIntegrationTest verifies correct functionality of +// the workload-health controller. This test will exercise the following behaviors: +// +// - Associating a workload with a node causes recomputation of the health and takes +// into account the nodes health +// - Modifying the workloads associated node causes health recomputation and takes into +// account the new nodes health +// - Removal of the node association causes recomputation of health and for no node health +// to be taken into account. +// - Creating a workload without associated health statuses but with a node will +// inherit its health from the node. +// - Overall health is computed as the worst health amongst the nodes health and all +// of the workloads associated HealthStatuses +func runV1Alpha1NodeAssociatedWorkloadLifecycleIntegrationTest(t *testing.T, c *rtest.Client) { + workloadName := "test-lifecycle" + workloadHealthName := "test-lifecycle" + nodeName1 := "test-lifecycle-1" + nodeName2 := "test-lifecycle-2" + nodeHealthName1 := "test-lifecycle-node-1" + nodeHealthName2 := "test-lifecycle-node-2" + + // Insert a some nodes to link the workloads to at various points throughout the test + node1 := rtest.Resource(catalog.NodeV1Alpha1Type, nodeName1). + WithData(t, &pbcatalog.Node{ + Addresses: []*pbcatalog.NodeAddress{{Host: "172.17.9.10"}}, + }). + Write(t, c) + node2 := rtest.Resource(catalog.NodeV1Alpha1Type, nodeName2). + WithData(t, &pbcatalog.Node{ + Addresses: []*pbcatalog.NodeAddress{{Host: "172.17.9.11"}}, + }). + Write(t, c) + + // Set some non-passing health statuses for those nodes. Using non-passing will make + // it easy to see that changing a passing workloads node association appropriately + // impacts the overall workload health. + setHealthStatus(t, c, node1.Id, nodeHealthName1, pbcatalog.Health_HEALTH_CRITICAL) + setHealthStatus(t, c, node2.Id, nodeHealthName2, pbcatalog.Health_HEALTH_WARNING) + + // Add the workload but don't immediately associate with any node. + workload := rtest.Resource(catalog.WorkloadV1Alpha1Type, workloadName). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "198.18.9.8"}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}, + }, + Identity: "test-lifecycle", + }). + Write(t, c) + + // wait for the workload health controller to mark the workload as healthy + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadHealthConditions[pbcatalog.Health_HEALTH_PASSING]) + + // now modify the workload to associate it with node 1 (currently with CRITICAL health) + workload = rtest.ResourceID(workload.Id). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{{Host: "198.18.9.8"}}, + Ports: map[string]*pbcatalog.WorkloadPort{"http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}}, + Identity: "test-lifecycle", + // this is the only difference from the previous write + NodeName: node1.Id.Name, + }). + Write(t, c) + + // wait for the workload health controller to mark the workload as critical (due to node 1 having critical health) + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadAndNodeHealthConditions[pbcatalog.Health_HEALTH_PASSING][pbcatalog.Health_HEALTH_CRITICAL]) + + // Now reassociate the workload with node 2. This should cause recalculation of its health into the warning state + workload = rtest.ResourceID(workload.Id). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{{Host: "198.18.9.8"}}, + Ports: map[string]*pbcatalog.WorkloadPort{"http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}}, + Identity: "test-lifecycle", + // this is the only difference from the previous write + NodeName: node2.Id.Name, + }). + Write(t, c) + + // Wait for the workload health controller to mark the workload as warning (due to node 2 having warning health) + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadAndNodeHealthConditions[pbcatalog.Health_HEALTH_PASSING][pbcatalog.Health_HEALTH_WARNING]) + + // Delete the node, this should cause the health to be recalculated as critical because the node association + // is broken. + c.MustDelete(t, node2.Id) + + // Wait for the workload health controller to mark the workload as critical due to the missing node + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadAndNodeHealthConditions[pbcatalog.Health_HEALTH_PASSING][pbcatalog.Health_HEALTH_CRITICAL]) + + // Now fixup the node association to point at node 1 + workload = rtest.ResourceID(workload.Id). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{{Host: "198.18.9.8"}}, + Ports: map[string]*pbcatalog.WorkloadPort{"http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}}, + Identity: "test-lifecycle", + // this is the only difference from the previous write + NodeName: node1.Id.Name, + }). + Write(t, c) + + // Also set node 1 health down to WARNING + setHealthStatus(t, c, node1.Id, nodeHealthName1, pbcatalog.Health_HEALTH_WARNING) + + // Wait for the workload health controller to mark the workload as warning (due to node 1 having warning health now) + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadAndNodeHealthConditions[pbcatalog.Health_HEALTH_PASSING][pbcatalog.Health_HEALTH_WARNING]) + + // Now add a critical workload health check to ensure that both node and workload health are accounted for. + setHealthStatus(t, c, workload.Id, workloadHealthName, pbcatalog.Health_HEALTH_CRITICAL) + + // Wait for the workload health to be recomputed and put into the critical status. + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadAndNodeHealthConditions[pbcatalog.Health_HEALTH_CRITICAL][pbcatalog.Health_HEALTH_WARNING]) + + // Reset the workloads health to passing. We expect the overall health to go back to warning + setHealthStatus(t, c, workload.Id, workloadHealthName, pbcatalog.Health_HEALTH_PASSING) + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadAndNodeHealthConditions[pbcatalog.Health_HEALTH_PASSING][pbcatalog.Health_HEALTH_WARNING]) + + // Remove the node association and wait for the health to go back to passing + workload = rtest.ResourceID(workload.Id). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{{Host: "198.18.9.8"}}, + Ports: map[string]*pbcatalog.WorkloadPort{"http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}}, + Identity: "test-lifecycle", + }). + Write(t, c) + c.WaitForStatusCondition(t, workload.Id, + catalog.WorkloadHealthStatusKey, + catalog.WorkloadHealthConditions[pbcatalog.Health_HEALTH_PASSING]) +} + +// RunCatalogV1Alpha1EndpointsLifecycleIntegrationTest verifies the correct functionality of +// the endpoints controller. This test will exercise the following behaviors: +// +// * Services without a selector get marked with status indicating their endpoints are unmanaged +// * Services with a selector get marked with status indicating their endpoints are managed +// * Deleting a service will delete the associated endpoints (regardless of them being managed or not) +// * Moving from managed to unmanaged endpoints will delete the managed endpoints +// * Moving from unmanaged to managed endpoints will overwrite any previous endpoints. +// * A service with a selector that matches no workloads will still have the endpoints object written. +// * Adding ports to a service will recalculate the endpoints +// * Removing ports from a service will recalculate the endpoints +// * Changing the workload will recalculate the endpoints (ports, addresses, or health) +func RunCatalogV1Alpha1EndpointsLifecycleIntegrationTest(t *testing.T, client pbresource.ResourceServiceClient) { + c := rtest.NewClient(client) + serviceName := "test-lifecycle" + + // Create the service without a selector. We should not see endpoints generated but we should see the + // status updated to note endpoints are not being managed. + service := rtest.Resource(catalog.ServiceV1Alpha1Type, serviceName). + WithData(t, &pbcatalog.Service{ + Ports: []*pbcatalog.ServicePort{{TargetPort: "http", Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}}, + }). + Write(t, c) + + // Wait to ensure the status is updated accordingly + c.WaitForStatusCondition(t, service.Id, catalog.EndpointsStatusKey, catalog.EndpointsStatusConditionUnmanaged) + + // Verify that no endpoints were created. + endpointsID := rtest.Resource(catalog.ServiceEndpointsV1Alpha1Type, serviceName).ID() + c.RequireResourceNotFound(t, endpointsID) + + // Add some empty endpoints (type validations enforce that they are owned by the service) + rtest.ResourceID(endpointsID). + WithData(t, &pbcatalog.ServiceEndpoints{}). + WithOwner(service.Id). + Write(t, c) + + // Now delete the service and ensure that they are cleaned up. + c.MustDelete(t, service.Id) + c.WaitForDeletion(t, endpointsID) + + // Add some workloads to eventually select by the service + + // api-1 has all ports (http, grpc and mesh). It also has a mixture of Addresses + // that select individual ports and one that selects all ports implicitly + api1 := rtest.Resource(catalog.WorkloadV1Alpha1Type, "api-1"). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1"}, + {Host: "::1", Ports: []string{"grpc"}}, + {Host: "127.0.0.2", Ports: []string{"http"}}, + {Host: "172.17.1.1", Ports: []string{"mesh"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "mesh": {Port: 10000, Protocol: pbcatalog.Protocol_PROTOCOL_MESH}, + "http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}, + "grpc": {Port: 9090, Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}, + }, + Identity: "api", + }). + Write(t, c) + + // api-2 has only grpc and mesh ports. It also has a mixture of Addresses that + // select individual ports and one that selects all ports implicitly + api2 := rtest.Resource(catalog.WorkloadV1Alpha1Type, "api-2"). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1"}, + {Host: "::1", Ports: []string{"grpc"}}, + {Host: "172.17.1.2", Ports: []string{"mesh"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "mesh": {Port: 10000, Protocol: pbcatalog.Protocol_PROTOCOL_MESH}, + "grpc": {Port: 9090, Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}, + }, + Identity: "api", + }). + Write(t, c) + + // api-3 has the mesh and HTTP ports. It also has a mixture of Addresses that + // select individual ports and one that selects all ports. + api3 := rtest.Resource(catalog.WorkloadV1Alpha1Type, "api-3"). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1"}, + {Host: "172.17.1.3", Ports: []string{"mesh"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "mesh": {Port: 10000, Protocol: pbcatalog.Protocol_PROTOCOL_MESH}, + "http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}, + }, + Identity: "api", + }). + Write(t, c) + + // Now create a service with unmanaged endpoints again + service = rtest.Resource(catalog.ServiceV1Alpha1Type, serviceName). + WithData(t, &pbcatalog.Service{ + Ports: []*pbcatalog.ServicePort{{TargetPort: "http", Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}}, + }). + Write(t, c) + + // Inject the endpoints resource. We want to prove that transition from unmanaged to + // managed endpoints results in overwriting of the old endpoints + rtest.ResourceID(endpointsID). + WithData(t, &pbcatalog.ServiceEndpoints{ + Endpoints: []*pbcatalog.Endpoint{ + { + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "198.18.1.1", External: true}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "http": {Port: 443, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}, + }, + HealthStatus: pbcatalog.Health_HEALTH_PASSING, + }, + }, + }). + WithOwner(service.Id). + Write(t, c) + + // Wait to ensure the status is updated accordingly + c.WaitForStatusCondition(t, service.Id, catalog.EndpointsStatusKey, catalog.EndpointsStatusConditionUnmanaged) + + // Now move the service to having managed endpoints + service = rtest.ResourceID(service.Id). + WithData(t, &pbcatalog.Service{ + Workloads: &pbcatalog.WorkloadSelector{Names: []string{"bar"}}, + Ports: []*pbcatalog.ServicePort{{TargetPort: "http", Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}}, + }). + Write(t, c) + + // Verify that this status is updated to show this service as having managed endpoints + c.WaitForStatusCondition(t, service.Id, catalog.EndpointsStatusKey, catalog.EndpointsStatusConditionManaged) + + // Verify that the service endpoints are created. In this case they will be empty + verifyServiceEndpoints(t, c, endpointsID, &pbcatalog.ServiceEndpoints{}) + + // Rewrite the service to select the API workloads - just select the singular port for now + service = rtest.ResourceID(service.Id). + WithData(t, &pbcatalog.Service{ + Workloads: &pbcatalog.WorkloadSelector{Prefixes: []string{"api-"}}, + Ports: []*pbcatalog.ServicePort{{TargetPort: "http", Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}}, + }). + Write(t, c) + + // Wait for the status to be updated. The condition itself will remain unchanged but we are waiting for + // the generations to match to know that the endpoints would have been regenerated + c.WaitForStatusCondition(t, service.Id, catalog.EndpointsStatusKey, catalog.EndpointsStatusConditionManaged) + + // ensure that api-1 and api-3 are selected but api-2 is excluded due to not having the desired port + verifyServiceEndpoints(t, c, endpointsID, &pbcatalog.ServiceEndpoints{ + Endpoints: []*pbcatalog.Endpoint{ + { + TargetRef: api1.Id, + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1", Ports: []string{"http"}}, + {Host: "127.0.0.2", Ports: []string{"http"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}, + }, + HealthStatus: pbcatalog.Health_HEALTH_PASSING, + }, + { + TargetRef: api3.Id, + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1", Ports: []string{"http"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}, + }, + HealthStatus: pbcatalog.Health_HEALTH_PASSING, + }, + }, + }) + + // Rewrite the service to select the API workloads - changing from selecting the HTTP port to the gRPC port + service = rtest.ResourceID(service.Id). + WithData(t, &pbcatalog.Service{ + Workloads: &pbcatalog.WorkloadSelector{Prefixes: []string{"api-"}}, + Ports: []*pbcatalog.ServicePort{{TargetPort: "grpc", Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}}, + }). + Write(t, c) + + // Wait for the status to be updated. The condition itself will remain unchanged but we are waiting for + // the generations to match to know that the endpoints would have been regenerated + c.WaitForStatusCondition(t, service.Id, catalog.EndpointsStatusKey, catalog.EndpointsStatusConditionManaged) + + // Check that the endpoints were generated as expected + verifyServiceEndpoints(t, c, endpointsID, &pbcatalog.ServiceEndpoints{ + Endpoints: []*pbcatalog.Endpoint{ + { + TargetRef: api1.Id, + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1", Ports: []string{"grpc"}}, + {Host: "::1", Ports: []string{"grpc"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "grpc": {Port: 9090, Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}, + }, + HealthStatus: pbcatalog.Health_HEALTH_PASSING, + }, + { + TargetRef: api2.Id, + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1", Ports: []string{"grpc"}}, + {Host: "::1", Ports: []string{"grpc"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "grpc": {Port: 9090, Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}, + }, + HealthStatus: pbcatalog.Health_HEALTH_PASSING, + }, + }, + }) + + // Update the service to change the ports used. This should result in the workload being removed + // from the endpoints + rtest.ResourceID(api2.Id). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1"}, + {Host: "::1", Ports: []string{"http"}}, + {Host: "172.17.1.2", Ports: []string{"mesh"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "mesh": {Port: 10000, Protocol: pbcatalog.Protocol_PROTOCOL_MESH}, + "http": {Port: 8080, Protocol: pbcatalog.Protocol_PROTOCOL_HTTP}, + }, + Identity: "api", + }). + Write(t, c) + + // Verify that api-2 was removed from the service endpoints as it no longer has a grpc port + verifyServiceEndpoints(t, c, endpointsID, &pbcatalog.ServiceEndpoints{ + Endpoints: []*pbcatalog.Endpoint{ + { + TargetRef: api1.Id, + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1", Ports: []string{"grpc"}}, + {Host: "::1", Ports: []string{"grpc"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "grpc": {Port: 9090, Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}, + }, + HealthStatus: pbcatalog.Health_HEALTH_PASSING, + }, + }, + }) + + // Remove the ::1 address from workload api1 which should result in recomputing endpoints + rtest.ResourceID(api1.Id). + WithData(t, &pbcatalog.Workload{ + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1"}, + {Host: "172.17.1.1", Ports: []string{"mesh"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "mesh": {Port: 10000, Protocol: pbcatalog.Protocol_PROTOCOL_MESH}, + "grpc": {Port: 9090, Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}, + }, + Identity: "api", + }). + Write(t, c) + + // Verify that api-1 had its addresses modified appropriately + verifyServiceEndpoints(t, c, endpointsID, &pbcatalog.ServiceEndpoints{ + Endpoints: []*pbcatalog.Endpoint{ + { + TargetRef: api1.Id, + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1", Ports: []string{"grpc"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "grpc": {Port: 9090, Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}, + }, + HealthStatus: pbcatalog.Health_HEALTH_PASSING, + }, + }, + }) + + // Add a failing health status to the api1 workload to force recomputation of endpoints + setHealthStatus(t, c, api1.Id, "api-failed", pbcatalog.Health_HEALTH_CRITICAL) + + // Verify that api-1 within the endpoints has the expected health + verifyServiceEndpoints(t, c, endpointsID, &pbcatalog.ServiceEndpoints{ + Endpoints: []*pbcatalog.Endpoint{ + { + TargetRef: api1.Id, + Addresses: []*pbcatalog.WorkloadAddress{ + {Host: "127.0.0.1", Ports: []string{"grpc"}}, + }, + Ports: map[string]*pbcatalog.WorkloadPort{ + "grpc": {Port: 9090, Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}, + }, + HealthStatus: pbcatalog.Health_HEALTH_CRITICAL, + }, + }, + }) + + // Move the service to being unmanaged. We should see the ServiceEndpoints being removed. + service = rtest.ResourceID(service.Id). + WithData(t, &pbcatalog.Service{ + Ports: []*pbcatalog.ServicePort{{TargetPort: "grpc", Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}}, + }). + Write(t, c) + + // Wait for the endpoints controller to inform us that the endpoints are not being managed + c.WaitForStatusCondition(t, service.Id, catalog.EndpointsStatusKey, catalog.EndpointsStatusConditionUnmanaged) + // Ensure that the managed endpoints were deleted + c.WaitForDeletion(t, endpointsID) + + // Put the service back into managed mode. + service = rtest.ResourceID(service.Id). + WithData(t, &pbcatalog.Service{ + Workloads: &pbcatalog.WorkloadSelector{Prefixes: []string{"api-"}}, + Ports: []*pbcatalog.ServicePort{{TargetPort: "grpc", Protocol: pbcatalog.Protocol_PROTOCOL_GRPC}}, + }). + Write(t, c) + + // Wait for the service endpoints to be regenerated + c.WaitForStatusCondition(t, service.Id, catalog.EndpointsStatusKey, catalog.EndpointsStatusConditionManaged) + c.RequireResourceExists(t, endpointsID) + + // Now delete the service and ensure that the endpoints eventually are deleted as well + c.MustDelete(t, service.Id) + c.WaitForDeletion(t, endpointsID) + +} + +func setHealthStatus(t *testing.T, client *rtest.Client, owner *pbresource.ID, name string, health pbcatalog.Health) *pbresource.Resource { + return rtest.Resource(catalog.HealthStatusV1Alpha1Type, name). + WithData(t, &pbcatalog.HealthStatus{ + Type: "synthetic", + Status: health, + }). + WithOwner(owner). + Write(t, client) +} diff --git a/internal/catalog/exports.go b/internal/catalog/exports.go index 61247091be1c..e0373bf7079b 100644 --- a/internal/catalog/exports.go +++ b/internal/catalog/exports.go @@ -5,6 +5,9 @@ package catalog import ( "github.com/hashicorp/consul/internal/catalog/internal/controllers" + "github.com/hashicorp/consul/internal/catalog/internal/controllers/endpoints" + "github.com/hashicorp/consul/internal/catalog/internal/controllers/nodehealth" + "github.com/hashicorp/consul/internal/catalog/internal/controllers/workloadhealth" "github.com/hashicorp/consul/internal/catalog/internal/mappers/nodemapper" "github.com/hashicorp/consul/internal/catalog/internal/mappers/selectiontracker" "github.com/hashicorp/consul/internal/catalog/internal/types" @@ -40,6 +43,21 @@ var ( HealthStatusV1Alpha1Type = types.HealthStatusV1Alpha1Type HealthChecksV1Alpha1Type = types.HealthChecksV1Alpha1Type DNSPolicyV1Alpha1Type = types.DNSPolicyV1Alpha1Type + + // Controller Statuses + NodeHealthStatusKey = nodehealth.StatusKey + NodeHealthStatusConditionHealthy = nodehealth.StatusConditionHealthy + NodeHealthConditions = nodehealth.Conditions + + WorkloadHealthStatusKey = workloadhealth.StatusKey + WorkloadHealthStatusConditionHealthy = workloadhealth.StatusConditionHealthy + WorkloadHealthConditions = workloadhealth.WorkloadConditions + WorkloadAndNodeHealthConditions = workloadhealth.NodeAndWorkloadConditions + + EndpointsStatusKey = endpoints.StatusKey + EndpointsStatusConditionEndpointsManaged = endpoints.StatusConditionEndpointsManaged + EndpointsStatusConditionManaged = endpoints.ConditionManaged + EndpointsStatusConditionUnmanaged = endpoints.ConditionUnmanaged ) // RegisterTypes adds all resource types within the "catalog" API group diff --git a/internal/resource/resourcetest/builder.go b/internal/resource/resourcetest/builder.go index 7355f38824ec..749ff4fea27e 100644 --- a/internal/resource/resourcetest/builder.go +++ b/internal/resource/resourcetest/builder.go @@ -1,11 +1,16 @@ package resourcetest import ( - "context" + "strings" + "github.com/hashicorp/consul/internal/storage" "github.com/hashicorp/consul/proto-public/pbresource" + "github.com/hashicorp/consul/sdk/testutil" + "github.com/hashicorp/consul/sdk/testutil/retry" "github.com/oklog/ulid/v2" "github.com/stretchr/testify/require" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/reflect/protoreflect" "google.golang.org/protobuf/types/known/anypb" @@ -37,6 +42,14 @@ func Resource(rtype *pbresource.Type, name string) *resourceBuilder { } } +func ResourceID(id *pbresource.ID) *resourceBuilder { + return &resourceBuilder{ + resource: &pbresource.Resource{ + Id: id, + }, + } +} + func (b *resourceBuilder) WithData(t T, data protoreflect.ProtoMessage) *resourceBuilder { t.Helper() @@ -108,22 +121,37 @@ func (b *resourceBuilder) ID() *pbresource.ID { func (b *resourceBuilder) Write(t T, client pbresource.ResourceServiceClient) *pbresource.Resource { t.Helper() + ctx := testutil.TestContext(t) + res := b.resource - rsp, err := client.Write(context.Background(), &pbresource.WriteRequest{ - Resource: res, - }) + var rsp *pbresource.WriteResponse + var err error - require.NoError(t, err) + // Retry any writes where the error is a UID mismatch and the UID was not specified. This is indicative + // of using a follower to rewrite an object who is not perfectly in-sync with the leader. + retry.Run(t, func(r *retry.R) { + rsp, err = client.Write(ctx, &pbresource.WriteRequest{ + Resource: res, + }) + + if err == nil || res.Id.Uid != "" || status.Code(err) != codes.FailedPrecondition { + return + } + + if strings.Contains(err.Error(), storage.ErrWrongUid.Error()) { + r.Fatalf("resource write failed due to uid mismatch - most likely a transient issue when talking to a non-leader") + } else { + // other errors are unexpected and should cause an immediate failure + r.Stop(err) + } + }) if !b.dontCleanup { - cleaner, ok := t.(CleanupT) - require.True(t, ok, "T does not implement a Cleanup method and cannot be used with automatic resource cleanup") - cleaner.Cleanup(func() { - _, err := client.Delete(context.Background(), &pbresource.DeleteRequest{ - Id: rsp.Resource.Id, - }) - require.NoError(t, err) + id := proto.Clone(rsp.Resource.Id).(*pbresource.ID) + id.Uid = "" + t.Cleanup(func() { + NewClient(client).MustDelete(t, id) }) } @@ -136,7 +164,7 @@ func (b *resourceBuilder) Write(t T, client pbresource.ResourceServiceClient) *p ObservedGeneration: rsp.Resource.Generation, Conditions: original.Conditions, } - _, err := client.WriteStatus(context.Background(), &pbresource.WriteStatusRequest{ + _, err := client.WriteStatus(ctx, &pbresource.WriteStatusRequest{ Id: rsp.Resource.Id, Key: key, Status: status, @@ -144,7 +172,7 @@ func (b *resourceBuilder) Write(t T, client pbresource.ResourceServiceClient) *p require.NoError(t, err) } - readResp, err := client.Read(context.Background(), &pbresource.ReadRequest{ + readResp, err := client.Read(ctx, &pbresource.ReadRequest{ Id: rsp.Resource.Id, }) diff --git a/internal/resource/resourcetest/client.go b/internal/resource/resourcetest/client.go index dab5b03c3adb..5047406d0585 100644 --- a/internal/resource/resourcetest/client.go +++ b/internal/resource/resourcetest/client.go @@ -1,12 +1,13 @@ package resourcetest import ( - "context" + "fmt" "math/rand" "time" "github.com/hashicorp/consul/internal/resource" "github.com/hashicorp/consul/proto-public/pbresource" + "github.com/hashicorp/consul/sdk/testutil" "github.com/hashicorp/consul/sdk/testutil/retry" "github.com/stretchr/testify/require" "golang.org/x/exp/slices" @@ -35,11 +36,14 @@ func (client *Client) SetRetryerConfig(timeout time.Duration, wait time.Duration } func (client *Client) retry(t T, fn func(r *retry.R)) { + t.Helper() retryer := &retry.Timer{Timeout: client.timeout, Wait: client.wait} retry.RunWith(retryer, t, fn) } func (client *Client) PublishResources(t T, resources []*pbresource.Resource) { + ctx := testutil.TestContext(t) + // Randomize the order of insertion. Generally insertion order shouldn't matter as the // controllers should eventually converge on the desired state. The exception to this // is that you cannot insert resources with owner refs before the resource they are @@ -74,12 +78,17 @@ func (client *Client) PublishResources(t T, resources []*pbresource.Resource) { } t.Logf("Writing resource %s with type %s", res.Id.Name, resource.ToGVK(res.Id.Type)) - _, err := client.Write(context.Background(), &pbresource.WriteRequest{ + rsp, err := client.Write(ctx, &pbresource.WriteRequest{ Resource: res, }) require.NoError(t, err) - // track the number o + id := rsp.Resource.Id + t.Cleanup(func() { + client.MustDelete(t, id) + }) + + // track the number of resources published published += 1 written = append(written, res.Id) } @@ -101,7 +110,7 @@ func (client *Client) PublishResources(t T, resources []*pbresource.Resource) { func (client *Client) RequireResourceNotFound(t T, id *pbresource.ID) { t.Helper() - rsp, err := client.Read(context.Background(), &pbresource.ReadRequest{Id: id}) + rsp, err := client.Read(testutil.TestContext(t), &pbresource.ReadRequest{Id: id}) require.Error(t, err) require.Equal(t, codes.NotFound, status.Code(err)) require.Nil(t, rsp) @@ -110,7 +119,7 @@ func (client *Client) RequireResourceNotFound(t T, id *pbresource.ID) { func (client *Client) RequireResourceExists(t T, id *pbresource.ID) *pbresource.Resource { t.Helper() - rsp, err := client.Read(context.Background(), &pbresource.ReadRequest{Id: id}) + rsp, err := client.Read(testutil.TestContext(t), &pbresource.ReadRequest{Id: id}) require.NoError(t, err, "error reading %s with type %s", id.Name, resource.ToGVK(id.Type)) require.NotNil(t, rsp) return rsp.Resource @@ -181,7 +190,7 @@ func (client *Client) WaitForStatusCondition(t T, id *pbresource.ID, statusKey s var res *pbresource.Resource client.retry(t, func(r *retry.R) { - res = client.RequireStatusConditionForCurrentGen(t, id, statusKey, condition) + res = client.RequireStatusConditionForCurrentGen(r, id, statusKey, condition) }) return res @@ -209,6 +218,14 @@ func (client *Client) WaitForResourceState(t T, id *pbresource.ID, verify func(T return res } +func (client *Client) WaitForDeletion(t T, id *pbresource.ID) { + t.Helper() + + client.retry(t, func(r *retry.R) { + client.RequireResourceNotFound(r, id) + }) +} + // ResolveResourceID will read the specified resource and returns its full ID. // This is mainly useful to get the ID with the Uid filled out. func (client *Client) ResolveResourceID(t T, id *pbresource.ID) *pbresource.ID { @@ -216,3 +233,24 @@ func (client *Client) ResolveResourceID(t T, id *pbresource.ID) *pbresource.ID { return client.RequireResourceExists(t, id).Id } + +func (client *Client) MustDelete(t T, id *pbresource.ID) { + t.Helper() + ctx := testutil.TestContext(t) + + client.retry(t, func(r *retry.R) { + _, err := client.Delete(ctx, &pbresource.DeleteRequest{Id: id}) + if status.Code(err) == codes.NotFound { + return + } + + // codes.Aborted indicates a CAS failure and that the delete request should + // be retried. Anything else should be considered an unrecoverable error. + if err != nil && status.Code(err) != codes.Aborted { + r.Stop(fmt.Errorf("failed to delete the resource: %w", err)) + return + } + + require.NoError(r, err) + }) +} diff --git a/internal/resource/resourcetest/testing.go b/internal/resource/resourcetest/testing.go index d02b70da9d03..1c774082b369 100644 --- a/internal/resource/resourcetest/testing.go +++ b/internal/resource/resourcetest/testing.go @@ -9,9 +9,5 @@ type T interface { Errorf(format string, args ...interface{}) Fatalf(format string, args ...interface{}) FailNow() -} - -type CleanupT interface { - T Cleanup(func()) } diff --git a/proto/private/prototest/testing.go b/proto/private/prototest/testing.go index 28341012afa6..b423478155d1 100644 --- a/proto/private/prototest/testing.go +++ b/proto/private/prototest/testing.go @@ -100,5 +100,5 @@ func AssertContainsElement[V any](t TestingT, list []V, element V, opts ...cmp.O } } - t.Fatalf("assertion failed: list does not contain element\n--- list\n%#v\n--- element: %#v", list, element) + t.Fatalf("assertion failed: list does not contain element\n--- list\n%+v\n--- element: %+v", list, element) } diff --git a/sdk/testutil/context.go b/sdk/testutil/context.go index 257f205aa298..47ff794c96c6 100644 --- a/sdk/testutil/context.go +++ b/sdk/testutil/context.go @@ -5,10 +5,14 @@ package testutil import ( "context" - "testing" ) -func TestContext(t *testing.T) context.Context { +type CleanerT interface { + Helper() + Cleanup(func()) +} + +func TestContext(t CleanerT) context.Context { t.Helper() ctx, cancel := context.WithCancel(context.Background()) t.Cleanup(cancel) diff --git a/sdk/testutil/retry/counter.go b/sdk/testutil/retry/counter.go new file mode 100644 index 000000000000..96a37ab9d2fc --- /dev/null +++ b/sdk/testutil/retry/counter.go @@ -0,0 +1,23 @@ +package retry + +import "time" + +// Counter repeats an operation a given number of +// times and waits between subsequent operations. +type Counter struct { + Count int + Wait time.Duration + + count int +} + +func (r *Counter) Continue() bool { + if r.count == r.Count { + return false + } + if r.count > 0 { + time.Sleep(r.Wait) + } + r.count++ + return true +} diff --git a/sdk/testutil/retry/retry.go b/sdk/testutil/retry/retry.go index 30045f0c629d..af468460d592 100644 --- a/sdk/testutil/retry/retry.go +++ b/sdk/testutil/retry/retry.go @@ -53,6 +53,8 @@ type R struct { // and triggers t.FailNow() done bool output []string + + cleanups []func() } func (r *R) Logf(format string, args ...interface{}) { @@ -65,6 +67,41 @@ func (r *R) Log(args ...interface{}) { func (r *R) Helper() {} +// Cleanup register a function to be run to cleanup resources that +// were allocated during the retry attempt. These functions are executed +// after a retry attempt. If they panic, it will not stop further retry +// attempts but will be cause for the overall test failure. +func (r *R) Cleanup(fn func()) { + r.cleanups = append(r.cleanups, fn) +} + +func (r *R) runCleanup() { + + // Make sure that if a cleanup function panics, + // we still run the remaining cleanup functions. + defer func() { + err := recover() + if err != nil { + r.Stop(fmt.Errorf("error when performing test cleanup: %v", err)) + } + if len(r.cleanups) > 0 { + r.runCleanup() + } + }() + + for len(r.cleanups) > 0 { + var cleanup func() + if len(r.cleanups) > 0 { + last := len(r.cleanups) - 1 + cleanup = r.cleanups[last] + r.cleanups = r.cleanups[:last] + } + if cleanup != nil { + cleanup() + } + } +} + // runFailed is a sentinel value to indicate that the func itself // didn't panic, rather that `FailNow` was called. type runFailed struct{} @@ -190,6 +227,7 @@ func run(r Retryer, t Failer, f func(r *R)) { // run f(rr), but if recover yields a runFailed value, we know // FailNow was called. func() { + defer rr.runCleanup() defer func() { if p := recover(); p != nil && p != (runFailed{}) { panic(p) @@ -216,16 +254,6 @@ func DefaultFailer() *Timer { return &Timer{Timeout: 7 * time.Second, Wait: 25 * time.Millisecond} } -// TwoSeconds repeats an operation for two seconds and waits 25ms in between. -func TwoSeconds() *Timer { - return &Timer{Timeout: 2 * time.Second, Wait: 25 * time.Millisecond} -} - -// ThreeTimes repeats an operation three times and waits 25ms in between. -func ThreeTimes() *Counter { - return &Counter{Count: 3, Wait: 25 * time.Millisecond} -} - // Retryer provides an interface for repeating operations // until they succeed or an exit condition is met. type Retryer interface { @@ -233,47 +261,3 @@ type Retryer interface { // returns false to indicate retrying should stop. Continue() bool } - -// Counter repeats an operation a given number of -// times and waits between subsequent operations. -type Counter struct { - Count int - Wait time.Duration - - count int -} - -func (r *Counter) Continue() bool { - if r.count == r.Count { - return false - } - if r.count > 0 { - time.Sleep(r.Wait) - } - r.count++ - return true -} - -// Timer repeats an operation for a given amount -// of time and waits between subsequent operations. -type Timer struct { - Timeout time.Duration - Wait time.Duration - - // stop is the timeout deadline. - // TODO: Next()? - // Set on the first invocation of Next(). - stop time.Time -} - -func (r *Timer) Continue() bool { - if r.stop.IsZero() { - r.stop = time.Now().Add(r.Timeout) - return true - } - if time.Now().After(r.stop) { - return false - } - time.Sleep(r.Wait) - return true -} diff --git a/sdk/testutil/retry/retry_test.go b/sdk/testutil/retry/retry_test.go index 1f7eda7b3133..77bc2d4d9f96 100644 --- a/sdk/testutil/retry/retry_test.go +++ b/sdk/testutil/retry/retry_test.go @@ -128,6 +128,69 @@ func TestRunWith(t *testing.T) { }) } +func TestCleanup(t *testing.T) { + t.Run("basic", func(t *testing.T) { + ft := &fakeT{} + cleanupsExecuted := 0 + RunWith(&Counter{Count: 2, Wait: time.Millisecond}, ft, func(r *R) { + r.Cleanup(func() { + cleanupsExecuted += 1 + }) + }) + + require.Equal(t, 0, ft.fails) + require.Equal(t, 1, cleanupsExecuted) + }) + t.Run("cleanup-panic-recovery", func(t *testing.T) { + ft := &fakeT{} + cleanupsExecuted := 0 + RunWith(&Counter{Count: 2, Wait: time.Millisecond}, ft, func(r *R) { + r.Cleanup(func() { + cleanupsExecuted += 1 + }) + + r.Cleanup(func() { + cleanupsExecuted += 1 + panic(fmt.Errorf("fake test error")) + }) + + r.Cleanup(func() { + cleanupsExecuted += 1 + }) + + // test is successful but should fail due to the cleanup panicing + }) + + require.Equal(t, 3, cleanupsExecuted) + require.Equal(t, 1, ft.fails) + require.Contains(t, ft.out[0], "fake test error") + }) + + t.Run("cleanup-per-retry", func(t *testing.T) { + ft := &fakeT{} + iter := 0 + cleanupsExecuted := 0 + RunWith(&Counter{Count: 3, Wait: time.Millisecond}, ft, func(r *R) { + if cleanupsExecuted != iter { + r.Stop(fmt.Errorf("cleanups not executed between retries")) + return + } + iter += 1 + + r.Cleanup(func() { + cleanupsExecuted += 1 + }) + + r.FailNow() + }) + + require.Equal(t, 3, cleanupsExecuted) + // ensure that r.Stop hadn't been called. If it was then we would + // have log output + require.Len(t, ft.out, 0) + }) +} + type fakeT struct { fails int out []string diff --git a/sdk/testutil/retry/timer.go b/sdk/testutil/retry/timer.go new file mode 100644 index 000000000000..a26593ddd72e --- /dev/null +++ b/sdk/testutil/retry/timer.go @@ -0,0 +1,37 @@ +package retry + +import "time" + +// TwoSeconds repeats an operation for two seconds and waits 25ms in between. +func TwoSeconds() *Timer { + return &Timer{Timeout: 2 * time.Second, Wait: 25 * time.Millisecond} +} + +// ThreeTimes repeats an operation three times and waits 25ms in between. +func ThreeTimes() *Counter { + return &Counter{Count: 3, Wait: 25 * time.Millisecond} +} + +// Timer repeats an operation for a given amount +// of time and waits between subsequent operations. +type Timer struct { + Timeout time.Duration + Wait time.Duration + + // stop is the timeout deadline. + // TODO: Next()? + // Set on the first invocation of Next(). + stop time.Time +} + +func (r *Timer) Continue() bool { + if r.stop.IsZero() { + r.stop = time.Now().Add(r.Timeout) + return true + } + if time.Now().After(r.stop) { + return false + } + time.Sleep(r.Wait) + return true +} diff --git a/test/integration/consul-container/go.mod b/test/integration/consul-container/go.mod index 27d1357bd1df..b1d092af136b 100644 --- a/test/integration/consul-container/go.mod +++ b/test/integration/consul-container/go.mod @@ -7,9 +7,11 @@ require ( github.com/avast/retry-go v3.0.0+incompatible github.com/docker/docker v23.0.6+incompatible github.com/docker/go-connections v0.4.0 - github.com/hashicorp/consul/api v1.20.0 - github.com/hashicorp/consul/envoyextensions v0.1.2 - github.com/hashicorp/consul/sdk v0.13.1 + github.com/hashicorp/consul v0.0.0-00010101000000-000000000000 + github.com/hashicorp/consul/api v1.22.0-rc1 + github.com/hashicorp/consul/envoyextensions v0.3.0-rc1 + github.com/hashicorp/consul/proto-public v0.4.0-rc1 + github.com/hashicorp/consul/sdk v0.14.0-rc1 github.com/hashicorp/go-cleanhttp v0.5.2 github.com/hashicorp/go-multierror v1.1.1 github.com/hashicorp/go-uuid v1.0.3 @@ -34,6 +36,7 @@ require ( github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect github.com/armon/go-metrics v0.4.1 // indirect + github.com/armon/go-radix v1.0.0 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195 // indirect github.com/containerd/containerd v1.7.1 // indirect @@ -47,6 +50,7 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/btree v1.0.1 // indirect + github.com/google/go-cmp v0.5.9 // indirect github.com/google/uuid v1.3.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-hclog v1.5.0 // indirect @@ -55,6 +59,7 @@ require ( github.com/hashicorp/go-rootcerts v1.0.2 // indirect github.com/hashicorp/go-sockaddr v1.0.2 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect + github.com/hashicorp/hcl v1.0.0 // indirect github.com/hashicorp/memberlist v0.5.0 // indirect github.com/imdario/mergo v0.3.15 // indirect github.com/itchyny/timefmt-go v0.1.4 // indirect @@ -70,28 +75,29 @@ require ( github.com/moby/sys/sequential v0.5.0 // indirect github.com/moby/term v0.5.0 // indirect github.com/morikuni/aec v1.0.0 // indirect + github.com/oklog/ulid/v2 v2.1.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.0-rc3 // indirect github.com/opencontainers/runc v1.1.7 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect github.com/sirupsen/logrus v1.9.0 // indirect + github.com/stretchr/objx v0.5.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/net v0.10.0 // indirect + golang.org/x/sync v0.2.0 // indirect golang.org/x/sys v0.8.0 // indirect golang.org/x/text v0.9.0 // indirect - golang.org/x/time v0.3.0 // indirect golang.org/x/tools v0.9.1 // indirect google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 // indirect google.golang.org/protobuf v1.30.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - gotest.tools/v3 v3.4.0 // indirect ) -replace github.com/hashicorp/consul/api => ../../../api - -replace github.com/hashicorp/consul/sdk => ../../../sdk - -replace github.com/hashicorp/consul => ../../.. - -replace github.com/hashicorp/consul/envoyextensions => ../../../envoyextensions +replace ( + github.com/hashicorp/consul => ../../.. + github.com/hashicorp/consul/api => ../../../api + github.com/hashicorp/consul/envoyextensions => ../../../envoyextensions + github.com/hashicorp/consul/proto-public => ../../../proto-public + github.com/hashicorp/consul/sdk => ../../../sdk +) diff --git a/test/integration/consul-container/go.sum b/test/integration/consul-container/go.sum index 44574893cd96..c4bfd7944078 100644 --- a/test/integration/consul-container/go.sum +++ b/test/integration/consul-container/go.sum @@ -13,6 +13,7 @@ fortio.org/version v1.0.2/go.mod h1:2JQp9Ax+tm6QKiGuzR5nJY63kFeANcgrZ0osoQFDVm0= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/DataDog/datadog-go v3.2.0+incompatible h1:qSG2N4FghB1He/r2mFrWKCaL7dXCilEuNEeAn20fdD4= github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= @@ -26,18 +27,25 @@ github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmV github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= github.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= +github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI= github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0= github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY= +github.com/aws/aws-sdk-go v1.42.34 h1:fqGAiKmCSRY1rEa4G9VqgkKKbNmLKYq5dKmLtQkvYi8= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible h1:C29Ae4G5GtYyYMm1aztcyj/J5ckgJm2zwdDajFbx1NY= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= +github.com/circonus-labs/circonusllhist v0.1.3 h1:TJH+oke8D16535+jHExHj4nQvzlZrj7ug5D7I/orNUA= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/xds/go v0.0.0-20230310173818-32f1caf87195 h1:58f1tJ1ra+zFINPlwLWvQsR9CzAKt2e+EWV2yX9oXQ4= @@ -95,12 +103,16 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= +github.com/hashicorp/consul-net-rpc v0.0.0-20221205195236-156cfab66a69 h1:wzWurXrxfSyG1PHskIZlfuXlTSCj1Tsyatp9DtaasuY= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-bexpr v0.1.2 h1:ijMXI4qERbzxbCnkxmfUtwMyjrrk3y+Vt0MxojNCbBs= github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= @@ -109,6 +121,7 @@ github.com/hashicorp/go-hclog v1.5.0/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVH github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-immutable-radix v1.3.1 h1:DKHmCUm2hRBK510BaiZlwvpD40f8bJFeZnpfm2KLowc= github.com/hashicorp/go-immutable-radix v1.3.1/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= +github.com/hashicorp/go-memdb v1.3.4 h1:XSL3NR682X/cVk2IeV0d70N4DZ9ljI885xAEU8IoK3c= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI= github.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= @@ -117,11 +130,13 @@ github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+ github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= +github.com/hashicorp/go-retryablehttp v0.6.7 h1:8/CAEZt/+F7kR7GevNHulKkUjLht3CPmn7egmhieNKo= github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc= github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= github.com/hashicorp/go-sockaddr v1.0.2 h1:ztczhD1jLxIRjVejw8gFomI1BQZOe2WoVOu0SyteCQc= github.com/hashicorp/go-sockaddr v1.0.2/go.mod h1:rB4wwRAUzs07qva3c5SdrY/NEtAUjGlgmH/UkBUC97A= +github.com/hashicorp/go-syslog v1.0.0 h1:KaodqZuhUoZereWVIYmpUgZysurB1kBLX2j0MwMrUAE= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= @@ -132,12 +147,18 @@ github.com/hashicorp/go-version v1.2.1/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09 github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hashicorp/hil v0.0.0-20200423225030-a18a1cd20038 h1:n9J0rwVWXDpNd5iZnwY7w4WZyq53/rROeI7OVvLW8Ok= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.4/go.mod h1:mtBihi+LeNXGtG8L9dX59gAEa12BDtBQSp4v/YAJqrc= github.com/hashicorp/memberlist v0.5.0 h1:EtYPN8DpAURiapus508I4n9CzHs2W+8NZGbmmR/prTM= github.com/hashicorp/memberlist v0.5.0/go.mod h1:yvyXLpo0QaGE59Y7hDTsTzDD25JYBZ4mHgHUZ8lrOI0= +github.com/hashicorp/raft v1.5.0 h1:uNs9EfJ4FwiArZRxxfd/dQ5d33nV31/CdCHArH89hT8= +github.com/hashicorp/raft-autopilot v0.1.6 h1:C1q3RNF2FfXNZfHWbvVAu0QixaQK8K5pX4O5lh+9z4I= github.com/hashicorp/serf v0.10.1 h1:Z1H2J60yRKvfDYAOZLd2MU0ND4AH/WDz7xYHDWQsIPY= github.com/hashicorp/serf v0.10.1/go.mod h1:yL2t6BqATOLGc5HF7qbFkTfXoPIY0WZdWHfEvMqbG+4= +github.com/hashicorp/yamux v0.0.0-20211028200310-0bc27b27de87 h1:xixZ2bWeofWV68J+x6AzmKuVM/JWCQwkWm6GW/MUR6I= github.com/imdario/mergo v0.3.15 h1:M8XP7IuFNsqUx6VPK2P9OSmsYsI/YFaGil0uD21V3dM= github.com/imdario/mergo v0.3.15/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/itchyny/gojq v0.12.9 h1:biKpbKwMxVYhCU1d6mR7qMr3f0Hn9F5k5YykCVb3gmM= @@ -176,6 +197,7 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/ github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.41 h1:WMszZWJG0XmzbK9FEmzH2TVcqYzFesusSIB41b8KHxY= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= @@ -185,7 +207,9 @@ github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa1 github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-testing-interface v1.14.0 h1:/x0XQ6h+3U3nAyk1yx+bHPURrKa9sVVvYbuqZ7pIAtI= github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= +github.com/mitchellh/hashstructure v0.0.0-20170609045927-2bca23e0e452 h1:hOY53G+kBFhbYFpRVxHl5eS7laP6B1+Cq+Z9Dry1iMU= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= @@ -204,6 +228,8 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/oklog/ulid/v2 v2.1.0 h1:+9lhoxAP56we25tyYETBBY1YLA2SaoLvUFgrP2miPJU= +github.com/oklog/ulid/v2 v2.1.0/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0-rc3 h1:fzg1mXZFj8YdPeNkRXMg+zb88BFV0Ys52cJydRwBkb8= @@ -216,6 +242,7 @@ github.com/otiai10/mint v1.5.1 h1:XaPLeE+9vGbuyEHem1JNk3bYc7KKqyI/na0/mLd/Kks= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= +github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -228,15 +255,19 @@ github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSg github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= +github.com/prometheus/client_golang v1.14.0 h1:nJdhIvne2eSX/XRAFV9PcvFFRbrjbcTUj0VP62TMhnw= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= +github.com/prometheus/common v0.37.0 h1:ccBbHCgIiT9uSoFY0vX8H3zsNR5eLt17/RQLUvn8pXE= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= +github.com/prometheus/procfs v0.8.0 h1:ODq8ZFEaYeCaZOJlZZdJA2AbQR98dSHSM1KW/You5mo= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= @@ -248,18 +279,23 @@ github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0 github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY= github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/teris-io/shortid v0.0.0-20220617161101-71ec9f2aa569 h1:xzABM9let0HLLqFypcxvLmlvEciCHL7+Lv+4vwZqecI= github.com/teris-io/shortid v0.0.0-20220617161101-71ec9f2aa569/go.mod h1:2Ly+NIftZN4de9zRmENdYbvPQeaVIYKWpLFStLFEBgI= github.com/testcontainers/testcontainers-go v0.20.1 h1:mK15UPJ8c5P+NsQKmkqzs/jMdJt6JMs5vlw2y4j92c0= github.com/testcontainers/testcontainers-go v0.20.1/go.mod h1:zb+NOlCQBkZ7RQp4QI+YMIHyO2CQ/qsXzNF5eLJ24SY= +github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926 h1:G3dpKMzFDjgEh2q1Z7zUUtKa8ViPtH+ocF0bE0g00O8= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -268,6 +304,7 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392/go.mod h1:/lpIB1dKB+9EgE3H3cr1v9wB50oz8l4C4h62xy7jSTY= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.1.0 h1:MDRAIl0xIo9Io2xV565hzXHw3zVseKrJKodhohM5CjU= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug= golang.org/x/exp v0.0.0-20230321023759-10a507213a29/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= @@ -302,6 +339,7 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.2.0 h1:PUR+T4wwASmuSTYdKjYHI5TD22Wy5ogLU5qZCOLxBrI= +golang.org/x/sync v0.2.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -318,7 +356,6 @@ golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210303074136-134d130e1a04/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -338,7 +375,6 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= -golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -348,7 +384,6 @@ golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -382,6 +417,5 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o= -gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/test/integration/consul-container/libs/cluster/agent.go b/test/integration/consul-container/libs/cluster/agent.go index b360ee444fe2..c6e4a2a002ea 100644 --- a/test/integration/consul-container/libs/cluster/agent.go +++ b/test/integration/consul-container/libs/cluster/agent.go @@ -8,6 +8,7 @@ import ( "io" "github.com/testcontainers/testcontainers-go" + "google.golang.org/grpc" "github.com/hashicorp/consul/api" @@ -36,6 +37,7 @@ type Agent interface { Upgrade(ctx context.Context, config Config) error Exec(ctx context.Context, cmd []string) (string, error) DataDir() string + GetGRPCConn() *grpc.ClientConn } // Config is a set of configurations required to create a Agent diff --git a/test/integration/consul-container/libs/cluster/container.go b/test/integration/consul-container/libs/cluster/container.go index 7ed88b0d824f..a371404bafe0 100644 --- a/test/integration/consul-container/libs/cluster/container.go +++ b/test/integration/consul-container/libs/cluster/container.go @@ -8,6 +8,7 @@ import ( "encoding/json" "fmt" "io" + "net/url" "os" "path/filepath" "strconv" @@ -15,11 +16,14 @@ import ( goretry "github.com/avast/retry-go" dockercontainer "github.com/docker/docker/api/types/container" + "github.com/docker/go-connections/nat" "github.com/hashicorp/go-multierror" "github.com/otiai10/copy" "github.com/pkg/errors" "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" "github.com/hashicorp/consul/api" @@ -58,6 +62,8 @@ type consulContainerNode struct { clientCACertFile string ip string + grpcConn *grpc.ClientConn + nextAdminPortOffset int nextConnectPortOffset int @@ -172,7 +178,8 @@ func NewConsulContainer(ctx context.Context, config Config, cluster *Cluster, po clientAddr string clientCACertFile string - info AgentInfo + info AgentInfo + grpcConn *grpc.ClientConn ) debugURI := "" if utils.Debug { @@ -236,6 +243,28 @@ func NewConsulContainer(ctx context.Context, config Config, cluster *Cluster, po info.CACertFile = clientCACertFile } + // TODO: Support gRPC+TLS port. + if pc.Ports.GRPC > 0 { + port, err := nat.NewPort("tcp", strconv.Itoa(pc.Ports.GRPC)) + if err != nil { + return nil, fmt.Errorf("failed to parse gRPC TLS port: %w", err) + } + endpoint, err := podContainer.PortEndpoint(ctx, port, "tcp") + if err != nil { + return nil, fmt.Errorf("failed to get gRPC TLS endpoint: %w", err) + } + url, err := url.Parse(endpoint) + if err != nil { + return nil, fmt.Errorf("failed to parse gRPC endpoint URL: %w", err) + } + conn, err := grpc.Dial(url.Host, grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return nil, fmt.Errorf("failed to dial gRPC connection: %w", err) + } + deferClean.Add(func() { _ = conn.Close() }) + grpcConn = conn + } + ip, err := podContainer.ContainerIP(ctx) if err != nil { return nil, err @@ -282,6 +311,7 @@ func NewConsulContainer(ctx context.Context, config Config, cluster *Cluster, po name: name, ip: ip, info: info, + grpcConn: grpcConn, } if httpPort > 0 || httpsPort > 0 { @@ -376,6 +406,10 @@ func (c *consulContainerNode) GetClient() *api.Client { return c.client } +func (c *consulContainerNode) GetGRPCConn() *grpc.ClientConn { + return c.grpcConn +} + // NewClient returns an API client by making a new one based on the provided token // - updateDefault: if true update the default client func (c *consulContainerNode) NewClient(token string, updateDefault bool) (*api.Client, error) { @@ -508,6 +542,10 @@ func (c *consulContainerNode) terminate(retainPod bool, skipFuncs bool) error { continue } } + + // if the pod is retained and therefore the IP then the grpc conn + // should handle reconnecting so there is no reason to close it. + c.closeGRPC() } var merr error @@ -529,6 +567,16 @@ func (c *consulContainerNode) terminate(retainPod bool, skipFuncs bool) error { return merr } +func (c *consulContainerNode) closeGRPC() error { + if c.grpcConn != nil { + if err := c.grpcConn.Close(); err != nil { + return err + } + c.grpcConn = nil + } + return nil +} + func (c *consulContainerNode) DataDir() string { return c.dataDir } @@ -565,6 +613,7 @@ func newContainerRequest(config Config, opts containerOpts, ports ...int) (podRe ExposedPorts: []string{ "8500/tcp", // Consul HTTP API "8501/tcp", // Consul HTTPs API + "8502/tcp", // Consul gRPC API "8443/tcp", // Envoy Gateway Listener diff --git a/test/integration/consul-container/libs/cluster/network.go b/test/integration/consul-container/libs/cluster/network.go index e0ee10f4e35f..6e170b3dabc1 100644 --- a/test/integration/consul-container/libs/cluster/network.go +++ b/test/integration/consul-container/libs/cluster/network.go @@ -20,6 +20,7 @@ func createNetwork(t TestingT, name string) (testcontainers.Network, error) { Name: name, Attachable: true, CheckDuplicate: true, + SkipReaper: isRYUKDisabled(), }, } first := true diff --git a/test/integration/consul-container/libs/utils/docker.go b/test/integration/consul-container/libs/utils/docker.go index 109205855cd5..6be46d91aee1 100644 --- a/test/integration/consul-container/libs/utils/docker.go +++ b/test/integration/consul-container/libs/utils/docker.go @@ -9,6 +9,9 @@ import ( "io" "os" "os/exec" + "strings" + + "github.com/hashicorp/go-version" ) // DockerExec simply shell out to the docker CLI binary on your host. @@ -16,6 +19,18 @@ func DockerExec(args []string, stdout io.Writer) error { return cmdExec("docker", "docker", args, stdout, "") } +// DockerImageVersion retrieves the value of the org.opencontainers.image.version label from the specified image. +func DockerImageVersion(imageName string) (*version.Version, error) { + var b strings.Builder + err := cmdExec("docker", "docker", []string{"image", "inspect", "--format", `{{index .Config.Labels "org.opencontainers.image.version"}}`, imageName}, &b, "") + if err != nil { + return nil, err + } + output := b.String() + + return version.NewVersion(strings.TrimSpace(output)) +} + func cmdExec(name, binary string, args []string, stdout io.Writer, dir string) error { if binary == "" { panic("binary named " + name + " was not detected") diff --git a/test/integration/consul-container/test/catalog/catalog_test.go b/test/integration/consul-container/test/catalog/catalog_test.go new file mode 100644 index 000000000000..8520e5a647e8 --- /dev/null +++ b/test/integration/consul-container/test/catalog/catalog_test.go @@ -0,0 +1,35 @@ +package catalog + +import ( + "testing" + + "github.com/stretchr/testify/require" + + libcluster "github.com/hashicorp/consul/test/integration/consul-container/libs/cluster" + libtopology "github.com/hashicorp/consul/test/integration/consul-container/libs/topology" + + "github.com/hashicorp/consul/internal/catalog/catalogtest" + pbresource "github.com/hashicorp/consul/proto-public/pbresource" +) + +func TestCatalog(t *testing.T) { + t.Parallel() + + cluster, _, _ := libtopology.NewCluster(t, &libtopology.ClusterConfig{ + NumServers: 3, + BuildOpts: &libcluster.BuildOptions{Datacenter: "dc1"}, + Cmd: `-hcl=experiments=["resource-apis"]`, + }) + + followers, err := cluster.Followers() + require.NoError(t, err) + client := pbresource.NewResourceServiceClient(followers[0].GetGRPCConn()) + + t.Run("one-shot", func(t *testing.T) { + catalogtest.RunCatalogV1Alpha1IntegrationTest(t, client) + }) + + t.Run("lifecycle", func(t *testing.T) { + catalogtest.RunCatalogV1Alpha1LifecycleIntegrationTest(t, client) + }) +} diff --git a/test/integration/consul-container/test/upgrade/catalog/catalog_test.go b/test/integration/consul-container/test/upgrade/catalog/catalog_test.go new file mode 100644 index 000000000000..ef2de3edeb24 --- /dev/null +++ b/test/integration/consul-container/test/upgrade/catalog/catalog_test.go @@ -0,0 +1,87 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package catalog + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/internal/catalog/catalogtest" + "github.com/hashicorp/consul/proto-public/pbresource" + libcluster "github.com/hashicorp/consul/test/integration/consul-container/libs/cluster" + "github.com/hashicorp/consul/test/integration/consul-container/libs/topology" + "github.com/hashicorp/consul/test/integration/consul-container/libs/utils" + "github.com/hashicorp/go-version" +) + +var minCatalogResourceVersion = version.Must(version.NewVersion("v1.16.0")) + +const ( + versionUndetermined = ` +Cannot determine the actual version the starting image represents. +Scrutinze test failures to ensure that the starting version should +actually be able to be used for creating the initial data set. + ` +) + +func maybeSkipUpgradeTest(t *testing.T, minVersion *version.Version) { + t.Helper() + + image := utils.DockerImage(utils.GetLatestImageName(), utils.LatestVersion) + latestVersion, err := utils.DockerImageVersion(image) + + if latestVersion != nil && latestVersion.LessThan(minVersion) { + t.Skipf("Upgrade test isn't applicable with version %q as the starting version", latestVersion.String()) + } + + if err != nil || latestVersion == nil { + t.Log(versionUndetermined) + } +} + +// Test upgrade a cluster of latest version to the target version and ensure that the catalog still +// functions properly. Note +func TestCatalogUpgrade(t *testing.T) { + maybeSkipUpgradeTest(t, minCatalogResourceVersion) + t.Parallel() + + const numServers = 1 + buildOpts := &libcluster.BuildOptions{ + ConsulImageName: utils.GetLatestImageName(), + ConsulVersion: utils.LatestVersion, + Datacenter: "dc1", + InjectAutoEncryption: true, + } + + cluster, _, _ := topology.NewCluster(t, &topology.ClusterConfig{ + NumServers: 1, + BuildOpts: buildOpts, + ApplyDefaultProxySettings: true, + Cmd: `-hcl=experiments=["resource-apis"]`, + }) + + client := cluster.APIClient(0) + + libcluster.WaitForLeader(t, cluster, client) + libcluster.WaitForMembers(t, client, numServers) + + leader, err := cluster.Leader() + require.NoError(t, err) + rscClient := pbresource.NewResourceServiceClient(leader.GetGRPCConn()) + + // Initialize some data + catalogtest.PublishCatalogV1Alpha1IntegrationTestData(t, rscClient) + + // upgrade the cluster to the Target version + t.Logf("initiating standard upgrade to version=%q", utils.TargetVersion) + err = cluster.StandardUpgrade(t, context.Background(), utils.GetTargetImageName(), utils.TargetVersion) + + require.NoError(t, err) + libcluster.WaitForLeader(t, cluster, client) + libcluster.WaitForMembers(t, client, numServers) + + catalogtest.VerifyCatalogV1Alpha1IntegrationTestResults(t, rscClient) +} diff --git a/website/content/commands/license.mdx b/website/content/commands/license.mdx index 50cee37544ad..762e66df43d8 100644 --- a/website/content/commands/license.mdx +++ b/website/content/commands/license.mdx @@ -167,7 +167,8 @@ Licensed Features: Corresponding HTTP API Endpoint: [\[GET\] /v1/operator/license](/consul/api-docs/operator/license#getting-the-consul-license) -This command gets the Consul Enterprise license. +This command gets the Consul Enterprise license. If the leader hasn't been updated with the newer license, the followers +will display the outdated license in their GET output. The table below shows this command's [required ACLs](/consul/api-docs/api-structure#authentication). Configuration of [blocking queries](/consul/api-docs/features/blocking) and [agent caching](/consul/api-docs/features/caching) diff --git a/website/content/commands/operator/raft.mdx b/website/content/commands/operator/raft.mdx index 25dea30ba62f..1b2464cb8b29 100644 --- a/website/content/commands/operator/raft.mdx +++ b/website/content/commands/operator/raft.mdx @@ -66,6 +66,10 @@ Raft configuration. `Voter` is "true" or "false", indicating if the server has a vote in the Raft configuration. +`Commit Index` is the last log index the server has a record of in its Raft log. + +`Trails Leader By` is the number of commits a follower trails the leader by. + #### Command Options - `-stale` - Enables non-leader servers to provide cluster state information. @@ -109,7 +113,7 @@ The return code will indicate success or failure. Corresponding HTTP API Endpoint: [\[POST\] /v1/operator/raft/transfer-leader](/consul/api-docs/operator/raft#transfer-raft-leadership) -This command transfers Raft leadership to another server agent. If an `id` is provided, Consul transfers leadership to the server with that id. +This command transfers Raft leadership to another server agent. If an `id` is provided, Consul transfers leadership to the server with that id. Use this command to change leadership without restarting the leader node, which maintains quorum and workload capacity. diff --git a/website/content/docs/connect/config-entries/jwt-provider.mdx b/website/content/docs/connect/config-entries/jwt-provider.mdx index 5fc75da4d300..b31427af4fff 100644 --- a/website/content/docs/connect/config-entries/jwt-provider.mdx +++ b/website/content/docs/connect/config-entries/jwt-provider.mdx @@ -108,53 +108,53 @@ Kind = "jwt-provider" # required Name = "" # required Issuer = "" # required JSONWebKeySet = { # required - Local = { # cannot specify with JWKS{}.Remote - JWKS = "" # cannot specify with JWKS{}.Local{}.Filename - Filename = "" # cannot specify with JWKS{}.Local{}.String - } + Local = { # cannot specify with JWKS{}.Remote + JWKS = "" # cannot specify with JWKS{}.Local{}.Filename + Filename = "" # cannot specify with JWKS{}.Local{}.String + } } JSONWebKeySet = { - Remote = { # cannot specify with JWKS{}.Local - URI = "" - RequestTimeoutMs = 1500 - CacheDuration = "5m" - FetchAsynchronously = false - RetryPolicy = { - NumRetries = 0 - RetryPolicyBackoff = { - BaseInterval = "1s" - MaxInterval = "10s" + Remote = { # cannot specify with JWKS{}.Local + URI = "" + RequestTimeoutMs = 1500 + CacheDuration = "5m" + FetchAsynchronously = false + RetryPolicy = { + NumRetries = 0 + RetryPolicyBackoff = { + BaseInterval = "1s" + MaxInterval = "10s" + } + } + } } - } - } -} Audiences = [""] Locations = [ - { - Header = { - Name = "" - ValuePrefix = "" - Forward = false - } - }, - { - QueryParam = { - Name = "" - } - }, - { - Cookie = { - Name = "" + { + Header = { + Name = "" + ValuePrefix = "" + Forward = false + } + }, + { + QueryParam = { + Name = "" + } + }, + { + Cookie = { + Name = "" + } } - } ] Forwarding = { - HeaderName = "" - PadForwardPayloadHeader = false + HeaderName = "" + PadForwardPayloadHeader = false } ClockSkewSeconds = 30 CacheConfig = { - Size = 0 + Size = 0 } ``` @@ -164,58 +164,58 @@ CacheConfig = { ```json { - "Kind": "jwt-provider", // required - "Name": "", // required - "Issuer": "", // required - "JSONWebKeySet": { // required - "Local": { // cannot specify with JWKS.Remote - "JWKS": "", // cannot specify with JWKS.Local.Filename - "Filename": "" // cannot specify with JWKS.Local.String +"Kind": "jwt-provider", // required +"Name": "", // required +"Issuer": "", // required +"JSONWebKeySet": { // required + "Local": { // cannot specify with JWKS.Remote + "JWKS": "", // cannot specify with JWKS.Local.Filename + "Filename": "" // cannot specify with JWKS.Local.String } - }, - "JSONWebKeySet": { - "Remote": { // cannot specify with JWKS.Local - "URI": "", - "RequestTimeoutMs": "1500", - "CacheDuration": "5m", - "FetchAsynchronously": "false", - "RetryPolicy": { - "NumRetries": "0", - "RetryPolicyBackOff": { - "BaseInterval": "1s", - "MaxInterval": "10s" +}, +"JSONWebKeySet": { + "Remote": { // cannot specify with JWKS.Local + "URI": "", + "RequestTimeoutMs": "1500", + "CacheDuration": "5m", + "FetchAsynchronously": "false", + "RetryPolicy": { + "NumRetries": "0", + "RetryPolicyBackOff": { + "BaseInterval": "1s", + "MaxInterval": "10s" + } + } } - } - } - }, - "Audiences": [""], - "Locations": [ +}, +"Audiences": [""], +"Locations": [ { - "Header": { - "Name": "", - "ValuePrefix": "", - "Forward": "false" - } + "Header": { + "Name": "", + "ValuePrefix": "", + "Forward": "false" + } }, { - "QueryParam": { - "Name":"", - } + "QueryParam": { + "Name":"", + } }, { - "Cookie": { - "Name": "" - } + "Cookie": { + "Name": "" + } } - ], - "Forwarding": { - "HeaderName": "", - "PadForwardPayloadHeader": "false" - }, - "ClockSkewSeconds": "30", - "CacheConfig": { +], +"Forwarding": { + "HeaderName": "", + "PadForwardPayloadHeader": "false" +}, +"ClockSkewSeconds": "30", +"CacheConfig": { "Size": "0" - } +} } ``` @@ -1014,7 +1014,7 @@ metadata: name: okta spec: issuer: okta - jsonWebKeySet: + jsonwebkeyset: remote: uri: https://.okta.com/oauth2/default/v1/keys cacheDuration: 30m diff --git a/website/content/docs/connect/config-entries/service-intentions.mdx b/website/content/docs/connect/config-entries/service-intentions.mdx index f8afda6e41da..180e3aaabd96 100644 --- a/website/content/docs/connect/config-entries/service-intentions.mdx +++ b/website/content/docs/connect/config-entries/service-intentions.mdx @@ -1506,64 +1506,64 @@ Sources = [ ``` ```yaml -apiVersion: consul.hashicorp.com/v1alpha1 -kind: ServiceIntentions -metadata: - name: backend -spec: - sources: - name: frontend - permissions: + apiVersion: consul.hashicorp.com/v1alpha1 + kind: ServiceIntentions + metadata: + name: backend + spec: + sources: + name: frontend + permissions: + http: + pathExact: /admin + jwt: + providers: + name: okta + verifyClaims: + path: + - perms + - role + value: admin + action: allow http: - pathExact: /admin - jwt: - providers: - name: okta - verifyClaims: - path: - - perms - - role - value: admin - action: allow - http: - pathPrefix: / + pathPrefix: / ``` ```json { - "Kind": "service-intentions", - "Name": "backend", - "Sources": [ - { - "Name": "frontend", - "Permissions": [ - { - "HTTP": { - "PathExact": "/admin" - }, - "JWT": { - "Providers": [ - { - "Name": "okta", - "VerifyClaims": [ - { - "Path": ["perms", "role"], - "Value": "admin" - } - ] - } - ] - } +"Kind": "service-intentions", +"Name": "backend", +"Sources": [ + { + "Name": "frontend", + "Permissions": [ + { + "HTTP": { + "PathExact": "/admin" }, - { - "Action": "allow", - "HTTP": { - "PathPrefix": "/" - } + "JWT": { + "Providers": [ + { + "Name": "okta", + "VerifyClaims": [ + { + "Path": ["perms", "role"], + "Value": "admin" + } + ] + } + ] } - ] - } - ] + }, + { + "Action": "allow", + "HTTP": { + "PathPrefix": "/" + } + } + ] + } +] } ``` diff --git a/website/content/docs/connect/failover/index.mdx b/website/content/docs/connect/failover/index.mdx index 2d9690644098..a4c51b799731 100644 --- a/website/content/docs/connect/failover/index.mdx +++ b/website/content/docs/connect/failover/index.mdx @@ -21,9 +21,11 @@ The following table compares these strategies in deployments with multiple datac | Failover Strategy | Supports WAN Federation | Supports Cluster Peering | Multi-Datacenter Failover Strength | Multi-Datacenter Usage Scenario | | :---------------: | :---------------------: | :----------------------: | :--------------------------------- | :------------------------------ | | `Failover` stanza | ✅ | ✅ | Enables more granular logic for failover targeting | Configuring failover for a single service or service subset, especially for testing or debugging purposes | -| Prepared query | ✅ | ✅ | Central policies that can automatically target the nearest datacenter | WAN-federated deployments where a primary datacenter is configured. Prepared queries are not replicated over peer connections. | +| Prepared query | ✅ | ❌ | Central policies that can automatically target the nearest datacenter | WAN-federated deployments where a primary datacenter is configured. | | Sameness groups | ❌ | ✅ | Group size changes without edits to existing member configurations | Cluster peering deployments with consistently named services and namespaces | +Although cluster peering connections support the [`Failover` field of the prepared query request schema](/consul/api-docs/query#failover) when using Consul's service discovery features to [perform dynamic DNS queries](/consul/docs/services/discovery/dns-dynamic-lookups), they do not support prepared queries for service mesh failover scenarios. + ### Failover configurations for a service mesh with a single datacenter You can implement a service resolver configuration entry and specify a pool of failover service instances that other services can exchange messages with when the primary service becomes unhealthy or unreachable. We recommend adopting this strategy as a minimum baseline when implementing Consul service mesh and layering additional failover strategies to build resilience into your application network. @@ -32,9 +34,9 @@ Refer to the [`Failover` configuration ](/consul/docs/connect/config-entries/ser ### Failover configuration for WAN-federated datacenters -If your network has multiple Consul datacenters that are WAN-federated, you can configure your applications to look for failover services with prepared queries. [Prepared queries](/consul/api-docs/) are configurations that enable you to define complex service discovery lookups. This strategy hinges on the secondary datacenter containing service instances that have the same name and residing in the same namespace as their counterparts in the primary datacenter. +If your network has multiple Consul datacenters that are WAN-federated, you can configure your applications to look for failover services with prepared queries. [Prepared queries](/consul/api-docs/) are configurations that enable you to define complex service discovery lookups. This strategy hinges on the secondary datacenter containing service instances that have the same name and residing in the same namespace as their counterparts in the primary datacenter. -Refer to the [Automate geo-failover with prepared queries tutorial](/consul/tutorials/developer-discovery/automate-geo-failover) for additional information. +Refer to the [Automate geo-failover with prepared queries tutorial](/consul/tutorials/developer-discovery/automate-geo-failover) for additional information. ### Failover configuration for peered clusters and partitions diff --git a/website/content/docs/connect/gateways/index.mdx b/website/content/docs/connect/gateways/index.mdx index 344b63dd0ad2..b333615c4ed0 100644 --- a/website/content/docs/connect/gateways/index.mdx +++ b/website/content/docs/connect/gateways/index.mdx @@ -31,7 +31,7 @@ Mesh gateways enable the following scenarios: - **Service-to-service communication across WAN-federated datacenters**. Refer to [Enabling Service-to-service Traffic Across Datacenters](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-wan-datacenters) for additional information. - **Service-to-service communication across admin partitions**. Since Consul 1.11.0, you can create administrative boundaries for single Consul deployments called "admin partitions". You can use mesh gateways to facilitate cross-partition communication. Refer to [Enabling Service-to-service Traffic Across Admin Partitions](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-partitions) for additional information. - **Bridge multiple datacenters using Cluster Peering**. Since Consul 1.14.0, mesh gateways can be used to route peering control-plane traffic between peered Consul Servers. See [Mesh Gateways for Peering Control Plane Traffic](/consul/docs/connect/gateways/mesh-gateway/peering-via-mesh-gateways) for more information. -- **Service-to-service communication across peered datacenters**. Refer to [Mesh Gateways between Peered Clusters](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-peers) for more information. +- **Service-to-service communication across peered datacenters**. Refer to [Establish cluster peering connections](/consul/docs/connect/cluster-peering/usage/establish-cluster-peering) for more information. -> **Mesh gateway tutorial**: Follow the [mesh gateway tutorial](/consul/tutorials/developer-mesh/service-mesh-gateways) to learn concepts associated with mesh gateways. diff --git a/website/content/docs/connect/gateways/mesh-gateway/index.mdx b/website/content/docs/connect/gateways/mesh-gateway/index.mdx index 89d64b8d1ebe..bcac5555278b 100644 --- a/website/content/docs/connect/gateways/mesh-gateway/index.mdx +++ b/website/content/docs/connect/gateways/mesh-gateway/index.mdx @@ -18,7 +18,7 @@ Mesh gateways can be used with any of the following Consul configrations for man * [Mesh gateways can be used to route service-to-service traffic between datacenters](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-wan-datacenters) * [Mesh gateways can be used to route all WAN traffic, including from Consul servers](/consul/docs/connect/gateways/mesh-gateway/wan-federation-via-mesh-gateways) 2. Cluster Peering - * [Mesh gateways can be used to route service-to-service traffic between datacenters](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-peers) + * [Mesh gateways can be used to route service-to-service traffic between datacenters](/consul/docs/connect/cluster-peering/usage/establish-cluster-peering) * [Mesh gateways can be used to route control-plane traffic from Consul servers](/consul/docs/connect/gateways/mesh-gateway/peering-via-mesh-gateways) 3. Admin Partitions * [Mesh gateways can be used to route service-to-service traffic between admin partitions in the same Consul datacenter](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-partitions) diff --git a/website/content/docs/connect/gateways/mesh-gateway/peering-via-mesh-gateways.mdx b/website/content/docs/connect/gateways/mesh-gateway/peering-via-mesh-gateways.mdx index b46a18bef2e4..97045649b2ff 100644 --- a/website/content/docs/connect/gateways/mesh-gateway/peering-via-mesh-gateways.mdx +++ b/website/content/docs/connect/gateways/mesh-gateway/peering-via-mesh-gateways.mdx @@ -7,7 +7,7 @@ description: >- # Enabling Peering Control Plane Traffic -In addition to [service-to-service traffic routing](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-peers), +In addition to [service-to-service traffic routing](/consul/docs/connect/cluster-peering/usage/establish-cluster-peering), we recommend routing control plane traffic between cluster peers through mesh gateways to simplfy networking requirements. @@ -59,7 +59,7 @@ For Consul Enterprise clusters, mesh gateways must be registered in the "default -In addition to the [ACL Configuration](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-peers#acl-configuration) necessary for service-to-service traffic, mesh gateways that route peering control plane traffic must be granted `peering:read` access to all peerings. +In addition to the [ACL Configuration](/consul/docs/connect/cluster-peering/tech-specs#acl-specifications) necessary for service-to-service traffic, mesh gateways that route peering control plane traffic must be granted `peering:read` access to all peerings. This access allows the mesh gateway to list all peerings in a Consul cluster and generate unique routing per peered datacenter. @@ -80,7 +80,7 @@ peering = "read" -In addition to the [ACL Configuration](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-peers#acl-configuration) necessary for service-to-service traffic, mesh gateways that route peering control plane traffic must be granted `peering:read` access to all peerings in all partitions. +In addition to the [ACL Configuration](/consul/docs/connect/cluster-peering/tech-specs#acl-specifications) necessary for service-to-service traffic, mesh gateways that route peering control plane traffic must be granted `peering:read` access to all peerings in all partitions. This access allows the mesh gateway to list all peerings in a Consul cluster and generate unique routing per peered partition. diff --git a/website/content/docs/connect/intentions/jwt-authorization.mdx b/website/content/docs/connect/intentions/jwt-authorization.mdx index a58bd3af3e19..9a8458054ea2 100644 --- a/website/content/docs/connect/intentions/jwt-authorization.mdx +++ b/website/content/docs/connect/intentions/jwt-authorization.mdx @@ -98,4 +98,4 @@ After you update the service intention, write the configuration to Consul so tha ```shell-session $ consul config write web-intention.hcl -``` \ No newline at end of file +``` diff --git a/website/content/docs/connect/proxies/envoy.mdx b/website/content/docs/connect/proxies/envoy.mdx index e6759113c2c7..bbdda8b82fcd 100644 --- a/website/content/docs/connect/proxies/envoy.mdx +++ b/website/content/docs/connect/proxies/envoy.mdx @@ -39,18 +39,19 @@ Consul supports **four major Envoy releases** at the beginning of each major Con | Consul Version | Compatible Envoy Versions | | ------------------- | -----------------------------------------------------------------------------------| +| 1.16.x | 1.26.2, 1.25.7, 1.24.8, 1.23.10 | | 1.15.x | 1.25.6, 1.24.7, 1.23.9, 1.22.11 | | 1.14.x | 1.24.0, 1.23.1, 1.22.5, 1.21.5 | -| 1.13.x | 1.23.1, 1.22.5, 1.21.5, 1.20.7 | ### Envoy and Consul Dataplane The Consul dataplane component was introduced in Consul v1.14 as a way to manage Envoy proxies without the use of Consul clients. Each new minor version of Consul is released with a new minor version of Consul dataplane, which packages both Envoy and the `consul-dataplane` binary in a single container image. For backwards compatability reasons, each new minor version of Consul will also support the previous minor version of Consul dataplane to allow for seamless upgrades. In addition, each minor version of Consul will support the next minor version of Consul dataplane to allow for extended dataplane support via newer versions of Envoy. -| Consul Version | Consul Dataplane Version (Bundled Envoy Version) | -| ------------------- | ------------------------------------------------- | -| 1.15.x | 1.1.x (Envoy 1.25.x), 1.0.x (Envoy 1.24.x) | -| 1.14.x | 1.1.x (Envoy 1.25.x), 1.0.x (Envoy 1.24.x) | +| Consul Version | Default `consul-dataplane` Version | Other compatible `consul-dataplane` Versions | +| ------------------- | ------------------------------------------------------------|----------------------------------------------| +| 1.16.x | 1.2.x (Envoy 1.26.x) | 1.1.x (Envoy 1.25.x) | +| 1.15.x | 1.1.x (Envoy 1.25.x) | 1.2.x (Envoy 1.26.x), 1.0.x (Envoy 1.24.x) | +| 1.14.x | 1.0.x (Envoy 1.24.x) | 1.1.x (Envoy 1.25.x) | ## Getting Started diff --git a/website/content/docs/enterprise/admin-partitions.mdx b/website/content/docs/enterprise/admin-partitions.mdx index 9be5a70d2e7e..023e38ee31e7 100644 --- a/website/content/docs/enterprise/admin-partitions.mdx +++ b/website/content/docs/enterprise/admin-partitions.mdx @@ -67,8 +67,6 @@ You can configure services to be discoverable by downstream services in any part You can use [cluster peering](/consul/docs/connect/cluster-peering/) between two admin partitions to connect clusters owned by different operators. Without Consul Enterprise, cluster peering is limited to the `default` partitions in each datacenter. Enterprise users can [establish cluster peering connections](/consul/docs/connect/cluster-peering/usage/establish-cluster-peering) between any two admin partitions as long as the partitions are in separate datacenters. It is not possible to establish cluster peering connections between two partitions in a single datacenter. -To use mesh gateways with admin partitions and cluster peering, refer to [Mesh Gateways between Peered Clusters](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-peers). - ## Requirements Your Consul configuration must meet the following requirements to use admin partitions. diff --git a/website/content/docs/k8s/upgrade/index.mdx b/website/content/docs/k8s/upgrade/index.mdx index b506e17c11f4..529815df9769 100644 --- a/website/content/docs/k8s/upgrade/index.mdx +++ b/website/content/docs/k8s/upgrade/index.mdx @@ -228,15 +228,13 @@ If you upgrade Consul from a version that uses client agents to a version the us type: OnDelete ``` -1. Add `consul.hashicorp.com/consul-k8s-version: 1.0.0` to the annotations for each pod you upgrade. - 1. Follow our [recommended procedures to upgrade servers](#upgrade-consul-servers) on Kubernetes deployments to upgrade Helm values for the new version of Consul. 1. Run `kubectl rollout restart` to restart your service mesh applications. Restarting service mesh application causes Kubernetes to re-inject them with the webhook for dataplanes. 1. Restart all gateways in your service mesh. -1. Disable client agents in your Helm chart by deleting the `client` stanza or setting `client.enabled` to `false`. +1. Disable client agents in your Helm chart by deleting the `client` stanza or setting `client.enabled` to `false` and running a `consul-k8s` or Helm upgrade. ## Configuring TLS on an existing cluster diff --git a/website/content/docs/lambda/invoke-from-lambda.mdx b/website/content/docs/lambda/invoke-from-lambda.mdx index 4c29e4b8d9cd..fd0da60776d5 100644 --- a/website/content/docs/lambda/invoke-from-lambda.mdx +++ b/website/content/docs/lambda/invoke-from-lambda.mdx @@ -88,7 +88,7 @@ The mesh gateway must be running and registered to the Lambda function’s Consu - [Mesh Gateways between WAN-Federated Datacenters](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-wan-datacenters) - [Mesh Gateways between Admin Partitions](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-partitions) -- [Mesh Gateways between Peered Clusters](/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-peers) +- [Establish cluster peering connections](/consul/docs/connect/cluster-peering/usage/establish-cluster-peering) - [Connect Services Across Datacenters with Mesh Gateways](/consul/tutorials/developer-mesh/service-mesh-gateways) ## Deploy the Lambda extension layer diff --git a/website/content/docs/upgrading/instructions/general-process.mdx b/website/content/docs/upgrading/instructions/general-process.mdx index 84da7327dc03..0c560f71357a 100644 --- a/website/content/docs/upgrading/instructions/general-process.mdx +++ b/website/content/docs/upgrading/instructions/general-process.mdx @@ -107,13 +107,7 @@ Take note of which agent is the leader. binary with the new one. **3.** The following steps must be done in order on the server agents, leaving the leader -agent for last. First force the server agent to leave the cluster with the following command: - -``` -consul leave -``` - -Then, use a service management system (e.g., systemd, upstart, etc.) to restart the Consul service. If +agent for last. First, use a service management system (e.g., systemd, upstart, etc.) to restart the Consul service. If you are not using a service management system, you must restart the agent manually. To validate that the agent has rejoined the cluster and is in sync with the leader, issue the diff --git a/website/redirects.js b/website/redirects.js index 0468a19c60ca..b9be8b5f2c61 100644 --- a/website/redirects.js +++ b/website/redirects.js @@ -53,4 +53,11 @@ module.exports = [ '/consul/docs/1.16.x/agent/limits/usage/set-global-traffic-rate-limits', permanent: true, }, + { + source: + '/consul/docs/connect/gateways/mesh-gateway/service-to-service-traffic-peers', + destination: + '/consul/docs/connect/cluster-peering/usage/establish-cluster-peering', + permanent: true, + }, ]