From b8a05ec0246091a046afd25db6fc6dbf9429f599 Mon Sep 17 00:00:00 2001 From: Alex Leong Date: Thu, 23 Jul 2020 14:32:50 -0700 Subject: [PATCH] Make service mirror controller per target cluster (#4710) This PR removes the service mirror controller from `linkerd mc install` to `linkerd mc link`, as described in https://github.com/linkerd/rfc/pull/31. For fuller context, please see that RFC. Basic multicluster functionality works here including: * `linkerd mc install` installs the Link CRD but not any service mirror controllers * `linkerd mc link` creates a Link resource and installs a service mirror controller which uses that Link * The service mirror controller creates and manages mirror services, a gateway mirror, and their endpoints. * The `linkerd mc gateways` command lists all linked target clusters, their liveliness, and probe latences. * The `linkerd check` multicluster checks have been updated for the new architecture. Several checks have been rendered obsolete by the new architecture and have been removed. The following are known issues requiring further work: * the service mirror controller uses the existing `mirror.linkerd.io/gateway-name` and `mirror.linkerd.io/gateway-ns` annotations to select which services to mirror. it does not yet support configuring a label selector. * an unlink command is needed for removing multicluster links: see https://github.com/linkerd/linkerd2/issues/4707 * an mc uninstall command is needed for uninstalling the multicluster addon: see https://github.com/linkerd/linkerd2/issues/4708 Signed-off-by: Alex Leong Signed-off-by: Eric Solomon --- bin/helm-build | 2 + charts/linkerd2-multicluster-link/.helmignore | 22 + charts/linkerd2-multicluster-link/Chart.yaml | 7 + charts/linkerd2-multicluster-link/README.md | 40 + .../templates/gateway-mirror.yaml | 14 + .../templates/service-mirror.yaml | 37 +- charts/linkerd2-multicluster-link/values.yaml | 9 + charts/linkerd2-multicluster/Chart.yaml | 4 +- .../templates/link-crd.yaml | 22 + charts/linkerd2-multicluster/values.yaml | 9 - cli/cmd/check.go | 5 +- cli/cmd/multicluster.go | 242 +++-- controller/api/public/gateways.go | 39 +- .../cmd/service-mirror/cluster_watcher.go | 756 ++++------------ .../cluster_watcher_mirroring_test.go | 310 +------ .../cluster_watcher_test_util.go | 518 +++-------- .../cmd/service-mirror/config_watcher.go | 161 ---- .../cmd/service-mirror/events_formatting.go | 51 +- controller/cmd/service-mirror/main.go | 184 ++-- controller/cmd/service-mirror/metrics.go | 28 +- .../cmd/service-mirror/probe_manager.go | 271 ------ controller/cmd/service-mirror/probe_worker.go | 20 +- pkg/charts/multicluster/values.go | 22 +- pkg/flags/flags.go | 2 +- pkg/healthcheck/healthcheck.go | 36 +- pkg/healthcheck/healthcheck_multicluster.go | 852 ++++++++---------- pkg/k8s/authz.go | 19 + pkg/k8s/k8s.go | 5 + pkg/multicluster/link.go | 273 ++++++ test/integration/install_test.go | 1 - .../testdata/check.multicluster.golden | 15 +- .../testdata/check.multicluster.proxy.golden | 15 +- testutil/test_helper.go | 3 +- 33 files changed, 1476 insertions(+), 2518 deletions(-) create mode 100644 charts/linkerd2-multicluster-link/.helmignore create mode 100644 charts/linkerd2-multicluster-link/Chart.yaml create mode 100644 charts/linkerd2-multicluster-link/README.md create mode 100644 charts/linkerd2-multicluster-link/templates/gateway-mirror.yaml rename charts/{linkerd2-multicluster => linkerd2-multicluster-link}/templates/service-mirror.yaml (59%) create mode 100644 charts/linkerd2-multicluster-link/values.yaml create mode 100644 charts/linkerd2-multicluster/templates/link-crd.yaml delete mode 100644 controller/cmd/service-mirror/config_watcher.go delete mode 100644 controller/cmd/service-mirror/probe_manager.go create mode 100644 pkg/multicluster/link.go diff --git a/bin/helm-build b/bin/helm-build index 4f4b9a477fbab..f4be94e7a26e4 100755 --- a/bin/helm-build +++ b/bin/helm-build @@ -20,6 +20,7 @@ bindir=$( cd "${BASH_SOURCE[0]%/*}" && pwd ) rootdir=$( cd "$bindir"/.. && pwd ) "$bindir"/helm lint "$rootdir"/charts/linkerd2-multicluster +"$bindir"/helm lint "$rootdir"/charts/linkerd2-multicluster-link "$bindir"/helm lint "$rootdir"/charts/partials "$bindir"/helm dep up "$rootdir"/charts/linkerd2-cni "$bindir"/helm lint "$rootdir"/charts/linkerd2-cni @@ -50,6 +51,7 @@ if [ "$1" = package ]; then "$bindir"/helm --version "$version" --app-version "$tag" -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2 "$bindir"/helm --version "$version" --app-version "$tag" -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2-cni "$bindir"/helm --version "$version" --app-version "$tag" -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2-multicluster + "$bindir"/helm --version "$version" --app-version "$tag" -d "$rootdir"/target/helm package "$rootdir"/charts/linkerd2-multicluster-link mv "$rootdir"/target/helm/index-pre.yaml "$rootdir"/target/helm/index-pre-"$version".yaml "$bindir"/helm repo index --url "https://helm.linkerd.io/$repo/" --merge "$rootdir"/target/helm/index-pre-"$version".yaml "$rootdir"/target/helm diff --git a/charts/linkerd2-multicluster-link/.helmignore b/charts/linkerd2-multicluster-link/.helmignore new file mode 100644 index 0000000000000..79c90a8063116 --- /dev/null +++ b/charts/linkerd2-multicluster-link/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +OWNERS +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/charts/linkerd2-multicluster-link/Chart.yaml b/charts/linkerd2-multicluster-link/Chart.yaml new file mode 100644 index 0000000000000..c6ec380a9b727 --- /dev/null +++ b/charts/linkerd2-multicluster-link/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +appVersion: edge-XX.X.X +description: A helm chart containing the resources to enable mirroring of services from a remote cluster +kubeVersion: ">=1.13.0-0" +icon: https://linkerd.io/images/logo-only-200h.png +name: "linkerd2-multicluster-link" +version: 0.1.0 diff --git a/charts/linkerd2-multicluster-link/README.md b/charts/linkerd2-multicluster-link/README.md new file mode 100644 index 0000000000000..3500c5285444a --- /dev/null +++ b/charts/linkerd2-multicluster-link/README.md @@ -0,0 +1,40 @@ + +# Linkerd2-multicluster-link Helm Chart + +Linkerd is a *service mesh*, designed to give platform-wide observability, +reliability, and security without requiring configuration or code changes. This +chart provides the components needed to enable communication between clusters. + +## Configuration + +The following table lists the configurable parameters of the +linkerd2-multicluster chart and their default values. + +| Parameter | Description | Default | +|---------------------------------|---------------------------------------------------------------------------------------------|----------------------------------------------| +|`controllerComponentLabel` | Control plane label. Do not edit |`linkerd.io/control-plane-component` | +|`controllerImage` | Docker image for the Service mirror component (uses the Linkerd controller image) |`gcr.io/linkerd-io/controller` | +|`controllerImageVersion` | Tag for the Service Mirror container Docker image |`latest version` | +|`createdByAnnotation` | Annotation label for the proxy create. Do not edit. |`linkerd.io/created-by` | +|`gateway` | If the gateway component should be installed |`true` | +|`gatewayLocalProbePath` | The path that will be used by the local liveness checks to ensure the gateway is alive |`/health-local` | +|`gatewayLocalProbePort` | The port that will be used by the local liveness checks to ensure the gateway is alive |`8888` | +|`gatewayName` | The name of the gateway that will be installed |`linkerd-gateway` | +|`gatewayNginxImage` | The Nginx image |`nginx` | +|`gatewayNginxImageVersion` | The version of the Nginx image |`1.17` | +|`gatewayPort` | The port on which all the gateway will accept incoming traffic |`4143` | +|`gatewayProbePath` | The path that will be used by remote clusters for determining whether the gateway is alive |`/health` | +|`gatewayProbePort` | The port used for liveliness probing |`4181` | +|`gatewayProbeSeconds` | The interval (in seconds) between liveness probes |`3` | +|`identityTrustDomain` | Trust domain used for identity of the existing linkerd installation |`cluster.local` | +|`installNamespace` | If the namespace should be installed |`true` | +|`linkerdNamespace` | The namespace of the existing Linkerd installation |`linkerd` | +|`linkerdVersion` | Control plane version | latest version | +|`namespace` | Service Mirror component namespace |`linkerd-multicluster` | +|`proxyOutboundPort` | The port on which the proxy accepts outbound traffic |`4140` | +|`remoteMirrorServiceAccountName` | The name of the service account used to allow remote clusters to mirror local services |`linkerd-service-mirror-remote-access-default`| +|`remoteMirrorServiceAccount` | If the remote mirror service account should be installed |`true` | +|`serviceMirror` | If the service mirror component should be installed |`true` | +|`logLevel` | Log level for the Multicluster components |`info` | +|`serviceMirrorRetryLimit` | Number of times update from the remote cluster is allowed to be requeued (retried) |`3` | +|`serviceMirrorUID` | User id under which the Service Mirror shall be ran |`2103` | diff --git a/charts/linkerd2-multicluster-link/templates/gateway-mirror.yaml b/charts/linkerd2-multicluster-link/templates/gateway-mirror.yaml new file mode 100644 index 0000000000000..7806d3e470760 --- /dev/null +++ b/charts/linkerd2-multicluster-link/templates/gateway-mirror.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: probe-gateway-{{.Values.targetClusterName}} + namespace: {{.Values.namespace}} + labels: + mirror.linkerd.io/mirrored-gateway: "true" + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} +spec: + ports: + - name: mc-probe + port: {{.Values.gatewayProbePort}} + protocol: TCP diff --git a/charts/linkerd2-multicluster/templates/service-mirror.yaml b/charts/linkerd2-multicluster-link/templates/service-mirror.yaml similarity index 59% rename from charts/linkerd2-multicluster/templates/service-mirror.yaml rename to charts/linkerd2-multicluster-link/templates/service-mirror.yaml index cd797bb7fc2e7..a337660d23881 100644 --- a/charts/linkerd2-multicluster/templates/service-mirror.yaml +++ b/charts/linkerd2-multicluster-link/templates/service-mirror.yaml @@ -1,11 +1,11 @@ -{{if .Values.serviceMirror -}} --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: linkerd-service-mirror-access-local-resources + name: linkerd-service-mirror-access-local-resources-{{.Values.targetClusterName}} labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} rules: - apiGroups: [""] resources: ["endpoints", "services"] @@ -17,72 +17,83 @@ rules: kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: linkerd-service-mirror-access-local-resources + name: linkerd-service-mirror-access-local-resources-{{.Values.targetClusterName}} labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: linkerd-service-mirror-access-local-resources + name: linkerd-service-mirror-access-local-resources-{{.Values.targetClusterName}} subjects: - kind: ServiceAccount - name: linkerd-service-mirror + name: linkerd-service-mirror-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} --- kind: Role apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: linkerd-service-mirror-read-remote-creds + name: linkerd-service-mirror-read-remote-creds-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} rules: - apiGroups: [""] resources: ["secrets"] + resourceNames: ["cluster-credentials-{{.Values.targetClusterName}}"] + verbs: ["list", "get", "watch"] + - apiGroups: ["multicluster.linkerd.io"] + resources: ["links"] verbs: ["list", "get", "watch"] --- kind: RoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: linkerd-service-mirror-read-remote-creds + name: linkerd-service-mirror-read-remote-creds-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} roleRef: apiGroup: rbac.authorization.k8s.io kind: Role - name: linkerd-service-mirror-read-remote-creds + name: linkerd-service-mirror-read-remote-creds-{{.Values.targetClusterName}} subjects: - kind: ServiceAccount - name: linkerd-service-mirror + name: linkerd-service-mirror-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} --- kind: ServiceAccount apiVersion: v1 metadata: - name: linkerd-service-mirror + name: linkerd-service-mirror-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} --- apiVersion: apps/v1 kind: Deployment metadata: labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror - name: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} + name: linkerd-service-mirror-{{.Values.targetClusterName}} namespace: {{.Values.namespace}} spec: replicas: 1 selector: matchLabels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} template: metadata: annotations: linkerd.io/inject: enabled labels: {{.Values.controllerComponentLabel}}: linkerd-service-mirror + mirror.linkerd.io/cluster-name: {{.Values.targetClusterName}} spec: containers: - args: @@ -90,6 +101,7 @@ spec: - -log-level={{.Values.logLevel}} - -event-requeue-limit={{.Values.serviceMirrorRetryLimit}} - -namespace={{.Values.namespace}} + - {{.Values.targetClusterName}} image: {{.Values.controllerImage}}:{{.Values.controllerImageVersion}} name: service-mirror securityContext: @@ -97,5 +109,4 @@ spec: ports: - containerPort: 9999 name: admin-http - serviceAccountName: linkerd-service-mirror -{{end -}} + serviceAccountName: linkerd-service-mirror-{{.Values.targetClusterName}} diff --git a/charts/linkerd2-multicluster-link/values.yaml b/charts/linkerd2-multicluster-link/values.yaml new file mode 100644 index 0000000000000..df34d0d83c42f --- /dev/null +++ b/charts/linkerd2-multicluster-link/values.yaml @@ -0,0 +1,9 @@ +controllerComponentLabel: linkerd.io/control-plane-component +controllerImage: gcr.io/linkerd-io/controller +controllerImageVersion: linkerdVersionValue +createdByAnnotation: linkerd.io/created-by +gatewayProbePort: 4181 +namespace: linkerd-multicluster +logLevel: info +serviceMirrorRetryLimit: 3 +serviceMirrorUID: 2103 diff --git a/charts/linkerd2-multicluster/Chart.yaml b/charts/linkerd2-multicluster/Chart.yaml index 377b564f958e4..0870b69269002 100644 --- a/charts/linkerd2-multicluster/Chart.yaml +++ b/charts/linkerd2-multicluster/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 appVersion: edge-XX.X.X -description: A helm chart containing the resources to enable mirroring of services on remote clusters +description: A helm chart containing the resources to support multicluster linking to remote clusters kubeVersion: ">=1.13.0-0" icon: https://linkerd.io/images/logo-only-200h.png name: "linkerd2-multicluster" -version: 0.1.0 \ No newline at end of file +version: 0.1.0 diff --git a/charts/linkerd2-multicluster/templates/link-crd.yaml b/charts/linkerd2-multicluster/templates/link-crd.yaml new file mode 100644 index 0000000000000..c2efffa956377 --- /dev/null +++ b/charts/linkerd2-multicluster/templates/link-crd.yaml @@ -0,0 +1,22 @@ +--- +### +### Link CRD +### +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: links.multicluster.linkerd.io + annotations: + {{.Values.createdByAnnotation}}: {{default (printf "linkerd/helm %s" .Values.linkerdVersion) .Values.cliVersion}} +spec: + group: multicluster.linkerd.io + versions: + - name: v1alpha1 + served: true + storage: true + scope: Namespaced + names: + plural: links + singular: link + kind: Link diff --git a/charts/linkerd2-multicluster/values.yaml b/charts/linkerd2-multicluster/values.yaml index 6d260c2416989..78d17be59e569 100644 --- a/charts/linkerd2-multicluster/values.yaml +++ b/charts/linkerd2-multicluster/values.yaml @@ -1,6 +1,3 @@ -controllerComponentLabel: linkerd.io/control-plane-component -controllerImage: gcr.io/linkerd-io/controller -controllerImageVersion: linkerdVersionValue createdByAnnotation: linkerd.io/created-by gateway: true gatewayLocalProbePath: /health-local @@ -12,15 +9,9 @@ gatewayPort: 4143 gatewayProbePath: /health gatewayProbePort: 4181 gatewayProbeSeconds: 3 -identityTrustDomain: cluster.local installNamespace: true -linkerdNamespace: linkerd linkerdVersion: linkerdVersionValue namespace: linkerd-multicluster proxyOutboundPort: 4140 -serviceMirror: true -logLevel: info -serviceMirrorRetryLimit: 3 -serviceMirrorUID: 2103 remoteMirrorServiceAccount: true remoteMirrorServiceAccountName: linkerd-service-mirror-remote-access-default diff --git a/cli/cmd/check.go b/cli/cmd/check.go index 3c16eed02c315..807ac167a1054 100644 --- a/cli/cmd/check.go +++ b/cli/cmd/check.go @@ -188,8 +188,7 @@ func configureAndRunChecks(wout io.Writer, werr io.Writer, stage string, options } checks = append(checks, healthcheck.LinkerdCNIPluginChecks) checks = append(checks, healthcheck.LinkerdHAChecks) - checks = append(checks, healthcheck.LinkerdMulticlusterSourceChecks) - checks = append(checks, healthcheck.LinkerdMulticlusterTargetChecks) + checks = append(checks, healthcheck.LinkerdMulticlusterChecks) checks = append(checks, healthcheck.AddOnCategories...) } @@ -208,8 +207,6 @@ func configureAndRunChecks(wout io.Writer, werr io.Writer, stage string, options RetryDeadline: time.Now().Add(options.wait), CNIEnabled: options.cniEnabled, InstallManifest: installManifest, - SourceCluster: options.multicluster, - TargetCluster: options.multicluster, }) success := runChecks(wout, werr, hc, options.output) diff --git a/cli/cmd/multicluster.go b/cli/cmd/multicluster.go index f6a50b9762ba7..4062c876ce669 100644 --- a/cli/cmd/multicluster.go +++ b/cli/cmd/multicluster.go @@ -18,6 +18,7 @@ import ( mccharts "github.com/linkerd/linkerd2/pkg/charts/multicluster" "github.com/linkerd/linkerd2/pkg/healthcheck" "github.com/linkerd/linkerd2/pkg/k8s" + mc "github.com/linkerd/linkerd2/pkg/multicluster" "github.com/linkerd/linkerd2/pkg/version" log "github.com/sirupsen/logrus" "github.com/spf13/cobra" @@ -33,10 +34,12 @@ import ( ) const ( - defaultMulticlusterNamespace = "linkerd-multicluster" - helmMulticlusterDefaultChartName = "linkerd2-multicluster" - tokenKey = "token" - defaultServiceAccountName = "linkerd-service-mirror-remote-access-default" + defaultMulticlusterNamespace = "linkerd-multicluster" + defaultGatewayName = "linkerd-gateway" + helmMulticlusterDefaultChartName = "linkerd2-multicluster" + helmMulticlusterLinkDefaultChartName = "linkerd2-multicluster-link" + tokenKey = "token" + defaultServiceAccountName = "linkerd-service-mirror-remote-access-default" ) type ( @@ -52,21 +55,23 @@ type ( gatewayProbeSeconds uint32 gatewayProbePort uint32 namespace string - serviceMirror bool - serviceMirrorRetryLimit uint32 - logLevel string gatewayNginxImage string gatewayNginxVersion string - controlPlaneVersion string dockerRegistry string remoteMirrorCredentials bool } linkOptions struct { - namespace string - clusterName string - apiServerAddress string - serviceAccountName string + namespace string + clusterName string + apiServerAddress string + serviceAccountName string + gatewayName string + gatewayNamespace string + serviceMirrorRetryLimit uint32 + logLevel string + controlPlaneVersion string + dockerRegistry string } exportServiceOptions struct { @@ -82,7 +87,7 @@ type ( ) func newMulticlusterInstallOptionsWithDefault() (*multiclusterInstallOptions, error) { - defaults, err := mccharts.NewValues() + defaults, err := mccharts.NewInstallValues() if err != nil { return nil, err } @@ -93,16 +98,26 @@ func newMulticlusterInstallOptionsWithDefault() (*multiclusterInstallOptions, er gatewayProbeSeconds: defaults.GatewayProbeSeconds, gatewayProbePort: defaults.GatewayProbePort, namespace: defaults.Namespace, - serviceMirror: defaults.ServiceMirror, - serviceMirrorRetryLimit: defaults.ServiceMirrorRetryLimit, - logLevel: defaults.LogLevel, gatewayNginxImage: defaults.GatewayNginxImage, gatewayNginxVersion: defaults.GatewayNginxImageVersion, - controlPlaneVersion: version.Version, dockerRegistry: defaultDockerRegistry, remoteMirrorCredentials: true, }, nil +} + +func newLinkOptionsWithDefault() (*linkOptions, error) { + defaults, err := mccharts.NewLinkValues() + if err != nil { + return nil, err + } + return &linkOptions{ + controlPlaneVersion: version.Version, + namespace: defaults.Namespace, + dockerRegistry: defaultDockerRegistry, + serviceMirrorRetryLimit: defaults.ServiceMirrorRetryLimit, + logLevel: defaults.LogLevel, + }, nil } func getLinkerdConfigMap() (*configPb.All, error) { @@ -119,15 +134,7 @@ func getLinkerdConfigMap() (*configPb.All, error) { return global, nil } -func buildMulticlusterInstallValues(opts *multiclusterInstallOptions) (*multicluster.Values, error) { - - global, err := getLinkerdConfigMap() - if err != nil { - if kerrors.IsNotFound(err) { - return nil, errors.New("you need Linkerd to be installed in order to install multicluster addons") - } - return nil, err - } +func buildServiceMirrorValues(opts *linkOptions) (*multicluster.Values, error) { if !alphaNumDashDot.MatchString(opts.controlPlaneVersion) { return nil, fmt.Errorf("%s is not a valid version", opts.controlPlaneVersion) @@ -145,13 +152,42 @@ func buildMulticlusterInstallValues(opts *multiclusterInstallOptions) (*multiclu return nil, fmt.Errorf("--log-level must be one of: panic, fatal, error, warn, info, debug") } - defaults, err := mccharts.NewValues() + defaults, err := mccharts.NewLinkValues() + if err != nil { + return nil, err + } + + defaults.TargetClusterName = opts.clusterName + defaults.Namespace = opts.namespace + defaults.ServiceMirrorRetryLimit = opts.serviceMirrorRetryLimit + defaults.LogLevel = opts.logLevel + defaults.ControllerImageVersion = opts.controlPlaneVersion + defaults.ControllerImage = fmt.Sprintf("%s/controller", opts.dockerRegistry) + + return defaults, nil +} + +func buildMulticlusterInstallValues(opts *multiclusterInstallOptions) (*multicluster.Values, error) { + + global, err := getLinkerdConfigMap() if err != nil { + if kerrors.IsNotFound(err) { + return nil, errors.New("you need Linkerd to be installed in order to install multicluster addons") + } return nil, err } - if opts.gatewayProbePort == defaults.GatewayLocalProbePort { - return nil, fmt.Errorf("The probe port needs to be different from %d which is the multicluster probe port", opts.gatewayProbePort) + if opts.namespace == "" { + return nil, errors.New("you need to specify a namespace") + } + + if opts.namespace == controlPlaneNamespace { + return nil, errors.New("you need to setup the multicluster addons in a namespace different than the Linkerd one") + } + + defaults, err := mccharts.NewInstallValues() + if err != nil { + return nil, err } defaults.Namespace = opts.namespace @@ -159,17 +195,12 @@ func buildMulticlusterInstallValues(opts *multiclusterInstallOptions) (*multiclu defaults.GatewayPort = opts.gatewayPort defaults.GatewayProbeSeconds = opts.gatewayProbeSeconds defaults.GatewayProbePort = opts.gatewayProbePort - defaults.ServiceMirror = opts.serviceMirror - defaults.ServiceMirrorRetryLimit = opts.serviceMirrorRetryLimit - defaults.LogLevel = opts.logLevel defaults.GatewayNginxImage = opts.gatewayNginxImage defaults.GatewayNginxImageVersion = opts.gatewayNginxVersion defaults.IdentityTrustDomain = global.Global.IdentityContext.TrustDomain defaults.LinkerdNamespace = controlPlaneNamespace defaults.ProxyOutboundPort = global.Proxy.OutboundPort.Port defaults.LinkerdVersion = version.Version - defaults.ControllerImageVersion = opts.controlPlaneVersion - defaults.ControllerImage = fmt.Sprintf("%s/controller", opts.dockerRegistry) defaults.RemoteMirrorServiceAccount = opts.remoteMirrorCredentials return defaults, nil @@ -194,7 +225,7 @@ func buildMulticlusterAllowValues(opts *allowOptions) (*mccharts.Values, error) return nil, errors.New("you need to setup the multicluster addons in a namespace different than the Linkerd one") } - defaults, err := mccharts.NewValues() + defaults, err := mccharts.NewInstallValues() if err != nil { return nil, err } @@ -336,8 +367,8 @@ func newMulticlusterInstallCommand() *cobra.Command { {Name: chartutil.ChartfileName}, {Name: "templates/namespace.yaml"}, {Name: "templates/gateway.yaml"}, - {Name: "templates/service-mirror.yaml"}, {Name: "templates/remote-access-service-mirror-rbac.yaml"}, + {Name: "templates/link-crd.yaml"}, } chart := &charts.Chart{ @@ -363,12 +394,8 @@ func newMulticlusterInstallCommand() *cobra.Command { cmd.Flags().Uint32Var(&options.gatewayPort, "gateway-port", options.gatewayPort, "The port on the gateway used for all incoming traffic") cmd.Flags().Uint32Var(&options.gatewayProbeSeconds, "gateway-probe-seconds", options.gatewayProbeSeconds, "The interval at which the gateway will be checked for being alive in seconds") cmd.Flags().Uint32Var(&options.gatewayProbePort, "gateway-probe-port", options.gatewayProbePort, "The liveness check port of the gateway") - cmd.Flags().BoolVar(&options.serviceMirror, "service-mirror", options.serviceMirror, "If the service-mirror component should be installed") - cmd.Flags().Uint32Var(&options.serviceMirrorRetryLimit, "service-mirror-retry-limit", options.serviceMirrorRetryLimit, "The number of times a failed update from the target cluster is allowed to be retried") - cmd.Flags().StringVar(&options.logLevel, "log-level", options.logLevel, "Log level for the Multicluster components") cmd.Flags().StringVar(&options.gatewayNginxImage, "gateway-nginx-image", options.gatewayNginxImage, "The nginx image to be used") cmd.Flags().StringVar(&options.gatewayNginxVersion, "gateway-nginx-image-version", options.gatewayNginxVersion, "The version of nginx to be used") - cmd.Flags().StringVarP(&options.controlPlaneVersion, "control-plane-version", "", options.controlPlaneVersion, "(Development) Tag to be used for the control plane component images") cmd.Flags().StringVar(&options.dockerRegistry, "registry", options.dockerRegistry, "Docker registry to pull images from") cmd.Flags().BoolVar(&options.remoteMirrorCredentials, "service-mirror-credentials", options.remoteMirrorCredentials, "Whether to install the service account which can be used by service mirror components in source clusters to discover exported servivces") @@ -387,7 +414,11 @@ func newMulticlusterInstallCommand() *cobra.Command { } func newLinkCommand() *cobra.Command { - opts := linkOptions{} + opts, err := newLinkOptionsWithDefault() + if err != nil { + fmt.Fprintf(os.Stderr, "%s", err) + os.Exit(1) + } cmd := &cobra.Command{ Use: "link", @@ -497,11 +528,93 @@ func newLinkCommand() *cobra.Command { }, } - out, err := yaml.Marshal(creds) + credsOut, err := yaml.Marshal(creds) + if err != nil { + return err + } + + gateway, err := k.CoreV1().Services(opts.gatewayNamespace).Get(opts.gatewayName, metav1.GetOptions{}) + if err != nil { + return err + } + + gatewayAddresses := []string{} + for _, ingress := range gateway.Status.LoadBalancer.Ingress { + gatewayAddresses = append(gatewayAddresses, ingress.IP) + } + if len(gatewayAddresses) == 0 { + return fmt.Errorf("Gateway %s.%s has no ingress addresses", gateway.Name, gateway.Namespace) + } + + gatewayIdentity, ok := gateway.Annotations[k8s.GatewayIdentity] + if !ok || gatewayIdentity == "" { + return fmt.Errorf("Gatway %s.%s has no %s annotation", gateway.Name, gateway.Namespace, k8s.GatewayIdentity) + } + + probeSpec, err := mc.ExtractProbeSpec(gateway) + if err != nil { + return err + } + + gatewayPort, err := extractGatewayPort(gateway) + if err != nil { + return err + } + + link := mc.Link{ + Name: opts.clusterName, + Namespace: opts.namespace, + TargetClusterName: opts.clusterName, + TargetClusterDomain: configMap.Global.ClusterDomain, + TargetClusterLinkerdNamespace: controlPlaneNamespace, + ClusterCredentialsSecret: fmt.Sprintf("cluster-credentials-%s", opts.clusterName), + GatewayAddress: strings.Join(gatewayAddresses, ","), + GatewayPort: gatewayPort, + GatewayIdentity: gatewayIdentity, + ProbeSpec: probeSpec, + } + + linkOut, err := yaml.Marshal(link.ToUnstructured().Object) if err != nil { return err } - fmt.Println(string(out)) + + values, err := buildServiceMirrorValues(opts) + + if err != nil { + return err + } + + // Render raw values and create chart config + rawValues, err := yaml.Marshal(values) + if err != nil { + return err + } + + files := []*chartutil.BufferedFile{ + {Name: chartutil.ChartfileName}, + {Name: "templates/service-mirror.yaml"}, + {Name: "templates/gateway-mirror.yaml"}, + } + + chart := &charts.Chart{ + Name: helmMulticlusterLinkDefaultChartName, + Dir: helmMulticlusterLinkDefaultChartName, + Namespace: controlPlaneNamespace, + RawValues: rawValues, + Files: files, + } + serviceMirrorOut, err := chart.RenderNoPartials() + if err != nil { + return err + } + + stdout.Write(credsOut) + stdout.Write([]byte("---\n")) + stdout.Write(linkOut) + stdout.Write([]byte("---\n")) + stdout.Write(serviceMirrorOut.Bytes()) + stdout.Write([]byte("---\n")) return nil }, @@ -511,6 +624,12 @@ func newLinkCommand() *cobra.Command { cmd.Flags().StringVar(&opts.clusterName, "cluster-name", "", "Cluster name") cmd.Flags().StringVar(&opts.apiServerAddress, "api-server-address", "", "The api server address of the target cluster") cmd.Flags().StringVar(&opts.serviceAccountName, "service-account-name", defaultServiceAccountName, "The name of the service account associated with the credentials") + cmd.Flags().StringVar(&opts.controlPlaneVersion, "control-plane-version", opts.controlPlaneVersion, "(Development) Tag to be used for the service mirror controller image") + cmd.Flags().StringVar(&opts.gatewayName, "gateway-name", defaultGatewayName, "The name of the gateway service") + cmd.Flags().StringVar(&opts.gatewayNamespace, "gateway-namespace", defaultMulticlusterNamespace, "The namespace of the gateway service") + cmd.Flags().Uint32Var(&opts.serviceMirrorRetryLimit, "service-mirror-retry-limit", opts.serviceMirrorRetryLimit, "The number of times a failed update from the target cluster is allowed to be retried") + cmd.Flags().StringVar(&opts.logLevel, "log-level", opts.logLevel, "Log level for the Multicluster components") + cmd.Flags().StringVar(&opts.dockerRegistry, "registry", opts.dockerRegistry, "Docker registry to pull service mirror controller image from") return cmd } @@ -796,14 +915,12 @@ func renderGateways(rows []*pb.GatewaysTable_Row, w io.Writer) { } var ( - gatewayNameHeader = "NAME" - gatewayNamespaceHeader = "NAMESPACE" - clusterNameHeader = "CLUSTER" - aliveHeader = "ALIVE" - pairedServicesHeader = "NUM_SVC" - latencyP50Header = "LATENCY_P50" - latencyP95Header = "LATENCY_P95" - latencyP99Header = "LATENCY_P99" + clusterNameHeader = "CLUSTER" + aliveHeader = "ALIVE" + pairedServicesHeader = "NUM_SVC" + latencyP50Header = "LATENCY_P50" + latencyP95Header = "LATENCY_P95" + latencyP99Header = "LATENCY_P99" ) func buildGatewaysTable() table.Table { @@ -814,18 +931,6 @@ func buildGatewaysTable() table.Table { Flexible: true, LeftAlign: true, }, - table.Column{ - Header: gatewayNamespaceHeader, - Width: 9, - Flexible: true, - LeftAlign: true, - }, - table.Column{ - Header: gatewayNameHeader, - Width: 4, - Flexible: true, - LeftAlign: true, - }, table.Column{ Header: aliveHeader, Width: 5, @@ -869,8 +974,6 @@ func gatewaysRowToTableRow(row *pb.GatewaysTable_Row) []string { } return []string{ row.ClusterName, - row.Namespace, - row.Name, alive, fmt.Sprint(row.PairedServices), valueOrPlaceholder(fmt.Sprintf("%dms", row.LatencyMsP50)), @@ -879,3 +982,12 @@ func gatewaysRowToTableRow(row *pb.GatewaysTable_Row) []string { } } + +func extractGatewayPort(gateway *corev1.Service) (uint32, error) { + for _, port := range gateway.Spec.Ports { + if port.Name == k8s.GatewayPortName { + return uint32(port.Port), nil + } + } + return 0, fmt.Errorf("gateway service %s has no gateway port named %s", gateway.Name, k8s.GatewayPortName) +} diff --git a/controller/api/public/gateways.go b/controller/api/public/gateways.go index 564a444b8be76..9d2c8b7d9da8f 100644 --- a/controller/api/public/gateways.go +++ b/controller/api/public/gateways.go @@ -54,7 +54,8 @@ func buildGatewaysRequestLabels(req *pb.GatewaysRequest) (labels model.LabelSet, return labels, groupBy } -// this function returns a map of gateways to the number of services using them +// this function returns a map of target cluster to the number of services mirrored +// from it func (s *grpcServer) getNumServicesMap() (map[string]uint64, error) { results := make(map[string]uint64) @@ -66,11 +67,7 @@ func (s *grpcServer) getNumServicesMap() (map[string]uint64, error) { for _, svc := range services.Items { clusterName := svc.Labels[k8s.RemoteClusterNameLabel] - gatewayName := svc.Labels[k8s.RemoteGatewayNameLabel] - gatewayNs := svc.Labels[k8s.RemoteGatewayNsLabel] - key := fmt.Sprintf("%s-%s-%s", clusterName, gatewayName, gatewayNs) - - results[key]++ + results[clusterName]++ } return results, nil @@ -83,20 +80,14 @@ func processPrometheusResult(results []promResult, numSvcMap map[string]uint64) for _, result := range results { for _, sample := range result.vec { - clusterName := sample.Metric[remoteClusterNameLabel] - gatewayName := sample.Metric[gatewayNameLabel] - gatewayNamespace := sample.Metric[gatewayNamespaceLabel] - numPairedSvc := numSvcMap[fmt.Sprintf("%s-%s-%s", clusterName, gatewayName, gatewayNamespace)] - - key := fmt.Sprintf("%s-%s-%s", clusterName, gatewayNamespace, gatewayName) + clusterName := string(sample.Metric[remoteClusterNameLabel]) + numPairedSvc := numSvcMap[clusterName] addRow := func() { - if rows[key] == nil { - rows[key] = &pb.GatewaysTable_Row{} - rows[key].ClusterName = string(clusterName) - rows[key].Name = string(gatewayName) - rows[key].Namespace = string(gatewayNamespace) - rows[key].PairedServices = numPairedSvc + if rows[clusterName] == nil { + rows[clusterName] = &pb.GatewaysTable_Row{} + rows[clusterName].ClusterName = clusterName + rows[clusterName].PairedServices = numPairedSvc } } @@ -105,16 +96,16 @@ func processPrometheusResult(results []promResult, numSvcMap map[string]uint64) switch result.prom { case promGatewayAlive: addRow() - rows[key].Alive = value > 0 + rows[clusterName].Alive = value > 0 case promLatencyP50: addRow() - rows[key].LatencyMsP50 = value + rows[clusterName].LatencyMsP50 = value case promLatencyP95: addRow() - rows[key].LatencyMsP95 = value + rows[clusterName].LatencyMsP95 = value case promLatencyP99: addRow() - rows[key].LatencyMsP99 = value + rows[clusterName].LatencyMsP99 = value } } } @@ -125,13 +116,11 @@ func processPrometheusResult(results []promResult, numSvcMap map[string]uint64) func (s *grpcServer) getGatewaysMetrics(ctx context.Context, req *pb.GatewaysRequest, timeWindow string) (map[string]*pb.GatewaysTable_Row, error) { labels, groupBy := buildGatewaysRequestLabels(req) - reqLabels := generateLabelStringWithExclusion(labels, string(gatewayNameLabel)) - promQueries := map[promType]string{ promGatewayAlive: gatewayAliveQuery, } - metricsResp, err := s.getPrometheusMetrics(ctx, promQueries, gatewayLatencyQuantileQuery, reqLabels, timeWindow, groupBy.String()) + metricsResp, err := s.getPrometheusMetrics(ctx, promQueries, gatewayLatencyQuantileQuery, labels.String(), timeWindow, groupBy.String()) if err != nil { return nil, err diff --git a/controller/cmd/service-mirror/cluster_watcher.go b/controller/cmd/service-mirror/cluster_watcher.go index 883ab190e36fe..890b34cfea9e1 100644 --- a/controller/cmd/service-mirror/cluster_watcher.go +++ b/controller/cmd/service-mirror/cluster_watcher.go @@ -1,15 +1,14 @@ package servicemirror import ( - "errors" "fmt" "net" - "strconv" "strings" "time" "github.com/linkerd/linkerd2/controller/k8s" consts "github.com/linkerd/linkerd2/pkg/k8s" + "github.com/linkerd/linkerd2/pkg/multicluster" "github.com/prometheus/client_golang/prometheus" logging "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" @@ -31,8 +30,7 @@ type ( // problems or general glitch in the Matrix. RemoteClusterServiceWatcher struct { serviceMirrorNamespace string - clusterName string - clusterDomain string + link *multicluster.Link remoteAPIClient *k8s.API localAPIClient *k8s.API stopper chan struct{} @@ -42,30 +40,10 @@ type ( repairPeriod time.Duration } - // ProbeConfig describes the configured probe on particular gateway (if presents) - ProbeConfig struct { - path string - port uint32 - periodInSeconds uint32 - } - - // GatewaySpec contains essential data about the gateway - GatewaySpec struct { - gatewayName string - gatewayNamespace string - clusterName string - addresses []corev1.EndpointAddress - incomingPort uint32 - resourceVersion string - identity string - *ProbeConfig - } - // RemoteServiceCreated is generated whenever a remote service is created Observing // this event means that the service in question is not mirrored atm RemoteServiceCreated struct { - service *corev1.Service - gatewayData gatewayMetadata + service *corev1.Service } // RemoteServiceUpdated is generated when we see something about an already @@ -76,7 +54,6 @@ type ( localService *corev1.Service localEndpoints *corev1.Endpoints remoteUpdate *corev1.Service - gatewayData gatewayMetadata } // RemoteServiceDeleted when a remote service is going away or it is not @@ -86,24 +63,7 @@ type ( Namespace string } - // RemoteGatewayDeleted is observed when a service that is a gateway is deleted - RemoteGatewayDeleted struct { - gatewayData gatewayMetadata - } - - // RemoteGatewayCreated is observed when a gateway service is created on the remote cluster - RemoteGatewayCreated struct { - gatewaySpec GatewaySpec - } - - // RemoteGatewayUpdated happens when a service that is updated. - RemoteGatewayUpdated struct { - gatewaySpec GatewaySpec - affectedServices []*corev1.Service - } - - // ClusterUnregistered is issued when the secret containing the remote cluster - // access information is deleted + // ClusterUnregistered is issued when this ClusterWatcher is shut down. ClusterUnregistered struct{} // OprhanedServicesGcTriggered is a self-triggered event which aims to delete any @@ -144,11 +104,6 @@ type ( // endpoints should be resolved based on the remote gateway and updated. RepairEndpoints struct{} - gatewayMetadata struct { - Name string - Namespace string - } - // RetryableError is an error that should be retried through requeuing events RetryableError struct{ Inner []error } ) @@ -161,26 +116,15 @@ func (re RetryableError) Error() string { return fmt.Sprintf("Inner errors:\n\t%s", strings.Join(errorStrings, "\n\t")) } -// When the gateway is resolved we need to produce a set of endpoint addresses that that -// contain the external IPs that this gateway exposes. Therefore we return the IP addresses -// as well as a single port on which the gateway is accessible. -func (rcsw *RemoteClusterServiceWatcher) resolveGateway(metadata *gatewayMetadata) (*GatewaySpec, error) { - gateway, err := rcsw.remoteAPIClient.Svc().Lister().Services(metadata.Namespace).Get(metadata.Name) - if err != nil { - return nil, err - } - return rcsw.extractGatewaySpec(gateway) -} - // NewRemoteClusterServiceWatcher constructs a new cluster watcher func NewRemoteClusterServiceWatcher( serviceMirrorNamespace string, localAPI *k8s.API, cfg *rest.Config, - clusterName string, + link *multicluster.Link, requeueLimit int, repairPeriod time.Duration, - clusterDomain string, + ) (*RemoteClusterServiceWatcher, error) { remoteAPI, err := k8s.InitializeAPIForConfig(cfg, false, k8s.Svc) if err != nil { @@ -189,8 +133,7 @@ func NewRemoteClusterServiceWatcher( stopper := make(chan struct{}) return &RemoteClusterServiceWatcher{ serviceMirrorNamespace: serviceMirrorNamespace, - clusterName: clusterName, - clusterDomain: clusterDomain, + link: link, remoteAPIClient: remoteAPI, localAPIClient: localAPI, stopper: stopper, @@ -205,26 +148,24 @@ func NewRemoteClusterServiceWatcher( } func (rcsw *RemoteClusterServiceWatcher) mirroredResourceName(remoteName string) string { - return fmt.Sprintf("%s-%s", remoteName, rcsw.clusterName) + return fmt.Sprintf("%s-%s", remoteName, rcsw.link.TargetClusterName) } func (rcsw *RemoteClusterServiceWatcher) originalResourceName(mirroredName string) string { - return strings.TrimSuffix(mirroredName, fmt.Sprintf("-%s", rcsw.clusterName)) + return strings.TrimSuffix(mirroredName, fmt.Sprintf("-%s", rcsw.link.TargetClusterName)) } -func (rcsw *RemoteClusterServiceWatcher) getMirroredServiceLabels(gatewayData *gatewayMetadata) map[string]string { +func (rcsw *RemoteClusterServiceWatcher) getMirroredServiceLabels() map[string]string { return map[string]string{ consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, - consts.RemoteGatewayNameLabel: gatewayData.Name, - consts.RemoteGatewayNsLabel: gatewayData.Namespace, + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, } } func (rcsw *RemoteClusterServiceWatcher) getMirroredServiceAnnotations(remoteService *corev1.Service) map[string]string { return map[string]string{ consts.RemoteResourceVersionAnnotation: remoteService.ResourceVersion, // needed to detect real changes - consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.%s", remoteService.Name, remoteService.Namespace, rcsw.clusterDomain), + consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.%s", remoteService.Name, remoteService.Namespace, rcsw.link.TargetClusterDomain), } } @@ -239,7 +180,7 @@ func (rcsw *RemoteClusterServiceWatcher) mirrorNamespaceIfNecessary(namespace st ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, }, Name: namespace, }, @@ -261,13 +202,13 @@ func (rcsw *RemoteClusterServiceWatcher) mirrorNamespaceIfNecessary(namespace st // that we should send traffic to and create endpoint ports that bind to the mirrored service ports // (same name, etc) but send traffic to the gateway port. This way we do not need to do any remapping // on the service side of things. It all happens in the endpoints. -func (rcsw *RemoteClusterServiceWatcher) getEndpointsPorts(service *corev1.Service, gatewayPort int32) []corev1.EndpointPort { +func (rcsw *RemoteClusterServiceWatcher) getEndpointsPorts(service *corev1.Service) []corev1.EndpointPort { var endpointsPorts []corev1.EndpointPort for _, remotePort := range service.Spec.Ports { endpointsPorts = append(endpointsPorts, corev1.EndpointPort{ Name: remotePort.Name, Protocol: remotePort.Protocol, - Port: gatewayPort, + Port: int32(rcsw.link.GatewayPort), }) } return endpointsPorts @@ -276,7 +217,7 @@ func (rcsw *RemoteClusterServiceWatcher) getEndpointsPorts(service *corev1.Servi func (rcsw *RemoteClusterServiceWatcher) cleanupOrphanedServices() error { matchLabels := map[string]string{ consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, } servicesOnLocalCluster, err := rcsw.localAPIClient.Svc().Lister().List(labels.Set(matchLabels).AsSelector()) @@ -318,10 +259,7 @@ func (rcsw *RemoteClusterServiceWatcher) cleanupOrphanedServices() error { // created. This piece of code is responsible for doing just that. It takes care of // services, endpoints and namespaces (if needed) func (rcsw *RemoteClusterServiceWatcher) cleanupMirroredResources() error { - matchLabels := map[string]string{ - consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, - } + matchLabels := rcsw.getMirroredServiceLabels() services, err := rcsw.localAPIClient.Svc().Lister().List(labels.Set(matchLabels).AsSelector()) if err != nil { @@ -374,7 +312,7 @@ func (rcsw *RemoteClusterServiceWatcher) cleanupMirroredResources() error { // Deletes a locally mirrored service as it is not present on the remote cluster anymore func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceDeleted(ev *RemoteServiceDeleted) error { localServiceName := rcsw.mirroredResourceName(ev.Name) - rcsw.log.Infof("Deleting mirrored service %s/%s and its corresponding Endpoints", ev.Namespace, localServiceName) + rcsw.log.Infof("Deleting mirrored service %s/%s", ev.Namespace, localServiceName) var errors []error if err := rcsw.localAPIClient.Client.CoreV1().Services(ev.Namespace).Delete(localServiceName, &metav1.DeleteOptions{}); err != nil { if !kerrors.IsNotFound(err) { @@ -382,12 +320,6 @@ func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceDeleted(ev *RemoteSe } } - if err := rcsw.localAPIClient.Client.CoreV1().Endpoints(ev.Namespace).Delete(localServiceName, &metav1.DeleteOptions{}); err != nil { - if !kerrors.IsNotFound(err) { - errors = append(errors, fmt.Errorf("could not delete Endpoints: %s/%s: %s", ev.Namespace, localServiceName, err)) - } - } - if len(errors) > 0 { return RetryableError{errors} } @@ -399,45 +331,29 @@ func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceDeleted(ev *RemoteSe // Updates a locally mirrored service. There might have been some pretty fundamental changes such as // new gateway being assigned or additional ports exposed. This method takes care of that. func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceUpdated(ev *RemoteServiceUpdated) error { - serviceInfo := fmt.Sprintf("%s/%s", ev.remoteUpdate.Namespace, ev.remoteUpdate.Name) rcsw.log.Infof("Updating mirror service %s/%s", ev.localService.Namespace, ev.localService.Name) - gatewaySpec, err := rcsw.resolveGateway(&ev.gatewayData) copiedEndpoints := ev.localEndpoints.DeepCopy() - if err == nil { - copiedEndpoints.Subsets = []corev1.EndpointSubset{ - { - Addresses: gatewaySpec.addresses, - Ports: rcsw.getEndpointsPorts(ev.remoteUpdate, int32(gatewaySpec.incomingPort)), - }, - } - - if gatewaySpec.identity != "" { - copiedEndpoints.Annotations[consts.RemoteGatewayIdentity] = gatewaySpec.identity - } else { - delete(copiedEndpoints.Annotations, consts.RemoteGatewayIdentity) - } + copiedEndpoints.Subsets = []corev1.EndpointSubset{ + { + Addresses: rcsw.resolveGatewayAddress(), + Ports: rcsw.getEndpointsPorts(ev.remoteUpdate), + }, + } - } else { - rcsw.log.Warnf("Could not resolve gateway for %s: %s, nulling endpoints", serviceInfo, err) - copiedEndpoints.Subsets = nil + if copiedEndpoints.Annotations == nil { + copiedEndpoints.Annotations = make(map[string]string) } - // we need to set the new name and ns data no matter whether they are valid or not - copiedEndpoints.Labels[consts.RemoteGatewayNameLabel] = ev.gatewayData.Name - copiedEndpoints.Labels[consts.RemoteGatewayNsLabel] = ev.gatewayData.Namespace + copiedEndpoints.Annotations[consts.RemoteGatewayIdentity] = rcsw.link.GatewayIdentity if _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(copiedEndpoints.Namespace).Update(copiedEndpoints); err != nil { return RetryableError{[]error{err}} } - ev.localService.Labels = rcsw.getMirroredServiceLabels(&ev.gatewayData) + ev.localService.Labels = rcsw.getMirroredServiceLabels() ev.localService.Annotations = rcsw.getMirroredServiceAnnotations(ev.remoteUpdate) ev.localService.Spec.Ports = remapRemoteServicePorts(ev.remoteUpdate.Spec.Ports) - if gatewaySpec != nil { - ev.localService.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = gatewaySpec.resourceVersion - } - if _, err := rcsw.localAPIClient.Client.CoreV1().Services(ev.localService.Namespace).Update(ev.localService); err != nil { return RetryableError{[]error{err}} } @@ -473,7 +389,7 @@ func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceCreated(ev *RemoteSe Name: localServiceName, Namespace: remoteService.Namespace, Annotations: rcsw.getMirroredServiceAnnotations(remoteService), - Labels: rcsw.getMirroredServiceLabels(&ev.gatewayData), + Labels: rcsw.getMirroredServiceLabels(), }, Spec: corev1.ServiceSpec{ Ports: remapRemoteServicePorts(remoteService.Spec.Ports), @@ -486,40 +402,30 @@ func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceCreated(ev *RemoteSe Namespace: ev.service.Namespace, Labels: map[string]string{ consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, - consts.RemoteGatewayNameLabel: ev.gatewayData.Name, - consts.RemoteGatewayNsLabel: ev.gatewayData.Namespace, + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, }, Annotations: map[string]string{ - consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.%s", remoteService.Name, remoteService.Namespace, rcsw.clusterDomain), + consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.%s", remoteService.Name, remoteService.Namespace, rcsw.link.TargetClusterDomain), }, }, } - // Now we try to resolve the remote gateway - gatewaySpec, err := rcsw.resolveGateway(&ev.gatewayData) - if err == nil { - // only if we resolve it, we are updating the endpoints addresses and ports - rcsw.log.Infof("Resolved gateway [%v:%d] for %s", gatewaySpec.addresses, gatewaySpec.incomingPort, serviceInfo) + gatewayAddress := rcsw.resolveGatewayAddress() + // only if we resolve it, we are updating the endpoints addresses and ports + rcsw.log.Infof("Resolved gateway [%v:%d] for %s", gatewayAddress, rcsw.link.GatewayPort, serviceInfo) - if len(gatewaySpec.addresses) > 0 { - endpointsToCreate.Subsets = []corev1.EndpointSubset{ - { - Addresses: gatewaySpec.addresses, - Ports: rcsw.getEndpointsPorts(ev.service, int32(gatewaySpec.incomingPort)), - }, - } - } else { - rcsw.log.Warnf("gateway for %s: %s does not have ready addresses, skipping subsets", serviceInfo, err) - } - serviceToCreate.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = gatewaySpec.resourceVersion - if gatewaySpec.identity != "" { - endpointsToCreate.Annotations[consts.RemoteGatewayIdentity] = gatewaySpec.identity + if len(gatewayAddress) > 0 { + endpointsToCreate.Subsets = []corev1.EndpointSubset{ + { + Addresses: gatewayAddress, + Ports: rcsw.getEndpointsPorts(ev.service), + }, } - } else { - rcsw.log.Infof("Could not resolve gateway for %s: %s, skipping subsets", serviceInfo, err) - endpointsToCreate.Subsets = nil + rcsw.log.Warnf("gateway for %s does not have ready addresses, skipping subsets", serviceInfo) + } + if rcsw.link.GatewayIdentity != "" { + endpointsToCreate.Annotations[consts.RemoteGatewayIdentity] = rcsw.link.GatewayIdentity } rcsw.log.Infof("Creating a new service mirror for %s", serviceInfo) @@ -540,274 +446,7 @@ func (rcsw *RemoteClusterServiceWatcher) handleRemoteServiceCreated(ev *RemoteSe return nil } -func (rcsw *RemoteClusterServiceWatcher) handleRemoteGatewayDeleted(ev *RemoteGatewayDeleted) error { - - if err := rcsw.localAPIClient.Client.CoreV1().Services(rcsw.serviceMirrorNamespace).Delete(rcsw.mirroredResourceName(ev.gatewayData.Name), &metav1.DeleteOptions{}); err != nil { - rcsw.log.Errorf("Could not delete gateway mirror %s", err) - } - - affectedEndpoints, err := rcsw.endpointsForGateway(&ev.gatewayData) - if err != nil { - // if we cannot find the endpoints, we can give up - if kerrors.IsNotFound(err) { - return err - } - // if it is another error, just retry - return RetryableError{[]error{err}} - } - - var errors []error - if len(affectedEndpoints) > 0 { - rcsw.log.Infof("Nulling %d endpoints due to gateway [%s/%s] deletion", len(affectedEndpoints), ev.gatewayData.Namespace, ev.gatewayData.Name) - for _, ep := range affectedEndpoints { - updated := ep.DeepCopy() - updated.Subsets = nil - if _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(ep.Namespace).Update(updated); err != nil { - errors = append(errors, err) - } - } - } - if len(errors) > 0 { - // if we have encountered any errors, we can retry the whole operation - return RetryableError{errors} - } - return nil -} - -// the logic here creates a mirror service for the gateway. The only port exposed there is the -// probes port. This enables us to discover the gateways probe endpoints through the dst service -// and apply proper identity -func (rcsw *RemoteClusterServiceWatcher) handleRemoteGatewayCreated(event *RemoteGatewayCreated) error { - localServiceName := rcsw.mirroredResourceName(event.gatewaySpec.gatewayName) - if event.gatewaySpec.ProbeConfig == nil { - rcsw.log.Infof("Skipping creation of gateway mirror as gateway does not specify probe config") - return nil - } - serviceToCreate := &corev1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: localServiceName, - Namespace: rcsw.serviceMirrorNamespace, - Annotations: map[string]string{ - consts.RemoteGatewayResourceVersionAnnotation: event.gatewaySpec.resourceVersion, - consts.MirroredGatewayRemoteName: event.gatewaySpec.gatewayName, - consts.MirroredGatewayRemoteNameSpace: event.gatewaySpec.gatewayNamespace, - consts.MirroredGatewayProbePath: event.gatewaySpec.ProbeConfig.path, - consts.MirroredGatewayProbePeriod: fmt.Sprint(event.gatewaySpec.ProbeConfig.periodInSeconds), - }, - Labels: map[string]string{ - consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, - consts.MirroredGatewayLabel: "true", - }, - }, - Spec: corev1.ServiceSpec{ - Ports: []corev1.ServicePort{ - { - Name: consts.ProbePortName, - Protocol: "TCP", - Port: int32(event.gatewaySpec.ProbeConfig.port), - }, - }, - }, - } - - endpointsToCreate := &corev1.Endpoints{ - ObjectMeta: metav1.ObjectMeta{ - Name: localServiceName, - Namespace: rcsw.serviceMirrorNamespace, - Labels: map[string]string{ - consts.MirroredResourceLabel: "true", - consts.RemoteClusterNameLabel: rcsw.clusterName, - }, - Annotations: map[string]string{ - consts.RemoteGatewayIdentity: event.gatewaySpec.identity, - }, - }, - } - - if len(event.gatewaySpec.addresses) > 0 { - endpointsToCreate.Subsets = []corev1.EndpointSubset{ - { - Addresses: event.gatewaySpec.addresses, - Ports: []corev1.EndpointPort{ - { - Name: consts.ProbePortName, - Protocol: "TCP", - Port: int32(event.gatewaySpec.ProbeConfig.port), - }, - }, - }, - } - } - - rcsw.log.Infof("Creating a new gateway mirror Service for %s", localServiceName) - if _, err := rcsw.localAPIClient.Client.CoreV1().Services(rcsw.serviceMirrorNamespace).Create(serviceToCreate); err != nil { - if !kerrors.IsAlreadyExists(err) { - // we might have created it during earlier attempt, if that is not the case, we retry - return RetryableError{[]error{err}} - } - } - - rcsw.log.Infof("Creating a new gateway mirror Endpoints for %s", localServiceName) - if _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(rcsw.serviceMirrorNamespace).Create(endpointsToCreate); err != nil { - // we clean up after ourselves - rcsw.localAPIClient.Client.CoreV1().Services(rcsw.serviceMirrorNamespace).Delete(event.gatewaySpec.gatewayName, &metav1.DeleteOptions{}) - // and retry - return RetryableError{[]error{err}} - } - - return nil -} - -func (rcsw *RemoteClusterServiceWatcher) updateAffectedServices(gatewaySpec GatewaySpec, affectedServices []*corev1.Service) error { - rcsw.log.Infof("Updating %d services due to gateway [%s/%s] update", len(affectedServices), gatewaySpec.gatewayNamespace, gatewaySpec.gatewayName) - var errors []error - for _, svc := range affectedServices { - updatedService := svc.DeepCopy() - if updatedService.Annotations != nil { - updatedService.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = gatewaySpec.resourceVersion - } - endpoints, err := rcsw.localAPIClient.Endpoint().Lister().Endpoints(svc.Namespace).Get(svc.Name) - if err != nil { - errors = append(errors, fmt.Errorf("Could not get endpoints: %s", err)) - continue - } - - updatedEndpoints := endpoints.DeepCopy() - if len(gatewaySpec.addresses) > 0 { - updatedEndpoints.Subsets = []corev1.EndpointSubset{ - { - Addresses: gatewaySpec.addresses, - Ports: rcsw.getEndpointsPorts(updatedService, int32(gatewaySpec.incomingPort)), - }, - } - } else { - updatedEndpoints.Subsets = nil - } - - if gatewaySpec.identity != "" { - updatedEndpoints.Annotations[consts.RemoteGatewayIdentity] = gatewaySpec.identity - } else { - delete(updatedEndpoints.Annotations, consts.RemoteGatewayIdentity) - } - - _, err = rcsw.localAPIClient.Client.CoreV1().Services(updatedService.Namespace).Update(updatedService) - if err != nil { - errors = append(errors, err) - continue - } - - _, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(updatedService.Namespace).Update(updatedEndpoints) - if err != nil { - errors = append(errors, err) - } - } - - if len(errors) > 0 { - return RetryableError{errors} - } - return nil -} - -func (rcsw *RemoteClusterServiceWatcher) updateGatewayMirrorService(spec *GatewaySpec) error { - localServiceName := rcsw.mirroredResourceName(spec.gatewayName) - service, err := rcsw.localAPIClient.Svc().Lister().Services(rcsw.serviceMirrorNamespace).Get(localServiceName) - if err != nil { - return err - } - - if service.Annotations != nil && service.Annotations[consts.RemoteGatewayResourceVersionAnnotation] != spec.resourceVersion { - updatedService := service.DeepCopy() - if updatedService.Annotations != nil { - updatedService.Annotations[consts.RemoteGatewayResourceVersionAnnotation] = spec.resourceVersion - updatedService.Annotations[consts.MirroredGatewayProbePath] = spec.ProbeConfig.path - updatedService.Annotations[consts.MirroredGatewayProbePeriod] = fmt.Sprint(spec.ProbeConfig.periodInSeconds) - } - - updatedService.Spec.Ports = []corev1.ServicePort{ - { - Name: consts.ProbePortName, - Protocol: "TCP", - Port: int32(spec.ProbeConfig.port), - }, - } - - endpoints, err := rcsw.localAPIClient.Endpoint().Lister().Endpoints(rcsw.serviceMirrorNamespace).Get(localServiceName) - if err != nil { - return err - } - - updatedEndpoints := endpoints.DeepCopy() - if spec.addresses == nil { - updatedEndpoints.Subsets = nil - } else { - updatedEndpoints.Subsets = []corev1.EndpointSubset{ - { - Addresses: spec.addresses, - Ports: []corev1.EndpointPort{ - { - Name: consts.ProbePortName, - Protocol: "TCP", - Port: int32(spec.ProbeConfig.port), - }, - }, - }, - } - } - - updatedEndpoints.Annotations[consts.RemoteGatewayIdentity] = spec.identity - - _, err = rcsw.localAPIClient.Client.CoreV1().Services(rcsw.serviceMirrorNamespace).Update(updatedService) - if err != nil { - return err - } - - _, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(rcsw.serviceMirrorNamespace).Update(updatedEndpoints) - if err != nil { - return err - } - rcsw.log.Infof("%s gateway mirror updated", localServiceName) - } - - return nil -} - -func (rcsw *RemoteClusterServiceWatcher) handleRemoteGatewayUpdated(ev *RemoteGatewayUpdated) error { - if err := rcsw.updateAffectedServices(ev.gatewaySpec, ev.affectedServices); err != nil { - return err - } - - if err := rcsw.updateGatewayMirrorService(&ev.gatewaySpec); err != nil { - return err - } - - return nil -} - -// Retrieves the annotations that indicate this service can be mirrored. -// The values of these annotations help us resolve the gateway to which -// traffic should be sent. -func getGatewayMetadata(annotations map[string]string) *gatewayMetadata { - remoteGatewayName, hasGtwName := annotations[consts.GatewayNameAnnotation] - remoteGatewayNs, hasGtwNs := annotations[consts.GatewayNsAnnotation] - if hasGtwName && hasGtwNs { - return &gatewayMetadata{ - Name: remoteGatewayName, - Namespace: remoteGatewayNs, - } - } - return nil -} - -func isGateway(annotations map[string]string) bool { - if annotations != nil { - _, hasAnnotation := annotations[consts.MulticlusterGatewayAnnotation] - return hasAnnotation - } - return false -} - -func isMirroredService(annotations map[string]string) bool { +func isExportedService(annotations map[string]string) bool { if annotations != nil { _, hasGtwName := annotations[consts.GatewayNameAnnotation] _, hasGtwNs := annotations[consts.GatewayNsAnnotation] @@ -822,48 +461,12 @@ func isMirroredService(annotations map[string]string) bool { func (rcsw *RemoteClusterServiceWatcher) createOrUpdateService(service *corev1.Service) error { localName := rcsw.mirroredResourceName(service.Name) - if isGateway(service.Annotations) { - gatewaySpec, err := rcsw.extractGatewaySpec(service) - if err != nil { - return RetryableError{[]error{err}} - } - - _, err = rcsw.localAPIClient.Svc().Lister().Services(rcsw.serviceMirrorNamespace).Get(localName) - if err != nil { - if kerrors.IsNotFound(err) { - rcsw.eventsQueue.Add(&RemoteGatewayCreated{ - gatewaySpec: *gatewaySpec, - }) - return nil - } - return RetryableError{[]error{err}} - } - - affectedServices, err := rcsw.affectedMirroredServicesForGatewayUpdate(&gatewayMetadata{ - Name: service.Name, - Namespace: service.Namespace, - }, service.ResourceVersion) - if err != nil { - return RetryableError{[]error{err}} - } - - rcsw.eventsQueue.Add(&RemoteGatewayUpdated{ - affectedServices: affectedServices, - gatewaySpec: *gatewaySpec, - }) - return nil - - } else if isMirroredService(service.Annotations) { - gatewayData := getGatewayMetadata(service.Annotations) - if gatewayData == nil { - return fmt.Errorf("got service in invalid state, no gateway metadata %s", service) - } + if isExportedService(service.Annotations) { localService, err := rcsw.localAPIClient.Svc().Lister().Services(service.Namespace).Get(localName) if err != nil { if kerrors.IsNotFound(err) { rcsw.eventsQueue.Add(&RemoteServiceCreated{ - service: service, - gatewayData: *gatewayData, + service: service, }) return nil } @@ -878,53 +481,33 @@ func (rcsw *RemoteClusterServiceWatcher) createOrUpdateService(service *corev1.S localService: localService, localEndpoints: endpoints, remoteUpdate: service, - gatewayData: *gatewayData, }) return nil } return RetryableError{[]error{err}} } return nil - } else { - localSvc, err := rcsw.localAPIClient.Svc().Lister().Services(service.Namespace).Get(localName) - if err == nil { - if localSvc.Labels != nil { - _, isMirroredRes := localSvc.Labels[consts.MirroredResourceLabel] - clusterName := localSvc.Labels[consts.RemoteClusterNameLabel] - if isMirroredRes && (clusterName == rcsw.clusterName) { - rcsw.eventsQueue.Add(&RemoteServiceDeleted{ - Name: service.Name, - Namespace: service.Namespace, - }) - } - } - } - return nil - } -} - -func (rcsw *RemoteClusterServiceWatcher) affectedMirroredServicesForGatewayUpdate(gatewayData *gatewayMetadata, latestResourceVersion string) ([]*corev1.Service, error) { - services, err := rcsw.mirroredServicesForGateway(gatewayData) - if err != nil { - return nil, err } - - affectedServices := []*corev1.Service{} - for _, srv := range services { - ver, ok := srv.Annotations[consts.RemoteGatewayResourceVersionAnnotation] - if ok && ver != latestResourceVersion { - affectedServices = append(affectedServices, srv) + localSvc, err := rcsw.localAPIClient.Svc().Lister().Services(service.Namespace).Get(localName) + if err == nil { + if localSvc.Labels != nil { + _, isMirroredRes := localSvc.Labels[consts.MirroredResourceLabel] + clusterName := localSvc.Labels[consts.RemoteClusterNameLabel] + if isMirroredRes && (clusterName == rcsw.link.TargetClusterName) { + rcsw.eventsQueue.Add(&RemoteServiceDeleted{ + Name: service.Name, + Namespace: service.Namespace, + }) + } } } - return affectedServices, nil + return nil } -func (rcsw *RemoteClusterServiceWatcher) mirroredServicesForGateway(gatewayData *gatewayMetadata) ([]*corev1.Service, error) { +func (rcsw *RemoteClusterServiceWatcher) getMirrorServices() ([]*corev1.Service, error) { matchLabels := map[string]string{ consts.MirroredResourceLabel: "true", - consts.RemoteGatewayNameLabel: gatewayData.Name, - consts.RemoteGatewayNsLabel: gatewayData.Namespace, - consts.RemoteClusterNameLabel: rcsw.clusterName, + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, } services, err := rcsw.localAPIClient.Svc().Lister().List(labels.Set(matchLabels).AsSelector()) @@ -934,33 +517,12 @@ func (rcsw *RemoteClusterServiceWatcher) mirroredServicesForGateway(gatewayData return services, nil } -func (rcsw *RemoteClusterServiceWatcher) endpointsForGateway(gatewayData *gatewayMetadata) ([]*corev1.Endpoints, error) { - matchLabels := map[string]string{ - consts.MirroredResourceLabel: "true", - consts.RemoteGatewayNameLabel: gatewayData.Name, - consts.RemoteGatewayNsLabel: gatewayData.Namespace, - consts.RemoteClusterNameLabel: rcsw.clusterName, - } - - endpoints, err := rcsw.localAPIClient.Endpoint().Lister().List(labels.Set(matchLabels).AsSelector()) - if err != nil { - return nil, err - } - return endpoints, nil -} - func (rcsw *RemoteClusterServiceWatcher) handleOnDelete(service *corev1.Service) { - if isMirroredService(service.Annotations) { + if isExportedService(service.Annotations) { rcsw.eventsQueue.Add(&RemoteServiceDeleted{ Name: service.Name, Namespace: service.Namespace, }) - } else if isGateway(service.Annotations) { - rcsw.eventsQueue.Add(&RemoteGatewayDeleted{ - gatewayData: gatewayMetadata{ - Name: service.Name, - Namespace: service.Namespace, - }}) } else { rcsw.log.Infof("Skipping OnDelete for service %s", service) } @@ -990,12 +552,6 @@ func (rcsw *RemoteClusterServiceWatcher) processNextEvent() (bool, interface{}, err = rcsw.handleRemoteServiceUpdated(ev) case *RemoteServiceDeleted: err = rcsw.handleRemoteServiceDeleted(ev) - case *RemoteGatewayUpdated: - err = rcsw.handleRemoteGatewayUpdated(ev) - case *RemoteGatewayDeleted: - err = rcsw.handleRemoteGatewayDeleted(ev) - case *RemoteGatewayCreated: - err = rcsw.handleRemoteGatewayCreated(ev) case *ClusterUnregistered: err = rcsw.cleanupMirroredResources() case *OprhanedServicesGcTriggered: @@ -1080,6 +636,11 @@ func (rcsw *RemoteClusterServiceWatcher) Start() error { ) go rcsw.processEvents() + // We need to issue a RepairEndpoints immediately to populate the gateway + // mirror endpoints. + ev := RepairEndpoints{} + rcsw.eventsQueue.Add(&ev) + go func() { ticker := time.NewTicker(rcsw.repairPeriod) for { @@ -1105,126 +666,117 @@ func (rcsw *RemoteClusterServiceWatcher) Stop(cleanupState bool) { rcsw.eventsQueue.ShutDown() } -func extractPort(port []corev1.ServicePort, portName string) (uint32, error) { - for _, p := range port { - if p.Name == portName { - return uint32(p.Port), nil +func (rcsw *RemoteClusterServiceWatcher) resolveGatewayAddress() []corev1.EndpointAddress { + var gatewayEndpoints []corev1.EndpointAddress + for _, addr := range strings.Split(rcsw.link.GatewayAddress, ",") { + resolved := addr + ipAddr, err := net.ResolveIPAddr("ip", addr) + if err == nil { + resolved = ipAddr.String() } + gatewayEndpoints = append(gatewayEndpoints, corev1.EndpointAddress{ + IP: resolved, + }) } - return 0, fmt.Errorf("could not find port with name %s", portName) + return gatewayEndpoints } -func extractProbeConfig(gateway *corev1.Service) (*ProbeConfig, error) { - probePath := gateway.Annotations[consts.GatewayProbePath] +func (rcsw *RemoteClusterServiceWatcher) repairEndpoints() { + endpointRepairCounter.With(prometheus.Labels{ + gatewayClusterName: rcsw.link.TargetClusterName, + }).Inc() - probePort, err := extractPort(gateway.Spec.Ports, consts.ProbePortName) + // Create or update gateway mirror endpoints. + gatewayMirrorName := fmt.Sprintf("probe-gateway-%s", rcsw.link.TargetClusterName) - if err != nil { - return nil, err + gatewayMirrorEndpoints := &corev1.Endpoints{ + ObjectMeta: metav1.ObjectMeta{ + Name: gatewayMirrorName, + Namespace: rcsw.serviceMirrorNamespace, + Labels: map[string]string{ + consts.RemoteClusterNameLabel: rcsw.link.TargetClusterName, + }, + Annotations: map[string]string{ + consts.RemoteGatewayIdentity: rcsw.link.GatewayIdentity, + }, + }, + Subsets: []corev1.EndpointSubset{ + { + Addresses: rcsw.resolveGatewayAddress(), + Ports: []corev1.EndpointPort{ + { + Name: "mc-probe", + Port: int32(rcsw.link.ProbeSpec.Port), + Protocol: "TCP", + }, + }, + }, + }, } - probePeriod, err := strconv.ParseUint(gateway.Annotations[consts.GatewayProbePeriod], 10, 32) + err := rcsw.createOrUpdateEndpoints(gatewayMirrorEndpoints) if err != nil { - return nil, err + rcsw.log.Errorf("Failed to create/update gateway mirror endpoints: %s", err) } - if probePath == "" { - return nil, errors.New("probe path is empty") + // Repair mirror service endpoints. + mirrorServices, err := rcsw.getMirrorServices() + if err != nil { + rcsw.log.Errorf("Failed to list mirror services: %s", err) } + for _, svc := range mirrorServices { + updatedService := svc.DeepCopy() - return &ProbeConfig{ - path: probePath, - port: probePort, - periodInSeconds: uint32(probePeriod), - }, nil -} + endpoints, err := rcsw.localAPIClient.Endpoint().Lister().Endpoints(svc.Namespace).Get(svc.Name) + if err != nil { + rcsw.log.Errorf("Could not get endpoints: %s", err) + continue + } -func (rcsw *RemoteClusterServiceWatcher) extractGatewaySpec(gateway *corev1.Service) (*GatewaySpec, error) { - incomingPort, err := extractPort(gateway.Spec.Ports, consts.GatewayPortName) + updatedEndpoints := endpoints.DeepCopy() + updatedEndpoints.Subsets = []corev1.EndpointSubset{ + { + Addresses: rcsw.resolveGatewayAddress(), + Ports: rcsw.getEndpointsPorts(updatedService), + }, + } - if err != nil { - return nil, err - } + if updatedEndpoints.Annotations == nil { + updatedEndpoints.Annotations = make(map[string]string) + } + updatedEndpoints.Annotations[consts.RemoteGatewayIdentity] = rcsw.link.GatewayIdentity - var gatewayEndpoints []corev1.EndpointAddress - for _, ingress := range gateway.Status.LoadBalancer.Ingress { - ip := ingress.IP - if ip == "" { - ipAddr, err := net.ResolveIPAddr("ip", ingress.Hostname) - if err != nil { - return nil, err - } - ip = ipAddr.String() + _, err = rcsw.localAPIClient.Client.CoreV1().Services(updatedService.Namespace).Update(updatedService) + if err != nil { + rcsw.log.Error(err) + continue } - gatewayEndpoints = append(gatewayEndpoints, corev1.EndpointAddress{ - IP: ip, - }) - } - gatewayIdentity := gateway.Annotations[consts.GatewayIdentity] - probeConfig, err := extractProbeConfig(gateway) - if err != nil { - return nil, fmt.Errorf("could not parse probe config for gateway: %s/%s: %s", gateway.Namespace, gateway.Name, err) + _, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(updatedService.Namespace).Update(updatedEndpoints) + if err != nil { + rcsw.log.Error(err) + } } - - return &GatewaySpec{ - clusterName: rcsw.clusterName, - gatewayName: gateway.Name, - gatewayNamespace: gateway.Namespace, - addresses: gatewayEndpoints, - incomingPort: incomingPort, - resourceVersion: gateway.ResourceVersion, - identity: gatewayIdentity, - ProbeConfig: probeConfig, - }, nil } -// repairEndpoints will look up all remote gateways and update the endpoints -// of all local mirror services for those gateways. Note that we ignore resource -// version and update ALL affected endpoints objects. This is because the -// remote gateway may be exposed as a DNS hostname and we want to re-resolve -// this DNS name in case its IP address has changed. By invoking repairEndpoints -// frequently, we can pick up any DNS changes fairly quickly. -// TODO: Replace this with a more robust solution that does not rely on -// frequently repairing endpoints to pick up DNS updates. -func (rcsw *RemoteClusterServiceWatcher) repairEndpoints() { - svcs, err := rcsw.remoteAPIClient.Svc().Lister().Services(metav1.NamespaceAll).List(labels.Everything()) +func (rcsw *RemoteClusterServiceWatcher) createOrUpdateEndpoints(ep *corev1.Endpoints) error { + _, err := rcsw.localAPIClient.Client.CoreV1().Endpoints(ep.Namespace).Get(ep.Name, metav1.GetOptions{}) if err != nil { - rcsw.log.Errorf("failed to list remote gateways: %s", err) - return - } - rcsw.log.Errorf("During repair, found %d remote services", len(svcs)) - for _, svc := range svcs { - if isGateway(svc.Annotations) { - - // We omit a resource version here because we want to get ALL mirror - // services for this gateway. - affectedServices, err := rcsw.affectedMirroredServicesForGatewayUpdate(&gatewayMetadata{ - Name: svc.Name, - Namespace: svc.Namespace, - }, "") - if err != nil { - rcsw.log.Errorf("failed to determine mirror services for gateway %s.%s: %s", svc.Name, svc.Namespace, err) - continue - } - - spec, err := rcsw.extractGatewaySpec(svc) + if kerrors.IsNotFound(err) { + // Does not exist so we should create it. + _, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(ep.Namespace).Create(ep) if err != nil { - rcsw.log.Errorf("failed to extract spec for gateway %s.%s: %s", svc.Name, svc.Namespace, err) - continue + return err } - - endpointRepairCounter.With(prometheus.Labels{ - gatewayNameLabel: svc.Name, - gatewayNamespaceLabel: svc.Namespace, - gatewayClusterName: rcsw.clusterName, - }).Inc() - - rcsw.log.Errorf("adding gateway update event %s with %d mirrro services", svc.Name, len(affectedServices)) - rcsw.eventsQueue.Add(&RemoteGatewayUpdated{ - gatewaySpec: *spec, - affectedServices: affectedServices, - }) + } else { + return err } } + // Exists so we should update it. + _, err = rcsw.localAPIClient.Client.CoreV1().Endpoints(ep.Namespace).Update(ep) + if err != nil { + return err + } + + return nil } diff --git a/controller/cmd/service-mirror/cluster_watcher_mirroring_test.go b/controller/cmd/service-mirror/cluster_watcher_mirroring_test.go index ce434f8e29fe5..758b7b7bfb1e2 100644 --- a/controller/cmd/service-mirror/cluster_watcher_mirroring_test.go +++ b/controller/cmd/service-mirror/cluster_watcher_mirroring_test.go @@ -2,12 +2,10 @@ package servicemirror import ( "fmt" - "net" "reflect" "testing" corev1 "k8s.io/api/core/v1" - v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/util/workqueue" ) @@ -53,15 +51,8 @@ func (tc *mirroringTestCase) run(t *testing.T) { } if tc.expectedLocalEndpoints == nil { - // ensure the are no local endpoints - endpoints, err := localAPI.Client.CoreV1().Endpoints(corev1.NamespaceAll).List(metav1.ListOptions{}) - if err != nil { - t.Fatal(err) - } - if len(endpoints.Items) > 0 { - t.Fatalf("Was expecting no local endpoints but instead found %d", len(endpoints.Items)) - - } + // In a real Kubernetes cluster, deleting the service is sufficient + // to delete the endpoints. } else { for _, expected := range tc.expectedLocalEndpoints { actual, err := localAPI.Client.CoreV1().Endpoints(expected.Namespace).Get(expected.Name, metav1.GetOptions{}) @@ -94,48 +85,25 @@ func (tc *mirroringTestCase) run(t *testing.T) { func TestRemoteServiceCreatedMirroring(t *testing.T) { for _, tt := range []mirroringTestCase{ { - description: "create service and endpoints when gateway cannot be resolved", - environment: serviceCreateWithMissingGateway, - expectedLocalServices: []*corev1.Service{ - mirroredService("service-one-remote", "ns1", "missing-gateway", "missing-namespace", "111", "", nil), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("service-one-remote", "ns1", "missing-gateway", "missing-namespace", "", "", nil), - }, + description: "does not create service and endpoints when gateway address is missing", + environment: serviceCreateWithMissingGateway, + expectedLocalServices: []*corev1.Service{}, + expectedLocalEndpoints: []*corev1.Endpoints{}, }, { - description: "create service and endpoints without subsets when gateway spec is wrong", - environment: createServiceWrongGatewaySpec, - expectedLocalServices: []*corev1.Service{ - mirroredService("service-one-remote", "ns1", "existing-gateway", "existing-namespace", "111", "", - []corev1.ServicePort{ - { - Name: "port1", - Protocol: "TCP", - Port: 555, - }, - { - Name: "port2", - Protocol: "TCP", - Port: 666, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("service-one-remote", "ns1", "existing-gateway", "existing-namespace", "", "", nil), - }, + description: "does not create service and endpoints when gateway spec is wrong", + environment: createServiceWrongGatewaySpec, + expectedLocalServices: []*corev1.Service{}, + expectedLocalEndpoints: []*corev1.Endpoints{}, }, { description: "create service and endpoints when gateway can be resolved", environment: createServiceOkeGatewaySpec, expectedLocalServices: []*corev1.Service{ - mirroredService( + mirrorService( "service-one-remote", "ns1", - "existing-gateway", - "existing-namespace", "111", - "222", []corev1.ServicePort{ { Name: "port1", @@ -150,7 +118,7 @@ func TestRemoteServiceCreatedMirroring(t *testing.T) { }), }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("service-one-remote", "ns1", "existing-gateway", "existing-namespace", "192.0.2.127", "gateway-identity", []corev1.EndpointPort{ + endpoints("service-one-remote", "ns1", "192.0.2.127", "gateway-identity", []corev1.EndpointPort{ { Name: "port1", Port: 888, @@ -184,50 +152,11 @@ func TestRemoteServiceDeletedMirroring(t *testing.T) { func TestRemoteServiceUpdatedMirroring(t *testing.T) { for _, tt := range []mirroringTestCase{ - { - description: "update to new gateway", - environment: updateServiceToNewGateway, - expectedLocalServices: []*corev1.Service{ - mirroredService( - "test-service-remote", - "test-namespace", - "gateway-new", - "gateway-ns", - "currentServiceResVersion", - "currentGatewayResVersion", - []corev1.ServicePort{ - { - Name: "port1", - Protocol: "TCP", - Port: 111, - }, - { - Name: "port3", - Protocol: "TCP", - Port: 333, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-remote", "test-namespace", "gateway-new", "gateway-ns", "0.0.0.0", "", []corev1.EndpointPort{ - { - Name: "port1", - Port: 999, - Protocol: "TCP", - }, - { - Name: "port2", - Port: 999, - Protocol: "TCP", - }, - }), - }, - }, { description: "updates service ports on both service and endpoints", environment: updateServiceWithChangedPorts, expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "currentServiceResVersion", "currentGatewayResVersion", + mirrorService("test-service-remote", "test-namespace", "currentServiceResVersion", []corev1.ServicePort{ { Name: "port1", @@ -243,7 +172,7 @@ func TestRemoteServiceUpdatedMirroring(t *testing.T) { }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "192.0.2.127", "", []corev1.EndpointPort{ + endpoints("test-service-remote", "test-namespace", "192.0.2.127", "gateway-identity", []corev1.EndpointPort{ { Name: "port1", Port: 888, @@ -263,189 +192,6 @@ func TestRemoteServiceUpdatedMirroring(t *testing.T) { } } -func TestRemoteGatewayUpdatedMirroring(t *testing.T) { - - localhostIP, err := net.ResolveIPAddr("ip", "localhost") - if err != nil { - t.Fatal(err) - } - - for _, tt := range []mirroringTestCase{ - { - description: "endpoints ports are updated on gateway change", - environment: remoteGatewayUpdated, - expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 999, - Protocol: "TCP", - }}), - endpoints("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 999, - Protocol: "TCP", - }}), - }, - }, - - { - description: "endpoints addresses are updated on gateway change", - environment: gatewayAddressChanged, - expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.1", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - endpoints("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.1", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), - }, - }, - - { - description: "identity is updated on gateway change", - environment: gatewayIdentityChanged, - expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "currentGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "new-identity", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - endpoints("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "new-identity", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), - }, - }, - { - description: "gateway uses hostname address", - environment: remoteGatewayUpdatedWithHostnameAddress, - expectedEventsInQueue: []interface{}{ - &RemoteGatewayUpdated{ - gatewaySpec: GatewaySpec{ - gatewayName: "gateway", - gatewayNamespace: "gateway-ns", - clusterName: "remote", - addresses: []corev1.EndpointAddress{{IP: localhostIP.String()}}, - incomingPort: 999, - resourceVersion: "currentGatewayResVersion", - ProbeConfig: &ProbeConfig{ - path: defaultProbePath, - port: defaultProbePort, - periodInSeconds: defaultProbePeriod, - }, - }, - affectedServices: []*v1.Service{}, - }, - }, - }, - } { - tc := tt // pin - tc.run(t) - } -} -func TestRemoteGatewayDeletedMirroring(t *testing.T) { - for _, tt := range []mirroringTestCase{ - { - description: "removes endpoint subsets when gateway is deleted", - environment: gatewayDeleted, - expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "", nil), - endpoints("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "", nil), - }, - }, - } { - tc := tt // pin - tc.run(t) - } -} - func TestClusterUnregisteredMirroring(t *testing.T) { for _, tt := range []mirroringTestCase{ { @@ -464,11 +210,11 @@ func TestGcOrphanedServicesMirroring(t *testing.T) { description: "deletes mirrored resources that are no longer present on the remote cluster", environment: gcTriggered, expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "", nil), + mirrorService("test-service-1-remote", "test-namespace", "", nil), }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-1-remote", "test-namespace", "", "", "", "", nil), + endpoints("test-service-1-remote", "test-namespace", "", "", nil), }, }, } { @@ -490,39 +236,31 @@ func onAddOrUpdateTestCases(isAdd bool) []mirroringTestCase { environment: onAddOrUpdateExportedSvc(isAdd), expectedEventsInQueue: []interface{}{&RemoteServiceCreated{ service: remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "resVersion", nil), - gatewayData: gatewayMetadata{ - Name: "gateway", - Namespace: "gateway-ns", - }, }}, }, { description: fmt.Sprintf("enqueue a RemoteServiceUpdated event if this is a service that we have already mirrored and its res version is different (%s)", testType), environment: onAddOrUpdateRemoteServiceUpdated(isAdd), expectedEventsInQueue: []interface{}{&RemoteServiceUpdated{ - localService: mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "pastResourceVersion", "gatewayResVersion", nil), - localEndpoints: endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + localService: mirrorService("test-service-remote", "test-namespace", "pastResourceVersion", nil), + localEndpoints: endpoints("test-service-remote", "test-namespace", "0.0.0.0", "", nil), remoteUpdate: remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "currentResVersion", nil), - gatewayData: gatewayMetadata{ - Name: "gateway", - Namespace: "gateway-ns", - }, }}, expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "pastResourceVersion", "gatewayResVersion", nil), + mirrorService("test-service-remote", "test-namespace", "pastResourceVersion", nil), }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + endpoints("test-service-remote", "test-namespace", "0.0.0.0", "", nil), }, }, { description: fmt.Sprintf("not enqueue any events as this update does not really tell us anything new (res version is the same...) (%s)", testType), environment: onAddOrUpdateSameResVersion(isAdd), expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "currentResVersion", "gatewayResVersion", nil), + mirrorService("test-service-remote", "test-namespace", "currentResVersion", nil), }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + endpoints("test-service-remote", "test-namespace", "0.0.0.0", "", nil), }, }, { @@ -534,10 +272,10 @@ func onAddOrUpdateTestCases(isAdd bool) []mirroringTestCase { }}, expectedLocalServices: []*corev1.Service{ - mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "currentResVersion", "gatewayResVersion", nil), + mirrorService("test-service-remote", "test-namespace", "currentResVersion", nil), }, expectedLocalEndpoints: []*corev1.Endpoints{ - endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + endpoints("test-service-remote", "test-namespace", "0.0.0.0", "", nil), }, }, } diff --git a/controller/cmd/service-mirror/cluster_watcher_test_util.go b/controller/cmd/service-mirror/cluster_watcher_test_util.go index b59dc0b61b2ed..a1f9ff24d630e 100644 --- a/controller/cmd/service-mirror/cluster_watcher_test_util.go +++ b/controller/cmd/service-mirror/cluster_watcher_test_util.go @@ -5,10 +5,12 @@ import ( "log" "reflect" "strings" + "time" "github.com/ghodss/yaml" "github.com/linkerd/linkerd2/controller/k8s" consts "github.com/linkerd/linkerd2/pkg/k8s" + "github.com/linkerd/linkerd2/pkg/multicluster" logging "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -23,10 +25,17 @@ const ( defaultProbePeriod = 60 ) +var defaultProbeSpec = multicluster.ProbeSpec{ + Path: defaultProbePath, + Port: defaultProbePort, + Period: time.Duration(defaultProbePeriod) * time.Second, +} + type testEnvironment struct { events []interface{} remoteResources []string localResources []string + link multicluster.Link } func (te *testEnvironment) runEnvironment(watcherQueue workqueue.RateLimitingInterface) (*k8s.API, error) { @@ -44,8 +53,7 @@ func (te *testEnvironment) runEnvironment(watcherQueue workqueue.RateLimitingInt localAPI.Sync(nil) watcher := RemoteClusterServiceWatcher{ - clusterName: clusterName, - clusterDomain: clusterDomain, + link: &te.link, remoteAPIClient: remoteAPI, localAPIClient: localAPI, stopper: nil, @@ -72,12 +80,12 @@ var serviceCreateWithMissingGateway = &testEnvironment{ events: []interface{}{ &RemoteServiceCreated{ service: remoteService("service-one", "ns1", "missing-gateway", "missing-namespace", "111", nil), - gatewayData: gatewayMetadata{ - Name: "missing-gateway", - Namespace: "missing-namespace", - }, }, }, + link: multicluster.Link{ + TargetClusterName: clusterName, + GatewayAddress: "", + }, } var createServiceWrongGatewaySpec = &testEnvironment{ @@ -96,15 +104,14 @@ var createServiceWrongGatewaySpec = &testEnvironment{ Port: 666, }, }), - - gatewayData: gatewayMetadata{ - Name: "existing-gateway", - Namespace: "existing-namespace", - }, }, }, remoteResources: []string{ - gatewayAsYaml("existing-gateway", "existing-namespace", "222", "192.0.2.127", "", "mc-wrong", 888, "", 111, "/path", 666), + gatewayAsYaml("existing-gateway", "existing-namespace", "222", "192.0.2.127", "mc-wrong", 888, "", 111, "/path", 666), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, + GatewayAddress: "??????", }, } @@ -123,14 +130,18 @@ var createServiceOkeGatewaySpec = &testEnvironment{ Port: 666, }, }), - gatewayData: gatewayMetadata{ - Name: "existing-gateway", - Namespace: "existing-namespace", - }, }, }, remoteResources: []string{ - gatewayAsYaml("existing-gateway", "existing-namespace", "222", "192.0.2.127", "", "mc-gateway", 888, "gateway-identity", defaultProbePort, defaultProbePath, defaultProbePeriod), + gatewayAsYaml("existing-gateway", "existing-namespace", "222", "192.0.2.127", "mc-gateway", 888, "gateway-identity", defaultProbePort, defaultProbePath, defaultProbePeriod), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } @@ -142,84 +153,16 @@ var deleteMirroredService = &testEnvironment{ }, }, localResources: []string{ - mirroredServiceAsYaml("test-service-remote-to-delete-remote", "test-namespace-to-delete", "", "", "", "", nil), - endpointsAsYaml("test-service-remote-to-delete-remote", "test-namespace-to-delete", "", "", "", "gateway-identity", nil), - }, -} - -var updateServiceToNewGateway = &testEnvironment{ - events: []interface{}{ - &RemoteServiceUpdated{ - remoteUpdate: remoteService("test-service", "test-namespace", "gateway-new", "gateway-ns", "currentServiceResVersion", []corev1.ServicePort{ - { - Name: "port1", - Protocol: "TCP", - Port: 111, - }, - { - Name: "port2", - Protocol: "TCP", - Port: 222, - }, - }), - localService: mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "pastServiceResVersion", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "port1", - Protocol: "TCP", - Port: 111, - }, - { - Name: "port2", - Protocol: "TCP", - Port: 222, - }, - }), - localEndpoints: endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "192.0.2.127", "", []corev1.EndpointPort{ - { - Name: "port1", - Port: 888, - Protocol: "TCP", - }, - { - Name: "port2", - Port: 888, - Protocol: "TCP", - }, - }), - gatewayData: gatewayMetadata{ - Name: "gateway-new", - Namespace: "gateway-ns", - }, - }, + mirrorServiceAsYaml("test-service-remote-to-delete-remote", "test-namespace-to-delete", "", nil), + endpointsAsYaml("test-service-remote-to-delete-remote", "test-namespace-to-delete", "", "gateway-identity", nil), }, - remoteResources: []string{ - gatewayAsYaml("gateway-new", "gateway-ns", "currentGatewayResVersion", "0.0.0.0", "", "mc-gateway", 999, "", defaultProbePort, defaultProbePath, defaultProbePeriod), - }, - localResources: []string{ - mirroredServiceAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "past", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "port1", - Protocol: "TCP", - Port: 111, - }, - { - Name: "port2", - Protocol: "TCP", - Port: 222, - }, - }), - endpointsAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "192.0.2.127", "", []corev1.EndpointPort{ - { - Name: "port1", - Port: 888, - Protocol: "TCP", - }, - { - Name: "port2", - Port: 888, - Protocol: "TCP", - }, - }), + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } @@ -238,7 +181,7 @@ var updateServiceWithChangedPorts = &testEnvironment{ Port: 333, }, }), - localService: mirroredService("test-service-remote", "test-namespace", "gateway", "gateway-ns", "pastServiceResVersion", "pastGatewayResVersion", []corev1.ServicePort{ + localService: mirrorService("test-service-remote", "test-namespace", "pastServiceResVersion", []corev1.ServicePort{ { Name: "port1", Protocol: "TCP", @@ -250,7 +193,7 @@ var updateServiceWithChangedPorts = &testEnvironment{ Port: 222, }, }), - localEndpoints: endpoints("test-service-remote", "test-namespace", "gateway", "gateway-ns", "192.0.2.127", "", []corev1.EndpointPort{ + localEndpoints: endpoints("test-service-remote", "test-namespace", "192.0.2.127", "", []corev1.EndpointPort{ { Name: "port1", Port: 888, @@ -262,17 +205,13 @@ var updateServiceWithChangedPorts = &testEnvironment{ Protocol: "TCP", }, }), - gatewayData: gatewayMetadata{ - Name: "gateway", - Namespace: "gateway-ns", - }, }, }, remoteResources: []string{ - gatewayAsYaml("gateway", "gateway-ns", "currentGatewayResVersion", "192.0.2.127", "", "mc-gateway", 888, "", defaultProbePort, defaultProbePath, defaultProbePeriod), + gatewayAsYaml("gateway", "gateway-ns", "currentGatewayResVersion", "192.0.2.127", "mc-gateway", 888, "", defaultProbePort, defaultProbePath, defaultProbePeriod), }, localResources: []string{ - mirroredServiceAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "past", "pastGatewayResVersion", []corev1.ServicePort{ + mirrorServiceAsYaml("test-service-remote", "test-namespace", "past", []corev1.ServicePort{ { Name: "port1", Protocol: "TCP", @@ -289,7 +228,7 @@ var updateServiceWithChangedPorts = &testEnvironment{ Port: 333, }, }), - endpointsAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "192.0.2.127", "", []corev1.EndpointPort{ + endpointsAsYaml("test-service-remote", "test-namespace", "192.0.2.127", "", []corev1.EndpointPort{ { Name: "port1", Port: 888, @@ -307,262 +246,13 @@ var updateServiceWithChangedPorts = &testEnvironment{ }, }), }, -} - -var remoteGatewayUpdated = &testEnvironment{ - events: []interface{}{ - &RemoteGatewayUpdated{ - gatewaySpec: GatewaySpec{ - gatewayName: "gateway", - gatewayNamespace: "gateway-ns", - clusterName: "remote", - addresses: []corev1.EndpointAddress{{IP: "0.0.0.0"}}, - incomingPort: 999, - resourceVersion: "currentGatewayResVersion", - ProbeConfig: &ProbeConfig{ - path: defaultProbePath, - port: defaultProbePort, - periodInSeconds: defaultProbePeriod, - }, - }, - affectedServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - }, - }, - localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - endpointsAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - endpointsAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), - }, -} - -var remoteGatewayUpdatedWithHostnameAddress = &testEnvironment{ - events: []interface{}{ - &RepairEndpoints{}, - }, - remoteResources: []string{ - gatewayAsYaml("gateway", "gateway-ns", "currentGatewayResVersion", "", "localhost", "mc-gateway", 999, "", defaultProbePort, defaultProbePath, defaultProbePeriod), - }, -} - -var gatewayAddressChanged = &testEnvironment{ - events: []interface{}{ - &RemoteGatewayUpdated{ - gatewaySpec: GatewaySpec{ - gatewayName: "gateway", - gatewayNamespace: "gateway-ns", - clusterName: "some-cluster", - addresses: []corev1.EndpointAddress{{IP: "0.0.0.1"}}, - incomingPort: 888, - resourceVersion: "currentGatewayResVersion", - ProbeConfig: &ProbeConfig{ - path: "/p", - port: 1, - periodInSeconds: 222, - }, - }, - affectedServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - }, - }, - localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - endpointsAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - endpointsAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), - }, -} - -var gatewayIdentityChanged = &testEnvironment{ - events: []interface{}{ - &RemoteGatewayUpdated{ - gatewaySpec: GatewaySpec{ - gatewayName: "gateway", - gatewayNamespace: "gateway-ns", - clusterName: clusterName, - addresses: []corev1.EndpointAddress{{IP: "0.0.0.0"}}, - incomingPort: 888, - resourceVersion: "currentGatewayResVersion", - identity: "new-identity", - ProbeConfig: &ProbeConfig{ - path: defaultProbePath, - port: defaultProbePort, - periodInSeconds: defaultProbePeriod, - }, - }, - affectedServices: []*corev1.Service{ - mirroredService("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - mirroredService("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - }, - }, - }, - localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - endpointsAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - endpointsAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), - }, -} - -var gatewayDeleted = &testEnvironment{ - events: []interface{}{ - &RemoteGatewayDeleted{ - gatewayData: gatewayMetadata{ - Name: "gateway", - Namespace: "gateway-ns", - }, - }, - }, - localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", - []corev1.ServicePort{ - { - Name: "svc-1-port", - Protocol: "TCP", - Port: 8081, - }, - }), - endpointsAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-1-port", - Port: 888, - Protocol: "TCP", - }}), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "", "pastGatewayResVersion", []corev1.ServicePort{ - { - Name: "svc-2-port", - Protocol: "TCP", - Port: 8082, - }, - }), - endpointsAsYaml("test-service-2-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", - []corev1.EndpointPort{ - { - Name: "svc-2-port", - Port: 888, - Protocol: "TCP", - }}), + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } @@ -571,10 +261,13 @@ var clusterUnregistered = &testEnvironment{ &ClusterUnregistered{}, }, localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "", "", "", "", nil), - endpointsAsYaml("test-service-1-remote", "test-namespace", "", "", "", "", nil), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "", "", "", "", nil), - endpointsAsYaml("test-service-2-remote", "test-namespace", "", "", "", "", nil), + mirrorServiceAsYaml("test-service-1-remote", "test-namespace", "", nil), + endpointsAsYaml("test-service-1-remote", "test-namespace", "", "", nil), + mirrorServiceAsYaml("test-service-2-remote", "test-namespace", "", nil), + endpointsAsYaml("test-service-2-remote", "test-namespace", "", "", nil), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, }, } @@ -583,14 +276,17 @@ var gcTriggered = &testEnvironment{ &OprhanedServicesGcTriggered{}, }, localResources: []string{ - mirroredServiceAsYaml("test-service-1-remote", "test-namespace", "gateway", "gateway-ns", "", "", nil), - endpointsAsYaml("test-service-1-remote", "test-namespace", "", "", "", "", nil), - mirroredServiceAsYaml("test-service-2-remote", "test-namespace", "", "", "", "", nil), - endpointsAsYaml("test-service-2-remote", "test-namespace", "", "", "", "", nil), + mirrorServiceAsYaml("test-service-1-remote", "test-namespace", "", nil), + endpointsAsYaml("test-service-1-remote", "test-namespace", "", "", nil), + mirrorServiceAsYaml("test-service-2-remote", "test-namespace", "", nil), + endpointsAsYaml("test-service-2-remote", "test-namespace", "", "", nil), }, remoteResources: []string{ remoteServiceAsYaml("test-service-1", "test-namespace", "gateway", "gateway-ns", "", nil), }, + link: multicluster.Link{ + TargetClusterName: clusterName, + }, } func onAddOrUpdateExportedSvc(isAdd bool) *testEnvironment { @@ -598,6 +294,14 @@ func onAddOrUpdateExportedSvc(isAdd bool) *testEnvironment { events: []interface{}{ onAddOrUpdateEvent(isAdd, remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "resVersion", nil)), }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, + }, } } @@ -608,8 +312,16 @@ func onAddOrUpdateRemoteServiceUpdated(isAdd bool) *testEnvironment { onAddOrUpdateEvent(isAdd, remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "currentResVersion", nil)), }, localResources: []string{ - mirroredServiceAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "pastResourceVersion", "gatewayResVersion", nil), - endpointsAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + mirrorServiceAsYaml("test-service-remote", "test-namespace", "pastResourceVersion", nil), + endpointsAsYaml("test-service-remote", "test-namespace", "0.0.0.0", "", nil), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } } @@ -620,8 +332,16 @@ func onAddOrUpdateSameResVersion(isAdd bool) *testEnvironment { onAddOrUpdateEvent(isAdd, remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "currentResVersion", nil)), }, localResources: []string{ - mirroredServiceAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "currentResVersion", "gatewayResVersion", nil), - endpointsAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + mirrorServiceAsYaml("test-service-remote", "test-namespace", "currentResVersion", nil), + endpointsAsYaml("test-service-remote", "test-namespace", "0.0.0.0", "", nil), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } } @@ -632,8 +352,16 @@ func serviceNotExportedAnymore(isAdd bool) *testEnvironment { onAddOrUpdateEvent(isAdd, remoteService("test-service", "test-namespace", "", "gateway-ns", "currentResVersion", nil)), }, localResources: []string{ - mirroredServiceAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "currentResVersion", "gatewayResVersion", nil), - endpointsAsYaml("test-service-remote", "test-namespace", "gateway", "gateway-ns", "0.0.0.0", "", nil), + mirrorServiceAsYaml("test-service-remote", "test-namespace", "currentResVersion", nil), + endpointsAsYaml("test-service-remote", "test-namespace", "0.0.0.0", "", nil), + }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, }, } } @@ -644,6 +372,14 @@ var onDeleteWithGatewayMetadata = &testEnvironment{ svc: remoteService("test-service", "test-namespace", "gateway", "gateway-ns", "currentResVersion", nil), }, }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, + }, } var onDeleteNoGatewayMetadata = &testEnvironment{ @@ -652,6 +388,14 @@ var onDeleteNoGatewayMetadata = &testEnvironment{ svc: remoteService("gateway", "test-namespace", "", "", "currentResVersion", nil), }, }, + link: multicluster.Link{ + TargetClusterName: clusterName, + TargetClusterDomain: clusterDomain, + GatewayIdentity: "gateway-identity", + GatewayAddress: "192.0.2.127", + GatewayPort: 888, + ProbeSpec: defaultProbeSpec, + }, } // the following tests ensure that onAdd, onUpdate and onDelete result in @@ -742,15 +486,11 @@ func remoteServiceAsYaml(name, namespace, gtwName, gtwNs, resourceVersion string return string(bytes) } -func mirroredService(name, namespace, gtwName, gtwNs, resourceVersion, gatewayResourceVersion string, ports []corev1.ServicePort) *corev1.Service { +func mirrorService(name, namespace, resourceVersion string, ports []corev1.ServicePort) *corev1.Service { annotations := make(map[string]string) annotations[consts.RemoteResourceVersionAnnotation] = resourceVersion annotations[consts.RemoteServiceFqName] = fmt.Sprintf("%s.%s.svc.cluster.local", strings.Replace(name, "-remote", "", 1), namespace) - if gatewayResourceVersion != "" { - annotations[consts.RemoteGatewayResourceVersionAnnotation] = gatewayResourceVersion - - } return &corev1.Service{ TypeMeta: metav1.TypeMeta{ Kind: "Service", @@ -760,10 +500,8 @@ func mirroredService(name, namespace, gtwName, gtwNs, resourceVersion, gatewayRe Name: name, Namespace: namespace, Labels: map[string]string{ - consts.RemoteClusterNameLabel: "remote", + consts.RemoteClusterNameLabel: clusterName, consts.MirroredResourceLabel: "true", - consts.RemoteGatewayNameLabel: gtwName, - consts.RemoteGatewayNsLabel: gtwNs, }, Annotations: annotations, }, @@ -773,8 +511,8 @@ func mirroredService(name, namespace, gtwName, gtwNs, resourceVersion, gatewayRe } } -func mirroredServiceAsYaml(name, namespace, gtwName, gtwNs, resourceVersion, gatewayResourceVersion string, ports []corev1.ServicePort) string { - svc := mirroredService(name, namespace, gtwName, gtwNs, resourceVersion, gatewayResourceVersion, ports) +func mirrorServiceAsYaml(name, namespace, resourceVersion string, ports []corev1.ServicePort) string { + svc := mirrorService(name, namespace, resourceVersion, ports) bytes, err := yaml.Marshal(svc) if err != nil { @@ -825,8 +563,8 @@ func gateway(name, namespace, resourceVersion, ip, hostname, portName string, po return &svc } -func gatewayAsYaml(name, namespace, resourceVersion, ip, hostname, portName string, port int32, identity string, probePort int32, probePath string, probePeriod int) string { - gtw := gateway(name, namespace, resourceVersion, ip, hostname, portName, port, identity, probePort, probePath, probePeriod) +func gatewayAsYaml(name, namespace, resourceVersion, ip, portName string, port int32, identity string, probePort int32, probePath string, probePeriod int) string { + gtw := gateway(name, namespace, resourceVersion, ip, "", portName, port, identity, probePort, probePath, probePeriod) bytes, err := yaml.Marshal(gtw) if err != nil { @@ -835,7 +573,7 @@ func gatewayAsYaml(name, namespace, resourceVersion, ip, hostname, portName stri return string(bytes) } -func endpoints(name, namespace, gtwName, gtwNs, gatewayIP string, gatewayIdentity string, ports []corev1.EndpointPort) *corev1.Endpoints { +func endpoints(name, namespace, gatewayIP string, gatewayIdentity string, ports []corev1.EndpointPort) *corev1.Endpoints { var subsets []corev1.EndpointSubset if gatewayIP != "" { subsets = []corev1.EndpointSubset{ @@ -859,10 +597,8 @@ func endpoints(name, namespace, gtwName, gtwNs, gatewayIP string, gatewayIdentit Name: name, Namespace: namespace, Labels: map[string]string{ - consts.RemoteClusterNameLabel: "remote", + consts.RemoteClusterNameLabel: clusterName, consts.MirroredResourceLabel: "true", - consts.RemoteGatewayNameLabel: gtwName, - consts.RemoteGatewayNsLabel: gtwNs, }, Annotations: map[string]string{ consts.RemoteServiceFqName: fmt.Sprintf("%s.%s.svc.cluster.local", strings.Replace(name, "-remote", "", 1), namespace), @@ -878,8 +614,8 @@ func endpoints(name, namespace, gtwName, gtwNs, gatewayIP string, gatewayIdentit return endpoints } -func endpointsAsYaml(name, namespace, gtwName, gtwNs, gatewayIP, gatewayIdentity string, ports []corev1.EndpointPort) string { - ep := endpoints(name, namespace, gtwName, gtwNs, gatewayIP, gatewayIdentity, ports) +func endpointsAsYaml(name, namespace, gatewayIP, gatewayIdentity string, ports []corev1.EndpointPort) string { + ep := endpoints(name, namespace, gatewayIP, gatewayIdentity, ports) bytes, err := yaml.Marshal(ep) if err != nil { diff --git a/controller/cmd/service-mirror/config_watcher.go b/controller/cmd/service-mirror/config_watcher.go deleted file mode 100644 index c71543acad976..0000000000000 --- a/controller/cmd/service-mirror/config_watcher.go +++ /dev/null @@ -1,161 +0,0 @@ -package servicemirror - -import ( - "fmt" - "sync" - "time" - - "github.com/linkerd/linkerd2/controller/k8s" - log "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - "k8s.io/client-go/tools/cache" - "k8s.io/client-go/tools/clientcmd" - - consts "github.com/linkerd/linkerd2/pkg/k8s" - sm "github.com/linkerd/linkerd2/pkg/servicemirror" -) - -// RemoteClusterConfigWatcher watches for secrets of type MirrorSecretType -// and upon the detection of such secret created starts a RemoteClusterServiceWatcher -type RemoteClusterConfigWatcher struct { - serviceMirrorNamespace string - k8sAPI *k8s.API - clusterWatchers map[string]*RemoteClusterServiceWatcher - requeueLimit int - repairPeriod time.Duration - sync.RWMutex -} - -// NewRemoteClusterConfigWatcher Creates a new config watcher -func NewRemoteClusterConfigWatcher(serviceMirrorNamespace string, secretsInformer cache.SharedIndexInformer, k8sAPI *k8s.API, requeueLimit int, repairPeriod time.Duration) *RemoteClusterConfigWatcher { - rcw := &RemoteClusterConfigWatcher{ - serviceMirrorNamespace: serviceMirrorNamespace, - k8sAPI: k8sAPI, - clusterWatchers: map[string]*RemoteClusterServiceWatcher{}, - requeueLimit: requeueLimit, - repairPeriod: repairPeriod, - } - secretsInformer.AddEventHandler( - cache.FilteringResourceEventHandler{ - FilterFunc: func(obj interface{}) bool { - switch object := obj.(type) { - case *corev1.Secret: - return object.Type == consts.MirrorSecretType - - case cache.DeletedFinalStateUnknown: - if secret, ok := object.Obj.(*corev1.Secret); ok { - return secret.Type == consts.MirrorSecretType - } - return false - default: - return false - } - }, - - Handler: cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - secret := obj.(*corev1.Secret) - if err := rcw.registerRemoteCluster(secret); err != nil { - log.Errorf("Cannot register target cluster: %s", err) - } - }, - DeleteFunc: func(obj interface{}) { - secret, ok := obj.(*corev1.Secret) - if !ok { - tombstone, ok := obj.(cache.DeletedFinalStateUnknown) - if !ok { - log.Errorf("couldn't get object from DeletedFinalStateUnknown %#v", obj) - return - } - secret, ok = tombstone.Obj.(*corev1.Secret) - if !ok { - log.Errorf("DeletedFinalStateUnknown contained object that is not a Secret %#v", obj) - return - } - } - if err := rcw.unregisterRemoteCluster(secret, true); err != nil { - log.Errorf("Cannot unregister target cluster: %s", err) - } - }, - UpdateFunc: func(old, new interface{}) { - oldSecret := old.(*corev1.Secret) - newSecret := new.(*corev1.Secret) - - if oldSecret.ResourceVersion != newSecret.ResourceVersion { - if err := rcw.unregisterRemoteCluster(oldSecret, false); err != nil { - log.Errorf("Cannot unregister target cluster: %s", err) - return - } - - if err := rcw.registerRemoteCluster(newSecret); err != nil { - log.Errorf("Cannot register target cluster: %s", err) - } - - } - - //TODO: Handle update (it might be that the credentials have changed...) - }, - }, - }, - ) - return rcw -} - -// Stop Shuts down all created config and cluster watchers -func (rcw *RemoteClusterConfigWatcher) Stop() { - rcw.Lock() - defer rcw.Unlock() - for _, watcher := range rcw.clusterWatchers { - watcher.Stop(false) - } -} - -func (rcw *RemoteClusterConfigWatcher) registerRemoteCluster(secret *corev1.Secret) error { - config, err := sm.ParseRemoteClusterSecret(secret) - - if err != nil { - return err - } - - clientConfig, err := clientcmd.RESTConfigFromKubeConfig(config.APIConfig) - if err != nil { - return fmt.Errorf("unable to parse kube config: %s", err) - } - - rcw.Lock() - defer rcw.Unlock() - - if _, ok := rcw.clusterWatchers[config.ClusterName]; ok { - return fmt.Errorf("there is already a cluster with name %s being watcher. Please delete its config before attempting to register a new one", config.ClusterName) - } - - watcher, err := NewRemoteClusterServiceWatcher(rcw.serviceMirrorNamespace, rcw.k8sAPI, clientConfig, config.ClusterName, rcw.requeueLimit, rcw.repairPeriod, config.ClusterDomain) - if err != nil { - return err - } - - rcw.clusterWatchers[config.ClusterName] = watcher - if err := watcher.Start(); err != nil { - return err - } - return nil - -} - -func (rcw *RemoteClusterConfigWatcher) unregisterRemoteCluster(secret *corev1.Secret, cleanState bool) error { - config, err := sm.ParseRemoteClusterSecret(secret) - - if err != nil { - return err - } - rcw.Lock() - defer rcw.Unlock() - if watcher, ok := rcw.clusterWatchers[config.ClusterName]; ok { - watcher.Stop(cleanState) - } else { - return fmt.Errorf("cannot find watcher for cluser: %s", config.ClusterName) - } - delete(rcw.clusterWatchers, config.ClusterName) - - return nil -} diff --git a/controller/cmd/service-mirror/events_formatting.go b/controller/cmd/service-mirror/events_formatting.go index 02058024c54d3..486b4e30b83e4 100644 --- a/controller/cmd/service-mirror/events_formatting.go +++ b/controller/cmd/service-mirror/events_formatting.go @@ -51,48 +51,19 @@ func formatEndpoints(endp *corev1.Endpoints) string { return fmt.Sprintf("Endpoints: {name: %s, namespace: %s, annotations: [%s], labels: [%s], subsets: [%s]}", endp.Name, endp.Namespace, formatMetadata(endp.Annotations), formatMetadata(endp.Labels), strings.Join(subsets, ",")) } -func (b ProbeConfig) String() string { - return fmt.Sprintf("ProbeConfig: {path: %s, port: %d, periodInSeconds: %d}", b.path, b.port, b.periodInSeconds) -} - -func (b GatewaySpec) String() string { - return fmt.Sprintf("GatewaySpec: {gatewayName: %s, gatewayNamespace: %s, clusterName: %s, addresses: [%s], incomingPort: %d, resourceVersion: %s, identity: %s, probeConfig: %s}", b.gatewayName, b.gatewayNamespace, b.clusterName, formatAddresses(b.addresses), b.incomingPort, b.resourceVersion, b.identity, b.ProbeConfig) -} - -func (gtm gatewayMetadata) String() string { - return fmt.Sprintf("gatewayMetadata: {name: %s, namespace: %s}", gtm.Name, gtm.Namespace) -} - // Events for cluster watcher func (rsc RemoteServiceCreated) String() string { - return fmt.Sprintf("RemoteServiceCreated: {service: %s, gatewayData: %s}", formatService(rsc.service), rsc.gatewayData) + return fmt.Sprintf("RemoteServiceCreated: {service: %s}", formatService(rsc.service)) } func (rsu RemoteServiceUpdated) String() string { - return fmt.Sprintf("RemoteServiceUpdated: {localService: %s, localEndpoints: %s, remoteUpdate: %s, gatewayData: %s}", formatService(rsu.localService), formatEndpoints(rsu.localEndpoints), formatService(rsu.remoteUpdate), rsu.gatewayData) + return fmt.Sprintf("RemoteServiceUpdated: {localService: %s, localEndpoints: %s, remoteUpdate: %s}", formatService(rsu.localService), formatEndpoints(rsu.localEndpoints), formatService(rsu.remoteUpdate)) } func (rsd RemoteServiceDeleted) String() string { return fmt.Sprintf("RemoteServiceDeleted: {name: %s, namespace: %s }", rsd.Name, rsd.Namespace) } -func (rgd RemoteGatewayDeleted) String() string { - return fmt.Sprintf("RemoteGatewayDeleted: {gatewayData: %s}", rgd.gatewayData) -} - -func (rgd *RemoteGatewayCreated) String() string { - return fmt.Sprintf("RemoteGatewayCreated: {gatewaySpec: %s}", rgd.gatewaySpec) -} - -func (rgu RemoteGatewayUpdated) String() string { - var services []string - - for _, s := range rgu.affectedServices { - services = append(services, formatService(s)) - } - return fmt.Sprintf("RemoteGatewayUpdated: {gatewaySpec: %s, affectedServices: [%s]}", rgu.gatewaySpec, strings.Join(services, ",")) -} - func (cgu ClusterUnregistered) String() string { return "ClusterUnregistered: {}" } @@ -116,21 +87,3 @@ func (od OnDeleteCalled) String() string { func (re RepairEndpoints) String() string { return "RepairEndpoints" } - -//Events for probe manager - -func (ps probeSpec) String() string { - return fmt.Sprintf("ProbeSpec: {path: %s, port: %d, period: %d}", ps.path, ps.port, ps.periodInSeconds) -} - -func (gmc GatewayMirrorCreated) String() string { - return fmt.Sprintf("GatewayMirrorCreated: {gatewayName: %s, gatewayNamespace: %s, clusterName: %s, probeSpec: %s}", gmc.gatewayName, gmc.gatewayNamespace, gmc.clusterName, gmc.probeSpec) -} - -func (gmd GatewayMirrorDeleted) String() string { - return fmt.Sprintf("GatewayMirrorDeleted: {gatewayName: %s, gatewayNamespace: %s, clusterName: %s}", gmd.gatewayName, gmd.gatewayNamespace, gmd.clusterName) -} - -func (gmu GatewayMirrorUpdated) String() string { - return fmt.Sprintf("GatewayMirrorUpdated: {gatewayName: %s, gatewayNamespace: %s, clusterName: %s, probeSpec: %s}", gmu.gatewayName, gmu.gatewayNamespace, gmu.clusterName, gmu.probeSpec) -} diff --git a/controller/cmd/service-mirror/main.go b/controller/cmd/service-mirror/main.go index 3fc6476e317a7..378b5918cb3ae 100644 --- a/controller/cmd/service-mirror/main.go +++ b/controller/cmd/service-mirror/main.go @@ -1,7 +1,6 @@ package servicemirror import ( - "context" "flag" "fmt" "os" @@ -9,93 +8,166 @@ import ( "syscall" "time" - "k8s.io/client-go/informers" - "k8s.io/client-go/tools/cache" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + dynamic "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/tools/clientcmd" - "github.com/linkerd/linkerd2/controller/k8s" + controllerK8s "github.com/linkerd/linkerd2/controller/k8s" "github.com/linkerd/linkerd2/pkg/admin" "github.com/linkerd/linkerd2/pkg/flags" + "github.com/linkerd/linkerd2/pkg/k8s" + "github.com/linkerd/linkerd2/pkg/multicluster" + "github.com/linkerd/linkerd2/pkg/servicemirror" log "github.com/sirupsen/logrus" - "k8s.io/client-go/kubernetes" ) -func initLocalResourceInformer(api kubernetes.Interface, namespace string, resource k8s.APIResource) (cache.SharedIndexInformer, error) { - sharedInformers := informers.NewSharedInformerFactoryWithOptions(api, 10*time.Minute, informers.WithNamespace(namespace)) - - var informer cache.SharedIndexInformer - - switch resource { - case k8s.Svc: - informer = sharedInformers.Core().V1().Services().Informer() - case k8s.Secret: - informer = sharedInformers.Core().V1().Secrets().Informer() - default: - return nil, fmt.Errorf("cannot instantiate local informer for %v", resource) - - } - - sharedInformers.Start(nil) - - ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) - defer cancel() - - log.Infof("waiting for local namespaced %v informer caches to sync", resource) - if !cache.WaitForCacheSync(ctx.Done(), informer.HasSynced) { - return nil, fmt.Errorf("failed to sync local namespaced %v informer caches", resource) - } - log.Infof("local namespaced %v informer caches synced", resource) - return informer, nil -} +var ( + clusterWatcher *RemoteClusterServiceWatcher + probeWorker *ProbeWorker +) -// Main executes the tap service-mirror +// Main executes the service-mirror controller func Main(args []string) { cmd := flag.NewFlagSet("service-mirror", flag.ExitOnError) kubeConfigPath := cmd.String("kubeconfig", "", "path to the local kube config") requeueLimit := cmd.Int("event-requeue-limit", 3, "requeue limit for events") metricsAddr := cmd.String("metrics-addr", ":9999", "address to serve scrapable metrics on") - namespace := cmd.String("namespace", "", "address to serve scrapable metrics on") + namespace := cmd.String("namespace", "", "namespace containing Link and credentials Secret") repairPeriod := cmd.Duration("endpoint-refresh-period", 1*time.Minute, "frequency to refresh endpoint resolution") flags.ConfigureAndParse(cmd, args) + linkName := cmd.Arg(0) stop := make(chan os.Signal, 1) signal.Notify(stop, os.Interrupt, syscall.SIGTERM) - k8sAPI, err := k8s.InitializeAPI( - *kubeConfigPath, - false, - k8s.Svc, - k8s.NS, - k8s.Endpoint, - ) - + // We create two different kubernetes API clients for the local cluster: + // k8sAPI is used as a dynamic client for unstrcutured access to Link custom + // resources. + // + // controllerK8sAPI is used by the cluster watcher to manage + // mirror resources such as services, namespaces, and endpoints. + k8sAPI, err := k8s.NewAPI(*kubeConfigPath, "", "", []string{}, 0) //TODO: Use can-i to check for required permissions if err != nil { log.Fatalf("Failed to initialize K8s API: %s", err) } - secretsInformer, err := initLocalResourceInformer(k8sAPI.Client, *namespace, k8s.Secret) + controllerK8sAPI, err := controllerK8s.InitializeAPI(*kubeConfigPath, false, + controllerK8s.NS, + controllerK8s.Svc, + controllerK8s.Endpoint, + ) if err != nil { - log.Fatalf("Failed to initialize secret informer: %s", err) + log.Fatalf("Failed to initialize K8s API: %s", err) + } + + linkClient := k8sAPI.DynamicClient.Resource(multicluster.LinkGVR).Namespace(*namespace) + + metrics := newProbeMetricVecs() + go admin.StartServer(*metricsAddr) + + controllerK8sAPI.Sync(nil) + + for { + // Start link watch + linkWatch, err := linkClient.Watch(metav1.ListOptions{}) + if err != nil { + log.Fatalf("Failed to watch Link %s: %s", linkName, err) + } + results := linkWatch.ResultChan() + + // Each time the link resource is updated, reload the config and restart the + // cluster watcher. + for event := range results { + switch obj := event.Object.(type) { + case *dynamic.Unstructured: + if obj.GetName() == linkName { + switch event.Type { + case watch.Added, watch.Modified: + link, err := multicluster.NewLink(*obj) + if err != nil { + log.Errorf("Failed to parse link %s: %s", linkName, err) + continue + } + log.Infof("Got updated link %s: %+v", linkName, link) + creds, err := loadCredentials(link, *namespace, k8sAPI) + if err != nil { + log.Errorf("Failed to load remote cluster credentials: %s", err) + } + restartClusterWatcher(link, *namespace, creds, controllerK8sAPI, *requeueLimit, *repairPeriod, metrics) + case watch.Deleted: + log.Infof("Link %s deleted", linkName) + // TODO: should we delete all mirror resources? + default: + log.Infof("Ignoring event type %s", event.Type) + } + } + default: + log.Errorf("Unknown object type detected: %+v", obj) + } + } + + log.Info("Link watch terminated; restarting watch") } - svcInformer, err := initLocalResourceInformer(k8sAPI.Client, *namespace, k8s.Svc) +} +func loadCredentials(link multicluster.Link, namespace string, k8sAPI *k8s.KubernetesAPI) (*servicemirror.WatchedClusterConfig, error) { + // Load the credentials secret + secret, err := k8sAPI.Interface.CoreV1().Secrets(namespace).Get(link.ClusterCredentialsSecret, metav1.GetOptions{}) if err != nil { - log.Fatalf("Failed to initialize service informer: %s", err) + return nil, fmt.Errorf("Failed to load credentials secret %s: %s", link.ClusterCredentialsSecret, err) } + return servicemirror.ParseRemoteClusterSecret(secret) +} - probeManager := NewProbeManager(svcInformer) - probeManager.Start() +func restartClusterWatcher( + link multicluster.Link, + namespace string, + creds *servicemirror.WatchedClusterConfig, + controllerK8sAPI *controllerK8s.API, + requeueLimit int, + repairPeriod time.Duration, + metrics probeMetricVecs, +) { + if clusterWatcher != nil { + clusterWatcher.Stop(false) + } + if probeWorker != nil { + probeWorker.Stop() + } - k8sAPI.Sync(nil) - watcher := NewRemoteClusterConfigWatcher(*namespace, secretsInformer, k8sAPI, *requeueLimit, *repairPeriod) - log.Info("Started cluster config watcher") + cfg, err := clientcmd.RESTConfigFromKubeConfig(creds.APIConfig) + if err != nil { + log.Errorf("Unable to parse kube config: %s", err) + return + } - go admin.StartServer(*metricsAddr) + clusterWatcher, err = NewRemoteClusterServiceWatcher( + namespace, + controllerK8sAPI, + cfg, + &link, + requeueLimit, + repairPeriod, + ) + if err != nil { + log.Errorf("Unable to create cluster watcher: %s", err) + return + } + + err = clusterWatcher.Start() + if err != nil { + log.Errorf("Failed to start cluster watcher: %s", err) + return + } - <-stop - log.Info("Stopping cluster config watcher") - watcher.Stop() - probeManager.Stop() + workerMetrics, err := metrics.newWorkerMetrics(link.TargetClusterName) + if err != nil { + log.Errorf("Failed to create metrics for cluster watcher: %s", err) + } + probeWorker = NewProbeWorker(fmt.Sprintf("probe-gateway-%s", link.TargetClusterName), &link.ProbeSpec, workerMetrics, link.TargetClusterName) + go probeWorker.run() } diff --git a/controller/cmd/service-mirror/metrics.go b/controller/cmd/service-mirror/metrics.go index 97f027b5acce6..6f6f087aa6083 100644 --- a/controller/cmd/service-mirror/metrics.go +++ b/controller/cmd/service-mirror/metrics.go @@ -7,11 +7,9 @@ import ( ) const ( - gatewayNameLabel = "gateway_name" - gatewayNamespaceLabel = "gateway_namespace" - gatewayClusterName = "target_cluster_name" - eventTypeLabelName = "event_type" - probeSuccessfulLabel = "probe_successful" + gatewayClusterName = "target_cluster_name" + eventTypeLabelName = "event_type" + probeSuccessfulLabel = "probe_successful" ) type probeMetricVecs struct { @@ -37,19 +35,19 @@ func init() { Name: "service_mirror_endpoint_repairs", Help: "Increments when the service mirror controller attempts to repair mirror endpoints", }, - []string{gatewayNameLabel, gatewayNamespaceLabel, gatewayClusterName}, + []string{gatewayClusterName}, ) } func newProbeMetricVecs() probeMetricVecs { - labelNames := []string{gatewayNameLabel, gatewayNamespaceLabel, gatewayClusterName} + labelNames := []string{gatewayClusterName} probes := promauto.NewCounterVec( prometheus.CounterOpts{ Name: "gateway_probes", Help: "A counter for the number of actual performed probes to a gateway", }, - []string{gatewayNameLabel, gatewayNamespaceLabel, gatewayClusterName, probeSuccessfulLabel}, + []string{gatewayClusterName, probeSuccessfulLabel}, ) enqueues := promauto.NewCounterVec( @@ -98,12 +96,10 @@ func newProbeMetricVecs() probeMetricVecs { probes: probes, } } -func (mv probeMetricVecs) newWorkerMetrics(gatewayNamespace, gatewayName, remoteClusterName string) (*probeMetrics, error) { +func (mv probeMetricVecs) newWorkerMetrics(remoteClusterName string) (*probeMetrics, error) { labels := prometheus.Labels{ - gatewayNameLabel: gatewayName, - gatewayNamespaceLabel: gatewayNamespace, - gatewayClusterName: remoteClusterName, + gatewayClusterName: remoteClusterName, } curriedProbes, err := mv.probes.CurryWith(labels) @@ -115,16 +111,14 @@ func (mv probeMetricVecs) newWorkerMetrics(gatewayNamespace, gatewayName, remote latencies: mv.latencies.With(labels), probes: curriedProbes, unregister: func() { - mv.unregister(gatewayNamespace, gatewayName, remoteClusterName) + mv.unregister(remoteClusterName) }, }, nil } -func (mv probeMetricVecs) unregister(gatewayNamespace, gatewayName, remoteClusterName string) { +func (mv probeMetricVecs) unregister(remoteClusterName string) { labels := prometheus.Labels{ - gatewayNameLabel: gatewayName, - gatewayNamespaceLabel: gatewayNamespace, - gatewayClusterName: remoteClusterName, + gatewayClusterName: remoteClusterName, } if !mv.alive.Delete(labels) { diff --git a/controller/cmd/service-mirror/probe_manager.go b/controller/cmd/service-mirror/probe_manager.go deleted file mode 100644 index c2b850fedea7a..0000000000000 --- a/controller/cmd/service-mirror/probe_manager.go +++ /dev/null @@ -1,271 +0,0 @@ -package servicemirror - -import ( - "fmt" - "strconv" - - consts "github.com/linkerd/linkerd2/pkg/k8s" - "github.com/prometheus/client_golang/prometheus" - log "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" - "k8s.io/client-go/tools/cache" -) - -const probeChanBufferSize = 500 - -// ProbeManager takes care of managing the lifecycle of probe workers -type ProbeManager struct { - probeWorkers map[string]*ProbeWorker - mirroredGatewayInformer cache.SharedIndexInformer - events chan interface{} - metricVecs *probeMetricVecs - done chan struct{} -} - -// GatewayMirrorCreated is observed when a mirror of a remote gateway is created locally -type GatewayMirrorCreated struct { - gatewayName string - gatewayNamespace string - clusterName string - probeSpec -} - -// GatewayMirrorDeleted is emitted when a mirror of a remote gateway is deleted -type GatewayMirrorDeleted struct { - gatewayName string - gatewayNamespace string - clusterName string -} - -// GatewayMirrorUpdated is emitted when the mirror of a remote gateway has changed -type GatewayMirrorUpdated struct { - gatewayName string - gatewayNamespace string - clusterName string - probeSpec -} - -// NewProbeManager creates a new probe manager -func NewProbeManager(mirroredGatewayInformer cache.SharedIndexInformer) *ProbeManager { - metricVecs := newProbeMetricVecs() - return &ProbeManager{ - mirroredGatewayInformer: mirroredGatewayInformer, - probeWorkers: make(map[string]*ProbeWorker), - events: make(chan interface{}, probeChanBufferSize), - metricVecs: &metricVecs, - done: make(chan struct{}), - } -} - -func eventTypeString(ev interface{}) string { - switch ev.(type) { - case *GatewayMirrorCreated: - return "GatewayMirrorCreated" - case *GatewayMirrorDeleted: - return "GatewayMirrorDeleted" - case *GatewayMirrorUpdated: - return "GatewayMirrorUpdated" - default: - return "Unknown" - } -} - -func (m *ProbeManager) enqueueEvent(event interface{}) { - m.metricVecs.enqueues.With(prometheus.Labels{eventTypeLabelName: eventTypeString(event)}).Inc() - m.events <- event -} - -func probeKey(gatewayNamespace string, gatewayName string, clusterName string) string { - return fmt.Sprintf("%s-%s-%s", gatewayNamespace, gatewayName, clusterName) -} - -func (m *ProbeManager) handleEvent(ev interface{}) { - switch ev := ev.(type) { - case *GatewayMirrorCreated: - m.handleGatewayMirrorCreated(ev) - case *GatewayMirrorUpdated: - m.handleGatewayMirrorUpdated(ev) - case *GatewayMirrorDeleted: - m.handleGatewayMirrorDeleted(ev) - default: - log.Errorf("Received unknown event: %v", ev) - } -} - -func (m *ProbeManager) handleGatewayMirrorDeleted(event *GatewayMirrorDeleted) { - probeKey := probeKey(event.gatewayNamespace, event.gatewayName, event.clusterName) - m.stopProbe(probeKey) -} - -func (m *ProbeManager) handleGatewayMirrorCreated(event *GatewayMirrorCreated) { - probeKey := probeKey(event.gatewayNamespace, event.gatewayName, event.clusterName) - worker, ok := m.probeWorkers[probeKey] - if ok { - log.Infof("There is already a probe worker for %s. Updating instead of creating", probeKey) - worker.UpdateProbeSpec(&event.probeSpec) - } else { - log.Infof("Creating probe worker %s", probeKey) - probeMetrics, err := m.metricVecs.newWorkerMetrics(event.gatewayNamespace, event.gatewayName, event.clusterName) - if err != nil { - log.Errorf("Could not crete probe metrics: %s", err) - } else { - localGatewayName := fmt.Sprintf("%s-%s", event.gatewayName, event.clusterName) - worker = NewProbeWorker(localGatewayName, &event.probeSpec, probeMetrics, probeKey) - m.probeWorkers[probeKey] = worker - worker.Start() - } - } -} - -func (m *ProbeManager) handleGatewayMirrorUpdated(event *GatewayMirrorUpdated) { - probeKey := probeKey(event.gatewayNamespace, event.gatewayName, event.clusterName) - worker, ok := m.probeWorkers[probeKey] - if ok { - if worker.probeSpec.port != event.port || worker.probeSpec.periodInSeconds != event.periodInSeconds || worker.probeSpec.path != event.path { - worker.UpdateProbeSpec(&event.probeSpec) - } - } else { - log.Infof("Could not find a worker for %s while handling GatewayMirrorUpdated event", probeKey) - } -} - -func (m *ProbeManager) stopProbe(key string) { - if worker, ok := m.probeWorkers[key]; ok { - worker.Stop() - delete(m.probeWorkers, key) - } else { - log.Infof("Could not find probe worker with key %s", key) - } -} - -func (m *ProbeManager) run() { - for { - select { - case event := <-m.events: - log.Infof("Probe Manager: received event: %s", event) - m.metricVecs.dequeues.With(prometheus.Labels{eventTypeLabelName: eventTypeString(event)}).Inc() - m.handleEvent(event) - case <-m.done: - log.Infof("Shutting down ProbeManager") - for key := range m.probeWorkers { - m.stopProbe(key) - } - return - } - } -} - -func extractProbeSpec(svc *corev1.Service) (*probeSpec, error) { - path, hasPath := svc.Annotations[consts.MirroredGatewayProbePath] - if !hasPath { - return nil, fmt.Errorf("mirrored Gateway service is missing %s annotation", consts.MirroredGatewayProbePath) - } - - probePort, err := extractPort(svc.Spec.Ports, consts.ProbePortName) - if err != nil { - return nil, fmt.Errorf("%s: %s", svc.Name, err) - } - - period, hasPeriod := svc.Annotations[consts.MirroredGatewayProbePeriod] - if !hasPeriod { - return nil, fmt.Errorf("mirrored Gateway service is missing %s annotation", consts.MirroredGatewayProbePeriod) - } - - probePeriod, err := strconv.ParseUint(period, 10, 32) - if err != nil { - return nil, err - } - - return &probeSpec{ - path: path, - port: probePort, - periodInSeconds: uint32(probePeriod), - }, nil - -} - -// Start starts the probe manager -func (m *ProbeManager) Start() { - m.mirroredGatewayInformer.AddEventHandler( - cache.FilteringResourceEventHandler{ - FilterFunc: func(obj interface{}) bool { - switch object := obj.(type) { - case *corev1.Service: - _, isMirrorGateway := object.Labels[consts.MirroredGatewayLabel] - return isMirrorGateway - - case cache.DeletedFinalStateUnknown: - if svc, ok := object.Obj.(*corev1.Service); ok { - _, isMirrorGateway := svc.Labels[consts.MirroredGatewayLabel] - return isMirrorGateway - } - return false - default: - return false - } - }, - - Handler: cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - service := obj.(*corev1.Service) - spec, err := extractProbeSpec(service) - if err != nil { - log.Errorf("Could not parse probe spec %s", err) - } else { - m.enqueueEvent(&GatewayMirrorCreated{ - gatewayName: service.Annotations[consts.MirroredGatewayRemoteName], - gatewayNamespace: service.Annotations[consts.MirroredGatewayRemoteNameSpace], - clusterName: service.Labels[consts.RemoteClusterNameLabel], - probeSpec: *spec, - }) - } - }, - DeleteFunc: func(obj interface{}) { - service, ok := obj.(*corev1.Service) - if !ok { - tombstone, ok := obj.(cache.DeletedFinalStateUnknown) - if !ok { - log.Errorf("couldn't get object from DeletedFinalStateUnknown %#v", obj) - return - } - service, ok = tombstone.Obj.(*corev1.Service) - if !ok { - log.Errorf("DeletedFinalStateUnknown contained object that is not a Secret %#v", obj) - return - } - } - - m.enqueueEvent(&GatewayMirrorDeleted{ - gatewayName: service.Annotations[consts.MirroredGatewayRemoteName], - gatewayNamespace: service.Annotations[consts.MirroredGatewayRemoteNameSpace], - clusterName: service.Labels[consts.RemoteClusterNameLabel], - }) - }, - UpdateFunc: func(old, new interface{}) { - oldService := old.(*corev1.Service) - newService := new.(*corev1.Service) - - if oldService.ResourceVersion != newService.ResourceVersion { - spec, err := extractProbeSpec(newService) - if err != nil { - log.Errorf("Could not parse probe spec %s", err) - } else { - m.enqueueEvent(&GatewayMirrorUpdated{ - gatewayName: newService.Annotations[consts.MirroredGatewayRemoteName], - gatewayNamespace: newService.Annotations[consts.MirroredGatewayRemoteNameSpace], - clusterName: newService.Labels[consts.RemoteClusterNameLabel], - probeSpec: *spec, - }) - } - } - }, - }, - }, - ) - go m.run() -} - -// Stop stops the probe manager -func (m *ProbeManager) Stop() { - close(m.done) -} diff --git a/controller/cmd/service-mirror/probe_worker.go b/controller/cmd/service-mirror/probe_worker.go index 62a4afbb42e35..ed349c434b2b1 100644 --- a/controller/cmd/service-mirror/probe_worker.go +++ b/controller/cmd/service-mirror/probe_worker.go @@ -6,30 +6,25 @@ import ( "sync" "time" + "github.com/linkerd/linkerd2/pkg/multicluster" "github.com/prometheus/client_golang/prometheus" logging "github.com/sirupsen/logrus" ) const httpGatewayTimeoutMillis = 50000 -type probeSpec struct { - path string - port uint32 - periodInSeconds uint32 -} - // ProbeWorker is responsible for monitoring gateways using a probe specification type ProbeWorker struct { localGatewayName string *sync.RWMutex - probeSpec *probeSpec + probeSpec *multicluster.ProbeSpec stopCh chan struct{} metrics *probeMetrics log *logging.Entry } // NewProbeWorker creates a new probe worker associated with a particular gateway -func NewProbeWorker(localGatewayName string, spec *probeSpec, metrics *probeMetrics, probekey string) *ProbeWorker { +func NewProbeWorker(localGatewayName string, spec *multicluster.ProbeSpec, metrics *probeMetrics, probekey string) *ProbeWorker { return &ProbeWorker{ localGatewayName: localGatewayName, RWMutex: &sync.RWMutex{}, @@ -43,7 +38,7 @@ func NewProbeWorker(localGatewayName string, spec *probeSpec, metrics *probeMetr } // UpdateProbeSpec is used to update the probe specification when something about the gateway changes -func (pw *ProbeWorker) UpdateProbeSpec(spec *probeSpec) { +func (pw *ProbeWorker) UpdateProbeSpec(spec *multicluster.ProbeSpec) { pw.Lock() pw.probeSpec = spec pw.Unlock() @@ -63,9 +58,8 @@ func (pw *ProbeWorker) Start() { } func (pw *ProbeWorker) run() { - periodInMillis := pw.probeSpec.periodInSeconds * 1000 - probeTickerPeriod := time.Duration(periodInMillis) * time.Millisecond - maxJitter := time.Duration(periodInMillis/10) * time.Millisecond // max jitter is 10% of period + probeTickerPeriod := pw.probeSpec.Period + maxJitter := pw.probeSpec.Period / 10 // max jitter is 10% of period probeTicker := NewTicker(probeTickerPeriod, maxJitter) probeLoop: @@ -90,7 +84,7 @@ func (pw *ProbeWorker) doProbe() { Timeout: httpGatewayTimeoutMillis * time.Millisecond, } - req, err := http.NewRequest("GET", fmt.Sprintf("http://%s:%d/%s", pw.localGatewayName, pw.probeSpec.port, pw.probeSpec.path), nil) + req, err := http.NewRequest("GET", fmt.Sprintf("http://%s:%d/%s", pw.localGatewayName, pw.probeSpec.Port, pw.probeSpec.Path), nil) if err != nil { pw.log.Errorf("Could not create a GET request to gateway: %s", err) return diff --git a/pkg/charts/multicluster/values.go b/pkg/charts/multicluster/values.go index 158265a50c2aa..d95aa5d6b431d 100644 --- a/pkg/charts/multicluster/values.go +++ b/pkg/charts/multicluster/values.go @@ -9,7 +9,10 @@ import ( "sigs.k8s.io/yaml" ) -const helmDefaultChartDir = "linkerd2-multicluster" +const ( + helmDefaultChartDir = "linkerd2-multicluster" + helmDefaultLinkChartDir = "linkerd2-multicluster-link" +) // Values contains the top-level elements in the Helm charts type Values struct { @@ -40,10 +43,11 @@ type Values struct { ServiceMirrorUID int64 `json:"serviceMirrorUID"` RemoteMirrorServiceAccount bool `json:"remoteMirrorServiceAccount"` RemoteMirrorServiceAccountName string `json:"remoteMirrorServiceAccountName"` + TargetClusterName string `json:"targetClusterName"` } -// NewValues returns a new instance of the Values type. -func NewValues() (*Values, error) { +// NewInstallValues returns a new instance of the Values type. +func NewInstallValues() (*Values, error) { chartDir := fmt.Sprintf("%s/", helmDefaultChartDir) v, err := readDefaults(chartDir) if err != nil { @@ -54,6 +58,18 @@ func NewValues() (*Values, error) { return v, nil } +// NewLinkValues returns a new instance of the Values type. +func NewLinkValues() (*Values, error) { + chartDir := fmt.Sprintf("%s/", helmDefaultLinkChartDir) + v, err := readDefaults(chartDir) + if err != nil { + return nil, err + } + + v.CliVersion = k8s.CreatedByAnnotationValue() + return v, nil +} + // readDefaults read all the default variables from the values.yaml file. // chartDir is the root directory of the Helm chart where values.yaml is. func readDefaults(chartDir string) (*Values, error) { diff --git a/pkg/flags/flags.go b/pkg/flags/flags.go index a7ac7952d6b87..b7f74df2a8939 100644 --- a/pkg/flags/flags.go +++ b/pkg/flags/flags.go @@ -51,7 +51,7 @@ func setLogLevel(logLevel string) { if level == log.DebugLevel { flag.Set("stderrthreshold", "INFO") flag.Set("logtostderr", "true") - flag.Set("v", "6") // At 7 and higher, authorization tokens get logged. + flag.Set("v", "12") // At 7 and higher, authorization tokens get logged. // pipe klog entries to logrus klog.SetOutput(log.StandardLogger().Writer()) } diff --git a/pkg/healthcheck/healthcheck.go b/pkg/healthcheck/healthcheck.go index 876db1c0fd976..067736e111aac 100644 --- a/pkg/healthcheck/healthcheck.go +++ b/pkg/healthcheck/healthcheck.go @@ -19,7 +19,7 @@ import ( "github.com/linkerd/linkerd2/pkg/identity" "github.com/linkerd/linkerd2/pkg/issuercerts" "github.com/linkerd/linkerd2/pkg/k8s" - sm "github.com/linkerd/linkerd2/pkg/servicemirror" + "github.com/linkerd/linkerd2/pkg/multicluster" "github.com/linkerd/linkerd2/pkg/tls" "github.com/linkerd/linkerd2/pkg/version" log "github.com/sirupsen/logrus" @@ -188,11 +188,6 @@ var ExpectedServiceAccountNames = []string{ "linkerd-tap", } -type expectedPolicy struct { - resources []string - verbs []string -} - var ( retryWindow = 5 * time.Second requestTimeout = 30 * time.Second @@ -344,8 +339,6 @@ type Options struct { RetryDeadline time.Time CNIEnabled bool InstallManifest string - SourceCluster bool - TargetCluster bool MultiCluster bool } @@ -356,20 +349,19 @@ type HealthChecker struct { *Options // these fields are set in the process of running checks - kubeAPI *k8s.KubernetesAPI - kubeVersion *k8sVersion.Info - controlPlanePods []corev1.Pod - apiClient public.APIClient - latestVersions version.Channels - serverVersion string - linkerdConfig *configPb.All - uuid string - issuerCert *tls.Cred - trustAnchors []*x509.Certificate - cniDaemonSet *appsv1.DaemonSet - serviceMirrorNs string - remoteClusterConfigs []*sm.WatchedClusterConfig - addOns map[string]interface{} + kubeAPI *k8s.KubernetesAPI + kubeVersion *k8sVersion.Info + controlPlanePods []corev1.Pod + apiClient public.APIClient + latestVersions version.Channels + serverVersion string + linkerdConfig *configPb.All + uuid string + issuerCert *tls.Cred + trustAnchors []*x509.Certificate + cniDaemonSet *appsv1.DaemonSet + links []multicluster.Link + addOns map[string]interface{} } // NewHealthChecker returns an initialized HealthChecker diff --git a/pkg/healthcheck/healthcheck_multicluster.go b/pkg/healthcheck/healthcheck_multicluster.go index 26e42e503211a..fc91757a5b5c1 100644 --- a/pkg/healthcheck/healthcheck_multicluster.go +++ b/pkg/healthcheck/healthcheck_multicluster.go @@ -8,50 +8,28 @@ import ( "sort" "strings" - pb "github.com/linkerd/linkerd2/controller/gen/public" - sm "github.com/linkerd/linkerd2/pkg/servicemirror" - tsclient "github.com/servicemeshinterface/smi-sdk-go/pkg/gen/client/split/clientset/versioned" + "github.com/linkerd/linkerd2/controller/gen/public" + "github.com/linkerd/linkerd2/pkg/multicluster" + "github.com/linkerd/linkerd2/pkg/servicemirror" corev1 "k8s.io/api/core/v1" "github.com/linkerd/linkerd2/pkg/k8s" "github.com/linkerd/linkerd2/pkg/tls" - v1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/clientcmd" ) const ( - // LinkerdMulticlusterSourceChecks adds a series of checks to validate - // the source side of a multicluster setup - LinkerdMulticlusterSourceChecks CategoryID = "linkerd-multicluster-source" - - // LinkerdMulticlusterTargetChecks add a series of checks to validate the - // targetside of a multicluster setup - LinkerdMulticlusterTargetChecks CategoryID = "linkerd-multicluster-target" - - linkerdServiceMirrorComponentName = "linkerd-service-mirror" - linkerdServiceMirrorClusterRoleName = "linkerd-service-mirror-access-local-resources" - linkerdServiceMirrorRoleName = "linkerd-service-mirror-read-remote-creds" + // LinkerdMulticlusterChecks adds a series of checks to validate a + // multicluster setup. + LinkerdMulticlusterChecks CategoryID = "linkerd-multicluster" + + linkerdServiceMirrorComponentName = "linkerd-service-mirror" + linkerdServiceMirrorSerivceAccountName = "linkerd-service-mirror-%s" + linkerdServiceMirrorClusterRoleName = "linkerd-service-mirror-access-local-resources-%s" + linkerdServiceMirrorRoleName = "linkerd-service-mirror-read-remote-creds-%s" ) -var expectedServiceMirrorClusterRolePolicies = []expectedPolicy{ - { - resources: []string{"endpoints", "services"}, - verbs: []string{"list", "get", "watch", "create", "delete", "update"}, - }, - { - resources: []string{"namespaces"}, - verbs: []string{"create", "list", "get", "watch"}, - }, -} - -var expectedServiceMirrorRolePolicies = []expectedPolicy{ - { - resources: []string{"secrets"}, - verbs: []string{"list", "get", "watch"}, - }, -} - var expectedServiceMirrorRemoteClusterPolicyVerbs = []string{ "get", "list", @@ -61,149 +39,103 @@ var expectedServiceMirrorRemoteClusterPolicyVerbs = []string{ func (hc *HealthChecker) multiClusterCategory() []category { return []category{ { - id: LinkerdMulticlusterSourceChecks, + id: LinkerdMulticlusterChecks, checkers: []checker{ + /* Link checks */ { - description: "service mirror controller is running", - hintAnchor: "l5d-multicluster-service-mirror-running", - retryDeadline: hc.RetryDeadline, - fatal: true, - check: func(context.Context) error { - return hc.checkServiceMirrorController() - }, - }, - { - description: "service mirror controller ClusterRoles exist", - hintAnchor: "l5d-multicluster-cluster-role-exist", + description: "Link CRD exists", + hintAnchor: "l5d-multicluster-link-crd-exists", + fatal: true, check: func(context.Context) error { - if hc.Options.SourceCluster { - return hc.checkClusterRoles(true, []string{linkerdServiceMirrorClusterRoleName}, hc.serviceMirrorComponentsSelector()) - } - return &SkipError{Reason: "not checking muticluster"} + return hc.checkLinkCRD() }, }, { - description: "service mirror controller ClusterRoleBindings exist", - hintAnchor: "l5d-multicluster-cluster-role-binding-exist", + description: "Link resources are valid", + hintAnchor: "l5d-multicluster-links-are-valid", + fatal: true, check: func(context.Context) error { - if hc.Options.SourceCluster { - return hc.checkClusterRoleBindings(true, []string{linkerdServiceMirrorClusterRoleName}, hc.serviceMirrorComponentsSelector()) + if hc.Options.MultiCluster { + return hc.checkLinks() } return &SkipError{Reason: "not checking muticluster"} }, }, + /* Serivce mirror controller checks */ { - description: "service mirror controller Roles exist", - hintAnchor: "l5d-multicluster-role-exist", + description: "service mirror controller has required permissions", + hintAnchor: "l5d-multicluster-source-rbac-correct", check: func(context.Context) error { - if hc.Options.SourceCluster { - return hc.checkRoles(true, hc.serviceMirrorNs, []string{linkerdServiceMirrorRoleName}, hc.serviceMirrorComponentsSelector()) + if hc.Options.MultiCluster { + return hc.checkServiceMirrorLocalRBAC() } return &SkipError{Reason: "not checking muticluster"} }, }, { - description: "service mirror controller RoleBindings exist", - hintAnchor: "l5d-multicluster-role-binding-exist", + description: "service mirror controllers are running", + hintAnchor: "l5d-multicluster-service-mirror-running", + retryDeadline: hc.RetryDeadline, + surfaceErrorOnRetry: true, check: func(context.Context) error { - if hc.Options.SourceCluster { - return hc.checkRoleBindings(true, hc.serviceMirrorNs, []string{linkerdServiceMirrorRoleName}, hc.serviceMirrorComponentsSelector()) + if hc.Options.MultiCluster { + return hc.checkServiceMirrorController() } return &SkipError{Reason: "not checking muticluster"} }, }, + /* Target cluster access checks */ { - description: "service mirror controller ServiceAccounts exist", - hintAnchor: "l5d-multicluster-service-account-exist", + description: "remote cluster access credentials are valid", + hintAnchor: "l5d-smc-target-clusters-access", check: func(context.Context) error { - if hc.Options.SourceCluster { - return hc.checkServiceAccounts([]string{linkerdServiceMirrorComponentName}, hc.serviceMirrorNs, hc.serviceMirrorComponentsSelector()) + if hc.Options.MultiCluster { + return hc.checkRemoteClusterConnectivity() } return &SkipError{Reason: "not checking muticluster"} }, }, - { - description: "service mirror controller has required permissions", - hintAnchor: "l5d-multicluster-source-rbac-correct", - check: func(context.Context) error { - return hc.checkServiceMirrorLocalRBAC() - }, - }, - { - description: "service mirror controller can access target clusters", - hintAnchor: "l5d-smc-target-clusters-access", - check: func(context.Context) error { - return hc.checkRemoteClusterConnectivity() - }, - }, - { - description: "all target cluster gateways are alive", - hintAnchor: "l5d-multicluster-target-gateways-alive", - check: func(ctx context.Context) error { - return hc.checkRemoteClusterGatewaysHealth(ctx) - }, - }, { description: "clusters share trust anchors", hintAnchor: "l5d-multicluster-clusters-share-anchors", check: func(ctx context.Context) error { - return hc.checkRemoteClusterAnchors() + if hc.Options.MultiCluster { + return hc.checkRemoteClusterAnchors() + } + return &SkipError{Reason: "not checking muticluster"} }, }, + /* Gateway mirror checks */ { - description: "multicluster daisy chaining is avoided", - hintAnchor: "l5d-multicluster-daisy-chaining", - warning: true, + description: "all gateway mirrors are healthy", + hintAnchor: "l5d-multicluster-gateways-endpoints", check: func(ctx context.Context) error { - return hc.checkDaisyChains() + if hc.Options.MultiCluster { + return hc.checkIfGatewayMirrorsHaveEndpoints(ctx) + } + return &SkipError{Reason: "not checking muticluster"} }, }, + /* Mirror service checks */ { description: "all mirror services have endpoints", hintAnchor: "l5d-multicluster-services-endpoints", - warning: true, - check: func(ctx context.Context) error { - return hc.checkIfMirrorServicesHaveEndpoints() - }, - }, - { - description: "all gateway mirrors have endpoints", - hintAnchor: "l5d-multicluster-gateways-endpoints", - warning: true, - check: func(ctx context.Context) error { - return hc.checkIfGatewayMirrorsHaveEndpoints() - }, - }, - { - description: "remote: all referenced gateways are valid", - hintAnchor: "l5d-multicluster-gateways-exist", - warning: true, check: func(ctx context.Context) error { - return hc.checkRemoteGateways() + if hc.Options.MultiCluster { + return hc.checkIfMirrorServicesHaveEndpoints() + } + return &SkipError{Reason: "not checking muticluster"} }, }, - }, - }, - { - id: LinkerdMulticlusterTargetChecks, - checkers: []checker{ { - description: "all cluster gateways are valid", - hintAnchor: "l5d-multicluster-gateways-exist", + description: "all mirror services are part of a Link", + hintAnchor: "l5d-multicluster-orphaned-services", warning: true, check: func(ctx context.Context) error { - targetCluster, err := hc.isTargetCluster() - if err != nil { - return err + if hc.Options.MultiCluster { + return hc.checkForOrphanedServices() } - if targetCluster || hc.TargetCluster { - err := hc.checkLocalGateways() - if err != nil { - return err - } - return hc.checkIfGatewaysHaveEndpoints() - } - return &SkipError{Reason: "not checking target cluster"} + return &SkipError{Reason: "not checking muticluster"} }, }, }, @@ -211,148 +143,250 @@ func (hc *HealthChecker) multiClusterCategory() []category { } } -func (hc *HealthChecker) serviceMirrorComponentsSelector() string { - return fmt.Sprintf("%s=%s", k8s.ControllerComponentLabel, linkerdServiceMirrorComponentName) -} +/* Link checks */ -func (hc *HealthChecker) checkServiceMirrorController() error { - options := metav1.ListOptions{ - LabelSelector: hc.serviceMirrorComponentsSelector(), +func (hc *HealthChecker) checkLinkCRD() error { + err := k8s.LinkAccess(hc.kubeAPI.Interface) + if err == nil { + hc.Options.MultiCluster = true + return nil } - result, err := hc.kubeAPI.AppsV1().Deployments(corev1.NamespaceAll).List(options) + if !hc.Options.MultiCluster { + return &SkipError{Reason: "not checking muticluster"} + } + return fmt.Errorf("multicluster.linkerd.io/Link CRD is missing: %s", err) +} + +func (hc *HealthChecker) checkLinks() error { + links, err := multicluster.GetLinks(hc.kubeAPI.DynamicClient) if err != nil { return err } - - // if we have explicitly requested for multicluster to be checked, error out - if len(result.Items) == 0 && hc.Options.SourceCluster { - return errors.New("Service mirror controller is not present") + if len(links) == 0 { + return &SkipError{Reason: "no links detected"} } + linkNames := []string{} + for _, l := range links { + linkNames = append(linkNames, fmt.Sprintf("\t* %s", l.TargetClusterName)) + } + hc.links = links + return &VerboseSuccess{Message: strings.Join(linkNames, "\n")} +} - if len(result.Items) > 0 { - hc.Options.SourceCluster = true +/* Serivce mirror controller checks */ - if len(result.Items) > 1 { - var errors []error - for _, smc := range result.Items { - errors = append(errors, fmt.Errorf("%s/%s", smc.Namespace, smc.Name)) - } - return fmt.Errorf("There are more than one service mirror controllers:\n%s", joinErrors(errors, 1)) +func (hc *HealthChecker) checkServiceMirrorLocalRBAC() error { + links := []string{} + errors := []string{} + + for _, link := range hc.links { + + err := hc.checkServiceAccounts( + []string{fmt.Sprintf(linkerdServiceMirrorSerivceAccountName, link.TargetClusterName)}, + link.Namespace, + serviceMirrorComponentsSelector(link.TargetClusterName), + ) + if err != nil { + errors = append(errors, err.Error()) } - controller := result.Items[0] - if controller.Status.AvailableReplicas < 1 { - return fmt.Errorf("Service mirror controller is not available: %s/%s", controller.Namespace, controller.Name) + err = hc.checkClusterRoles( + true, + []string{fmt.Sprintf(linkerdServiceMirrorClusterRoleName, link.TargetClusterName)}, + serviceMirrorComponentsSelector(link.TargetClusterName), + ) + if err != nil { + errors = append(errors, err.Error()) } - hc.serviceMirrorNs = controller.Namespace - return nil - } - return &SkipError{Reason: "not checking muticluster"} -} + err = hc.checkClusterRoleBindings( + true, + []string{fmt.Sprintf(linkerdServiceMirrorClusterRoleName, link.TargetClusterName)}, + serviceMirrorComponentsSelector(link.TargetClusterName), + ) + if err != nil { + errors = append(errors, err.Error()) + } -func comparePermissions(expected, actual []string) error { - sort.Strings(expected) - sort.Strings(actual) + err = hc.checkRoles( + true, + link.Namespace, + []string{fmt.Sprintf(linkerdServiceMirrorRoleName, link.TargetClusterName)}, + serviceMirrorComponentsSelector(link.TargetClusterName), + ) + if err != nil { + errors = append(errors, err.Error()) + } - expectedStr := strings.Join(expected, ",") - actualStr := strings.Join(actual, ",") + err = hc.checkRoleBindings( + true, + link.Namespace, + []string{fmt.Sprintf(linkerdServiceMirrorRoleName, link.TargetClusterName)}, + serviceMirrorComponentsSelector(link.TargetClusterName), + ) + if err != nil { + errors = append(errors, err.Error()) + } - if expectedStr != actualStr { - return fmt.Errorf("expected %s, got %s", expectedStr, actualStr) + links = append(links, fmt.Sprintf("\t* %s", link.TargetClusterName)) + } + if len(errors) > 0 { + return fmt.Errorf(strings.Join(errors, "\n")) } - return nil + if len(links) == 0 { + return &SkipError{Reason: "no links"} + } + + return &VerboseSuccess{Message: strings.Join(links, "\n")} } -func verifyRule(expected expectedPolicy, actual []v1.PolicyRule) error { - for _, rule := range actual { - if err := comparePermissions(expected.resources, rule.Resources); err == nil { - if err := comparePermissions(expected.verbs, rule.Verbs); err != nil { - return fmt.Errorf("unexpected verbs %s", err) - } - return nil +func (hc *HealthChecker) checkServiceMirrorController() error { + + errors := []error{} + clusterNames := []string{} + + for _, link := range hc.links { + options := metav1.ListOptions{ + LabelSelector: serviceMirrorComponentsSelector(link.TargetClusterName), + } + result, err := hc.kubeAPI.AppsV1().Deployments(corev1.NamespaceAll).List(options) + if err != nil { + return err + } + + if len(result.Items) > 1 { + errors = append(errors, fmt.Errorf("* too many service mirror controller deployments for Link %s", link.TargetClusterName)) + continue + } + if len(result.Items) == 0 { + errors = append(errors, fmt.Errorf("* no service mirror controller deployment for Link %s", link.TargetClusterName)) + continue } + + controller := result.Items[0] + if controller.Status.AvailableReplicas < 1 { + errors = append(errors, fmt.Errorf("* service mirror controller is not available: %s/%s", controller.Namespace, controller.Name)) + continue + } + clusterNames = append(clusterNames, fmt.Sprintf("\t* %s", link.TargetClusterName)) } - return fmt.Errorf("could not fine rule for %s", strings.Join(expected.resources, ",")) + if len(errors) > 0 { + return joinErrors(errors, 2) + } + + if len(clusterNames) == 0 { + return &SkipError{Reason: "no links"} + } + + return &VerboseSuccess{Message: strings.Join(clusterNames, "\n")} } -func (hc *HealthChecker) checkServiceMirrorLocalRBAC() error { - if hc.Options.SourceCluster { - var errors []string +/* Target cluster access checks */ - clusterRole, err := hc.kubeAPI.RbacV1().ClusterRoles().Get(linkerdServiceMirrorClusterRoleName, metav1.GetOptions{}) +func (hc *HealthChecker) checkRemoteClusterConnectivity() error { + errors := []error{} + links := []string{} + for _, link := range hc.links { + // Load the credentials secret + secret, err := hc.kubeAPI.Interface.CoreV1().Secrets(link.Namespace).Get(link.ClusterCredentialsSecret, metav1.GetOptions{}) if err != nil { - return fmt.Errorf("Could not obtain service mirror ClusterRole %s: %s", linkerdServiceMirrorClusterRoleName, err) + errors = append(errors, fmt.Errorf("* secret: [%s/%s]: %s", link.Namespace, link.ClusterCredentialsSecret, err)) + continue } - role, err := hc.kubeAPI.RbacV1().Roles(hc.serviceMirrorNs).Get(linkerdServiceMirrorRoleName, metav1.GetOptions{}) + config, err := servicemirror.ParseRemoteClusterSecret(secret) + if err != nil { - return fmt.Errorf("Could not obtain service mirror Role %s : %s", linkerdServiceMirrorRoleName, err) + errors = append(errors, fmt.Errorf("* secret: [%s/%s]: could not parse config secret: %s", secret.Namespace, secret.Name, err)) + continue } - if len(clusterRole.Rules) != len(expectedServiceMirrorClusterRolePolicies) { - return fmt.Errorf("Service mirror ClusterRole %s has %d policy rules, expected %d", clusterRole.Name, len(clusterRole.Rules), len(expectedServiceMirrorClusterRolePolicies)) + clientConfig, err := clientcmd.RESTConfigFromKubeConfig(config.APIConfig) + if err != nil { + errors = append(errors, fmt.Errorf("* secret: [%s/%s] cluster: [%s]: unable to parse api config: %s", secret.Namespace, secret.Name, config.ClusterName, err)) + continue } - for _, rule := range expectedServiceMirrorClusterRolePolicies { - if err := verifyRule(rule, clusterRole.Rules); err != nil { - errors = append(errors, fmt.Sprintf("Service mirror ClusterRole %s: %s", clusterRole.Name, err)) - } + remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, requestTimeout) + if err != nil { + errors = append(errors, fmt.Errorf("* secret: [%s/%s] cluster: [%s]: could not instantiate api for target cluster: %s", secret.Namespace, secret.Name, config.ClusterName, err)) + continue } - if len(role.Rules) != len(expectedServiceMirrorRolePolicies) { - return fmt.Errorf("Service mirror Role %s has %d policy rules, expected %d", role.Name, len(role.Rules), len(expectedServiceMirrorRolePolicies)) + var verbs []string + if err := hc.checkCanPerformAction(remoteAPI, "get", corev1.NamespaceAll, "", "v1", "services"); err == nil { + verbs = append(verbs, "get") } - for _, rule := range expectedServiceMirrorRolePolicies { - if err := verifyRule(rule, role.Rules); err != nil { - errors = append(errors, fmt.Sprintf("Service mirror Role %s: %s", role.Name, err)) - } + if err := hc.checkCanPerformAction(remoteAPI, "list", corev1.NamespaceAll, "", "v1", "services"); err == nil { + verbs = append(verbs, "list") } - if len(errors) > 0 { - return fmt.Errorf(strings.Join(errors, "\n")) + if err := hc.checkCanPerformAction(remoteAPI, "watch", corev1.NamespaceAll, "", "v1", "services"); err == nil { + verbs = append(verbs, "watch") } - return nil + if err := comparePermissions(expectedServiceMirrorRemoteClusterPolicyVerbs, verbs); err != nil { + errors = append(errors, fmt.Errorf("* cluster: [%s]: Insufficient Service permissions: %s", config.ClusterName, err)) + } + + links = append(links, fmt.Sprintf("\t* %s", link.TargetClusterName)) } - return &SkipError{Reason: "not checking muticluster"} -} -func (hc *HealthChecker) checkRemoteClusterAnchors() error { - if len(hc.remoteClusterConfigs) == 0 { - return &SkipError{Reason: "no target cluster configs"} + if len(errors) > 0 { + return joinErrors(errors, 2) + } + + if len(links) == 0 { + return &SkipError{Reason: "no links"} } + return &VerboseSuccess{Message: strings.Join(links, "\n")} +} + +func (hc *HealthChecker) checkRemoteClusterAnchors() error { localAnchors, err := tls.DecodePEMCertificates(hc.linkerdConfig.Global.IdentityContext.TrustAnchorsPem) if err != nil { return fmt.Errorf("Cannot parse source trust anchors: %s", err) } + errors := []string{} + links := []string{} + for _, link := range hc.links { + // Load the credentials secret + secret, err := hc.kubeAPI.Interface.CoreV1().Secrets(link.Namespace).Get(link.ClusterCredentialsSecret, metav1.GetOptions{}) + if err != nil { + errors = append(errors, fmt.Sprintf("* secret: [%s/%s]: %s", link.Namespace, link.ClusterCredentialsSecret, err)) + continue + } - var offendingClusters []string - for _, cfg := range hc.remoteClusterConfigs { + config, err := servicemirror.ParseRemoteClusterSecret(secret) + if err != nil { + errors = append(errors, fmt.Sprintf("* secret: [%s/%s]: could not parse config secret: %s", secret.Namespace, secret.Name, err)) + continue + } - clientConfig, err := clientcmd.RESTConfigFromKubeConfig(cfg.APIConfig) + clientConfig, err := clientcmd.RESTConfigFromKubeConfig(config.APIConfig) if err != nil { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s: unable to parse api config", cfg.ClusterName)) + errors = append(errors, fmt.Sprintf("* secret: [%s/%s] cluster: [%s]: unable to parse api config: %s", secret.Namespace, secret.Name, config.ClusterName, err)) continue } remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, requestTimeout) if err != nil { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s: unable to instantiate api", cfg.ClusterName)) + errors = append(errors, fmt.Sprintf("* secret: [%s/%s] cluster: [%s]: could not instantiate api for target cluster: %s", secret.Namespace, secret.Name, config.ClusterName, err)) continue } - _, cfMap, err := FetchLinkerdConfigMap(remoteAPI, cfg.LinkerdNamespace) + _, cfMap, err := FetchLinkerdConfigMap(remoteAPI, link.TargetClusterLinkerdNamespace) if err != nil { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s: unable to fetch anchors: %s", cfg.ClusterName, err)) + errors = append(errors, fmt.Sprintf("* %s: unable to fetch anchors: %s", link.TargetClusterName, err)) continue } + remoteAnchors, err := tls.DecodePEMCertificates(cfMap.Global.IdentityContext.TrustAnchorsPem) if err != nil { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s: cannot parse trust anchors", cfg.ClusterName)) + errors = append(errors, fmt.Sprintf("* %s: cannot parse trust anchors", link.TargetClusterName)) continue } @@ -360,7 +394,7 @@ func (hc *HealthChecker) checkRemoteClusterAnchors() error { // same, we can only compare certs one way and be sure we have // identical anchors if len(remoteAnchors) != len(localAnchors) { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s", cfg.ClusterName)) + errors = append(errors, fmt.Sprintf("* %s", link.TargetClusterName)) continue } @@ -372,363 +406,165 @@ func (hc *HealthChecker) checkRemoteClusterAnchors() error { for _, remote := range remoteAnchors { local, ok := localAnchorsMap[string(remote.Signature)] if !ok || !local.Equal(remote) { - offendingClusters = append(offendingClusters, fmt.Sprintf("* %s", cfg.ClusterName)) + errors = append(errors, fmt.Sprintf("* %s", link.TargetClusterName)) break } } + links = append(links, fmt.Sprintf("\t* %s", link.TargetClusterName)) } - if len(offendingClusters) > 0 { - return fmt.Errorf("Problematic clusters:\n %s", strings.Join(offendingClusters, "\n ")) + if len(errors) > 0 { + return fmt.Errorf("Problematic clusters:\n %s", strings.Join(errors, "\n ")) } - return nil -} + if len(links) == 0 { + return &SkipError{Reason: "no links"} + } -func serviceExported(svc corev1.Service) bool { - _, hasGtwName := svc.Annotations[k8s.GatewayNameAnnotation] - _, hasGtwNs := svc.Annotations[k8s.GatewayNsAnnotation] - return hasGtwName && hasGtwNs + return &VerboseSuccess{Message: strings.Join(links, "\n")} } -func (hc *HealthChecker) checkDaisyChains() error { - if hc.Options.SourceCluster { - errs := []error{} +/* Gateway mirror checks */ - svcs, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{}) - if err != nil { - return err - } - for _, svc := range svcs.Items { - _, isMirror := svc.Labels[k8s.MirroredResourceLabel] - if isMirror && serviceExported(svc) { - errs = append(errs, fmt.Errorf("mirror service %s.%s is exported", svc.Name, svc.Namespace)) - } - } +func (hc *HealthChecker) checkIfGatewayMirrorsHaveEndpoints(ctx context.Context) error { + links := []string{} + errors := []error{} - ts, err := tsclient.NewForConfig(hc.kubeAPI.Config) + for _, link := range hc.links { + selector := metav1.ListOptions{LabelSelector: fmt.Sprintf("%s,%s=%s", k8s.MirroredGatewayLabel, k8s.RemoteClusterNameLabel, link.TargetClusterName)} + gatewayMirrors, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(selector) if err != nil { - return err - } - splits, err := ts.SplitV1alpha1().TrafficSplits(metav1.NamespaceAll).List(metav1.ListOptions{}) - if err != nil { - return err - } - for _, split := range splits.Items { - apex, err := hc.kubeAPI.CoreV1().Services(split.Namespace).Get(split.Spec.Service, metav1.GetOptions{}) - if err != nil { - return err - } - if serviceExported(*apex) { - for _, backend := range split.Spec.Backends { - if backend.Weight.IsZero() { - continue - } - leaf, err := hc.kubeAPI.CoreV1().Services(split.Namespace).Get(backend.Service, metav1.GetOptions{}) - if err != nil { - return err - } - _, isMirror := leaf.Labels[k8s.MirroredResourceLabel] - if isMirror { - errs = append(errs, fmt.Errorf("exported service %s.%s routes to mirror service %s.%s via traffic split %s.%s", - apex.Name, apex.Namespace, leaf.Name, leaf.Namespace, split.Name, split.Namespace, - )) - } - } - } - } - if len(errs) > 0 { - messages := []string{} - for _, err := range errs { - messages = append(messages, fmt.Sprintf("* %s", err.Error())) - } - return errors.New(strings.Join(messages, "\n")) + errors = append(errors, err) + continue } - return nil - } - return &SkipError{Reason: "not checking muticluster"} -} - -func (hc *HealthChecker) checkLocalGateways() error { - - errs := checkGateways(hc.kubeAPI) - if len(errs) > 0 { - return joinErrors(errs, 1) - } - return nil -} -func (hc *HealthChecker) checkRemoteGateways() error { + if len(gatewayMirrors.Items) != 1 { + errors = append(errors, fmt.Errorf("wrong number (%d) of probe gateways for target cluster %s", len(gatewayMirrors.Items), link.TargetClusterName)) + continue + } - if len(hc.remoteClusterConfigs) == 0 { - return &SkipError{Reason: "no target cluster configs"} - } + svc := gatewayMirrors.Items[0] - var offendingClusters []error - for _, cfg := range hc.remoteClusterConfigs { - clientConfig, err := clientcmd.RESTConfigFromKubeConfig(cfg.APIConfig) - if err != nil { - offendingClusters = append(offendingClusters, fmt.Errorf("* %s: unable to parse api config", cfg.ClusterName)) + // Check if there is a relevant end-point + endpoints, err := hc.kubeAPI.CoreV1().Endpoints(svc.Namespace).Get(svc.Name, metav1.GetOptions{}) + if err != nil || len(endpoints.Subsets) == 0 { + errors = append(errors, fmt.Errorf("%s.%s mirrored from cluster [%s] has no endpoints", svc.Name, svc.Namespace, svc.Labels[k8s.RemoteClusterNameLabel])) continue } - remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, requestTimeout) + // Check gateway liveness according to probes + req := public.GatewaysRequest{ + TimeWindow: "1m", + RemoteClusterName: link.TargetClusterName, + } + rsp, err := hc.apiClient.Gateways(ctx, &req) if err != nil { - offendingClusters = append(offendingClusters, fmt.Errorf("* %s: unable to instantiate api", cfg.ClusterName)) + errors = append(errors, fmt.Errorf("failed to fetch gateway metrics for %s.%s: %s", svc.Name, svc.Namespace, err)) continue } - - errs := checkGateways(remoteAPI) - if len(errs) > 0 { - offendingClusters = append(offendingClusters, fmt.Errorf("* %s: remote cluster has invalid gateways:\n%s", cfg.ClusterName, joinErrors(errs, 2).Error())) + table := rsp.GetOk().GetGatewaysTable() + if table == nil { + errors = append(errors, fmt.Errorf("failed to fetch gateway metrics for %s.%s: %s", svc.Name, svc.Namespace, rsp.GetError().GetError())) continue } - } - if len(offendingClusters) > 0 { - return joinErrors(offendingClusters, 1) - } - return nil -} - -func checkGateways(api *k8s.KubernetesAPI) []error { - errs := []error{} - services, err := api.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{}) - if err != nil { - return []error{err} - } - - for _, svc := range services.Items { - if serviceExported(svc) { - // Check if there is a relevant gateway - gatewayName := svc.Annotations[k8s.GatewayNameAnnotation] - gatewayNamespace := svc.Annotations[k8s.GatewayNsAnnotation] - gateway, err := api.CoreV1().Services(gatewayNamespace).Get(gatewayName, metav1.GetOptions{}) - if err != nil { - errs = append(errs, fmt.Errorf("Exported service %s.%s references a gateway that does not exist: %s.%s", svc.Name, svc.Namespace, gatewayName, gatewayNamespace)) - continue - } - - // check if there is an external IP for the gateway service - if len(gateway.Status.LoadBalancer.Ingress) <= 0 { - errs = append(errs, fmt.Errorf("Exported service %s.%s references a gateway with no external IP: %s.%s", svc.Name, svc.Namespace, gatewayName, gatewayNamespace)) - } - - // check if the gateway service has relevant ports - portNames := []string{k8s.GatewayPortName, k8s.ProbePortName} - for _, portName := range portNames { - if !ifPortExists(gateway.Spec.Ports, portName) { - errs = append(errs, fmt.Errorf("Exported service %s.%s references a gateway that is missing port %s: %s.%s", svc.Name, svc.Namespace, portName, gatewayName, gatewayNamespace)) - } - } - + if len(table.Rows) != 1 { + errors = append(errors, fmt.Errorf("wrong number of (%d) gateway metrics entries for %s.%s", len(table.Rows), svc.Name, svc.Namespace)) + continue } - } - return errs -} -func ifPortExists(ports []corev1.ServicePort, portName string) bool { - for _, port := range ports { - if port.Name == portName { - return true + row := table.Rows[0] + if !row.Alive { + errors = append(errors, fmt.Errorf("liveness checks failed for %s", link.TargetClusterName)) + continue } - } - return false -} - -func (hc *HealthChecker) isTargetCluster() (bool, error) { - services, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{}) - if err != nil { - return false, err + links = append(links, fmt.Sprintf("\t* %s", link.TargetClusterName)) + } + if len(errors) > 0 { + return joinErrors(errors, 1) } - for _, service := range services.Items { - if serviceExported(service) { - return true, nil - } + if len(links) == 0 { + return &SkipError{Reason: "no links"} } - return false, nil + return &VerboseSuccess{Message: strings.Join(links, "\n")} } -func (hc *HealthChecker) checkRemoteClusterConnectivity() error { - if hc.Options.SourceCluster { - options := metav1.ListOptions{ - FieldSelector: fmt.Sprintf("%s=%s", "type", k8s.MirrorSecretType), - } - secrets, err := hc.kubeAPI.CoreV1().Secrets(corev1.NamespaceAll).List(options) - if err != nil { - return err - } - - if len(secrets.Items) == 0 { - return &SkipError{Reason: "no target cluster configs"} - } +/* Mirror service checks */ - var errors []error - for _, s := range secrets.Items { - secret := s - config, err := sm.ParseRemoteClusterSecret(&secret) - if err != nil { - errors = append(errors, fmt.Errorf("* secret: [%s/%s]: could not parse config secret: %s", secret.Namespace, secret.Name, err)) - continue - } - - clientConfig, err := clientcmd.RESTConfigFromKubeConfig(config.APIConfig) - if err != nil { - errors = append(errors, fmt.Errorf("* secret: [%s/%s] cluster: [%s]: unable to parse api config: %s", secret.Namespace, secret.Name, config.ClusterName, err)) - continue - } - - remoteAPI, err := k8s.NewAPIForConfig(clientConfig, "", []string{}, requestTimeout) - if err != nil { - errors = append(errors, fmt.Errorf("* secret: [%s/%s] cluster: [%s]: could not instantiate api for target cluster: %s", secret.Namespace, secret.Name, config.ClusterName, err)) - continue - } - - var verbs []string - if err := hc.checkCanPerformAction(remoteAPI, "get", corev1.NamespaceAll, "", "v1", "services"); err == nil { - verbs = append(verbs, "get") - } - - if err := hc.checkCanPerformAction(remoteAPI, "list", corev1.NamespaceAll, "", "v1", "services"); err == nil { - verbs = append(verbs, "list") - } - - if err := hc.checkCanPerformAction(remoteAPI, "watch", corev1.NamespaceAll, "", "v1", "services"); err == nil { - verbs = append(verbs, "watch") - } - - if err := comparePermissions(expectedServiceMirrorRemoteClusterPolicyVerbs, verbs); err != nil { - errors = append(errors, fmt.Errorf("* cluster: [%s]: Insufficient Service permissions: %s", config.ClusterName, err)) - } - - hc.remoteClusterConfigs = append(hc.remoteClusterConfigs, config) - - } +func (hc *HealthChecker) checkIfMirrorServicesHaveEndpoints() error { - if len(errors) > 0 { - return joinErrors(errors, 2) - } - return nil + var servicesWithNoEndpoints []string + selector := fmt.Sprintf("%s, !%s", k8s.MirroredResourceLabel, k8s.MirroredGatewayLabel) + mirrorServices, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{LabelSelector: selector}) + if err != nil { + return err } - return &SkipError{Reason: "not checking muticluster"} -} - -func (hc *HealthChecker) checkRemoteClusterGatewaysHealth(ctx context.Context) error { - if hc.Options.SourceCluster { - if hc.apiClient == nil { - return errors.New("public api client uninitialized") - } - req := &pb.GatewaysRequest{ - TimeWindow: "1m", - } - rsp, err := hc.apiClient.Gateways(ctx, req) - if err != nil { - return err - } - var deadGateways []string - var aliveGateways []string - if len(rsp.GetOk().GatewaysTable.Rows) == 0 { - return &SkipError{Reason: "no target gateways"} - } - for _, gtw := range rsp.GetOk().GatewaysTable.Rows { - if gtw.Alive { - aliveGateways = append(aliveGateways, fmt.Sprintf(" * cluster: [%s], gateway: [%s/%s]", gtw.ClusterName, gtw.Namespace, gtw.Name)) - } else { - deadGateways = append(deadGateways, fmt.Sprintf("* cluster: [%s], gateway: [%s/%s]", gtw.ClusterName, gtw.Namespace, gtw.Name)) - } + for _, svc := range mirrorServices.Items { + // Check if there is a relevant end-point + endpoint, err := hc.kubeAPI.CoreV1().Endpoints(svc.Namespace).Get(svc.Name, metav1.GetOptions{}) + if err != nil || len(endpoint.Subsets) == 0 { + servicesWithNoEndpoints = append(servicesWithNoEndpoints, fmt.Sprintf("%s.%s mirrored from cluster [%s] (gateway: [%s/%s])", svc.Name, svc.Namespace, svc.Labels[k8s.RemoteClusterNameLabel], svc.Labels[k8s.RemoteGatewayNsLabel], svc.Labels[k8s.RemoteGatewayNameLabel])) } - - if len(deadGateways) > 0 { - return fmt.Errorf("Some gateways are not alive:\n %s", strings.Join(deadGateways, "\n ")) - } - return &VerboseSuccess{Message: strings.Join(aliveGateways, "\n")} } - return &SkipError{Reason: "not checking muticluster"} -} - -func (hc *HealthChecker) checkIfMirrorServicesHaveEndpoints() error { - if hc.Options.SourceCluster { - - var servicesWithNoEndpoints []string - selector := fmt.Sprintf("%s, !%s", k8s.MirroredResourceLabel, k8s.MirroredGatewayLabel) - mirrorServices, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{LabelSelector: selector}) - if err != nil { - return err - } - for _, svc := range mirrorServices.Items { - // Check if there is a relevant end-point - endpoint, err := hc.kubeAPI.CoreV1().Endpoints(svc.Namespace).Get(svc.Name, metav1.GetOptions{}) - if err != nil || len(endpoint.Subsets) == 0 { - servicesWithNoEndpoints = append(servicesWithNoEndpoints, fmt.Sprintf("%s.%s mirrored from cluster [%s] (gateway: [%s/%s])", svc.Name, svc.Namespace, svc.Labels[k8s.RemoteClusterNameLabel], svc.Labels[k8s.RemoteGatewayNsLabel], svc.Labels[k8s.RemoteGatewayNameLabel])) - } - } - - if len(servicesWithNoEndpoints) > 0 { - return fmt.Errorf("Some mirror services do not have endpoints:\n %s", strings.Join(servicesWithNoEndpoints, "\n ")) - } - return nil + if len(servicesWithNoEndpoints) > 0 { + return fmt.Errorf("Some mirror services do not have endpoints:\n %s", strings.Join(servicesWithNoEndpoints, "\n ")) } - return &SkipError{Reason: "not checking muticluster"} -} -func (hc *HealthChecker) checkIfGatewayMirrorsHaveEndpoints() error { - if hc.Options.SourceCluster { + if len(mirrorServices.Items) == 0 { + return &SkipError{Reason: "no mirror services"} + } - var gatewayMirrorsWithNoEndpoints []string - gatewayServices, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{LabelSelector: k8s.MirroredGatewayLabel}) - if err != nil { - return err - } + return nil +} - for _, svc := range gatewayServices.Items { - // Check if there is a relevant end-point - endpoints, err := hc.kubeAPI.CoreV1().Endpoints(svc.Namespace).Get(svc.Name, metav1.GetOptions{}) - if err != nil || len(endpoints.Subsets) == 0 { - gatewayMirrorsWithNoEndpoints = append(gatewayMirrorsWithNoEndpoints, fmt.Sprintf("%s.%s mirrored from cluster [%s]", svc.Name, svc.Namespace, svc.Labels[k8s.RemoteClusterNameLabel])) - } - } +func (hc *HealthChecker) checkForOrphanedServices() error { + errors := []error{} - if len(gatewayMirrorsWithNoEndpoints) > 0 { - return fmt.Errorf("Some gateway mirrors do not have endpoints:\n %s", strings.Join(gatewayMirrorsWithNoEndpoints, "\n ")) - } - return nil + selector := fmt.Sprintf("%s, !%s", k8s.MirroredResourceLabel, k8s.MirroredGatewayLabel) + mirrorServices, err := hc.kubeAPI.CoreV1().Services(metav1.NamespaceAll).List(metav1.ListOptions{LabelSelector: selector}) + if err != nil { + return err } - return &SkipError{Reason: "not checking muticluster"} -} - -func (hc *HealthChecker) checkIfGatewaysHaveEndpoints() error { - var gatewaysWithNoEndpoints []string - services, err := hc.kubeAPI.CoreV1().Services(corev1.NamespaceAll).List(metav1.ListOptions{}) + links, err := multicluster.GetLinks(hc.kubeAPI.DynamicClient) if err != nil { return err } - for _, service := range services.Items { - if gatewayService(service) { - // Check if there is a relevant end-point - endpoints, err := hc.kubeAPI.CoreV1().Endpoints(service.Namespace).Get(service.Name, metav1.GetOptions{}) - if err != nil || len(endpoints.Subsets) == 0 { - gatewaysWithNoEndpoints = append(gatewaysWithNoEndpoints, fmt.Sprintf("%s.%s", service.Name, service.Namespace)) + for _, svc := range mirrorServices.Items { + targetCluster := svc.Labels[k8s.RemoteClusterNameLabel] + hasLink := false + for _, link := range links { + if link.TargetClusterName == targetCluster { + hasLink = true + break } } + if !hasLink { + errors = append(errors, fmt.Errorf("mirror service %s.%s is not part of any Link", svc.Name, svc.Namespace)) + } + } + if len(mirrorServices.Items) == 0 { + return &SkipError{Reason: "no mirror services"} } - if len(gatewaysWithNoEndpoints) > 0 { - return fmt.Errorf("Some gateway services do not have endpoints:\n %s", strings.Join(gatewaysWithNoEndpoints, "\n ")) + if len(errors) > 0 { + return joinErrors(errors, 1) } return nil - } -func gatewayService(svc corev1.Service) bool { - _, isGtw := svc.Annotations[k8s.MulticlusterGatewayAnnotation] - return isGtw +/* util */ + +func serviceMirrorComponentsSelector(targetCluster string) string { + return fmt.Sprintf("%s=%s,%s=%s", + k8s.ControllerComponentLabel, linkerdServiceMirrorComponentName, + k8s.RemoteClusterNameLabel, targetCluster) } func joinErrors(errs []error, tabDepth int) error { @@ -739,3 +575,17 @@ func joinErrors(errs []error, tabDepth int) error { } return errors.New(strings.Join(errStrings, "\n")) } + +func comparePermissions(expected, actual []string) error { + sort.Strings(expected) + sort.Strings(actual) + + expectedStr := strings.Join(expected, ",") + actualStr := strings.Join(actual, ",") + + if expectedStr != actualStr { + return fmt.Errorf("expected %s, got %s", expectedStr, actualStr) + } + + return nil +} diff --git a/pkg/k8s/authz.go b/pkg/k8s/authz.go index f8f7cf4d76bdb..09eeb63426f47 100644 --- a/pkg/k8s/authz.go +++ b/pkg/k8s/authz.go @@ -140,6 +140,25 @@ func checkEndpointSlicesExist(k8sClient kubernetes.Interface) error { return errors.New("no EndpointSlice resources exist in the cluster") } +// LinkAccess checks whether the Link CRD is installed on the cluster and the +// client is authorized to access Links. +func LinkAccess(k8sClient kubernetes.Interface) error { + res, err := k8sClient.Discovery().ServerResourcesForGroupVersion(LinkAPIGroupVersion) + if err != nil { + return err + } + + if res.GroupVersion == LinkAPIGroupVersion { + for _, apiRes := range res.APIResources { + if apiRes.Kind == LinkKind { + return ResourceAuthz(k8sClient, "", "list", LinkAPIGroup, LinkAPIVersion, "links", "") + } + } + } + + return errors.New("Link CRD not found") +} + // ClusterAccess verifies whether k8sClient is authorized to access all pods in // all namespaces in the cluster. func ClusterAccess(k8sClient kubernetes.Interface) error { diff --git a/pkg/k8s/k8s.go b/pkg/k8s/k8s.go index 892a9e888f03e..e480d3375b8c1 100644 --- a/pkg/k8s/k8s.go +++ b/pkg/k8s/k8s.go @@ -28,6 +28,11 @@ const ( ServiceProfileAPIVersion = "linkerd.io/v1alpha2" ServiceProfileKind = "ServiceProfile" + LinkAPIGroup = "multicluster.linkerd.io" + LinkAPIVersion = "v1alpha1" + LinkAPIGroupVersion = "multicluster.linkerd.io/v1alpha1" + LinkKind = "Link" + // special case k8s job label, to not conflict with Prometheus' job label l5dJob = "k8s_job" ) diff --git a/pkg/multicluster/link.go b/pkg/multicluster/link.go new file mode 100644 index 0000000000000..2503301a23880 --- /dev/null +++ b/pkg/multicluster/link.go @@ -0,0 +1,273 @@ +package multicluster + +import ( + "errors" + "fmt" + "strconv" + "strings" + "time" + + "github.com/linkerd/linkerd2/pkg/k8s" + consts "github.com/linkerd/linkerd2/pkg/k8s" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" +) + +type ( + // ProbeSpec defines how a gateway should be queried for health. Once per + // period, the probe workers will send an HTTP request to the remote gateway + // on the given port with the given path and expect a HTTP 200 response. + ProbeSpec struct { + Path string + Port uint32 + Period time.Duration + } + + // Link is an internal representation of the link.multicluster.linkerd.io + // custom resource. It defines a multicluster link to a gateway in a + // target cluster and is configures the behavior of a service mirror + // controller. + Link struct { + Name string + Namespace string + TargetClusterName string + TargetClusterDomain string + TargetClusterLinkerdNamespace string + ClusterCredentialsSecret string + GatewayAddress string + GatewayPort uint32 + GatewayIdentity string + ProbeSpec ProbeSpec + } +) + +// LinkGVR is the Group Version and Resource of the Link custom resource. +var LinkGVR = schema.GroupVersionResource{ + Group: k8s.LinkAPIGroup, + Version: k8s.LinkAPIVersion, + Resource: "links", +} + +func (ps ProbeSpec) String() string { + return fmt.Sprintf("ProbeSpec: {path: %s, port: %d, period: %s}", ps.Path, ps.Port, ps.Period) +} + +// NewLink parses an unstructured link.multicluster.linkerd.io resource and +// converts it to a structured internal representation. +func NewLink(u unstructured.Unstructured) (Link, error) { + + spec, ok := u.Object["spec"] + if !ok { + return Link{}, errors.New("Field 'spec' is missing") + } + specObj, ok := spec.(map[string]interface{}) + if !ok { + return Link{}, errors.New("Field 'spec' is not an object") + } + + ps, ok := specObj["probeSpec"] + if !ok { + return Link{}, errors.New("Field 'probeSpec' is missing") + } + psObj, ok := ps.(map[string]interface{}) + if !ok { + return Link{}, errors.New("Field 'probeSpec' it not an object") + } + + probeSpec, err := newProbeSpec(psObj) + if err != nil { + return Link{}, err + } + + targetClusterName, err := stringField(specObj, "targetClusterName") + if err != nil { + return Link{}, err + } + + targetClusterDomain, err := stringField(specObj, "targetClusterDomain") + if err != nil { + return Link{}, err + } + + targetClusterLinkerdNamespace, err := stringField(specObj, "targetClusterLinkerdNamespace") + if err != nil { + return Link{}, err + } + + clusterCredentialsSecret, err := stringField(specObj, "clusterCredentialsSecret") + if err != nil { + return Link{}, err + } + + gatewayAddress, err := stringField(specObj, "gatewayAddress") + if err != nil { + return Link{}, err + } + + portStr, err := stringField(specObj, "gatewayPort") + if err != nil { + return Link{}, err + } + gatewayPort, err := strconv.ParseUint(portStr, 10, 32) + if err != nil { + return Link{}, err + } + + gatewayIdentity, err := stringField(specObj, "gatewayIdentity") + if err != nil { + return Link{}, err + } + + return Link{ + Name: u.GetName(), + Namespace: u.GetNamespace(), + TargetClusterName: targetClusterName, + TargetClusterDomain: targetClusterDomain, + TargetClusterLinkerdNamespace: targetClusterLinkerdNamespace, + ClusterCredentialsSecret: clusterCredentialsSecret, + GatewayAddress: gatewayAddress, + GatewayPort: uint32(gatewayPort), + GatewayIdentity: gatewayIdentity, + ProbeSpec: probeSpec, + }, nil +} + +// ToUnstructured converts a Link struct into an unstructured resource that can +// be used by a kubernetes dynamic client. +func (l Link) ToUnstructured() unstructured.Unstructured { + return unstructured.Unstructured{ + + Object: map[string]interface{}{ + "apiVersion": k8s.LinkAPIGroupVersion, + "kind": k8s.LinkKind, + "metadata": map[string]interface{}{ + "name": l.Name, + "namespace": l.Namespace, + }, + "spec": map[string]interface{}{ + "targetClusterName": l.TargetClusterName, + "targetClusterDomain": l.TargetClusterDomain, + "targetClusterLinkerdNamespace": l.TargetClusterLinkerdNamespace, + "clusterCredentialsSecret": l.ClusterCredentialsSecret, + "gatewayAddress": l.GatewayAddress, + "gatewayPort": fmt.Sprintf("%d", l.GatewayPort), + "gatewayIdentity": l.GatewayIdentity, + "probeSpec": map[string]interface{}{ + "path": l.ProbeSpec.Path, + "port": fmt.Sprintf("%d", l.ProbeSpec.Port), + "period": l.ProbeSpec.Period.String(), + }, + }, + }, + } +} + +// ExtractProbeSpec parses the ProbSpec from a gateway service's annotations. +func ExtractProbeSpec(gateway *corev1.Service) (ProbeSpec, error) { + path := gateway.Annotations[consts.GatewayProbePath] + if path == "" { + return ProbeSpec{}, errors.New("probe path is empty") + } + + port, err := extractPort(gateway.Spec.Ports, consts.ProbePortName) + if err != nil { + return ProbeSpec{}, err + } + + period, err := strconv.ParseUint(gateway.Annotations[consts.GatewayProbePeriod], 10, 32) + if err != nil { + return ProbeSpec{}, err + } + + return ProbeSpec{ + Path: path, + Port: port, + Period: time.Duration(period) * time.Second, + }, nil +} + +// GetLinks fetchs a list of all Link objects in the cluster. +func GetLinks(client dynamic.Interface) ([]Link, error) { + list, err := client.Resource(LinkGVR).List(metav1.ListOptions{}) + if err != nil { + return nil, err + } + links := []Link{} + errs := []string{} + for _, u := range list.Items { + link, err := NewLink(u) + if err != nil { + errs = append(errs, fmt.Sprintf("failed to parse Link %s: %s", u.GetName(), err)) + } else { + links = append(links, link) + } + } + if len(errs) > 0 { + return nil, errors.New(strings.Join(errs, "\n")) + } + return links, nil +} + +// GetLink fetches a Link object from Kubernetes by name/namespace. +func GetLink(client dynamic.Interface, namespace, name string) (Link, error) { + unstructured, err := client.Resource(LinkGVR).Namespace(namespace).Get(name, metav1.GetOptions{}) + if err != nil { + return Link{}, err + } + return NewLink(*unstructured) +} + +func extractPort(port []corev1.ServicePort, portName string) (uint32, error) { + for _, p := range port { + if p.Name == portName { + return uint32(p.Port), nil + } + } + return 0, fmt.Errorf("could not find port with name %s", portName) +} + +func newProbeSpec(obj map[string]interface{}) (ProbeSpec, error) { + periodStr, err := stringField(obj, "period") + if err != nil { + return ProbeSpec{}, err + } + period, err := time.ParseDuration(periodStr) + if err != nil { + return ProbeSpec{}, err + } + + path, err := stringField(obj, "path") + if err != nil { + return ProbeSpec{}, err + } + + portStr, err := stringField(obj, "port") + if err != nil { + return ProbeSpec{}, err + } + port, err := strconv.ParseUint(portStr, 10, 32) + if err != nil { + return ProbeSpec{}, err + } + + return ProbeSpec{ + Path: path, + Port: uint32(port), + Period: period, + }, nil +} + +func stringField(obj map[string]interface{}, key string) (string, error) { + value, ok := obj[key] + if !ok { + return "", fmt.Errorf("Field '%s' is missing", key) + } + str, ok := value.(string) + if !ok { + return "", fmt.Errorf("Field '%s' is not a string", key) + } + return str, nil +} diff --git a/test/integration/install_test.go b/test/integration/install_test.go index f2cb969b08c5e..8745348cb7b04 100644 --- a/test/integration/install_test.go +++ b/test/integration/install_test.go @@ -868,7 +868,6 @@ func TestUninstallMulticluster(t *testing.T) { } else { exec := append([]string{"multicluster"}, []string{ "install", - "--log-level", "debug", "--namespace", TestHelper.GetMulticlusterNamespace(), }...) out, stderr, err := TestHelper.LinkerdRun(exec...) diff --git a/test/integration/testdata/check.multicluster.golden b/test/integration/testdata/check.multicluster.golden index ae5ff3c339a01..05a884f5eb75a 100644 --- a/test/integration/testdata/check.multicluster.golden +++ b/test/integration/testdata/check.multicluster.golden @@ -74,17 +74,8 @@ linkerd-grafana √ grafana add-on config map exists √ grafana pod is running -linkerd-multicluster-source ---------------------------- -√ service mirror controller is running -√ service mirror controller ClusterRoles exist -√ service mirror controller ClusterRoleBindings exist -√ service mirror controller Roles exist -√ service mirror controller RoleBindings exist -√ service mirror controller ServiceAccounts exist -√ service mirror controller has required permissions -√ multicluster daisy chaining is avoided -√ all mirror services have endpoints -√ all gateway mirrors have endpoints +linkerd-multicluster +-------------------- +√ Link CRD exists Status check results are √ diff --git a/test/integration/testdata/check.multicluster.proxy.golden b/test/integration/testdata/check.multicluster.proxy.golden index a1e816899c12f..f3050fdc9d84a 100644 --- a/test/integration/testdata/check.multicluster.proxy.golden +++ b/test/integration/testdata/check.multicluster.proxy.golden @@ -81,17 +81,8 @@ linkerd-grafana √ grafana add-on config map exists √ grafana pod is running -linkerd-multicluster-source ---------------------------- -√ service mirror controller is running -√ service mirror controller ClusterRoles exist -√ service mirror controller ClusterRoleBindings exist -√ service mirror controller Roles exist -√ service mirror controller RoleBindings exist -√ service mirror controller ServiceAccounts exist -√ service mirror controller has required permissions -√ multicluster daisy chaining is avoided -√ all mirror services have endpoints -√ all gateway mirrors have endpoints +linkerd-multicluster +-------------------- +√ Link CRD exists Status check results are √ diff --git a/testutil/test_helper.go b/testutil/test_helper.go index 4c9501b76eb3c..3c9356947ba3b 100644 --- a/testutil/test_helper.go +++ b/testutil/test_helper.go @@ -113,8 +113,7 @@ func NewGenericTestHelper( // MulticlusterDeployReplicas is a map containing the number of replicas for each Deployment and the main // container name for multicluster components var MulticlusterDeployReplicas = map[string]DeploySpec{ - "linkerd-gateway": {1, []string{"nginx"}}, - "linkerd-service-mirror": {1, []string{"service-mirror"}}, + "linkerd-gateway": {1, []string{"nginx"}}, } // NewTestHelper creates a new instance of TestHelper for the current test run.