From 50a60b6809671266bdea1541b1012fd1eeba4774 Mon Sep 17 00:00:00 2001 From: Ashwin Venkatesh Date: Thu, 19 Jan 2023 15:49:14 -0500 Subject: [PATCH] Use Proxy Healthchecks when configured. - When a service is configured with the correct annotation, a readiness endpoint with be configured in Consul dataplane, and the readiness probe of the sidecar will be configured to use that endpoint to determine the health of the system. Additionally, when t-proxy is enabled, that port shall be in the ExcludeList for inbound connections. --- CHANGELOG.md | 2 +- .../connect-inject/container_init.go | 15 ++++ .../connect-inject/container_init_test.go | 88 ++++++++++++++++++- .../connect-inject/endpoints_controller.go | 35 ++++++-- .../endpoints_controller_test.go | 82 +++++++++++++++++ control-plane/connect-inject/envoy_sidecar.go | 23 ++++- .../connect-inject/envoy_sidecar_test.go | 50 ++++++++--- .../connect-inject/redirect_traffic.go | 6 ++ .../connect-inject/redirect_traffic_test.go | 33 +++++++ 9 files changed, 315 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08aaf16bbc..d9cc5e4aa7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ IMPROVEMENTS: * Helm: * Add a `global.extraLabels` stanza to allow setting global Kubernetes labels for all components deployed by the `consul-k8s` Helm chart. [[GH-1778](https://github.com/hashicorp/consul-k8s/pull/1778)] * Control-Plane - * Add support for the annotation `consul.hashicorp.com/use-proxy-health-check`. [[GH-1824](https://github.com/hashicorp/consul-k8s/pull/1824)] + * Add support for the annotation `consul.hashicorp.com/use-proxy-health-check`. When this annotation is used by a service, it configures a readiness endpoint on Consul Dataplane and queries it instead of the proxy's inbound port which forwards requests to the application. [[GH-1824](https://github.com/hashicorp/consul-k8s/pull/1824)], [[GH-1843](https://github.com/hashicorp/consul-k8s/pull/1843)] * Add health check for synced services based on the status of the Kubernetes readiness probe on synced pod. [[GH-1821](https://github.com/hashicorp/consul-k8s/pull/1821)] BUG FIXES: diff --git a/control-plane/connect-inject/container_init.go b/control-plane/connect-inject/container_init.go index e0ef413011..47dabbbe0b 100644 --- a/control-plane/connect-inject/container_init.go +++ b/control-plane/connect-inject/container_init.go @@ -56,6 +56,12 @@ type initContainerCommandData struct { // EnvoyUID is the Linux user id that will be used when tproxy is enabled. EnvoyUID int + // EnableProxyHealthChecks configures a readiness endpoint on the envoy sidecar. + EnableProxyHealthChecks bool + // EnableProxyHealthChecks is the port on which the readiness endpoint is configured + // on the envoy sidecar. + EnvoyHealthCheckPort int + // EnableTransparentProxy configures this init container to run in transparent proxy mode, // i.e. run consul connect redirect-traffic command and add the required privileges to the // container to do that. @@ -166,6 +172,8 @@ func (w *MeshWebhook) containerInit(namespace corev1.Namespace, pod corev1.Pod, ConsulCACert: w.ConsulCACert, EnableTransparentProxy: tproxyEnabled, EnableCNI: w.EnableCNI, + EnableProxyHealthChecks: useProxyHealthCheck(pod), + EnvoyHealthCheckPort: proxyDefaultHealthPort + mpi.serviceIndex, TProxyExcludeInboundPorts: splitCommaSeparatedItemsFromAnnotation(annotationTProxyExcludeInboundPorts, pod), TProxyExcludeOutboundPorts: splitCommaSeparatedItemsFromAnnotation(annotationTProxyExcludeOutboundPorts, pod), TProxyExcludeOutboundCIDRs: splitCommaSeparatedItemsFromAnnotation(annotationTProxyExcludeOutboundCIDRs, pod), @@ -436,6 +444,10 @@ consul-k8s-control-plane connect-init -pod-name=${POD_NAME} -pod-namespace=${POD {{- else }} -proxy-id="$(cat /consul/connect-inject/proxyid)" \ {{- end }} + {{- if .EnableProxyHealthChecks }} + -envoy-ready-bind-address="${POD_IP}" \ + -envoy-ready-bind-port={{ .EnvoyHealthCheckPort }} \ + {{- end }} {{- if .PrometheusScrapePath }} -prometheus-scrape-path="{{ .PrometheusScrapePath }}" \ {{- end }} @@ -498,6 +510,9 @@ consul-k8s-control-plane connect-init -pod-name=${POD_NAME} -pod-namespace=${POD {{- range .TProxyExcludeOutboundPorts }} -exclude-outbound-port="{{ . }}" \ {{- end }} + {{- if .EnableProxyHealthChecks }} + -exclude-inbound-port={{ .EnvoyHealthCheckPort }} \ + {{- end }} {{- range .TProxyExcludeOutboundCIDRs }} -exclude-outbound-cidr="{{ . }}" \ {{- end }} diff --git a/control-plane/connect-inject/container_init_test.go b/control-plane/connect-inject/container_init_test.go index 22f14c1f73..f4ab3eed47 100644 --- a/control-plane/connect-inject/container_init_test.go +++ b/control-plane/connect-inject/container_init_test.go @@ -74,7 +74,29 @@ consul-k8s-control-plane connect-init -pod-name=${POD_NAME} -pod-namespace=${POD "", "", }, + { + "Proxy Health Checks", + func(pod *corev1.Pod) *corev1.Pod { + pod.Annotations[annotationService] = "web" + pod.Annotations[annotationUseProxyHealthCheck] = "true" + return pod + }, + MeshWebhook{}, + `/bin/sh -ec +export CONSUL_HTTP_ADDR="${HOST_IP}:8500" +export CONSUL_GRPC_ADDR="${HOST_IP}:8502" +consul-k8s-control-plane connect-init -pod-name=${POD_NAME} -pod-namespace=${POD_NAMESPACE} \ + -consul-api-timeout=0s \ +# Generate the envoy bootstrap code +/consul/connect-inject/consul connect envoy \ + -proxy-id="$(cat /consul/connect-inject/proxyid)" \ + -envoy-ready-bind-address="${POD_IP}" \ + -envoy-ready-bind-port=21000 \ + -bootstrap > /consul/connect-inject/envoy-bootstrap.yaml`, + "", + "", + }, { "When auth method is set -service-account-name and -service-name are passed in", func(pod *corev1.Pod) *corev1.Pod { @@ -384,7 +406,6 @@ func TestHandlerContainerInit_transparentProxy(t *testing.T) { -proxy-uid=5995`, map[string]string{keyTransparentProxy: "true"}, }, - "enabled globally, ns not set, annotation not set, cni enabled": { true, true, @@ -395,6 +416,17 @@ func TestHandlerContainerInit_transparentProxy(t *testing.T) { -proxy-uid=5995`, nil, }, + "enabled globally, ns not set, annotation is true, cni disabled, proxy health checks": { + true, + false, + map[string]string{keyTransparentProxy: "true", annotationUseProxyHealthCheck: "true"}, + `/consul/connect-inject/consul connect redirect-traffic \ + -exclude-inbound-port=21000 \ + -proxy-id="$(cat /consul/connect-inject/proxyid)" \ + -proxy-uid=5995`, + "", + nil, + }, } for name, c := range cases { t.Run(name, func(t *testing.T) { @@ -995,6 +1027,60 @@ consul-k8s-control-plane connect-init -pod-name=${POD_NAME} -pod-namespace=${POD /consul/connect-inject/consul connect envoy \ -proxy-id="$(cat /consul/connect-inject/proxyid-web-admin)" \ -admin-bind=127.0.0.1:19001 \ + -bootstrap > /consul/connect-inject/envoy-bootstrap-web-admin.yaml`, + }, + }, + { + "Whole template, multiport, proxy health check", + func(pod *corev1.Pod) *corev1.Pod { + pod.Annotations[annotationUseProxyHealthCheck] = "true" + return pod + }, + MeshWebhook{ConsulAPITimeout: 5 * time.Second}, + 2, + []multiPortInfo{ + { + serviceIndex: 0, + serviceName: "web", + }, + { + serviceIndex: 1, + serviceName: "web-admin", + }, + }, + []string{ + `/bin/sh -ec +export CONSUL_HTTP_ADDR="${HOST_IP}:8500" +export CONSUL_GRPC_ADDR="${HOST_IP}:8502" +consul-k8s-control-plane connect-init -pod-name=${POD_NAME} -pod-namespace=${POD_NAMESPACE} \ + -consul-api-timeout=5s \ + -multiport=true \ + -proxy-id-file=/consul/connect-inject/proxyid-web \ + -service-name="web" \ + +# Generate the envoy bootstrap code +/consul/connect-inject/consul connect envoy \ + -proxy-id="$(cat /consul/connect-inject/proxyid-web)" \ + -envoy-ready-bind-address="${POD_IP}" \ + -envoy-ready-bind-port=21000 \ + -admin-bind=127.0.0.1:19000 \ + -bootstrap > /consul/connect-inject/envoy-bootstrap-web.yaml`, + + `/bin/sh -ec +export CONSUL_HTTP_ADDR="${HOST_IP}:8500" +export CONSUL_GRPC_ADDR="${HOST_IP}:8502" +consul-k8s-control-plane connect-init -pod-name=${POD_NAME} -pod-namespace=${POD_NAMESPACE} \ + -consul-api-timeout=5s \ + -multiport=true \ + -proxy-id-file=/consul/connect-inject/proxyid-web-admin \ + -service-name="web-admin" \ + +# Generate the envoy bootstrap code +/consul/connect-inject/consul connect envoy \ + -proxy-id="$(cat /consul/connect-inject/proxyid-web-admin)" \ + -envoy-ready-bind-address="${POD_IP}" \ + -envoy-ready-bind-port=21001 \ + -admin-bind=127.0.0.1:19001 \ -bootstrap > /consul/connect-inject/envoy-bootstrap-web-admin.yaml`, }, }, diff --git a/control-plane/connect-inject/endpoints_controller.go b/control-plane/connect-inject/endpoints_controller.go index 9b37ca3bcf..f9759b4fe7 100644 --- a/control-plane/connect-inject/endpoints_controller.go +++ b/control-plane/connect-inject/endpoints_controller.go @@ -61,6 +61,9 @@ const ( // proxyDefaultInboundPort is the default inbound port for the proxy. proxyDefaultInboundPort = 20000 + + // proxyDefaultHealthPort is the default health check port for the proxy. + proxyDefaultHealthPort = 21000 ) type EndpointsController struct { @@ -501,6 +504,31 @@ func (r *EndpointsController) createServiceRegistrations(pod corev1.Pod, service if idx := getMultiPortIdx(pod, serviceEndpoints); idx >= 0 { proxyPort += idx } + var publicListenerCheck api.AgentServiceCheck + if useProxyHealthCheck(pod) { + // When using the proxy's health check, create an HTTP check on the ready endpoint + // that will be configured on the proxy sidecar container. + healthCheckPort := proxyDefaultHealthPort + if idx := getMultiPortIdx(pod, serviceEndpoints); idx >= 0 { + healthCheckPort += idx + } + publicListenerCheck = api.AgentServiceCheck{ + Name: "Proxy Public Listener", + HTTP: fmt.Sprintf("http://%s:%d/ready", pod.Status.PodIP, healthCheckPort), + TLSSkipVerify: true, + Interval: "10s", + DeregisterCriticalServiceAfter: "10m", + } + } else { + // Configure the default application health check. + publicListenerCheck = api.AgentServiceCheck{ + Name: "Proxy Public Listener", + TCP: fmt.Sprintf("%s:%d", pod.Status.PodIP, proxyPort), + Interval: "10s", + DeregisterCriticalServiceAfter: "10m", + } + } + proxyService := &api.AgentServiceRegistration{ Kind: api.ServiceKindConnectProxy, ID: proxyServiceID, @@ -511,12 +539,7 @@ func (r *EndpointsController) createServiceRegistrations(pod corev1.Pod, service Namespace: r.consulNamespace(pod.Namespace), Proxy: proxyConfig, Checks: api.AgentServiceChecks{ - { - Name: "Proxy Public Listener", - TCP: fmt.Sprintf("%s:%d", pod.Status.PodIP, proxyPort), - Interval: "10s", - DeregisterCriticalServiceAfter: "10m", - }, + &publicListenerCheck, { Name: "Destination Alias", AliasService: serviceID, diff --git a/control-plane/connect-inject/endpoints_controller_test.go b/control-plane/connect-inject/endpoints_controller_test.go index 49ee67a91d..ce76d1fa49 100644 --- a/control-plane/connect-inject/endpoints_controller_test.go +++ b/control-plane/connect-inject/endpoints_controller_test.go @@ -1187,6 +1187,7 @@ func TestReconcileCreateEndpoint(t *testing.T) { expectedProxySvcInstances []*api.CatalogService expectedAgentHealthChecks []*api.AgentCheck expErr string + useProxyHealthChecks bool }{ { name: "Empty endpoints", @@ -1279,6 +1280,76 @@ func TestReconcileCreateEndpoint(t *testing.T) { }, }, }, + { + name: "Basic endpoints with proxy healthchecks", + useProxyHealthChecks: true, + consulSvcName: "service-created", + k8sObjects: func() []runtime.Object { + pod1 := createPod("pod1", "1.2.3.4", true, true) + pod1.Annotations[annotationUseProxyHealthCheck] = "true" + endpoint := &corev1.Endpoints{ + ObjectMeta: metav1.ObjectMeta{ + Name: "service-created", + Namespace: "default", + }, + Subsets: []corev1.EndpointSubset{ + { + Addresses: []corev1.EndpointAddress{ + { + IP: "1.2.3.4", + NodeName: &nodeName, + TargetRef: &corev1.ObjectReference{ + Kind: "Pod", + Name: "pod1", + Namespace: "default", + }, + }, + }, + }, + }, + } + return []runtime.Object{pod1, endpoint} + }, + initialConsulSvcs: []*api.AgentServiceRegistration{}, + expectedNumSvcInstances: 1, + expectedConsulSvcInstances: []*api.CatalogService{ + { + ServiceID: "pod1-service-created", + ServiceName: "service-created", + ServiceAddress: "1.2.3.4", + ServicePort: 0, + ServiceMeta: map[string]string{MetaKeyPodName: "pod1", MetaKeyKubeServiceName: "service-created", MetaKeyKubeNS: "default", MetaKeyManagedBy: managedByValue}, + ServiceTags: []string{}, + }, + }, + expectedProxySvcInstances: []*api.CatalogService{ + { + ServiceID: "pod1-service-created-sidecar-proxy", + ServiceName: "service-created-sidecar-proxy", + ServiceAddress: "1.2.3.4", + ServicePort: 20000, + ServiceProxy: &api.AgentServiceConnectProxyConfig{ + DestinationServiceName: "service-created", + DestinationServiceID: "pod1-service-created", + LocalServiceAddress: "", + LocalServicePort: 0, + }, + ServiceMeta: map[string]string{MetaKeyPodName: "pod1", MetaKeyKubeServiceName: "service-created", MetaKeyKubeNS: "default", MetaKeyManagedBy: managedByValue}, + ServiceTags: []string{}, + }, + }, + expectedAgentHealthChecks: []*api.AgentCheck{ + { + CheckID: "default/pod1-service-created/kubernetes-health-check", + ServiceName: "service-created", + ServiceID: "pod1-service-created", + Name: "Kubernetes Health Check", + Status: api.HealthPassing, + Output: kubernetesSuccessReasonMsg, + Type: ttl, + }, + }, + }, { name: "Endpoints with multiple addresses", consulSvcName: "service-created", @@ -1811,6 +1882,17 @@ func TestReconcileCreateEndpoint(t *testing.T) { require.Contains(t, expectedChecks, checks[0].Name) require.Contains(t, expectedChecks, checks[1].Name) } + agentChecks, err := consulClient.Agent().Checks() + require.NoError(t, err) + for _, check := range agentChecks { + if check.Name == "Proxy Public Listener" { + if tt.useProxyHealthChecks { + require.Equal(t, "http", check.Type) + } else { + require.Equal(t, "tcp", check.Type) + } + } + } // Check that the Consul health check was created for the k8s pod. if tt.expectedAgentHealthChecks != nil { diff --git a/control-plane/connect-inject/envoy_sidecar.go b/control-plane/connect-inject/envoy_sidecar.go index 7bf55afe60..53b3fcac55 100644 --- a/control-plane/connect-inject/envoy_sidecar.go +++ b/control-plane/connect-inject/envoy_sidecar.go @@ -50,6 +50,14 @@ func (w *MeshWebhook) envoySidecar(namespace corev1.Namespace, pod corev1.Pod, m Command: cmd, } + if useProxyHealthCheck(pod) { + // Add a port on the sidecar where the sidecar proxy will be queried for its health. + container.Ports = append(container.Ports, corev1.ContainerPort{ + Name: fmt.Sprintf("%s-%d", "proxy-health", mpi.serviceIndex), + ContainerPort: int32(proxyDefaultHealthPort + mpi.serviceIndex), + }) + } + // Add any extra Envoy VolumeMounts. if _, ok := pod.Annotations[annotationConsulSidecarUserVolumeMount]; ok { var volumeMount []corev1.VolumeMount @@ -105,7 +113,7 @@ func (w *MeshWebhook) getContainerSidecarCommand(pod corev1.Pod, multiPortSvcNam } if multiPortSvcName != "" { // --base-id is needed so multiple Envoy proxies can run on the same host. - cmd = append(cmd, "--base-id", fmt.Sprintf("%d", multiPortSvcIdx)) + cmd = append(cmd, "--base-id", strconv.Itoa(multiPortSvcIdx)) } // Check to see if the user has overriden concurrency via an annotation. @@ -215,3 +223,16 @@ func (w *MeshWebhook) envoySidecarResources(pod corev1.Pod) (corev1.ResourceRequ return resources, nil } + +// useProxyHealthCheck returns true if the pod has the annotation 'consul.hashicorp.com/use-proxy-health-check' +// set to truthy values. +func useProxyHealthCheck(pod corev1.Pod) bool { + if v, ok := pod.Annotations[annotationUseProxyHealthCheck]; ok { + useProxyHealthCheck, err := strconv.ParseBool(v) + if err != nil { + return false + } + return useProxyHealthCheck + } + return false +} diff --git a/control-plane/connect-inject/envoy_sidecar_test.go b/control-plane/connect-inject/envoy_sidecar_test.go index cd83d74244..43c464ed7f 100644 --- a/control-plane/connect-inject/envoy_sidecar_test.go +++ b/control-plane/connect-inject/envoy_sidecar_test.go @@ -12,10 +12,10 @@ import ( ) func TestHandlerEnvoySidecar(t *testing.T) { - require := require.New(t) cases := map[string]struct { annotations map[string]string expCommand []string + expPort *corev1.ContainerPort expErr string }{ "default settings, no annotations": { @@ -39,6 +39,21 @@ func TestHandlerEnvoySidecar(t *testing.T) { "--concurrency", "42", }, }, + "default settings, proxy health check annotations": { + annotations: map[string]string{ + annotationService: "foo", + annotationUseProxyHealthCheck: "true", + }, + expCommand: []string{ + "envoy", + "--config-path", "/consul/connect-inject/envoy-bootstrap.yaml", + "--concurrency", "0", + }, + expPort: &corev1.ContainerPort{ + Name: "proxy-health-0", + ContainerPort: int32(proxyDefaultHealthPort), + }, + }, "default settings, invalid concurrency annotation negative number": { annotations: map[string]string{ annotationService: "foo", @@ -64,28 +79,31 @@ func TestHandlerEnvoySidecar(t *testing.T) { } container, err := h.envoySidecar(testNS, pod, multiPortInfo{}) if c.expErr != "" { - require.Contains(err.Error(), c.expErr) + require.Contains(t, err.Error(), c.expErr) } else { - require.NoError(err) - require.Equal(c.expCommand, container.Command) - require.Equal(container.VolumeMounts, []corev1.VolumeMount{ + require.NoError(t, err) + require.Equal(t, c.expCommand, container.Command) + require.Equal(t, container.VolumeMounts, []corev1.VolumeMount{ { Name: volumeName, MountPath: "/consul/connect-inject", }, }) + if c.expPort != nil { + require.Contains(t, container.Ports, *c.expPort) + } } }) } } func TestHandlerEnvoySidecar_Multiport(t *testing.T) { - require := require.New(t) w := MeshWebhook{} pod := corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Annotations: map[string]string{ - annotationService: "web,web-admin", + annotationService: "web,web-admin", + annotationUseProxyHealthCheck: "true", }, }, @@ -114,17 +132,29 @@ func TestHandlerEnvoySidecar_Multiport(t *testing.T) { 0: {"envoy", "--config-path", "/consul/connect-inject/envoy-bootstrap-web.yaml", "--base-id", "0", "--concurrency", "0"}, 1: {"envoy", "--config-path", "/consul/connect-inject/envoy-bootstrap-web-admin.yaml", "--base-id", "1", "--concurrency", "0"}, } + expPorts := map[int]corev1.ContainerPort{ + 0: { + Name: "proxy-health-0", + ContainerPort: int32(proxyDefaultHealthPort), + }, + 1: { + Name: "proxy-health-1", + ContainerPort: int32(proxyDefaultHealthPort + 1), + }, + } for i := 0; i < 2; i++ { container, err := w.envoySidecar(testNS, pod, multiPortInfos[i]) - require.NoError(err) - require.Equal(expCommand[i], container.Command) + require.NoError(t, err) + require.Equal(t, expCommand[i], container.Command) - require.Equal(container.VolumeMounts, []corev1.VolumeMount{ + require.Equal(t, container.VolumeMounts, []corev1.VolumeMount{ { Name: volumeName, MountPath: "/consul/connect-inject", }, }) + + require.Contains(t, container.Ports, expPorts[i]) } } diff --git a/control-plane/connect-inject/redirect_traffic.go b/control-plane/connect-inject/redirect_traffic.go index 895b5befbe..71aebf54e7 100644 --- a/control-plane/connect-inject/redirect_traffic.go +++ b/control-plane/connect-inject/redirect_traffic.go @@ -44,6 +44,12 @@ func (w *MeshWebhook) addRedirectTrafficConfigAnnotation(pod *corev1.Pod, ns cor cfg.ExcludeInboundPorts = append(cfg.ExcludeInboundPorts, prometheusScrapePort) } + // Exclude the port on which the proxy health check port will be configured if + // using the proxy health check for a service. + if useProxyHealthCheck(*pod) { + cfg.ExcludeInboundPorts = append(cfg.ExcludeInboundPorts, strconv.Itoa(proxyDefaultHealthPort)) + } + // Exclude any overwritten liveness/readiness/startup ports from redirection. overwriteProbes, err := shouldOverwriteProbes(*pod, w.TProxyOverwriteProbes) if err != nil { diff --git a/control-plane/connect-inject/redirect_traffic_test.go b/control-plane/connect-inject/redirect_traffic_test.go index 838f1c0c25..1478786be5 100644 --- a/control-plane/connect-inject/redirect_traffic_test.go +++ b/control-plane/connect-inject/redirect_traffic_test.go @@ -74,6 +74,39 @@ func TestAddRedirectTrafficConfig(t *testing.T) { ExcludeUIDs: []string{"5996"}, }, }, + { + name: "proxy health checks enabled", + webhook: MeshWebhook{ + Log: logrtest.TestLogger{T: t}, + AllowK8sNamespacesSet: mapset.NewSetWith("*"), + DenyK8sNamespacesSet: mapset.NewSet(), + decoder: decoder, + }, + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: defaultNamespace, + Name: defaultPodName, + Annotations: map[string]string{ + annotationUseProxyHealthCheck: "true", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test", + }, + }, + }, + }, + expCfg: iptables.Config{ + ConsulDNSIP: "", + ProxyUserID: strconv.Itoa(envoyUserAndGroupID), + ProxyInboundPort: proxyDefaultInboundPort, + ProxyOutboundPort: iptables.DefaultTProxyOutboundPort, + ExcludeUIDs: []string{"5996"}, + ExcludeInboundPorts: []string{"21000"}, + }, + }, { name: "metrics enabled", webhook: MeshWebhook{