From 1821f08477724394b9819de04b5a43ade5b996b7 Mon Sep 17 00:00:00 2001 From: Ashwin Venkatesh Date: Thu, 19 Jan 2023 11:21:44 -0500 Subject: [PATCH] When configured, use the health check of the proxy (#1841) * When a service is configured with the correct annotation, a readiness endpoint with be configured in Consul dataplane and the readiness probe of the sidecar will be configured to use that endpoint to determine the health of the system. Additionally, when t-proxy is enabled, that port shall be in the ExcludeList for inbound connections. --- CHANGELOG.md | 2 +- .../connect-inject/constants/constants.go | 3 + .../webhook/consul_dataplane_sidecar.go | 61 ++++++- .../webhook/consul_dataplane_sidecar_test.go | 154 +++++++++++++++++- .../webhook/redirect_traffic.go | 6 + .../webhook/redirect_traffic_test.go | 33 ++++ 6 files changed, 249 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b7c7dd852..d4cf05b4f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ IMPROVEMENTS: * Add the `envoyExtensions` field to the `ProxyDefaults` and `ServiceDefaults` CRD. [[GH-1823]](https://github.com/hashicorp/consul-k8s/pull/1823) * Add the `balanceInboundConnections` field to the `ServiceDefaults` CRD. [[GH-1823]](https://github.com/hashicorp/consul-k8s/pull/1823) * Control-Plane - * Add support for the annotation `consul.hashicorp.com/use-proxy-health-check`. [[GH-1824](https://github.com/hashicorp/consul-k8s/pull/1824)] + * Add support for the annotation `consul.hashicorp.com/use-proxy-health-check`. When this annotation is used by a service, it configures a readiness endpoint on Consul Dataplane and queries it instead of the proxy's inbound port which forwards requests to the application. [[GH-1824](https://github.com/hashicorp/consul-k8s/pull/1824)], [[GH-1841](https://github.com/hashicorp/consul-k8s/pull/1824)] * Add health check for synced services based on the status of the Kubernetes readiness probe on synced pod. [[GH-1821](https://github.com/hashicorp/consul-k8s/pull/1821)] BUG FIXES: diff --git a/control-plane/connect-inject/constants/constants.go b/control-plane/connect-inject/constants/constants.go index 62f21740c4..e371677629 100644 --- a/control-plane/connect-inject/constants/constants.go +++ b/control-plane/connect-inject/constants/constants.go @@ -7,6 +7,9 @@ const ( // ProxyDefaultInboundPort is the default inbound port for the proxy. ProxyDefaultInboundPort = 20000 + // ProxyDefaultHealthPort is the default HTTP health check port for the proxy. + ProxyDefaultHealthPort = 21000 + // MetaKeyKubeNS is the meta key name for Kubernetes namespace used for the Consul services. MetaKeyKubeNS = "k8s-namespace" diff --git a/control-plane/connect-inject/webhook/consul_dataplane_sidecar.go b/control-plane/connect-inject/webhook/consul_dataplane_sidecar.go index d0cd232c84..ad3333ba1b 100644 --- a/control-plane/connect-inject/webhook/consul_dataplane_sidecar.go +++ b/control-plane/connect-inject/webhook/consul_dataplane_sidecar.go @@ -47,13 +47,28 @@ func (w *MeshWebhook) consulDataplaneSidecar(namespace corev1.Namespace, pod cor containerName = fmt.Sprintf("%s-%s", sidecarContainer, mpi.serviceName) } - probe := &corev1.Probe{ - Handler: corev1.Handler{ - TCPSocket: &corev1.TCPSocketAction{ - Port: intstr.FromInt(constants.ProxyDefaultInboundPort + mpi.serviceIndex), + var probe *corev1.Probe + if useProxyHealthCheck(pod) { + // If using the proxy health check for a service, configure an HTTP handler + // that queries the '/ready' endpoint of the proxy. + probe = &corev1.Probe{ + Handler: corev1.Handler{ + HTTPGet: &corev1.HTTPGetAction{ + Port: intstr.FromInt(constants.ProxyDefaultHealthPort + mpi.serviceIndex), + Path: "/ready", + }, }, - }, - InitialDelaySeconds: 1, + InitialDelaySeconds: 1, + } + } else { + probe = &corev1.Probe{ + Handler: corev1.Handler{ + TCPSocket: &corev1.TCPSocketAction{ + Port: intstr.FromInt(constants.ProxyDefaultInboundPort + mpi.serviceIndex), + }, + }, + InitialDelaySeconds: 1, + } } container := corev1.Container{ @@ -89,13 +104,27 @@ func (w *MeshWebhook) consulDataplaneSidecar(namespace corev1.Namespace, pod cor }, Args: args, ReadinessProbe: probe, - LivenessProbe: probe, } if w.AuthMethod != "" { container.VolumeMounts = append(container.VolumeMounts, saTokenVolumeMount) } + if useProxyHealthCheck(pod) { + // Configure the Readiness Address for the proxy's health check to be the Pod IP. + container.Env = append(container.Env, corev1.EnvVar{ + Name: "DP_ENVOY_READY_BIND_ADDRESS", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{FieldPath: "status.podIP"}, + }, + }) + // Configure the port on which the readiness probe will query the proxy for its health. + container.Ports = append(container.Ports, corev1.ContainerPort{ + Name: fmt.Sprintf("%s-%d", "proxy-health", mpi.serviceIndex), + ContainerPort: int32(constants.ProxyDefaultHealthPort + mpi.serviceIndex), + }) + } + // Add any extra VolumeMounts. if userVolMount, ok := pod.Annotations[constants.AnnotationConsulSidecarUserVolumeMount]; ok { var volumeMounts []corev1.VolumeMount @@ -206,6 +235,11 @@ func (w *MeshWebhook) getContainerSidecarArgs(namespace corev1.Namespace, mpi mu args = append(args, "-tls-disabled") } + // Configure the readiness port on the dataplane sidecar if proxy health checks are enabled. + if useProxyHealthCheck(pod) { + args = append(args, fmt.Sprintf("%s=%d", "-envoy-ready-bind-port", constants.ProxyDefaultHealthPort+mpi.serviceIndex)) + } + if mpi.serviceName != "" { args = append(args, fmt.Sprintf("-envoy-admin-bind-port=%d", 19000+mpi.serviceIndex)) } @@ -383,3 +417,16 @@ func (w *MeshWebhook) sidecarResources(pod corev1.Pod) (corev1.ResourceRequireme return resources, nil } + +// useProxyHealthCheck returns true if the pod has the annotation 'consul.hashicorp.com/use-proxy-health-check' +// set to truthy values. +func useProxyHealthCheck(pod corev1.Pod) bool { + if v, ok := pod.Annotations[constants.AnnotationUseProxyHealthCheck]; ok { + useProxyHealthCheck, err := strconv.ParseBool(v) + if err != nil { + return false + } + return useProxyHealthCheck + } + return false +} diff --git a/control-plane/connect-inject/webhook/consul_dataplane_sidecar_test.go b/control-plane/connect-inject/webhook/consul_dataplane_sidecar_test.go index f6916bc3e2..37aa1619bf 100644 --- a/control-plane/connect-inject/webhook/consul_dataplane_sidecar_test.go +++ b/control-plane/connect-inject/webhook/consul_dataplane_sidecar_test.go @@ -214,7 +214,6 @@ func TestHandlerConsulDataplaneSidecar(t *testing.T) { InitialDelaySeconds: 1, } require.Equal(t, expectedProbe, container.ReadinessProbe) - require.Equal(t, expectedProbe, container.LivenessProbe) require.Nil(t, container.StartupProbe) require.Len(t, container.Env, 3) require.Equal(t, container.Env[0].Name, "TMPDIR") @@ -308,6 +307,158 @@ func TestHandlerConsulDataplaneSidecar_DNSProxy(t *testing.T) { require.Contains(t, container.Args, "-consul-dns-bind-port=8600") } +func TestHandlerConsulDataplaneSidecar_ProxyHealthCheck(t *testing.T) { + h := MeshWebhook{ + ConsulConfig: &consul.Config{HTTPPort: 8500, GRPCPort: 8502}, + ConsulAddress: "1.1.1.1", + LogLevel: "info", + } + pod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + constants.AnnotationUseProxyHealthCheck: "true", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "web", + }, + }, + }, + } + container, err := h.consulDataplaneSidecar(testNS, pod, multiPortInfo{}) + expectedProbe := &corev1.Probe{ + Handler: corev1.Handler{ + HTTPGet: &corev1.HTTPGetAction{ + Port: intstr.FromInt(21000), + Path: "/ready", + }, + }, + InitialDelaySeconds: 1, + } + require.NoError(t, err) + require.Contains(t, container.Args, "-envoy-ready-bind-port=21000") + require.Equal(t, expectedProbe, container.ReadinessProbe) + require.Contains(t, container.Env, corev1.EnvVar{ + Name: "DP_ENVOY_READY_BIND_ADDRESS", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{FieldPath: "status.podIP"}, + }, + }) + require.Contains(t, container.Ports, corev1.ContainerPort{ + Name: "proxy-health-0", + ContainerPort: 21000, + }) +} + +func TestHandlerConsulDataplaneSidecar_ProxyHealthCheck_Multiport(t *testing.T) { + h := MeshWebhook{ + ConsulConfig: &consul.Config{HTTPPort: 8500, GRPCPort: 8502}, + ConsulAddress: "1.1.1.1", + LogLevel: "info", + } + pod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Annotations: map[string]string{ + constants.AnnotationService: "web,web-admin", + constants.AnnotationUseProxyHealthCheck: "true", + }, + }, + + Spec: corev1.PodSpec{ + Volumes: []corev1.Volume{ + { + Name: "web-admin-service-account", + }, + }, + Containers: []corev1.Container{ + { + Name: "web", + }, + { + Name: "web-side", + }, + { + Name: "web-admin", + }, + { + Name: "web-admin-side", + }, + { + Name: "auth-method-secret", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "service-account-secret", + MountPath: "/var/run/secrets/kubernetes.io/serviceaccount", + }, + }, + }, + }, + ServiceAccountName: "web", + }, + } + multiPortInfos := []multiPortInfo{ + { + serviceIndex: 0, + serviceName: "web", + }, + { + serviceIndex: 1, + serviceName: "web-admin", + }, + } + expectedArgs := []string{ + "-envoy-ready-bind-port=21000", + "-envoy-ready-bind-port=21001", + } + expectedProbe := []*corev1.Probe{ + { + Handler: corev1.Handler{ + HTTPGet: &corev1.HTTPGetAction{ + Port: intstr.FromInt(21000), + Path: "/ready", + }, + }, + InitialDelaySeconds: 1, + }, + { + Handler: corev1.Handler{ + HTTPGet: &corev1.HTTPGetAction{ + Port: intstr.FromInt(21001), + Path: "/ready", + }, + }, + InitialDelaySeconds: 1, + }, + } + expectedPort := []corev1.ContainerPort{ + { + Name: "proxy-health-0", + ContainerPort: 21000, + }, + { + Name: "proxy-health-1", + ContainerPort: 21001, + }, + } + expectedEnvVar := corev1.EnvVar{ + Name: "DP_ENVOY_READY_BIND_ADDRESS", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{FieldPath: "status.podIP"}, + }, + } + for i, info := range multiPortInfos { + container, err := h.consulDataplaneSidecar(testNS, pod, info) + require.NoError(t, err) + require.Contains(t, container.Args, expectedArgs[i]) + require.Equal(t, expectedProbe[i], container.ReadinessProbe) + require.Contains(t, container.Ports, expectedPort[i]) + require.Contains(t, container.Env, expectedEnvVar) + } +} + func TestHandlerConsulDataplaneSidecar_Multiport(t *testing.T) { for _, aclsEnabled := range []bool{false, true} { name := fmt.Sprintf("acls enabled: %t", aclsEnabled) @@ -430,7 +581,6 @@ func TestHandlerConsulDataplaneSidecar_Multiport(t *testing.T) { InitialDelaySeconds: 1, } require.Equal(t, expectedProbe, container.ReadinessProbe) - require.Equal(t, expectedProbe, container.LivenessProbe) require.Nil(t, container.StartupProbe) } }) diff --git a/control-plane/connect-inject/webhook/redirect_traffic.go b/control-plane/connect-inject/webhook/redirect_traffic.go index 73040b39e7..eab23a2b91 100644 --- a/control-plane/connect-inject/webhook/redirect_traffic.go +++ b/control-plane/connect-inject/webhook/redirect_traffic.go @@ -52,6 +52,12 @@ func (w *MeshWebhook) iptablesConfigJSON(pod corev1.Pod, ns corev1.Namespace) (s return "", err } + // Exclude the port on which the proxy health check port will be configured if + // using the proxy health check for a service. + if useProxyHealthCheck(pod) { + cfg.ExcludeInboundPorts = append(cfg.ExcludeInboundPorts, strconv.Itoa(constants.ProxyDefaultHealthPort)) + } + if overwriteProbes { for i, container := range pod.Spec.Containers { // skip the "envoy-sidecar" container from having its probes overridden diff --git a/control-plane/connect-inject/webhook/redirect_traffic_test.go b/control-plane/connect-inject/webhook/redirect_traffic_test.go index 5ed660d96d..2ad9940fbe 100644 --- a/control-plane/connect-inject/webhook/redirect_traffic_test.go +++ b/control-plane/connect-inject/webhook/redirect_traffic_test.go @@ -72,6 +72,39 @@ func TestAddRedirectTrafficConfig(t *testing.T) { ExcludeUIDs: []string{"5996"}, }, }, + { + name: "proxy health checks enabled", + webhook: MeshWebhook{ + Log: logrtest.TestLogger{T: t}, + AllowK8sNamespacesSet: mapset.NewSetWith("*"), + DenyK8sNamespacesSet: mapset.NewSet(), + decoder: decoder, + }, + pod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: defaultNamespace, + Name: defaultPodName, + Annotations: map[string]string{ + constants.AnnotationUseProxyHealthCheck: "true", + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "test", + }, + }, + }, + }, + expCfg: iptables.Config{ + ConsulDNSIP: "", + ProxyUserID: strconv.Itoa(sidecarUserAndGroupID), + ProxyInboundPort: constants.ProxyDefaultInboundPort, + ProxyOutboundPort: iptables.DefaultTProxyOutboundPort, + ExcludeUIDs: []string{"5996"}, + ExcludeInboundPorts: []string{"21000"}, + }, + }, { name: "metrics enabled", webhook: MeshWebhook{