From 32780125d7e8d18650e7d871c4ac4f6772ba22fa Mon Sep 17 00:00:00 2001 From: Brandon McNama Date: Thu, 15 Sep 2022 21:32:59 -0400 Subject: [PATCH] feat: Add LB exclusion label when deleting node Currently, when Karpenter drains and then deletes a Node from the cluster, if that node is registered in a Target Group for an ALB/NLB the corresponding EC2 instance is not removed. This leads to the potential for increased errors when deleting nodes via Karpenter. In order to help resolve this issue, this change adds the well-known `node.kubernetes.io/exclude-from-external-balancers` label, which will case the AWS LB controller to remove the node from the Target Group while Karpenter is draining the node. This is similar to how the AWS Node Termination Handler works (see aws/aws-node-termination-handler#316). In future, Karpenter might be enhanced to be able to wait for a configurable period before deleting the Node and terminating the associated instance as currently there's a race condition between the Pods being drained off of the Node and the EC2 instance being removed from the target group. --- pkg/controllers/termination/suite_test.go | 19 +++++++++++++++++++ pkg/controllers/termination/terminate.go | 5 +++++ 2 files changed, 24 insertions(+) diff --git a/pkg/controllers/termination/suite_test.go b/pkg/controllers/termination/suite_test.go index beec62c8a280..9ecd8b9ffcac 100644 --- a/pkg/controllers/termination/suite_test.go +++ b/pkg/controllers/termination/suite_test.go @@ -103,6 +103,25 @@ var _ = Describe("Termination", func() { ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) ExpectNotFound(ctx, env.Client, node) }) + It("should exclude nodes from load balancers when terminating", func() { + // This is a kludge to prevent the node from being deleted before we can + // inspect its labels + podNoEvict := test.Pod(test.PodOptions{ + NodeName: node.Name, + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{v1alpha5.DoNotEvictPodAnnotationKey: "true"}, + OwnerReferences: defaultOwnerRefs, + }, + }) + + ExpectApplied(ctx, env.Client, node, podNoEvict) + + Expect(env.Client.Delete(ctx, node)).To(Succeed()) + node = ExpectNodeExists(ctx, env.Client, node.Name) + ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(node)) + node = ExpectNodeExists(ctx, env.Client, node.Name) + Expect(node.Labels[v1.LabelNodeExcludeBalancers]).Should(Equal("karpenter")) + }) It("should not evict pods that tolerate unschedulable taint", func() { podEvict := test.Pod(test.PodOptions{NodeName: node.Name, ObjectMeta: metav1.ObjectMeta{OwnerReferences: defaultOwnerRefs}}) podSkip := test.Pod(test.PodOptions{ diff --git a/pkg/controllers/termination/terminate.go b/pkg/controllers/termination/terminate.go index 43ec6bcb9ac0..b31bfb289033 100644 --- a/pkg/controllers/termination/terminate.go +++ b/pkg/controllers/termination/terminate.go @@ -61,6 +61,11 @@ func (t *Terminator) cordon(ctx context.Context, node *v1.Node) error { // 2. Cordon node persisted := node.DeepCopy() node.Spec.Unschedulable = true + // Handle nil map + if node.Labels == nil { + node.Labels = map[string]string{} + } + node.Labels[v1.LabelNodeExcludeBalancers] = "karpenter" if err := t.KubeClient.Patch(ctx, node, client.MergeFrom(persisted)); err != nil { return fmt.Errorf("patching node %s, %w", node.Name, err) }