From e3712522b6337cd9e1c2b3a87b9129afae2bd73f Mon Sep 17 00:00:00 2001 From: Florian Fl Bauer Date: Thu, 13 Jul 2023 09:29:53 +0200 Subject: [PATCH 1/3] add label to exclude from external loadbalancers --- k8s/util.go | 20 +++++++++++++++++++- main.go | 2 ++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/k8s/util.go b/k8s/util.go index a42bc597..e693da41 100644 --- a/k8s/util.go +++ b/k8s/util.go @@ -4,7 +4,7 @@ import ( "log" "github.com/aws/aws-sdk-go/service/autoscaling" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" ) // CheckIfNodeHasEnoughResourcesToTransferAllPodsInNodes calculates the resources available in the target nodes @@ -103,3 +103,21 @@ func AnnotateNodeByAutoScalingInstance(client ClientAPI, instance *autoscaling.I } return nil } + +// Label Node adds an Label to the Kubernetes node represented by a given AWS instance +func LabelNodeByAutoScalingInstance(client ClientAPI, instance *autoscaling.Instance, key, value string) error { + node, err := client.GetNodeByAutoScalingInstance(instance) + if err != nil { + return err + } + labels := node.GetLabels() + if currentValue := labels[key]; currentValue != value { + labels[key] = value + node.SetLabels(labels) + err = client.UpdateNode(node) + if err != nil { + return err + } + } + return nil +} diff --git a/main.go b/main.go index 17c08b57..d1c572c3 100644 --- a/main.go +++ b/main.go @@ -187,6 +187,8 @@ func DoHandleRollingUpgrade(client k8s.ClientAPI, ec2Service ec2iface.EC2API, au if hasEnoughResources { log.Printf("[%s][%s] Updated nodes have enough resources available", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId)) if minutesSinceDrained == -1 { + log.Printf("[%s][%s] Label node to exlude from external load balancers", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId)) + k8s.LabelNodeByAutoScalingInstance(client, outdatedInstance, "node.kubernetes.io/exclude-from-external-load-balancers", "twin") log.Printf("[%s][%s] Draining node", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId)) err := client.Drain(node.Name, config.Get().IgnoreDaemonSets, config.Get().DeleteEmptyDirData, config.Get().PodTerminationGracePeriod) if err != nil { From a7b9decd70dd5b2d76ba90b5d4646b7fd3357754 Mon Sep 17 00:00:00 2001 From: Florian Fl Bauer Date: Wed, 26 Jul 2023 13:50:28 +0200 Subject: [PATCH 2/3] Make Exclude From External Load Balancers optional --- README.md | 1 + config/config.go | 71 +++++++++++++++++++++++++----------------------- main.go | 6 ++-- 3 files changed, 42 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index d80f1757..d3db1d0c 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,7 @@ Therefore, this application will not run into any issues if it is restarted, res | METRICS | Expose metrics in Prometheus format at `:${METRICS_PORT}/metrics` | no | `""` | | SLOW_MODE | If enabled, every time a node is terminated during an execution, the current execution will stop rather than continuing to the next ASG | no | `false` | | EAGER_CORDONING | If enabled, all outdated nodes will get cordoned before any rolling update action. The default mode is to cordon a node just before draining it. See [#41](https://github.com/TwiN/aws-eks-asg-rolling-update-handler/issues/41) for possible consequences of enabling this. | no | `false` | +| EXCLUDE_FROM_EXTERNAL_LOAD_BALANCERS | If enabled, node label `node.kubernetes.io/exclude-from-external-load-balancers=true` will be added to nodes before draining. See [#131](https://github.com/TwiN/aws-eks-asg-rolling-update-handler/pull/131) for more information | no | `false` | **NOTE:** Only one of `CLUSTER_NAME`, `AUTODISCOVERY_TAGS` or `AUTO_SCALING_GROUP_NAMES` must be set. diff --git a/config/config.go b/config/config.go index 2a2b3d07..1b940f41 100644 --- a/config/config.go +++ b/config/config.go @@ -12,48 +12,51 @@ import ( var cfg *config const ( - EnvEnvironment = "ENVIRONMENT" - EnvDebug = "DEBUG" - EnvIgnoreDaemonSets = "IGNORE_DAEMON_SETS" - EnvDeleteLocalData = "DELETE_LOCAL_DATA" // Deprecated: in favor of DeleteEmptyDirData (DELETE_EMPTY_DIR_DATA) - EnvDeleteEmptyDirData = "DELETE_EMPTY_DIR_DATA" - EnvClusterName = "CLUSTER_NAME" - EnvAutodiscoveryTags = "AUTODISCOVERY_TAGS" - EnvAutoScalingGroupNames = "AUTO_SCALING_GROUP_NAMES" - EnvAwsRegion = "AWS_REGION" - EnvExecutionInterval = "EXECUTION_INTERVAL" - EnvExecutionTimeout = "EXECUTION_TIMEOUT" - EnvPodTerminationGracePeriod = "POD_TERMINATION_GRACE_PERIOD" - EnvMetrics = "METRICS" - EnvMetricsPort = "METRICS_PORT" - EnvSlowMode = "SLOW_MODE" - EnvEagerCordoning = "EAGER_CORDONING" + EnvEnvironment = "ENVIRONMENT" + EnvDebug = "DEBUG" + EnvIgnoreDaemonSets = "IGNORE_DAEMON_SETS" + EnvDeleteLocalData = "DELETE_LOCAL_DATA" // Deprecated: in favor of DeleteEmptyDirData (DELETE_EMPTY_DIR_DATA) + EnvDeleteEmptyDirData = "DELETE_EMPTY_DIR_DATA" + EnvClusterName = "CLUSTER_NAME" + EnvAutodiscoveryTags = "AUTODISCOVERY_TAGS" + EnvAutoScalingGroupNames = "AUTO_SCALING_GROUP_NAMES" + EnvAwsRegion = "AWS_REGION" + EnvExecutionInterval = "EXECUTION_INTERVAL" + EnvExecutionTimeout = "EXECUTION_TIMEOUT" + EnvPodTerminationGracePeriod = "POD_TERMINATION_GRACE_PERIOD" + EnvMetrics = "METRICS" + EnvMetricsPort = "METRICS_PORT" + EnvSlowMode = "SLOW_MODE" + EnvEagerCordoning = "EAGER_CORDONING" + EnvExcludeFromExternalLoadBalancers = "EXLUDE_FROM_EXTERNAL_LOAD_BALANCERS" ) type config struct { - Environment string // Optional - Debug bool // Defaults to false - AutoScalingGroupNames []string // Required if AutodiscoveryTags not provided - AutodiscoveryTags string // Required if AutoScalingGroupNames not provided - AwsRegion string // Defaults to us-west-2 - IgnoreDaemonSets bool // Defaults to true - DeleteEmptyDirData bool // Defaults to true - ExecutionInterval time.Duration // Defaults to 20s - ExecutionTimeout time.Duration // Defaults to 900s - PodTerminationGracePeriod int // Defaults to -1 - Metrics bool // Defaults to false - MetricsPort int // Defaults to 8080 - SlowMode bool // Defaults to false - EagerCordoning bool // Defaults to false + Environment string // Optional + Debug bool // Defaults to false + AutoScalingGroupNames []string // Required if AutodiscoveryTags not provided + AutodiscoveryTags string // Required if AutoScalingGroupNames not provided + AwsRegion string // Defaults to us-west-2 + IgnoreDaemonSets bool // Defaults to true + DeleteEmptyDirData bool // Defaults to true + ExecutionInterval time.Duration // Defaults to 20s + ExecutionTimeout time.Duration // Defaults to 900s + PodTerminationGracePeriod int // Defaults to -1 + Metrics bool // Defaults to false + MetricsPort int // Defaults to 8080 + SlowMode bool // Defaults to false + EagerCordoning bool // Defaults to false + ExcludeFromExternalLoadBalancers bool // defaults to false } // Initialize is used to initialize the application's configuration func Initialize() error { cfg = &config{ - Environment: strings.ToLower(os.Getenv(EnvEnvironment)), - Debug: strings.ToLower(os.Getenv(EnvDebug)) == "true", - SlowMode: strings.ToLower(os.Getenv(EnvSlowMode)) == "true", - EagerCordoning: strings.ToLower(os.Getenv(EnvEagerCordoning)) == "true", + Environment: strings.ToLower(os.Getenv(EnvEnvironment)), + Debug: strings.ToLower(os.Getenv(EnvDebug)) == "true", + SlowMode: strings.ToLower(os.Getenv(EnvSlowMode)) == "true", + EagerCordoning: strings.ToLower(os.Getenv(EnvEagerCordoning)) == "true", + ExcludeFromExternalLoadBalancers: strings.ToLower(os.Getenv(EnvExcludeFromExternalLoadBalancers)) == "true", } if clusterName := os.Getenv(EnvClusterName); len(clusterName) > 0 { // See "Prerequisites" in https://docs.aws.amazon.com/eks/latest/userguide/autoscaling.html diff --git a/main.go b/main.go index d1c572c3..160d755f 100644 --- a/main.go +++ b/main.go @@ -187,8 +187,10 @@ func DoHandleRollingUpgrade(client k8s.ClientAPI, ec2Service ec2iface.EC2API, au if hasEnoughResources { log.Printf("[%s][%s] Updated nodes have enough resources available", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId)) if minutesSinceDrained == -1 { - log.Printf("[%s][%s] Label node to exlude from external load balancers", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId)) - k8s.LabelNodeByAutoScalingInstance(client, outdatedInstance, "node.kubernetes.io/exclude-from-external-load-balancers", "twin") + if config.Get().ExcludeFromExternalLoadBalancers { + log.Printf("[%s][%s] Label node to exlude from external load balancers", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId)) + k8s.LabelNodeByAutoScalingInstance(client, outdatedInstance, "node.kubernetes.io/exclude-from-external-load-balancers", "true") + } log.Printf("[%s][%s] Draining node", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId)) err := client.Drain(node.Name, config.Get().IgnoreDaemonSets, config.Get().DeleteEmptyDirData, config.Get().PodTerminationGracePeriod) if err != nil { From c92e36892bef986ec312a8a074346e3e130adc37 Mon Sep 17 00:00:00 2001 From: Florian Fl Bauer Date: Fri, 28 Jul 2023 13:09:10 +0200 Subject: [PATCH 3/3] Adding Tests for ExcludeFromExternalLoadBalancers --- config/config.go | 19 +++++---- config/config_test.go | 2 +- k8s/client.go | 4 +- k8stest/k8stest.go | 3 +- main.go | 2 +- main_test.go | 97 +++++++++++++++++++++++++++++++++++++++++-- 6 files changed, 111 insertions(+), 16 deletions(-) diff --git a/config/config.go b/config/config.go index 1b940f41..b58957cd 100644 --- a/config/config.go +++ b/config/config.go @@ -46,7 +46,7 @@ type config struct { MetricsPort int // Defaults to 8080 SlowMode bool // Defaults to false EagerCordoning bool // Defaults to false - ExcludeFromExternalLoadBalancers bool // defaults to false + ExcludeFromExternalLoadBalancers bool // Defaults to false } // Initialize is used to initialize the application's configuration @@ -138,21 +138,22 @@ func Initialize() error { // Set sets the application's configuration and is intended to be used for testing purposes. // See Initialize() for production -func Set(autoScalingGroupNames []string, ignoreDaemonSets, deleteEmptyDirData, eagerCordoning bool) { +func Set(autoScalingGroupNames []string, ignoreDaemonSets, deleteEmptyDirData, eagerCordoning bool, excludeFromExternalLoadBalancers bool) { cfg = &config{ - AutoScalingGroupNames: autoScalingGroupNames, - IgnoreDaemonSets: ignoreDaemonSets, - DeleteEmptyDirData: deleteEmptyDirData, - EagerCordoning: eagerCordoning, - ExecutionInterval: time.Second * 20, - ExecutionTimeout: time.Second * 900, + AutoScalingGroupNames: autoScalingGroupNames, + IgnoreDaemonSets: ignoreDaemonSets, + DeleteEmptyDirData: deleteEmptyDirData, + EagerCordoning: eagerCordoning, + ExcludeFromExternalLoadBalancers: excludeFromExternalLoadBalancers, + ExecutionInterval: time.Second * 20, + ExecutionTimeout: time.Second * 900, } } func Get() *config { if cfg == nil { log.Println("Config wasn't initialized prior to being called. Assuming this is a test.") - Set(nil, true, true, false) + Set(nil, true, true, false, false) } return cfg } diff --git a/config/config_test.go b/config/config_test.go index d2148eb1..7a564217 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -54,7 +54,7 @@ func TestInitialize_withMissingRequiredValues(t *testing.T) { } func TestSet(t *testing.T) { - Set([]string{"asg-a", "asg-b", "asg-c"}, true, true, false) + Set([]string{"asg-a", "asg-b", "asg-c"}, true, true, false, false) config := Get() if len(config.AutoScalingGroupNames) != 3 { t.Error() diff --git a/k8s/client.go b/k8s/client.go index 46703eeb..f2260a6f 100644 --- a/k8s/client.go +++ b/k8s/client.go @@ -9,7 +9,7 @@ import ( "github.com/TwiN/gocache/v2" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/autoscaling" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/kubectl/pkg/drain" @@ -20,6 +20,8 @@ const ( AnnotationRollingUpdateDrainedTimestamp = "aws-eks-asg-rolling-update-handler.twin.sh/drained-at" AnnotationRollingUpdateTerminatedTimestamp = "aws-eks-asg-rolling-update-handler.twin.sh/terminated-at" + LabelExcludeFromExternalLoadBalancers = "node.kubernetes.io/exclude-from-external-load-balancers" + nodesCacheKey = "nodes" ) diff --git a/k8stest/k8stest.go b/k8stest/k8stest.go index 2a2dd336..2727cd36 100644 --- a/k8stest/k8stest.go +++ b/k8stest/k8stest.go @@ -6,7 +6,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/autoscaling" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -100,6 +100,7 @@ func CreateTestNode(name, availabilityZone, instanceId, allocatableCpu, allocata } node.SetName(name) node.SetAnnotations(make(map[string]string)) + node.SetLabels(make(map[string]string)) return node } diff --git a/main.go b/main.go index 160d755f..d05bd5bd 100644 --- a/main.go +++ b/main.go @@ -189,7 +189,7 @@ func DoHandleRollingUpgrade(client k8s.ClientAPI, ec2Service ec2iface.EC2API, au if minutesSinceDrained == -1 { if config.Get().ExcludeFromExternalLoadBalancers { log.Printf("[%s][%s] Label node to exlude from external load balancers", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId)) - k8s.LabelNodeByAutoScalingInstance(client, outdatedInstance, "node.kubernetes.io/exclude-from-external-load-balancers", "true") + k8s.LabelNodeByAutoScalingInstance(client, outdatedInstance, k8s.LabelExcludeFromExternalLoadBalancers, "true") } log.Printf("[%s][%s] Draining node", aws.StringValue(autoScalingGroup.AutoScalingGroupName), aws.StringValue(outdatedInstance.InstanceId)) err := client.Drain(node.Name, config.Get().IgnoreDaemonSets, config.Get().DeleteEmptyDirData, config.Get().PodTerminationGracePeriod) diff --git a/main_test.go b/main_test.go index 1ae30384..93f1031a 100644 --- a/main_test.go +++ b/main_test.go @@ -683,8 +683,8 @@ func TestHasAcceptableNumberOfUpdatedNonReadyNodes(t *testing.T) { } func TestHandleRollingUpgrade_withEagerCordoning(t *testing.T) { - config.Set(nil, true, true, true) - defer config.Set(nil, true, true, false) + config.Set(nil, true, true, true, false) + defer config.Set(nil, true, true, false, false) oldInstance1 := cloudtest.CreateTestAutoScalingInstance("old-1", "v1", nil, "InService") oldInstance2 := cloudtest.CreateTestAutoScalingInstance("old-2", "v1", nil, "InService") @@ -718,7 +718,8 @@ func TestHandleRollingUpgrade_withEagerCordoning(t *testing.T) { func TestHandleRollingUpgrade_withEagerCordoningDisabled(t *testing.T) { // explicitly setting this, but eager cordoning is disabled by default anyways - config.Set(nil, true, true, false) + config.Set(nil, true, true, false, true) + defer config.Set(nil, true, true, true, false) oldInstance1 := cloudtest.CreateTestAutoScalingInstance("old-1", "v1", nil, "InService") oldInstance2 := cloudtest.CreateTestAutoScalingInstance("old-2", "v1", nil, "InService") @@ -746,3 +747,93 @@ func TestHandleRollingUpgrade_withEagerCordoningDisabled(t *testing.T) { t.Error("Eager cordoning is not enabled, so no node should have been cordoned on the first execution") } } + +func TestHandleRollingUpgrade_withExcludeFromExternalLoadBalancers(t *testing.T) { + config.Set(nil, true, true, false, true) + defer config.Set(nil, true, true, false, false) + + oldInstance := cloudtest.CreateTestAutoScalingInstance("old-1", "v1", nil, "InService") + asg := cloudtest.CreateTestAutoScalingGroup("asg", "v2", nil, []*autoscaling.Instance{oldInstance}, false) + + oldNode := k8stest.CreateTestNode("old-node-1", aws.StringValue(oldInstance.AvailabilityZone), aws.StringValue(oldInstance.InstanceId), "1000m", "1000Mi") + oldNodePod := k8stest.CreateTestPod("old-pod-1", oldNode.Name, "100m", "100Mi", false, v1.PodRunning) + + mockClient := k8stest.NewMockClient([]v1.Node{oldNode}, []v1.Pod{oldNodePod}) + mockEc2Service := cloudtest.NewMockEC2Service(nil) + mockAutoScalingService := cloudtest.NewMockAutoScalingService([]*autoscaling.Group{asg}) + + // First run (Node rollout process gets marked as started) + err := HandleRollingUpgrade(mockClient, mockEc2Service, mockAutoScalingService, []*autoscaling.Group{asg}) + if err != nil { + t.Error("unexpected error:", err) + } + if mockClient.Counter["UpdateNode"] != 1 { + t.Error("Node should've been annotated, meaning that UpdateNode should've been called once") + } + + // Second run (ASG's desired capacity gets increased) + err = HandleRollingUpgrade(mockClient, mockEc2Service, mockAutoScalingService, []*autoscaling.Group{asg}) + if err != nil { + t.Error("unexpected error:", err) + } + if mockAutoScalingService.Counter["SetDesiredCapacity"] != 1 { + t.Error("ASG should've been increased because there's no updated nodes yet") + } + asg = mockAutoScalingService.AutoScalingGroups[aws.StringValue(asg.AutoScalingGroupName)] + if aws.Int64Value(asg.DesiredCapacity) != 2 { + t.Error("The desired capacity of the ASG should've been increased to 2") + } + + // Third run (Nothing changed) + err = HandleRollingUpgrade(mockClient, mockEc2Service, mockAutoScalingService, []*autoscaling.Group{asg}) + if err != nil { + t.Error("unexpected error:", err) + } + if mockAutoScalingService.Counter["SetDesiredCapacity"] != 1 { + t.Error("Desired capacity shouldn't have been updated") + } + asg = mockAutoScalingService.AutoScalingGroups[aws.StringValue(asg.AutoScalingGroupName)] + if aws.Int64Value(asg.DesiredCapacity) != 2 { + t.Error("The desired capacity of the ASG should've stayed at 2") + } + + // Fourth run (new instance has been registered to ASG, but is pending) + newInstance := cloudtest.CreateTestAutoScalingInstance("new-1", "v2", nil, "Pending") + asg.Instances = append(asg.Instances, newInstance) + err = HandleRollingUpgrade(mockClient, mockEc2Service, mockAutoScalingService, []*autoscaling.Group{asg}) + if err != nil { + t.Error("unexpected error:", err) + } + if mockAutoScalingService.Counter["SetDesiredCapacity"] != 1 { + t.Error("Desired capacity shouldn't have been updated") + } + + // Fifth run (new instance is now InService, but node has still not joined cluster (GetNodeByAutoScalingInstance should return not found)) + newInstance.SetLifecycleState("InService") + err = HandleRollingUpgrade(mockClient, mockEc2Service, mockAutoScalingService, []*autoscaling.Group{asg}) + if err != nil { + t.Error("unexpected error:", err) + } + + // Sixth run (new instance has joined the cluster, but Kubelet isn't ready to accept pods yet) + newNode := k8stest.CreateTestNode("new-node-1", aws.StringValue(newInstance.AvailabilityZone), aws.StringValue(newInstance.InstanceId), "1000m", "1000Mi") + newNode.Status.Conditions = []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}} + mockClient.Nodes[newNode.Name] = newNode + err = HandleRollingUpgrade(mockClient, mockEc2Service, mockAutoScalingService, []*autoscaling.Group{asg}) + if err != nil { + t.Error("unexpected error:", err) + } + + // Seventh run (Kubelet is ready to accept new pods. Old node gets drained and terminated) + newNode = mockClient.Nodes[newNode.Name] + newNode.Status.Conditions = []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}} + mockClient.Nodes[newNode.Name] = newNode + err = HandleRollingUpgrade(mockClient, mockEc2Service, mockAutoScalingService, []*autoscaling.Group{asg}) + if err != nil { + t.Error("unexpected error:", err) + } + oldNode = mockClient.Nodes[oldNode.Name] + if _, ok := oldNode.GetLabels()[k8s.LabelExcludeFromExternalLoadBalancers]; !ok { + t.Error("Node should've been labeled") + } +}