From 5fa4dc44925e8028260437bb37e85f75599c7c23 Mon Sep 17 00:00:00 2001 From: Austin Siu Date: Thu, 18 Aug 2022 08:25:49 -0500 Subject: [PATCH] Filter managed non-ASG nodes by tag (#669) * Remove unused ASG calls since tags should propagate to instance * Replace and deprecate ASG-specific tags * Fix unit tests * Clean up comments and document deprecated chart values * Fix tests * Add managed tag to e2e tests * Fix managed tag in e2e tests * Update test/e2e/asg-lifecycle-sqs-test * Update test/e2e/ec2-state-change-sqs-test * Update test/e2e/rebalance-recommendation-sqs-test * Update test/e2e/scheduled-change-event-sqs-test * Update test/e2e/spot-interruption-sqs-test * Remove extraneous comments Co-authored-by: Brandon Wagner Co-authored-by: Steve Nay <265958+snay2@users.noreply.github.com> --- README.md | 15 +- cmd/node-termination-handler.go | 4 +- .../aws-node-termination-handler/README.md | 8 +- .../templates/deployment.yaml | 8 +- .../aws-node-termination-handler/values.yaml | 7 +- pkg/config/config.go | 40 +++-- pkg/monitor/sqsevent/sqs-monitor.go | 73 +-------- .../sqsevent/sqs-monitor_internal_test.go | 146 ++---------------- pkg/monitor/sqsevent/sqs-monitor_test.go | 67 ++------ test/e2e/asg-lifecycle-sqs-test | 2 +- test/e2e/ec2-state-change-sqs-test | 2 +- test/e2e/rebalance-recommendation-sqs-test | 2 +- test/e2e/scheduled-change-event-sqs-test | 2 +- test/e2e/spot-interruption-sqs-test | 2 +- 14 files changed, 85 insertions(+), 293 deletions(-) diff --git a/README.md b/README.md index 8a2700ae..6583e82d 100644 --- a/README.md +++ b/README.md @@ -269,19 +269,26 @@ $ aws autoscaling put-lifecycle-hook \ --role-arn ``` -#### 3. Tag the ASGs: +#### 3. Tag the Instances: -By default the aws-node-termination-handler will only manage terminations for ASGs tagged w/ `key=aws-node-termination-handler/managed` +By default the aws-node-termination-handler will only manage terminations for instances tagged with `key=aws-node-termination-handler/managed`. +The value of the key does not matter. +To tag ASGs and propagate the tags to your instances (recommended): ``` $ aws autoscaling create-or-update-tags \ --tags ResourceId=my-auto-scaling-group,ResourceType=auto-scaling-group,Key=aws-node-termination-handler/managed,Value=,PropagateAtLaunch=true ``` -The value of the key does not matter. +To tag an EC2 instance: +``` +aws ec2 create-tags \ + --resources i-1234567890abcdef0 \ + --tags 'Key="aws-node-termination-handler/managed",Value=' +``` This functionality is helpful in accounts where there are ASGs that do not run kubernetes nodes or you do not want aws-node-termination-handler to manage their termination lifecycle. -However, if your account is dedicated to ASGs for your kubernetes cluster, then you can turn off the ASG tag check by setting the flag `--check-asg-tag-before-draining=false` or environment variable `CHECK_ASG_TAG_BEFORE_DRAINING=false`. +However, if your account is dedicated to ASGs for your kubernetes cluster, then you can turn off the ASG tag check by setting the flag `--check-tag-before-draining=false` or environment variable `CHECK_TAG_BEFORE_DRAINING=false`. You can also control what resources NTH manages by adding the resource ARNs to your Amazon EventBridge rules. diff --git a/cmd/node-termination-handler.go b/cmd/node-termination-handler.go index 76cad7bc..21a45b50 100644 --- a/cmd/node-termination-handler.go +++ b/cmd/node-termination-handler.go @@ -176,8 +176,8 @@ func main() { log.Debug().Msgf("AWS Credentials retrieved from provider: %s", creds.ProviderName) sqsMonitor := sqsevent.SQSMonitor{ - CheckIfManaged: nthConfig.CheckASGTagBeforeDraining, - ManagedAsgTag: nthConfig.ManagedAsgTag, + CheckIfManaged: nthConfig.CheckTagBeforeDraining, + ManagedTag: nthConfig.ManagedTag, QueueURL: nthConfig.QueueURL, InterruptionChan: interruptionChan, CancelChan: cancelChan, diff --git a/config/helm/aws-node-termination-handler/README.md b/config/helm/aws-node-termination-handler/README.md index 2a1e7749..acd4c308 100644 --- a/config/helm/aws-node-termination-handler/README.md +++ b/config/helm/aws-node-termination-handler/README.md @@ -110,9 +110,11 @@ The configuration in this table applies to AWS Node Termination Handler in queue | `awsRegion` | If specified, use the AWS region for AWS API calls, else NTH will try to find the region through the `AWS_REGION` environment variable, IMDS, or the specified queue URL. | `""` | | `queueURL` | Listens for messages on the specified SQS queue URL. | `""` | | `workers` | The maximum amount of parallel event processors to handle concurrent events. | `10` | -| `checkASGTagBeforeDraining` | If `true`, check that the instance is tagged with the `managedAsgTag` before draining the node. If `false`, disables calls ASG API. | `true` | -| `managedAsgTag` | The node tag to check if `checkASGTagBeforeDraining` is `true`. | `aws-node-termination-handler/managed` | -| `useProviderId` | If `true`, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname. | `false` | +| `checkTagBeforeDraining` | If `true`, check that the instance is tagged with the `managedTag` before draining the node. | `true` | +| `managedTag` | The node tag to check if `checkTagBeforeDraining` is `true`. | `aws-node-termination-handler/managed` | +| `checkASGTagBeforeDraining` | [DEPRECATED](Use `checkTagBeforeDraining` instead) If `true`, check that the instance is tagged with the `managedAsgTag` before draining the node. If `false`, disables calls ASG API. | `true` | +| `managedAsgTag` | [DEPRECATED](Use `managedTag` instead) The node tag to check if `checkASGTagBeforeDraining` is `true`. +| `useProviderId` | If `true`, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname. | `false` | ### IMDS Mode Configuration diff --git a/config/helm/aws-node-termination-handler/templates/deployment.yaml b/config/helm/aws-node-termination-handler/templates/deployment.yaml index 1ebbd27c..f6031945 100644 --- a/config/helm/aws-node-termination-handler/templates/deployment.yaml +++ b/config/helm/aws-node-termination-handler/templates/deployment.yaml @@ -82,10 +82,10 @@ spec: value: {{ .Values.enablePrometheusServer | quote }} - name: PROMETHEUS_SERVER_PORT value: {{ .Values.prometheusServerPort | quote }} - - name: CHECK_ASG_TAG_BEFORE_DRAINING - value: {{ .Values.checkASGTagBeforeDraining | quote }} - - name: MANAGED_ASG_TAG - value: {{ .Values.managedAsgTag | quote }} + - name: CHECK_TAG_BEFORE_DRAINING + value: {{ .Values.checkTagBeforeDraining | quote }} + - name: MANAGED_TAG + value: {{ .Values.managedTag | quote }} - name: USE_PROVIDER_ID value: {{ .Values.useProviderId | quote }} - name: DRY_RUN diff --git a/config/helm/aws-node-termination-handler/values.yaml b/config/helm/aws-node-termination-handler/values.yaml index 9f19efd3..ca5db321 100644 --- a/config/helm/aws-node-termination-handler/values.yaml +++ b/config/helm/aws-node-termination-handler/values.yaml @@ -171,11 +171,10 @@ queueURL: "" workers: 10 # If true, check that the instance is tagged with "aws-node-termination-handler/managed" as the key before draining the node -# If false, disables calls to ASG API. -checkASGTagBeforeDraining: true +checkTagBeforeDraining: true -# The tag to ensure is on a node if checkASGTagBeforeDraining is true -managedAsgTag: "aws-node-termination-handler/managed" +# The tag to ensure is on a node if checkTagBeforeDraining is true +managedTag: "aws-node-termination-handler/managed" # If true, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname. useProviderId: false diff --git a/pkg/config/config.go b/pkg/config/config.go index 3f7d35b0..8d86e36c 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -60,8 +60,12 @@ const ( enableRebalanceDrainingDefault = false checkASGTagBeforeDrainingConfigKey = "CHECK_ASG_TAG_BEFORE_DRAINING" checkASGTagBeforeDrainingDefault = true + checkTagBeforeDrainingConfigKey = "CHECK_TAG_BEFORE_DRAINING" + checkTagBeforeDrainingDefault = true managedAsgTagConfigKey = "MANAGED_ASG_TAG" + managedTagConfigKey = "MANAGED_TAG" managedAsgTagDefault = "aws-node-termination-handler/managed" + managedTagDefault = "aws-node-termination-handler/managed" useProviderIdConfigKey = "USE_PROVIDER_ID" useProviderIdDefault = false metadataTriesConfigKey = "METADATA_TRIES" @@ -123,7 +127,9 @@ type Config struct { EnableRebalanceMonitoring bool EnableRebalanceDraining bool CheckASGTagBeforeDraining bool + CheckTagBeforeDraining bool ManagedAsgTag string + ManagedTag string MetadataTries int CordonOnly bool TaintNode bool @@ -178,8 +184,10 @@ func ParseCliArgs() (config Config, err error) { flag.BoolVar(&config.EnableSQSTerminationDraining, "enable-sqs-termination-draining", getBoolEnv(enableSQSTerminationDrainingConfigKey, enableSQSTerminationDrainingDefault), "If true, drain nodes when an SQS termination event is received") flag.BoolVar(&config.EnableRebalanceMonitoring, "enable-rebalance-monitoring", getBoolEnv(enableRebalanceMonitoringConfigKey, enableRebalanceMonitoringDefault), "If true, cordon nodes when the rebalance recommendation notice is received. If you'd like to drain the node in addition to cordoning, then also set \"enableRebalanceDraining\".") flag.BoolVar(&config.EnableRebalanceDraining, "enable-rebalance-draining", getBoolEnv(enableRebalanceDrainingConfigKey, enableRebalanceDrainingDefault), "If true, drain nodes when the rebalance recommendation notice is received") - flag.BoolVar(&config.CheckASGTagBeforeDraining, "check-asg-tag-before-draining", getBoolEnv(checkASGTagBeforeDrainingConfigKey, checkASGTagBeforeDrainingDefault), "If true, check that the instance is tagged with \"aws-node-termination-handler/managed\" as the key before draining the node. If false, disables calls to ASG API.") - flag.StringVar(&config.ManagedAsgTag, "managed-asg-tag", getEnv(managedAsgTagConfigKey, managedAsgTagDefault), "Sets the tag to check for on instances that is propogated from the ASG before taking action, default to aws-node-termination-handler/managed") + flag.BoolVar(&config.CheckASGTagBeforeDraining, "check-asg-tag-before-draining", getBoolEnv(checkASGTagBeforeDrainingConfigKey, checkASGTagBeforeDrainingDefault), "[DEPRECATED] * Use check-tag-before-draining instead * If true, check that the instance is tagged with \"aws-node-termination-handler/managed\" as the key before draining the node. If false, disables calls to ASG API.") + flag.BoolVar(&config.CheckTagBeforeDraining, "check-tag-before-draining", getBoolEnv(checkTagBeforeDrainingConfigKey, checkTagBeforeDrainingDefault), "If true, check that the instance is tagged with \"aws-node-termination-handler/managed\" as the key before draining the node.") + flag.StringVar(&config.ManagedAsgTag, "managed-asg-tag", getEnv(managedAsgTagConfigKey, managedAsgTagDefault), "[DEPRECATED] * Use managed-tag instead * Sets the tag to check instances for that is propogated from the ASG before taking action, default to aws-node-termination-handler/managed") + flag.StringVar(&config.ManagedTag, "managed-tag", getEnv(managedTagConfigKey, managedTagDefault), "Sets the tag to check instances for before taking action, default to aws-node-termination-handler/managed") flag.IntVar(&config.MetadataTries, "metadata-tries", getIntEnv(metadataTriesConfigKey, metadataTriesDefault), "The number of times to try requesting metadata. If you would like 2 retries, set metadata-tries to 3.") flag.BoolVar(&config.CordonOnly, "cordon-only", getBoolEnv(cordonOnly, false), "If true, nodes will be cordoned but not drained when an interruption event occurs.") flag.BoolVar(&config.TaintNode, "taint-node", getBoolEnv(taintNode, false), "If true, nodes will be tainted when an interruption event occurs.") @@ -209,12 +217,26 @@ func ParseCliArgs() (config Config, err error) { config.PodTerminationGracePeriod = gracePeriod } + if isConfigProvided("managed-asg-tag", managedAsgTagConfigKey) && isConfigProvided("managed-tag", managedTagConfigKey) { + log.Warn().Msg("Deprecated argument \"managed-asg-tag\" and the replacement argument \"managed-tag\" was provided. Using the newer argument \"managed-tag\"") + } else if isConfigProvided("managed-asg-tag", managedAsgTagConfigKey) { + log.Warn().Msg("Deprecated argument \"managed-asg-tag\" was provided. This argument will eventually be removed. Please switch to \"managed-tag\" instead.") + config.ManagedTag = config.ManagedAsgTag + } + + if isConfigProvided("check-asg-tag-before-draining", checkASGTagBeforeDrainingConfigKey) && isConfigProvided("check-tag-before-draining", checkTagBeforeDrainingConfigKey) { + log.Warn().Msg("Deprecated argument \"check-asg-tag-before-draining\" and the replacement argument \"check-tag-before-draining\" was provided. Using the newer argument \"check-tag-before-draining\"") + } else if isConfigProvided("check-asg-tag-before-draining", checkASGTagBeforeDrainingConfigKey) { + log.Warn().Msg("Deprecated argument \"check-asg-tag-before-draining\" was provided. This argument will eventually be removed. Please switch to \"check-tag-before-draining\" instead.") + config.CheckTagBeforeDraining = config.CheckASGTagBeforeDraining + } + switch strings.ToLower(config.LogLevel) { case "info": case "debug": case "error": default: - return config, fmt.Errorf("Invalid log-level passed: %s Should be one of: info, debug, error", config.LogLevel) + return config, fmt.Errorf("invalid log-level passed: %s Should be one of: info, debug, error", config.LogLevel) } if config.NodeName == "" { @@ -273,8 +295,8 @@ func (c Config) PrintJsonConfigArgs() { Str("aws_region", c.AWSRegion). Str("aws_endpoint", c.AWSEndpoint). Str("queue_url", c.QueueURL). - Bool("check_asg_tag_before_draining", c.CheckASGTagBeforeDraining). - Str("ManagedAsgTag", c.ManagedAsgTag). + Bool("check_tag_before_draining", c.CheckTagBeforeDraining). + Str("ManagedTag", c.ManagedTag). Bool("use_provider_id", c.UseProviderId). Msg("aws-node-termination-handler arguments") } @@ -321,8 +343,8 @@ func (c Config) PrintHumanConfigArgs() { "\tkubernetes-events-extra-annotations: %s,\n"+ "\taws-region: %s,\n"+ "\tqueue-url: %s,\n"+ - "\tcheck-asg-tag-before-draining: %t,\n"+ - "\tmanaged-asg-tag: %s,\n"+ + "\tcheck-tag-before-draining: %t,\n"+ + "\tmanaged-tag: %s,\n"+ "\tuse-provider-id: %t,\n"+ "\taws-endpoint: %s,\n", c.DryRun, @@ -358,8 +380,8 @@ func (c Config) PrintHumanConfigArgs() { c.KubernetesEventsExtraAnnotations, c.AWSRegion, c.QueueURL, - c.CheckASGTagBeforeDraining, - c.ManagedAsgTag, + c.CheckTagBeforeDraining, + c.ManagedTag, c.UseProviderId, c.AWSEndpoint, ) diff --git a/pkg/monitor/sqsevent/sqs-monitor.go b/pkg/monitor/sqsevent/sqs-monitor.go index 1c2510a0..61e757b8 100644 --- a/pkg/monitor/sqsevent/sqs-monitor.go +++ b/pkg/monitor/sqsevent/sqs-monitor.go @@ -21,7 +21,6 @@ import ( "github.com/aws/aws-node-termination-handler/pkg/monitor" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" - "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/aws/aws-sdk-go/service/autoscaling/autoscalingiface" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/ec2/ec2iface" @@ -49,7 +48,7 @@ type SQSMonitor struct { ASG autoscalingiface.AutoScalingAPI EC2 ec2iface.EC2API CheckIfManaged bool - ManagedAsgTag string + ManagedTag string } // InterruptionEventWrapper is a convenience wrapper for associating an interruption event with its error, if any @@ -214,7 +213,7 @@ func (m SQSMonitor) processInterruptionEvents(interruptionEventWrappers []Interr dropMessageSuggestionCount++ case m.CheckIfManaged && !eventWrapper.InterruptionEvent.IsManaged: - // This event isn't for an instance that is managed by this process + // This event is for an instance that is not managed by this process log.Debug().Str("instance-id", eventWrapper.InterruptionEvent.InstanceID).Msg("dropping interruption event for unmanaged node") dropMessageSuggestionCount++ @@ -354,20 +353,8 @@ func (m SQSMonitor) getNodeInfo(instanceID string) (*NodeInfo, error) { } if m.CheckIfManaged { - if nodeInfo.AsgName == "" { - // If ASG tags are not propagated we might need to use the API - // to retrieve the ASG name - nodeInfo.AsgName, err = m.retrieveAutoScalingGroupName(nodeInfo.InstanceID) - if err != nil { - return nil, fmt.Errorf("unable to retrieve AutoScaling group: %w", err) - } - } - if nodeInfo.Tags[m.ManagedAsgTag] == "" { - // if ASG tags are not propagated we might have to check the ASG directly - nodeInfo.IsManaged, err = m.isASGManaged(nodeInfo.AsgName, nodeInfo.InstanceID) - if err != nil { - return nil, err - } + if _, ok := nodeInfo.Tags[m.ManagedTag]; !ok { + nodeInfo.IsManaged = false } } @@ -376,55 +363,3 @@ func (m SQSMonitor) getNodeInfo(instanceID string) (*NodeInfo, error) { return nodeInfo, nil } - -// isASGManaged returns whether the autoscaling group should be managed by node termination handler -func (m SQSMonitor) isASGManaged(asgName string, instanceID string) (bool, error) { - if asgName == "" { - return false, nil - } - asgFilter := autoscaling.Filter{Name: aws.String("auto-scaling-group"), Values: []*string{aws.String(asgName)}} - asgDescribeTagsInput := autoscaling.DescribeTagsInput{ - Filters: []*autoscaling.Filter{&asgFilter}, - } - isManaged := false - err := m.ASG.DescribeTagsPages(&asgDescribeTagsInput, func(resp *autoscaling.DescribeTagsOutput, next bool) bool { - for _, tag := range resp.Tags { - if *tag.Key == m.ManagedAsgTag { - isManaged = true - // breaks paging loop - return false - } - } - // continue paging loop - return true - }) - - log.Debug(). - Str("instance_id", instanceID). - Str("tag_key", m.ManagedAsgTag). - Bool("is_managed", isManaged). - Msg("directly checked if instance's Auto Scaling Group is managed") - return isManaged, err -} - -// retrieveAutoScalingGroupName returns the autoscaling group name for a given instanceID -func (m SQSMonitor) retrieveAutoScalingGroupName(instanceID string) (string, error) { - asgDescribeInstanceInput := autoscaling.DescribeAutoScalingInstancesInput{ - InstanceIds: []*string{&instanceID}, - MaxRecords: aws.Int64(50), - } - asgs, err := m.ASG.DescribeAutoScalingInstances(&asgDescribeInstanceInput) - if err != nil { - return "", err - } - if len(asgs.AutoScalingInstances) == 0 { - log.Debug().Str("instance_id", instanceID).Msg("Did not find an Auto Scaling Group for the given instance id") - return "", nil - } - asgName := asgs.AutoScalingInstances[0].AutoScalingGroupName - log.Debug(). - Str("instance_id", instanceID). - Str("asg_name", *asgName). - Msg("performed API lookup of instance ASG") - return *asgName, nil -} diff --git a/pkg/monitor/sqsevent/sqs-monitor_internal_test.go b/pkg/monitor/sqsevent/sqs-monitor_internal_test.go index db8f2db6..128be769 100644 --- a/pkg/monitor/sqsevent/sqs-monitor_internal_test.go +++ b/pkg/monitor/sqsevent/sqs-monitor_internal_test.go @@ -76,79 +76,19 @@ func TestGetNodeInfo_BothTags_Managed(t *testing.T) { EC2: ec2Mock, ASG: h.MockedASG{}, CheckIfManaged: true, - ManagedAsgTag: "aws-nth/managed", + ManagedTag: "aws-nth/managed", } nodeInfo, err := monitor.getNodeInfo("i-0123456789") h.Ok(t, err) h.Equals(t, true, nodeInfo.IsManaged) } -func TestGetNodeInfo_ASGTag_ASGNotManaged(t *testing.T) { - ec2Mock := h.MockedEC2{ - DescribeInstancesResp: getDescribeInstancesResp( - "i-beebeebe", - "mydns.example.com", - map[string]string{ - ASGTagName: "test-asg", - }), - } - asgMock := h.MockedASG{ - DescribeTagsPagesResp: autoscaling.DescribeTagsOutput{ - Tags: []*autoscaling.TagDescription{}, - }, - } - monitor := SQSMonitor{ - EC2: ec2Mock, - ASG: asgMock, - CheckIfManaged: true, - ManagedAsgTag: "aws-nth/managed", - } - nodeInfo, err := monitor.getNodeInfo("i-0123456789") - h.Ok(t, err) - h.Equals(t, "test-asg", nodeInfo.AsgName) - h.Equals(t, false, nodeInfo.IsManaged) -} - -func TestGetNodeInfo_ASGTag_ASGManaged(t *testing.T) { - ec2Mock := h.MockedEC2{ - DescribeInstancesResp: getDescribeInstancesResp( - "i-beebeebe", - "mydns.example.com", - map[string]string{ - ASGTagName: "test-asg", - }), - } - asgMock := h.MockedASG{ - DescribeTagsPagesResp: autoscaling.DescribeTagsOutput{ - Tags: []*autoscaling.TagDescription{ - {Key: aws.String("aws-nth/managed")}, - }, - }, - } - monitor := SQSMonitor{ - EC2: ec2Mock, - ASG: asgMock, - CheckIfManaged: true, - ManagedAsgTag: "aws-nth/managed", - } - nodeInfo, err := monitor.getNodeInfo("i-0123456789") - h.Ok(t, err) - h.Equals(t, "test-asg", nodeInfo.AsgName) - h.Equals(t, true, nodeInfo.IsManaged) -} - -func TestGetNodeInfo_NoASG(t *testing.T) { +func TestGetNodeInfo_NoASG_Managed(t *testing.T) { ec2Mock := h.MockedEC2{ DescribeInstancesResp: getDescribeInstancesResp("i-beebeebe", "mydns.example.com", map[string]string{}), } - asgMock := h.MockedASG{ - DescribeAutoScalingInstancesResp: autoscaling.DescribeAutoScalingInstancesOutput{ - AutoScalingInstances: []*autoscaling.InstanceDetails{}, - }, - } monitor := SQSMonitor{ EC2: ec2Mock, - ASG: asgMock, } nodeInfo, err := monitor.getNodeInfo("i-0123456789") h.Ok(t, err) @@ -160,16 +100,10 @@ func TestGetNodeInfo_NoASG_NotManaged(t *testing.T) { ec2Mock := h.MockedEC2{ DescribeInstancesResp: getDescribeInstancesResp("i-beebeebe", "mydns.example.com", map[string]string{}), } - asgMock := h.MockedASG{ - DescribeAutoScalingInstancesResp: autoscaling.DescribeAutoScalingInstancesOutput{ - AutoScalingInstances: []*autoscaling.InstanceDetails{}, - }, - } monitor := SQSMonitor{ EC2: ec2Mock, - ASG: asgMock, CheckIfManaged: true, - ManagedAsgTag: "aws-nth/managed", + ManagedTag: "aws-nth/managed", } nodeInfo, err := monitor.getNodeInfo("i-0123456789") h.Ok(t, err) @@ -202,26 +136,15 @@ func TestGetNodeInfo_ASG(t *testing.T) { func TestGetNodeInfo_ASG_ASGManaged(t *testing.T) { asgName := "test-asg" + managedTag := "aws-nth/managed" + tags := map[string]string{managedTag: "", "aws:autoscaling:groupName": asgName} ec2Mock := h.MockedEC2{ - DescribeInstancesResp: getDescribeInstancesResp("i-beebeebe", "mydns.example.com", map[string]string{}), - } - asgMock := h.MockedASG{ - DescribeAutoScalingInstancesResp: autoscaling.DescribeAutoScalingInstancesOutput{ - AutoScalingInstances: []*autoscaling.InstanceDetails{ - {AutoScalingGroupName: aws.String(asgName)}, - }, - }, - DescribeTagsPagesResp: autoscaling.DescribeTagsOutput{ - Tags: []*autoscaling.TagDescription{ - {Key: aws.String("aws-nth/managed")}, - }, - }, + DescribeInstancesResp: getDescribeInstancesResp("i-beebeebe", "mydns.example.com", tags), } monitor := SQSMonitor{ EC2: ec2Mock, - ASG: asgMock, CheckIfManaged: true, - ManagedAsgTag: "aws-nth/managed", + ManagedTag: managedTag, } nodeInfo, err := monitor.getNodeInfo("i-0123456789") h.Ok(t, err) @@ -231,24 +154,14 @@ func TestGetNodeInfo_ASG_ASGManaged(t *testing.T) { func TestGetNodeInfo_ASG_ASGNotManaged(t *testing.T) { asgName := "test-asg" + tags := map[string]string{"aws:autoscaling:groupName": asgName} ec2Mock := h.MockedEC2{ - DescribeInstancesResp: getDescribeInstancesResp("i-beebeebe", "mydns.example.com", map[string]string{}), - } - asgMock := h.MockedASG{ - DescribeAutoScalingInstancesResp: autoscaling.DescribeAutoScalingInstancesOutput{ - AutoScalingInstances: []*autoscaling.InstanceDetails{ - {AutoScalingGroupName: aws.String(asgName)}, - }, - }, - DescribeTagsPagesResp: autoscaling.DescribeTagsOutput{ - Tags: []*autoscaling.TagDescription{}, - }, + DescribeInstancesResp: getDescribeInstancesResp("i-beebeebe", "mydns.example.com", tags), } monitor := SQSMonitor{ EC2: ec2Mock, - ASG: asgMock, CheckIfManaged: true, - ManagedAsgTag: "aws-nth/managed", + ManagedTag: "aws-nth/managed", } nodeInfo, err := monitor.getNodeInfo("i-0123456789") h.Ok(t, err) @@ -267,45 +180,6 @@ func TestGetNodeInfo_Err(t *testing.T) { h.Nok(t, err) } -func TestGetNodeInfo_ASGError(t *testing.T) { - ec2Mock := h.MockedEC2{ - DescribeInstancesResp: getDescribeInstancesResp("i-beebeebe", "mydns.example.com", map[string]string{}), - } - asgMock := h.MockedASG{ - DescribeAutoScalingInstancesErr: fmt.Errorf("error"), - } - monitor := SQSMonitor{ - EC2: ec2Mock, - ASG: asgMock, - CheckIfManaged: true, //enables calling ASG API - } - _, err := monitor.getNodeInfo("i-0123456789") - h.Nok(t, err) -} - -func TestGetNodeInfo_ASGTagErr(t *testing.T) { - asgName := "test-asg" - ec2Mock := h.MockedEC2{ - DescribeInstancesResp: getDescribeInstancesResp("i-beebeebe", "mydns.example.com", map[string]string{}), - } - asgMock := h.MockedASG{ - DescribeAutoScalingInstancesResp: autoscaling.DescribeAutoScalingInstancesOutput{ - AutoScalingInstances: []*autoscaling.InstanceDetails{ - {AutoScalingGroupName: aws.String(asgName)}, - }, - }, - DescribeTagsPagesErr: fmt.Errorf("error"), - } - monitor := SQSMonitor{ - EC2: ec2Mock, - ASG: asgMock, - CheckIfManaged: true, - ManagedAsgTag: "aws-nth/managed", - } - _, err := monitor.getNodeInfo("i-0123456789") - h.Nok(t, err) -} - // AWS Mock helpers specific to sqs-monitor internal tests func getDescribeInstancesResp(instanceID string, privateDNSName string, tags map[string]string) ec2.DescribeInstancesOutput { diff --git a/pkg/monitor/sqsevent/sqs-monitor_test.go b/pkg/monitor/sqsevent/sqs-monitor_test.go index 0d5747f2..f08b8125 100644 --- a/pkg/monitor/sqsevent/sqs-monitor_test.go +++ b/pkg/monitor/sqsevent/sqs-monitor_test.go @@ -147,7 +147,7 @@ func TestMonitor_EventBridgeSuccess(t *testing.T) { sqsMonitor := sqsevent.SQSMonitor{ SQS: sqsMock, EC2: ec2Mock, - ManagedAsgTag: "aws-node-termination-handler/managed", + ManagedTag: "aws-node-termination-handler/managed", ASG: mockIsManagedTrue(nil), CheckIfManaged: true, QueueURL: "https://test-queue", @@ -191,7 +191,7 @@ func TestMonitor_EventBridgeTestNotification(t *testing.T) { sqsMonitor := sqsevent.SQSMonitor{ SQS: sqsMock, EC2: ec2Mock, - ManagedAsgTag: "aws-node-termination-handler/managed", + ManagedTag: "aws-node-termination-handler/managed", ASG: mockIsManagedFalse(nil), CheckIfManaged: true, QueueURL: "https://test-queue", @@ -232,7 +232,7 @@ func TestMonitor_AsgDirectToSqsSuccess(t *testing.T) { sqsMonitor := sqsevent.SQSMonitor{ SQS: sqsMock, EC2: ec2Mock, - ManagedAsgTag: "aws-node-termination-handler/managed", + ManagedTag: "aws-node-termination-handler/managed", ASG: mockIsManagedTrue(nil), CheckIfManaged: true, QueueURL: "https://test-queue", @@ -278,7 +278,7 @@ func TestMonitor_AsgDirectToSqsTestNotification(t *testing.T) { sqsMonitor := sqsevent.SQSMonitor{ SQS: sqsMock, EC2: ec2Mock, - ManagedAsgTag: "aws-node-termination-handler/managed", + ManagedTag: "aws-node-termination-handler/managed", ASG: mockIsManagedFalse(nil), CheckIfManaged: true, QueueURL: "https://test-queue", @@ -322,7 +322,7 @@ func TestMonitor_DrainTasks(t *testing.T) { sqsMonitor := sqsevent.SQSMonitor{ SQS: sqsMock, EC2: ec2Mock, - ManagedAsgTag: "aws-node-termination-handler/managed", + ManagedTag: "aws-node-termination-handler/managed", ASG: mockIsManagedTrue(&asgMock), CheckIfManaged: true, QueueURL: "https://test-queue", @@ -371,7 +371,7 @@ func TestMonitor_DrainTasks_Errors(t *testing.T) { sqsMonitor := sqsevent.SQSMonitor{ SQS: sqsMock, EC2: ec2Mock, - ManagedAsgTag: "aws-node-termination-handler/managed", + ManagedTag: "aws-node-termination-handler/managed", ASG: mockIsManagedTrue(&asgMock), CheckIfManaged: true, QueueURL: "https://test-queue", @@ -424,7 +424,7 @@ func TestMonitor_DrainTasksASGFailure(t *testing.T) { sqsMonitor := sqsevent.SQSMonitor{ SQS: sqsMock, EC2: ec2Mock, - ManagedAsgTag: "aws-node-termination-handler/managed", + ManagedTag: "aws-node-termination-handler/managed", ASG: mockIsManagedTrue(&asgMock), CheckIfManaged: true, QueueURL: "https://test-queue", @@ -702,7 +702,7 @@ func TestMonitor_EC2NoDNSName(t *testing.T) { sqsMonitor := sqsevent.SQSMonitor{ SQS: sqsMock, EC2: ec2Mock, - ManagedAsgTag: "aws-node-termination-handler/managed", + ManagedTag: "aws-node-termination-handler/managed", ASG: mockIsManagedTrue(nil), CheckIfManaged: true, QueueURL: "https://test-queue", @@ -742,7 +742,7 @@ func TestMonitor_EC2NoDNSNameOnTerminatedInstance(t *testing.T) { sqsMonitor := sqsevent.SQSMonitor{ SQS: sqsMock, EC2: ec2Mock, - ManagedAsgTag: "aws-node-termination-handler/managed", + ManagedTag: "aws-node-termination-handler/managed", ASG: mockIsManagedTrue(nil), CheckIfManaged: true, QueueURL: "https://test-queue", @@ -780,7 +780,7 @@ func TestMonitor_SQSDeleteFailure(t *testing.T) { sqsMonitor := sqsevent.SQSMonitor{ SQS: sqsMock, EC2: ec2Mock, - ManagedAsgTag: "aws-node-termination-handler/managed", + ManagedTag: "aws-node-termination-handler/managed", ASG: mockIsManagedTrue(nil), CheckIfManaged: true, QueueURL: "https://test-queue", @@ -837,45 +837,6 @@ func TestMonitor_InstanceNotManaged(t *testing.T) { } } -func TestMonitor_InstanceManagedErr(t *testing.T) { - for _, event := range []sqsevent.EventBridgeEvent{spotItnEvent, asgLifecycleEvent} { - msg, err := getSQSMessageFromEvent(event) - h.Ok(t, err) - messages := []*sqs.Message{ - &msg, - } - sqsMock := h.MockedSQS{ - ReceiveMessageResp: sqs.ReceiveMessageOutput{Messages: messages}, - ReceiveMessageErr: nil, - } - dnsNodeName := "ip-10-0-0-157.us-east-2.compute.internal" - ec2Mock := h.MockedEC2{ - DescribeInstancesResp: getDescribeInstancesResp(dnsNodeName, false, false), - } - - drainChan := make(chan monitor.InterruptionEvent, 1) - - sqsMonitor := sqsevent.SQSMonitor{ - SQS: sqsMock, - EC2: ec2Mock, - ASG: mockIsManagedErr(nil), - CheckIfManaged: true, - QueueURL: "https://test-queue", - InterruptionChan: drainChan, - } - - err = sqsMonitor.Monitor() - h.Nok(t, err) - - select { - case <-drainChan: - h.Ok(t, fmt.Errorf("Expected no events")) - default: - h.Ok(t, nil) - } - } -} - // AWS Mock Helpers specific to sqs-monitor tests func getDescribeInstancesResp(privateDNSName string, withASGTag bool, withManagedTag bool) ec2.DescribeInstancesOutput { @@ -941,11 +902,3 @@ func mockIsManagedFalse(asg *h.MockedASG) h.MockedASG { } return *asg } - -func mockIsManagedErr(asg *h.MockedASG) h.MockedASG { - if asg == nil { - asg = &h.MockedASG{} - } - asg.DescribeAutoScalingInstancesErr = fmt.Errorf("error") - return *asg -} diff --git a/test/e2e/asg-lifecycle-sqs-test b/test/e2e/asg-lifecycle-sqs-test index b05db052..eb80774b 100755 --- a/test/e2e/asg-lifecycle-sqs-test +++ b/test/e2e/asg-lifecycle-sqs-test @@ -42,7 +42,7 @@ set +x sleep 10 -RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION} --tag-specifications 'ResourceType=instance,Tags=[{Key=aws:autoscaling:groupName,Value=nth-integ-test}]'" +RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION} --tag-specifications 'ResourceType=instance,Tags=[{Key=aws:autoscaling:groupName,Value=nth-integ-test},{Key=aws-node-termination-handler/managed,Value=blah}]'" localstack_pod=$(kubectl get pods --selector app=localstack --field-selector="status.phase=Running" \ -o go-template --template '{{range .items}}{{.metadata.name}} {{.metadata.creationTimestamp}}{{"\n"}}{{end}}' \ | awk '$2 >= "'"${START_TIME//+0000/Z}"'" { print $1 }') diff --git a/test/e2e/ec2-state-change-sqs-test b/test/e2e/ec2-state-change-sqs-test index 464d0232..56a5d799 100755 --- a/test/e2e/ec2-state-change-sqs-test +++ b/test/e2e/ec2-state-change-sqs-test @@ -42,7 +42,7 @@ set +x sleep 10 -RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION} --tag-specifications 'ResourceType=instance,Tags=[{Key=aws:autoscaling:groupName,Value=nth-integ-test}]'" +RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION} --tag-specifications 'ResourceType=instance,Tags=[{Key=aws:autoscaling:groupName,Value=nth-integ-test},{Key=aws-node-termination-handler/managed,Value=blah}]'" set -x localstack_pod=$(kubectl get pods --selector app=localstack --field-selector="status.phase=Running" \ -o go-template --template '{{range .items}}{{.metadata.name}} {{.metadata.creationTimestamp}}{{"\n"}}{{end}}' \ diff --git a/test/e2e/rebalance-recommendation-sqs-test b/test/e2e/rebalance-recommendation-sqs-test index 8c8d4774..760c269a 100755 --- a/test/e2e/rebalance-recommendation-sqs-test +++ b/test/e2e/rebalance-recommendation-sqs-test @@ -42,7 +42,7 @@ set +x sleep 10 -RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION} --tag-specifications 'ResourceType=instance,Tags=[{Key=aws:autoscaling:groupName,Value=nth-integ-test}]'" +RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION} --tag-specifications 'ResourceType=instance,Tags=[{Key=aws:autoscaling:groupName,Value=nth-integ-test},{Key=aws-node-termination-handler/managed,Value=blah}]'" localstack_pod=$(kubectl get pods --selector app=localstack --field-selector="status.phase=Running" \ -o go-template --template '{{range .items}}{{.metadata.name}} {{.metadata.creationTimestamp}}{{"\n"}}{{end}}' \ | awk '$2 >= "'"${START_TIME//+0000/Z}"'" { print $1 }') diff --git a/test/e2e/scheduled-change-event-sqs-test b/test/e2e/scheduled-change-event-sqs-test index e63597a9..1b43a707 100755 --- a/test/e2e/scheduled-change-event-sqs-test +++ b/test/e2e/scheduled-change-event-sqs-test @@ -42,7 +42,7 @@ set +x sleep 10 -RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION} --tag-specifications 'ResourceType=instance,Tags=[{Key=aws:autoscaling:groupName,Value=nth-integ-test}]'" +RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION} --tag-specifications 'ResourceType=instance,Tags=[{Key=aws:autoscaling:groupName,Value=nth-integ-test},{Key=aws-node-termination-handler/managed,Value=blah}]'" localstack_pod=$(kubectl get pods --selector app=localstack --field-selector="status.phase=Running" \ -o go-template --template '{{range .items}}{{.metadata.name}} {{.metadata.creationTimestamp}}{{"\n"}}{{end}}' \ | awk '$2 >= "'"${START_TIME//+0000/Z}"'" { print $1 }') diff --git a/test/e2e/spot-interruption-sqs-test b/test/e2e/spot-interruption-sqs-test index 32498dd5..62f70230 100755 --- a/test/e2e/spot-interruption-sqs-test +++ b/test/e2e/spot-interruption-sqs-test @@ -42,7 +42,7 @@ set +x sleep 10 -RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION} --tag-specifications 'ResourceType=instance,Tags=[{Key=aws:autoscaling:groupName,Value=nth-integ-test}]'" +RUN_INSTANCE_CMD="awslocal ec2 run-instances --private-ip-address ${WORKER_IP} --region ${AWS_REGION} --tag-specifications 'ResourceType=instance,Tags=[{Key=aws:autoscaling:groupName,Value=nth-integ-test},{Key=aws-node-termination-handler/managed,Value=blah}]'" localstack_pod=$(kubectl get pods --selector app=localstack --field-selector="status.phase=Running" \ -o go-template --template '{{range .items}}{{.metadata.name}} {{.metadata.creationTimestamp}}{{"\n"}}{{end}}' \ | awk '$2 >= "'"${START_TIME//+0000/Z}"'" { print $1 }')