nginxinc · lucacome · May 27, 2020 · May 19, 2020 · May 27, 2020 · pleshakov
diff --git a/.travis.yml b/.travis.yml
@@ -10,6 +10,6 @@ script:
 before_install:
 - echo "PR Slug:${TRAVIS_PULL_REQUEST_SLUG}"
 - if [[ "${TRAVIS_PULL_REQUEST_SLUG}" == "nginxinc/nginx-asg-sync" || "${TRAVIS_PULL_REQUEST}" == "false" ]]; then
-  wget https://github.com/fossas/fossa-cli/releases/download/v1.0.27/fossa-cli_1.0.27_linux_amd64.tar.gz && tar xzf fossa-cli_1.0.27_linux_amd64.tar.gz
+  wget https://github.com/fossas/fossa-cli/releases/download/v1.0.30/fossa-cli_1.0.30_linux_amd64.tar.gz && tar xzf fossa-cli_1.0.30_linux_amd64.tar.gz
   && ./fossa init
-  && FOSSA_API_KEY=${fossapush} ./fossa analyze -t nginx-asg-sync -b ${TRAVIS_BRANCH} ; fi
+  && GO111MODULE=on FOSSA_API_KEY=${fossapush} ./fossa analyze -t nginx-asg-sync -b ${TRAVIS_BRANCH} ; fi
diff --git a/Makefile b/Makefile
@@ -1,14 +1,15 @@
 GO_DOCKER_RUN = docker run --rm -v $(shell pwd):/go/src/github.com/nginxinc/nginx-asg-sync -v $(shell pwd)/build_output:/build_output -w /go/src/github.com/nginxinc/nginx-asg-sync/cmd/sync
 GOLANG_CONTAINER = golang:1.14
 BUILD_IN_CONTAINER = 1
+GOFLAGS ?= -mod=vendor
 
 all: amazon centos7 ubuntu-xenial amazon2 ubuntu-bionic
 
 test:
 ifeq ($(BUILD_IN_CONTAINER),1)
 	$(GO_DOCKER_RUN) $(GOLANG_CONTAINER) go test
 else
-	go test ./...
+	GO111MODULE=on GOFLAGS='$(GOFLAGS)' go test ./...
 endif
 
 lint:
@@ -18,7 +19,7 @@ compile: test
 ifeq ($(BUILD_IN_CONTAINER),1)
 	$(GO_DOCKER_RUN) $(GOLANG_CONTAINER) go build -o /build_output/nginx-asg-sync
 else
-	go build -o ./build_output/nginx-asg-sync github.com/nginxinc/nginx-asg-sync/cmd/sync
+	GO111MODULE=on GOFLAGS='$(GOFLAGS)' go build -o ./build_output/nginx-asg-sync github.com/nginxinc/nginx-asg-sync/cmd/sync
 endif
 
 amazon: compile

diff --git a/README.md b/README.md
@@ -136,6 +136,10 @@ Because cloud provider APIs return the instances IP addresses before the instanc
 
 Small timeouts ensure that a health check will fail fast if the backend instance is not healthy. Also, the mandatory parameter ensures NGINX Plus won't consider a newly added instance healthy until a health check passes.
 
+When using AWS it's possible to filter out the instances that are not in a `InService` state of the [Lifecycle](https://docs.aws.amazon.com/autoscaling/ec2/userguide/AutoScalingGroupLifecycle.html) with the parameter `in_service` set to `true`. This will ensure that the IP won't be added until the instance is ready to accept requests.
+This also works when an instance is being terminated: the asg-sync will remove the IP of an instance that went from  the `InService` state to one of the terminating states.
+**Note**: because the asg-sync works on a polling-based model, there will be a delay between the instance  going to a terminating state and the asg-sync removing its IP from NGINX Plus. To guarantee that NGINX Plus doesn't send any requests to a terminated instance, make sure the instance goes to the `Terminating:Wait` state for a period greater than the interval `sync_interval_in_seconds`.
+
 ### Configuration for Cloud Providers
 
 See the example for your cloud provider: [AWS](examples/aws.md), [Azure](examples/azure.md).
@@ -144,7 +148,7 @@ See the example for your cloud provider: [AWS](examples/aws.md), [Azure](example
 
 nginx-asg-sync runs as a system service and supports the start/stop/restart commands.
 
-For Ubuntu 14.04 and Amazon Linux 1, run: `$ sudo start|stop|restart nginx-asg-sync`
+For Amazon Linux 1, run: `$ sudo start|stop|restart nginx-asg-sync`
 
 For Ubuntu 16.04 and 18.04, CentOS7/RHEL7 and Amazon Linux 2, run: `$ sudo service nginx-asg-sync start|stop|restart`
 

diff --git a/cmd/sync/aws.go b/cmd/sync/aws.go
@@ -3,20 +3,24 @@ package main
 import (
 	"fmt"
 	"net/http"
+	"reflect"
 	"time"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/ec2metadata"
 	"github.com/aws/aws-sdk-go/aws/session"
+	"github.com/aws/aws-sdk-go/service/autoscaling"
+	"github.com/aws/aws-sdk-go/service/autoscaling/autoscalingiface"
 	"github.com/aws/aws-sdk-go/service/ec2"
 	"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
 	yaml "gopkg.in/yaml.v2"
 )
 
-// AWSClient allows you to get the list of IP addresses of instanes of an Auto Scaling group. It implements the CloudProvider interface
+// AWSClient allows you to get the list of IP addresses of instances of an Auto Scaling group. It implements the CloudProvider interface
 type AWSClient struct {
-	svcEC2 ec2iface.EC2API
-	config *awsConfig
+	svcEC2         ec2iface.EC2API
+	svcAutoscaling autoscalingiface.AutoScalingAPI
+	config         *awsConfig
 }
 
 // NewAWSClient creates and configures an AWSClient
@@ -71,6 +75,7 @@ func (client *AWSClient) GetUpstreams() []Upstream {
 			MaxFails:     &client.config.Upstreams[i].MaxFails,
 			FailTimeout:  getFailTimeoutOrDefault(client.config.Upstreams[i].FailTimeout),
 			SlowStart:    getSlowStartOrDefault(client.config.Upstreams[i].SlowStart),
+			InService:    client.config.Upstreams[i].InService,
 		}
 		upstreams = append(upstreams, u)
 	}
@@ -87,8 +92,9 @@ func (client *AWSClient) configure() error {
 		return err
 	}
 
-	svcEC2 := ec2.New(session)
-	client.svcEC2 = svcEC2
+	client.svcEC2 = ec2.New(session)
+	client.svcAutoscaling = autoscaling.New(session)
+
 	return nil
 }
 
@@ -131,6 +137,13 @@ func (client *AWSClient) CheckIfScalingGroupExists(name string) (bool, error) {
 
 // GetPrivateIPsForScalingGroup returns the list of IP addresses of instances of the Auto Scaling group
 func (client *AWSClient) GetPrivateIPsForScalingGroup(name string) ([]string, error) {
+	var onlyInService bool
+	for _, u := range client.GetUpstreams() {
+		if u.ScalingGroup == name && u.InService {
+			onlyInService = true
+			break
+		}
+	}
 	params := &ec2.DescribeInstancesInput{
 		Filters: []*ec2.Filter{
 			{
@@ -152,17 +165,76 @@ func (client *AWSClient) GetPrivateIPsForScalingGroup(name string) ([]string, er
 	}
 
 	var result []string
+	insIDtoIP := make(map[string]string)
+
 	for _, res := range response.Reservations {
 		for _, ins := range res.Instances {
 			if len(ins.NetworkInterfaces) > 0 && ins.NetworkInterfaces[0].PrivateIpAddress != nil {
-				result = append(result, *ins.NetworkInterfaces[0].PrivateIpAddress)
+				if onlyInService {
+					insIDtoIP[*ins.InstanceId] = *ins.NetworkInterfaces[0].PrivateIpAddress
+				} else {
+					result = append(result, *ins.NetworkInterfaces[0].PrivateIpAddress)
+				}
 			}
 		}
 	}
+	if onlyInService {
+		result, err = client.getInstancesInService(insIDtoIP)
+		if err != nil {
+			return nil, err
+		}
+	}
 
 	return result, nil
 }
 
+// getInstancesInService returns the list of instances that have LifecycleState == InService
+func (client *AWSClient) getInstancesInService(insIDtoIP map[string]string) ([]string, error) {
+	const maxItems = 50
+	var result []string
+	keys := reflect.ValueOf(insIDtoIP).MapKeys()
+	instanceIds := make([]*string, len(keys))
+
+	for i := 0; i < len(keys); i++ {
+		instanceIds[i] = aws.String(keys[i].String())
+	}
+
+	batches := prepareBatches(maxItems, instanceIds)
+	for _, batch := range batches {
+		params := &autoscaling.DescribeAutoScalingInstancesInput{
+			InstanceIds: batch,
+		}
+		response, err := client.svcAutoscaling.DescribeAutoScalingInstances(params)
+		if err != nil {
+			return nil, err
+		}
+
+		for _, ins := range response.AutoScalingInstances {
+			if *ins.LifecycleState == "InService" {
+				result = append(result, insIDtoIP[*ins.InstanceId])
+			}
+		}
+	}
+
+	return result, nil
+}
+
+func prepareBatches(maxItems int, items []*string) [][]*string {
+	var batches [][]*string
+
+	min := func(a, b int) int {
+		if a <= b {
+			return a
+		}
+		return b
+	}
+
+	for i := 0; i < len(items); i += maxItems {
+		batches = append(batches, items[i:min(i+maxItems, len(items))])
+	}
+	return batches
+}
+
 // Configuration for AWS Cloud Provider
 
 type awsConfig struct {
@@ -179,6 +251,7 @@ type awsUpstream struct {
 	MaxFails         int    `yaml:"max_fails"`
 	FailTimeout      string `yaml:"fail_timeout"`
 	SlowStart        string `yaml:"slow_start"`
+	InService        bool   `yaml:"in_service"`
 }
 
 func validateAWSConfig(cfg *awsConfig) error {

diff --git a/cmd/sync/aws_test.go b/cmd/sync/aws_test.go
@@ -1,6 +1,8 @@
 package main
 
-import "testing"
+import (
+	"testing"
+)
 
 type testInputAWS struct {
 	cfg *awsConfig
@@ -14,6 +16,7 @@ func getValidAWSConfig() *awsConfig {
 			AutoscalingGroup: "backend-group",
 			Port:             80,
 			Kind:             "http",
+			InService:        false,
 		},
 	}
 	cfg := awsConfig{
@@ -100,6 +103,7 @@ func TestGetUpstreamsAWS(t *testing.T) {
 			MaxConns:    2,
 			SlowStart:   "5s",
 			FailTimeout: "10s",
+			InService:   false,
 		},
 		{
 			Name:        "127.0.0.2",
@@ -108,6 +112,7 @@ func TestGetUpstreamsAWS(t *testing.T) {
 			MaxConns:    3,
 			SlowStart:   "6s",
 			FailTimeout: "11s",
+			InService:   true,
 		},
 	}
 	cfg.Upstreams = upstreams
@@ -152,5 +157,32 @@ func areEqualUpstreamsAWS(u1 awsUpstream, u2 Upstream) bool {
 		return false
 	}
 
+	if u1.InService != u2.InService {
+		return false
+	}
+
 	return true
 }
+
+func TestPrepareBatches(t *testing.T) {
+	const maxItems = 3
+	ids := []string{"i-394ujfs", "i-dfdinf", "i-fsfsf", "i-8hr83hfwif", "i-nsnsnan"}
+	instanceIds := make([]*string, len(ids))
+
+	for i := 0; i < len(ids); i++ {
+		instanceIds[i] = &ids[i]
+	}
+
+	batches := prepareBatches(maxItems, instanceIds)
+
+	if len(batches) > len(ids)/maxItems+1 {
+		t.Error("prepareBatches() didn't split the slice correctly")
+	}
+
+	for _, batch := range batches {
+		if len(batch) > maxItems {
+			t.Errorf("prepareBatches() returned a batch with len > %v", maxItems)
+		}
+	}
+
+}
diff --git a/cmd/sync/config.go b/cmd/sync/config.go
@@ -59,4 +59,5 @@ type Upstream struct {
 	MaxFails     *int
 	FailTimeout  string
 	SlowStart    string
+	InService    bool
 }
diff --git a/examples/aws.md b/examples/aws.md
@@ -34,6 +34,7 @@ upstreams:
    max_fails: 1
    fail_timeout: 10s
    slow_start: 0s
+   in_service: true
 ```
 
 * The `api_endpoint` key defines the NGINX Plus API endpoint.
@@ -48,4 +49,5 @@ upstreams:
   * `max_conns` – The maximum number of simultaneous active connections to an upstream server. Default value is 0, meaning there is no limit.
   * `max_fails` – The number of unsuccessful attempts to communicate with an upstream server that should happen in the duration set by the `fail-timeout` to consider the server unavailable. Default value is 1. The zero value disables the accounting of attempts.
   * `fail_timeout` – The time during which the specified number of unsuccessful attempts to communicate with an upstream server should happen to consider the server unavailable. Default value is 10s.
-  * `slow_start` – The slow start allows an upstream server to gradually recover its weight from 0 to its nominal value after it has been recovered or became available or when the server becomes available after a period of time it was considered unavailable. By default, the slow start is disabled.
+  * `slow_start` – The slow start allows an upstream server to gradually recover its weight from 0 to its nominal value after it has been recovered or became available or when the server becomes available after a period of time it was considered unavailable. By default, the slow start is disabled.
+  * `in_service` – Use only instances that are in the `InService` state of the [Lifecycle](https://docs.aws.amazon.com/autoscaling/ec2/userguide/AutoScalingGroupLifecycle.html). Default value is false.
diff --git a/go.mod b/go.mod
@@ -3,7 +3,7 @@ module github.com/nginxinc/nginx-asg-sync
 go 1.14
 
 require (
-	github.com/Azure/azure-sdk-for-go v42.2.0+incompatible
+	github.com/Azure/azure-sdk-for-go v42.3.0+incompatible
 	github.com/Azure/go-autorest/autorest v0.10.1 // indirect
 	github.com/Azure/go-autorest/autorest/adal v0.8.3 // indirect
 	github.com/Azure/go-autorest/autorest/azure/auth v0.4.2

diff --git a/go.sum b/go.sum
@@ -1,5 +1,5 @@
-github.com/Azure/azure-sdk-for-go v42.2.0+incompatible h1:ezf8BQIvXYn+LSf+rDqOVyRG3bWkf/SXKYFz4zIBX1Q=
-github.com/Azure/azure-sdk-for-go v42.2.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
+github.com/Azure/azure-sdk-for-go v42.3.0+incompatible h1:PAHkmPqd/vQV4LJcqzEUM1elCyTMWjbrO8oFMl0dvBE=
+github.com/Azure/azure-sdk-for-go v42.3.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
 github.com/Azure/go-autorest/autorest v0.9.0/go.mod h1:xyHB1BMZT0cuDHU7I0+g046+BFDTQ8rEZB0s4Yfa6bI=
 github.com/Azure/go-autorest/autorest v0.9.3/go.mod h1:GsRuLYvwzLjjjRoWEIyMUaYq8GNUx2nRB378IPt/1p0=
 github.com/Azure/go-autorest/autorest v0.10.1 h1:uaB8A32IZU9YKs9v50+/LWIWTDHJk2vlGzbfd7FfESI=

diff --git a/vendor/github.com/Azure/azure-sdk-for-go/version/version.go b/vendor/github.com/Azure/azure-sdk-for-go/version/version.go