From 811462e9695dd160eb2eb4b2738632670c0332f4 Mon Sep 17 00:00:00 2001 From: Rico Huijbers Date: Mon, 19 Nov 2018 11:37:42 +0100 Subject: [PATCH] feat(aws-ecs): instance autoscaling and drain hook (#1192) Make it easy to configure EC2 instance autoscaling for your cluster, and automatically add a Lifecylce Hook Lambda that will delay instance termination until all ECS tasks have drained from the instance. Fixes #1162. --- .../aws-autoscaling/lib/auto-scaling-group.ts | 65 ++++-- .../aws-autoscaling/lib/lifecycle-hook.ts | 4 +- .../test/test.auto-scaling-group.ts | 72 +++++++ packages/@aws-cdk/aws-ecs/README.md | 26 ++- packages/@aws-cdk/aws-ecs/lib/cluster.ts | 53 ++++- .../lib/drain-hook/instance-drain-hook.ts | 99 +++++++++ .../lib/drain-hook/lambda-source/index.py | 70 +++++++ packages/@aws-cdk/aws-ecs/package.json | 1 + .../test/ec2/integ.lb-awsvpc-nw.expected.json | 192 +++++++++++++++++- .../test/ec2/integ.lb-bridge-nw.expected.json | 192 +++++++++++++++++- .../@aws-cdk/aws-ecs/test/test.ecs-cluster.ts | 28 ++- 11 files changed, 777 insertions(+), 25 deletions(-) create mode 100644 packages/@aws-cdk/aws-ecs/lib/drain-hook/instance-drain-hook.ts create mode 100644 packages/@aws-cdk/aws-ecs/lib/drain-hook/lambda-source/index.py diff --git a/packages/@aws-cdk/aws-autoscaling/lib/auto-scaling-group.ts b/packages/@aws-cdk/aws-autoscaling/lib/auto-scaling-group.ts index 85aa0cf347f33..ea9fd6b91ef49 100644 --- a/packages/@aws-cdk/aws-autoscaling/lib/auto-scaling-group.ts +++ b/packages/@aws-cdk/aws-autoscaling/lib/auto-scaling-group.ts @@ -28,13 +28,15 @@ export interface AutoScalingGroupProps { /** * Minimum number of instances in the fleet + * * @default 1 */ minSize?: number; /** * Maximum number of instances in the fleet - * @default 1 + * + * @default desiredCapacity */ maxSize?: number; @@ -234,9 +236,12 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup launchConfig.addDependency(this.role); + const desiredCapacity = + (props.desiredCapacity !== undefined ? props.desiredCapacity : + (props.minSize !== undefined ? props.minSize : + (props.maxSize !== undefined ? props.maxSize : 1))); const minSize = props.minSize !== undefined ? props.minSize : 1; - const maxSize = props.maxSize !== undefined ? props.maxSize : 1; - const desiredCapacity = props.desiredCapacity !== undefined ? props.desiredCapacity : 1; + const maxSize = props.maxSize !== undefined ? props.maxSize : desiredCapacity; if (desiredCapacity < minSize || desiredCapacity > maxSize) { throw new Error(`Should have minSize (${minSize}) <= desiredCapacity (${desiredCapacity}) <= maxSize (${maxSize})`); @@ -322,8 +327,8 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup /** * Scale out or in based on time */ - public scaleOnSchedule(id: string, props: BasicScheduledActionProps) { - new ScheduledAction(this, `ScheduledAction${id}`, { + public scaleOnSchedule(id: string, props: BasicScheduledActionProps): ScheduledAction { + return new ScheduledAction(this, `ScheduledAction${id}`, { autoScalingGroup: this, ...props, }); @@ -332,7 +337,7 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup /** * Scale out or in to achieve a target CPU utilization */ - public scaleOnCpuUtilization(id: string, props: CpuUtilizationScalingProps) { + public scaleOnCpuUtilization(id: string, props: CpuUtilizationScalingProps): TargetTrackingScalingPolicy { return new TargetTrackingScalingPolicy(this, `ScalingPolicy${id}`, { autoScalingGroup: this, predefinedMetric: PredefinedMetric.ASGAverageCPUUtilization, @@ -344,7 +349,7 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup /** * Scale out or in to achieve a target network ingress rate */ - public scaleOnIncomingBytes(id: string, props: NetworkUtilizationScalingProps) { + public scaleOnIncomingBytes(id: string, props: NetworkUtilizationScalingProps): TargetTrackingScalingPolicy { return new TargetTrackingScalingPolicy(this, `ScalingPolicy${id}`, { autoScalingGroup: this, predefinedMetric: PredefinedMetric.ASGAverageNetworkIn, @@ -356,7 +361,7 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup /** * Scale out or in to achieve a target network egress rate */ - public scaleOnOutgoingBytes(id: string, props: NetworkUtilizationScalingProps) { + public scaleOnOutgoingBytes(id: string, props: NetworkUtilizationScalingProps): TargetTrackingScalingPolicy { return new TargetTrackingScalingPolicy(this, `ScalingPolicy${id}`, { autoScalingGroup: this, predefinedMetric: PredefinedMetric.ASGAverageNetworkOut, @@ -371,7 +376,7 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup * The AutoScalingGroup must have been attached to an Application Load Balancer * in order to be able to call this. */ - public scaleOnRequestCount(id: string, props: RequestCountScalingProps) { + public scaleOnRequestCount(id: string, props: RequestCountScalingProps): TargetTrackingScalingPolicy { if (this.albTargetGroup === undefined) { throw new Error('Attach the AutoScalingGroup to an Application Load Balancer before calling scaleOnRequestCount()'); } @@ -389,13 +394,14 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup // Target tracking policy can only be created after the load balancer has been // attached to the targetgroup (because we need its ARN). policy.addDependency(this.albTargetGroup.loadBalancerDependency()); + return policy; } /** * Scale out or in in order to keep a metric around a target value */ - public scaleToTrackMetric(id: string, props: MetricTargetTrackingProps) { - new TargetTrackingScalingPolicy(this, `ScalingPolicy${id}`, { + public scaleToTrackMetric(id: string, props: MetricTargetTrackingProps): TargetTrackingScalingPolicy { + return new TargetTrackingScalingPolicy(this, `ScalingPolicy${id}`, { autoScalingGroup: this, customMetric: props.metric, ...props @@ -405,7 +411,7 @@ export class AutoScalingGroup extends cdk.Construct implements IAutoScalingGroup /** * Scale out or in, in response to a metric */ - public scaleOnMetric(id: string, props: BasicStepScalingPolicyProps) { + public scaleOnMetric(id: string, props: BasicStepScalingPolicyProps): StepScalingPolicy { return new StepScalingPolicy(this, id, { ...props, autoScalingGroup: this }); } @@ -658,6 +664,41 @@ export interface IAutoScalingGroup { * The name of the AutoScalingGroup */ readonly autoScalingGroupName: string; + + /** + * Send a message to either an SQS queue or SNS topic when instances launch or terminate + */ + onLifecycleTransition(id: string, props: BasicLifecycleHookProps): LifecycleHook; + + /** + * Scale out or in based on time + */ + scaleOnSchedule(id: string, props: BasicScheduledActionProps): ScheduledAction; + + /** + * Scale out or in to achieve a target CPU utilization + */ + scaleOnCpuUtilization(id: string, props: CpuUtilizationScalingProps): TargetTrackingScalingPolicy; + + /** + * Scale out or in to achieve a target network ingress rate + */ + scaleOnIncomingBytes(id: string, props: NetworkUtilizationScalingProps): TargetTrackingScalingPolicy; + + /** + * Scale out or in to achieve a target network egress rate + */ + scaleOnOutgoingBytes(id: string, props: NetworkUtilizationScalingProps): TargetTrackingScalingPolicy; + + /** + * Scale out or in in order to keep a metric around a target value + */ + scaleToTrackMetric(id: string, props: MetricTargetTrackingProps): TargetTrackingScalingPolicy; + + /** + * Scale out or in, in response to a metric + */ + scaleOnMetric(id: string, props: BasicStepScalingPolicyProps): StepScalingPolicy; } /** diff --git a/packages/@aws-cdk/aws-autoscaling/lib/lifecycle-hook.ts b/packages/@aws-cdk/aws-autoscaling/lib/lifecycle-hook.ts index 1a96355838cac..8ad24858ddaee 100644 --- a/packages/@aws-cdk/aws-autoscaling/lib/lifecycle-hook.ts +++ b/packages/@aws-cdk/aws-autoscaling/lib/lifecycle-hook.ts @@ -27,7 +27,7 @@ export interface BasicLifecycleHookProps { * * If the lifecycle hook times out, perform the action in DefaultResult. */ - heartbeatTimeout?: number; + heartbeatTimeoutSec?: number; /** * The state of the Amazon EC2 instance to which you want to attach the lifecycle hook. @@ -87,7 +87,7 @@ export class LifecycleHook extends cdk.Construct implements api.ILifecycleHook { const resource = new cloudformation.LifecycleHookResource(this, 'Resource', { autoScalingGroupName: props.autoScalingGroup.autoScalingGroupName, defaultResult: props.defaultResult, - heartbeatTimeout: props.heartbeatTimeout, + heartbeatTimeout: props.heartbeatTimeoutSec, lifecycleHookName: props.lifecycleHookName, lifecycleTransition: props.lifecycleTransition, notificationMetadata: props.notificationMetadata, diff --git a/packages/@aws-cdk/aws-autoscaling/test/test.auto-scaling-group.ts b/packages/@aws-cdk/aws-autoscaling/test/test.auto-scaling-group.ts index 587ef5f91b22b..cf786197d5b0f 100644 --- a/packages/@aws-cdk/aws-autoscaling/test/test.auto-scaling-group.ts +++ b/packages/@aws-cdk/aws-autoscaling/test/test.auto-scaling-group.ts @@ -149,6 +149,78 @@ export = { test.done(); }, + 'can specify only min capacity'(test: Test) { + // GIVEN + const stack = new cdk.Stack(); + const vpc = mockVpc(stack); + + // WHEN + new autoscaling.AutoScalingGroup(stack, 'MyFleet', { + instanceType: new ec2.InstanceTypePair(ec2.InstanceClass.M4, ec2.InstanceSize.Micro), + machineImage: new ec2.AmazonLinuxImage(), + vpc, + minSize: 10 + }); + + // THEN + expect(stack).to(haveResource("AWS::AutoScaling::AutoScalingGroup", { + MinSize: "10", + MaxSize: "10", + DesiredCapacity: "10", + } + )); + + test.done(); + }, + + 'can specify only max capacity'(test: Test) { + // GIVEN + const stack = new cdk.Stack(); + const vpc = mockVpc(stack); + + // WHEN + new autoscaling.AutoScalingGroup(stack, 'MyFleet', { + instanceType: new ec2.InstanceTypePair(ec2.InstanceClass.M4, ec2.InstanceSize.Micro), + machineImage: new ec2.AmazonLinuxImage(), + vpc, + maxSize: 10 + }); + + // THEN + expect(stack).to(haveResource("AWS::AutoScaling::AutoScalingGroup", { + MinSize: "1", + MaxSize: "10", + DesiredCapacity: "10", + } + )); + + test.done(); + }, + + 'can specify only desiredCount'(test: Test) { + // GIVEN + const stack = new cdk.Stack(); + const vpc = mockVpc(stack); + + // WHEN + new autoscaling.AutoScalingGroup(stack, 'MyFleet', { + instanceType: new ec2.InstanceTypePair(ec2.InstanceClass.M4, ec2.InstanceSize.Micro), + machineImage: new ec2.AmazonLinuxImage(), + vpc, + desiredCapacity: 10 + }); + + // THEN + expect(stack).to(haveResource("AWS::AutoScaling::AutoScalingGroup", { + MinSize: "1", + MaxSize: "10", + DesiredCapacity: "10", + } + )); + + test.done(); + }, + 'addToRolePolicy can be used to add statements to the role policy'(test: Test) { const stack = new cdk.Stack(undefined, 'MyStack', { env: { region: 'us-east-1', account: '1234' }}); const vpc = mockVpc(stack); diff --git a/packages/@aws-cdk/aws-ecs/README.md b/packages/@aws-cdk/aws-ecs/README.md index a85586ce10d66..01c170e6d7086 100644 --- a/packages/@aws-cdk/aws-ecs/README.md +++ b/packages/@aws-cdk/aws-ecs/README.md @@ -236,10 +236,32 @@ containers are running on for you. If you're running an ECS cluster however, your EC2 instances might fill up as your number of Tasks goes up. To avoid placement errors, you will want to configure AutoScaling for your -EC2 instance group so that your instance count scales with demand. +EC2 instance group so that your instance count scales with demand. To keep +your EC2 instances halfway loaded, scaling up to a maximum of 30 instances +if required: + +```ts +const autoScalingGroup = cluster.addDefaultAutoScalingGroupCapacity({ + instanceType: new ec2.InstanceType("t2.xlarge"), + minCapacity: 3, + maxCapacity: 30 + instanceCount: 3, + + // Give instances 5 minutes to drain running tasks when an instance is + // terminated. This is the default, turn this off by specifying 0 or + // change the timeout up to 900 seconds. + taskDrainTimeSec: 300, +}); + +autoScalingGroup.scaleOnCpuUtilization('KeepCpuHalfwayLoaded', { + targetUtilizationPercent: 50 +}); +``` + +See the `@aws-cdk/aws-autoscaling` library for more autoscaling options +you can configure on your instances. ### Roadmap -- [ ] Instance AutoScaling - [ ] Service Discovery Integration - [ ] Private registry authentication diff --git a/packages/@aws-cdk/aws-ecs/lib/cluster.ts b/packages/@aws-cdk/aws-ecs/lib/cluster.ts index 8e2c8466e20f7..37fedc3503a66 100644 --- a/packages/@aws-cdk/aws-ecs/lib/cluster.ts +++ b/packages/@aws-cdk/aws-ecs/lib/cluster.ts @@ -3,6 +3,7 @@ import cloudwatch = require ('@aws-cdk/aws-cloudwatch'); import ec2 = require('@aws-cdk/aws-ec2'); import iam = require('@aws-cdk/aws-iam'); import cdk = require('@aws-cdk/cdk'); +import { InstanceDrainHook } from './drain-hook/instance-drain-hook'; import { cloudformation } from './ecs.generated'; /** @@ -70,19 +71,23 @@ export class Cluster extends cdk.Construct implements ICluster { /** * Add a default-configured AutoScalingGroup running the ECS-optimized AMI to this Cluster + * + * Returns the AutoScalingGroup so you can add autoscaling settings to it. */ - public addDefaultAutoScalingGroupCapacity(options: AddDefaultAutoScalingGroupOptions) { + public addDefaultAutoScalingGroupCapacity(options: AddDefaultAutoScalingGroupOptions): autoscaling.AutoScalingGroup { const autoScalingGroup = new autoscaling.AutoScalingGroup(this, 'DefaultAutoScalingGroup', { vpc: this.vpc, instanceType: options.instanceType, machineImage: new EcsOptimizedAmi(), updateType: autoscaling.UpdateType.ReplacingUpdate, - minSize: 0, - maxSize: options.instanceCount || 1, - desiredCapacity: options.instanceCount || 1 + minSize: options.minCapacity, + maxSize: options.maxCapacity, + desiredCapacity: options.instanceCount, }); - this.addAutoScalingGroupCapacity(autoScalingGroup); + this.addAutoScalingGroupCapacity(autoScalingGroup, options); + + return autoScalingGroup; } /** @@ -118,6 +123,15 @@ export class Cluster extends cdk.Construct implements ICluster { "logs:CreateLogStream", "logs:PutLogEvents" ).addAllResources()); + + // 0 disables, otherwise forward to underlying implementation which picks the sane default + if (options.taskDrainTimeSeconds !== 0) { + new InstanceDrainHook(autoScalingGroup, 'DrainECSHook', { + autoScalingGroup, + cluster: this, + drainTimeSec: options.taskDrainTimeSeconds + }); + } } /** @@ -291,12 +305,25 @@ export interface AddAutoScalingGroupCapacityOptions { * @default false */ containersAccessInstanceRole?: boolean; + + /** + * Give tasks this many seconds to complete when instances are being scaled in. + * + * Task draining adds a Lambda and a Lifecycle hook to your AutoScalingGroup + * that will delay instance termination until all ECS tasks have drained from + * the instance. + * + * Set to 0 to disable task draining. + * + * @default 300 + */ + taskDrainTimeSeconds?: number; } /** * Properties for adding autoScalingGroup */ -export interface AddDefaultAutoScalingGroupOptions { +export interface AddDefaultAutoScalingGroupOptions extends AddAutoScalingGroupCapacityOptions { /** * The type of EC2 instance to launch into your Autoscaling Group @@ -309,4 +336,18 @@ export interface AddDefaultAutoScalingGroupOptions { * @default 1 */ instanceCount?: number; + + /** + * Maximum number of instances + * + * @default Same as instanceCount + */ + maxCapacity?: number; + + /** + * Minimum number of instances + * + * @default Same as instanceCount + */ + minCapacity?: number; } diff --git a/packages/@aws-cdk/aws-ecs/lib/drain-hook/instance-drain-hook.ts b/packages/@aws-cdk/aws-ecs/lib/drain-hook/instance-drain-hook.ts new file mode 100644 index 0000000000000..763ede6430c71 --- /dev/null +++ b/packages/@aws-cdk/aws-ecs/lib/drain-hook/instance-drain-hook.ts @@ -0,0 +1,99 @@ +import autoscaling = require('@aws-cdk/aws-autoscaling'); +import iam = require('@aws-cdk/aws-iam'); +import lambda = require('@aws-cdk/aws-lambda'); +import sns = require('@aws-cdk/aws-sns'); +import cdk = require('@aws-cdk/cdk'); +import fs = require('fs'); +import path = require('path'); +import { ICluster } from '../cluster'; + +// Reference for the source in this package: +// +// https://github.com/aws-samples/ecs-refarch-cloudformation/blob/master/infrastructure/lifecyclehook.yaml + +/** + * Properties for instance draining hook + */ +export interface InstanceDrainHookProps { + /** + * The AutoScalingGroup to install the instance draining hook for + */ + autoScalingGroup: autoscaling.IAutoScalingGroup; + + /** + * The cluster on which tasks have been scheduled + */ + cluster: ICluster; + + /** + * How many seconds to give tasks to drain before the instance is terminated anyway + * + * Must be between 0 and 900. + * + * @default 900 + */ + drainTimeSec?: number; +} + +/** + * A hook to drain instances from ECS traffic before they're terminated + */ +export class InstanceDrainHook extends cdk.Construct { + constructor(parent: cdk.Construct, id: string, props: InstanceDrainHookProps) { + super(parent, id); + + const drainTimeSeconds = props.drainTimeSec !== undefined ? props.drainTimeSec : 300; + + if (drainTimeSeconds < 0 || drainTimeSeconds > 900) { + throw new Error(`Drain time must be between 0 and 900 seconds, got: ${drainTimeSeconds}`); + } + + // Invoke Lambda via SNS Topic + const topic = new sns.Topic(this, 'Topic'); + const fn = new lambda.Function(this, 'Function', { + code: lambda.Code.inline(fs.readFileSync(path.join(__dirname, 'lambda-source', 'index.py'), { encoding: 'utf-8' })), + handler: 'index.lambda_handler', + runtime: lambda.Runtime.Python36, + // Timeout: some extra margin for additional API calls made by the Lambda, + // up to a maximum of 15 minutes. + timeout: Math.min(drainTimeSeconds + 10, 900), + environment: { + CLUSTER: props.cluster.clusterName + } + }); + + // Hook everything up: ASG -> Topic, Topic -> Lambda + props.autoScalingGroup.onLifecycleTransition('DrainHook', { + lifecycleTransition: autoscaling.LifecycleTransition.InstanceTerminating, + defaultResult: autoscaling.DefaultResult.Continue, + notificationTarget: topic, + heartbeatTimeoutSec: drainTimeSeconds, + }); + topic.subscribeLambda(fn); + + // FIXME: These should probably be restricted usefully in some way, but I don't exactly + // know how. + fn.addToRolePolicy(new iam.PolicyStatement() + .addActions( + 'autoscaling:CompleteLifecycleAction', + 'ec2:DescribeInstances', + 'ec2:DescribeInstanceAttribute', + 'ec2:DescribeInstanceStatus', + 'ec2:DescribeHosts', + ) + .addAllResources()); + + // FIXME: These should be restricted to the ECS cluster probably, but I don't exactly + // know how. + fn.addToRolePolicy(new iam.PolicyStatement() + .addActions( + 'ecs:ListContainerInstances', + 'ecs:SubmitContainerStateChange', + 'ecs:SubmitTaskStateChange', + 'ecs:DescribeContainerInstances', + 'ecs:UpdateContainerInstancesState', + 'ecs:ListTasks', + 'ecs:DescribeTasks') + .addAllResources()); + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-ecs/lib/drain-hook/lambda-source/index.py b/packages/@aws-cdk/aws-ecs/lib/drain-hook/lambda-source/index.py new file mode 100644 index 0000000000000..e17ac68681300 --- /dev/null +++ b/packages/@aws-cdk/aws-ecs/lib/drain-hook/lambda-source/index.py @@ -0,0 +1,70 @@ +import boto3, json, os, time + +ecs = boto3.client('ecs') +autoscaling = boto3.client('autoscaling') + + +def lambda_handler(event, context): + print(json.dumps(event)) + cluster = os.environ['CLUSTER'] + snsTopicArn = event['Records'][0]['Sns']['TopicArn'] + lifecycle_event = json.loads(event['Records'][0]['Sns']['Message']) + instance_id = lifecycle_event.get('EC2InstanceId') + if not instance_id: + print('Got event without EC2InstanceId: %s', json.dumps(event)) + return + + instance_arn = container_instance_arn(cluster, instance_id) + print('Instance %s has container instance ARN %s' % (lifecycle_event['EC2InstanceId'], instance_arn)) + + if not instance_arn: + return + + while has_tasks(cluster, instance_arn): + time.sleep(10) + + try: + print('Terminating instance %s' % instance_id) + autoscaling.complete_lifecycle_action( + LifecycleActionResult='CONTINUE', + **pick(lifecycle_event, 'LifecycleHookName', 'LifecycleActionToken', 'AutoScalingGroupName')) + except Exception as e: + # Lifecycle action may have already completed. + print(str(e)) + + +def container_instance_arn(cluster, instance_id): + """Turn an instance ID into a container instance ARN.""" + arns = ecs.list_container_instances(cluster=cluster, filter='ec2InstanceId==' + instance_id)['containerInstanceArns'] + if not arns: + return None + return arns[0] + + +def has_tasks(cluster, instance_arn): + """Return True if the instance is running tasks for the given cluster.""" + instances = ecs.describe_container_instances(cluster=cluster, containerInstances=[instance_arn])['containerInstances'] + if not instances: + return False + instance = instances[0] + + if instance['status'] == 'ACTIVE': + # Start draining, then try again later + set_container_instance_to_draining(cluster, instance_arn) + return True + + tasks = instance['runningTasksCount'] + instance['pendingTasksCount'] + print('Instance %s has %s tasks' % (instance_arn, tasks)) + + return tasks > 0 + + +def set_container_instance_to_draining(cluster, instance_arn): + ecs.update_container_instances_state( + cluster=cluster, + containerInstances=[instance_arn], status='DRAINING') + + +def pick(dct, *keys): + """Pick a subset of a dict.""" + return {k: v for k, v in dct.items() if k in keys} diff --git a/packages/@aws-cdk/aws-ecs/package.json b/packages/@aws-cdk/aws-ecs/package.json index 92ee16c61b6a2..c9dc25b0dfd92 100644 --- a/packages/@aws-cdk/aws-ecs/package.json +++ b/packages/@aws-cdk/aws-ecs/package.json @@ -74,6 +74,7 @@ "@aws-cdk/aws-lambda": "^0.17.0", "@aws-cdk/aws-logs": "^0.17.0", "@aws-cdk/aws-route53": "^0.17.0", + "@aws-cdk/aws-sns": "^0.17.0", "@aws-cdk/cdk": "^0.17.0", "@aws-cdk/cx-api": "^0.17.0" }, diff --git a/packages/@aws-cdk/aws-ecs/test/ec2/integ.lb-awsvpc-nw.expected.json b/packages/@aws-cdk/aws-ecs/test/ec2/integ.lb-awsvpc-nw.expected.json index 4b4f44e30af90..59c11214bb09f 100644 --- a/packages/@aws-cdk/aws-ecs/test/ec2/integ.lb-awsvpc-nw.expected.json +++ b/packages/@aws-cdk/aws-ecs/test/ec2/integ.lb-awsvpc-nw.expected.json @@ -468,7 +468,7 @@ "Type": "AWS::AutoScaling::AutoScalingGroup", "Properties": { "MaxSize": "1", - "MinSize": "0", + "MinSize": "1", "DesiredCapacity": "1", "LaunchConfigurationName": { "Ref": "EcsClusterDefaultAutoScalingGroupLaunchConfigB7E376C1" @@ -498,6 +498,196 @@ } } }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25": { + "Type": "AWS::SNS::Topic" + }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicFunctionSubscription4313BD38": { + "Type": "AWS::SNS::Subscription", + "Properties": { + "Endpoint": { + "Fn::GetAtt": [ + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionE17A5F5E", + "Arn" + ] + }, + "Protocol": "lambda", + "TopicArn": { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25" + } + } + }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRole94543EDA": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "lambda.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + ] + ] + } + ] + } + }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRoleDefaultPolicyA45BF396": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "autoscaling:CompleteLifecycleAction", + "ec2:DescribeInstances", + "ec2:DescribeInstanceAttribute", + "ec2:DescribeInstanceStatus", + "ec2:DescribeHosts" + ], + "Effect": "Allow", + "Resource": "*" + }, + { + "Action": [ + "ecs:ListContainerInstances", + "ecs:SubmitContainerStateChange", + "ecs:SubmitTaskStateChange", + "ecs:DescribeContainerInstances", + "ecs:UpdateContainerInstancesState", + "ecs:ListTasks", + "ecs:DescribeTasks" + ], + "Effect": "Allow", + "Resource": "*" + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRoleDefaultPolicyA45BF396", + "Roles": [ + { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRole94543EDA" + } + ] + } + }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionE17A5F5E": { + "Type": "AWS::Lambda::Function", + "Properties": { + "Code": { + "ZipFile": "import boto3, json, os, time\n\necs = boto3.client('ecs')\nautoscaling = boto3.client('autoscaling')\n\n\ndef lambda_handler(event, context):\n print(json.dumps(event))\n cluster = os.environ['CLUSTER']\n snsTopicArn = event['Records'][0]['Sns']['TopicArn']\n lifecycle_event = json.loads(event['Records'][0]['Sns']['Message'])\n instance_id = lifecycle_event.get('EC2InstanceId')\n if not instance_id:\n print('Got event without EC2InstanceId: %s', json.dumps(event))\n return\n\n instance_arn = container_instance_arn(cluster, instance_id)\n print('Instance %s has container instance ARN %s' % (lifecycle_event['EC2InstanceId'], instance_arn))\n\n if not instance_arn:\n return\n\n while has_tasks(cluster, instance_arn):\n time.sleep(10)\n\n try:\n print('Terminating instance %s' % instance_id)\n autoscaling.complete_lifecycle_action(\n LifecycleActionResult='CONTINUE',\n **pick(lifecycle_event, 'LifecycleHookName', 'LifecycleActionToken', 'AutoScalingGroupName'))\n except Exception as e:\n # Lifecycle action may have already completed.\n print(str(e))\n\n\ndef container_instance_arn(cluster, instance_id):\n \"\"\"Turn an instance ID into a container instance ARN.\"\"\"\n arns = ecs.list_container_instances(cluster=cluster, filter='ec2InstanceId==' + instance_id)['containerInstanceArns']\n if not arns:\n return None\n return arns[0]\n\n\ndef has_tasks(cluster, instance_arn):\n \"\"\"Return True if the instance is running tasks for the given cluster.\"\"\"\n instances = ecs.describe_container_instances(cluster=cluster, containerInstances=[instance_arn])['containerInstances']\n if not instances:\n return False\n instance = instances[0]\n\n if instance['status'] == 'ACTIVE':\n # Start draining, then try again later\n set_container_instance_to_draining(cluster, instance_arn)\n return True\n\n tasks = instance['runningTasksCount'] + instance['pendingTasksCount']\n print('Instance %s has %s tasks' % (instance_arn, tasks))\n\n return tasks > 0\n\n\ndef set_container_instance_to_draining(cluster, instance_arn):\n ecs.update_container_instances_state(\n cluster=cluster,\n containerInstances=[instance_arn], status='DRAINING')\n\n\ndef pick(dct, *keys):\n \"\"\"Pick a subset of a dict.\"\"\"\n return {k: v for k, v in dct.items() if k in keys}\n" + }, + "Handler": "index.lambda_handler", + "Role": { + "Fn::GetAtt": [ + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRole94543EDA", + "Arn" + ] + }, + "Runtime": "python3.6", + "Environment": { + "Variables": { + "CLUSTER": { + "Ref": "EcsCluster97242B84" + } + } + }, + "Timeout": 310 + }, + "DependsOn": [ + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRole94543EDA", + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRoleDefaultPolicyA45BF396" + ] + }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionTopicE6B1EBA6": { + "Type": "AWS::Lambda::Permission", + "Properties": { + "Action": "lambda:InvokeFunction", + "FunctionName": { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionE17A5F5E" + }, + "Principal": "sns.amazonaws.com", + "SourceArn": { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25" + } + } + }, + "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleA38EC83B": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "autoscaling.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + } + } + }, + "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleDefaultPolicy75002F88": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "sns:Publish", + "Effect": "Allow", + "Resource": { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25" + } + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleDefaultPolicy75002F88", + "Roles": [ + { + "Ref": "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleA38EC83B" + } + ] + } + }, + "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookFFA63029": { + "Type": "AWS::AutoScaling::LifecycleHook", + "Properties": { + "AutoScalingGroupName": { + "Ref": "EcsClusterDefaultAutoScalingGroupASGC1A785DB" + }, + "LifecycleTransition": "autoscaling:EC2_INSTANCE_TERMINATING", + "DefaultResult": "CONTINUE", + "HeartbeatTimeout": 300, + "NotificationTargetARN": { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25" + }, + "RoleARN": { + "Fn::GetAtt": [ + "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleA38EC83B", + "Arn" + ] + } + } + }, "TaskDefTaskRole1EDB4A67": { "Type": "AWS::IAM::Role", "Properties": { diff --git a/packages/@aws-cdk/aws-ecs/test/ec2/integ.lb-bridge-nw.expected.json b/packages/@aws-cdk/aws-ecs/test/ec2/integ.lb-bridge-nw.expected.json index 9ba0140e66a09..142dcdb1f2580 100644 --- a/packages/@aws-cdk/aws-ecs/test/ec2/integ.lb-bridge-nw.expected.json +++ b/packages/@aws-cdk/aws-ecs/test/ec2/integ.lb-bridge-nw.expected.json @@ -489,7 +489,7 @@ "Type": "AWS::AutoScaling::AutoScalingGroup", "Properties": { "MaxSize": "1", - "MinSize": "0", + "MinSize": "1", "DesiredCapacity": "1", "LaunchConfigurationName": { "Ref": "EcsClusterDefaultAutoScalingGroupLaunchConfigB7E376C1" @@ -519,6 +519,196 @@ } } }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25": { + "Type": "AWS::SNS::Topic" + }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicFunctionSubscription4313BD38": { + "Type": "AWS::SNS::Subscription", + "Properties": { + "Endpoint": { + "Fn::GetAtt": [ + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionE17A5F5E", + "Arn" + ] + }, + "Protocol": "lambda", + "TopicArn": { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25" + } + } + }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRole94543EDA": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "lambda.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + ] + ] + } + ] + } + }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRoleDefaultPolicyA45BF396": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "autoscaling:CompleteLifecycleAction", + "ec2:DescribeInstances", + "ec2:DescribeInstanceAttribute", + "ec2:DescribeInstanceStatus", + "ec2:DescribeHosts" + ], + "Effect": "Allow", + "Resource": "*" + }, + { + "Action": [ + "ecs:ListContainerInstances", + "ecs:SubmitContainerStateChange", + "ecs:SubmitTaskStateChange", + "ecs:DescribeContainerInstances", + "ecs:UpdateContainerInstancesState", + "ecs:ListTasks", + "ecs:DescribeTasks" + ], + "Effect": "Allow", + "Resource": "*" + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRoleDefaultPolicyA45BF396", + "Roles": [ + { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRole94543EDA" + } + ] + } + }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionE17A5F5E": { + "Type": "AWS::Lambda::Function", + "Properties": { + "Code": { + "ZipFile": "import boto3, json, os, time\n\necs = boto3.client('ecs')\nautoscaling = boto3.client('autoscaling')\n\n\ndef lambda_handler(event, context):\n print(json.dumps(event))\n cluster = os.environ['CLUSTER']\n snsTopicArn = event['Records'][0]['Sns']['TopicArn']\n lifecycle_event = json.loads(event['Records'][0]['Sns']['Message'])\n instance_id = lifecycle_event.get('EC2InstanceId')\n if not instance_id:\n print('Got event without EC2InstanceId: %s', json.dumps(event))\n return\n\n instance_arn = container_instance_arn(cluster, instance_id)\n print('Instance %s has container instance ARN %s' % (lifecycle_event['EC2InstanceId'], instance_arn))\n\n if not instance_arn:\n return\n\n while has_tasks(cluster, instance_arn):\n time.sleep(10)\n\n try:\n print('Terminating instance %s' % instance_id)\n autoscaling.complete_lifecycle_action(\n LifecycleActionResult='CONTINUE',\n **pick(lifecycle_event, 'LifecycleHookName', 'LifecycleActionToken', 'AutoScalingGroupName'))\n except Exception as e:\n # Lifecycle action may have already completed.\n print(str(e))\n\n\ndef container_instance_arn(cluster, instance_id):\n \"\"\"Turn an instance ID into a container instance ARN.\"\"\"\n arns = ecs.list_container_instances(cluster=cluster, filter='ec2InstanceId==' + instance_id)['containerInstanceArns']\n if not arns:\n return None\n return arns[0]\n\n\ndef has_tasks(cluster, instance_arn):\n \"\"\"Return True if the instance is running tasks for the given cluster.\"\"\"\n instances = ecs.describe_container_instances(cluster=cluster, containerInstances=[instance_arn])['containerInstances']\n if not instances:\n return False\n instance = instances[0]\n\n if instance['status'] == 'ACTIVE':\n # Start draining, then try again later\n set_container_instance_to_draining(cluster, instance_arn)\n return True\n\n tasks = instance['runningTasksCount'] + instance['pendingTasksCount']\n print('Instance %s has %s tasks' % (instance_arn, tasks))\n\n return tasks > 0\n\n\ndef set_container_instance_to_draining(cluster, instance_arn):\n ecs.update_container_instances_state(\n cluster=cluster,\n containerInstances=[instance_arn], status='DRAINING')\n\n\ndef pick(dct, *keys):\n \"\"\"Pick a subset of a dict.\"\"\"\n return {k: v for k, v in dct.items() if k in keys}\n" + }, + "Handler": "index.lambda_handler", + "Role": { + "Fn::GetAtt": [ + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRole94543EDA", + "Arn" + ] + }, + "Runtime": "python3.6", + "Environment": { + "Variables": { + "CLUSTER": { + "Ref": "EcsCluster97242B84" + } + } + }, + "Timeout": 310 + }, + "DependsOn": [ + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRole94543EDA", + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionServiceRoleDefaultPolicyA45BF396" + ] + }, + "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionTopicE6B1EBA6": { + "Type": "AWS::Lambda::Permission", + "Properties": { + "Action": "lambda:InvokeFunction", + "FunctionName": { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookFunctionE17A5F5E" + }, + "Principal": "sns.amazonaws.com", + "SourceArn": { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25" + } + } + }, + "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleA38EC83B": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "autoscaling.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + } + } + }, + "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleDefaultPolicy75002F88": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "sns:Publish", + "Effect": "Allow", + "Resource": { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25" + } + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleDefaultPolicy75002F88", + "Roles": [ + { + "Ref": "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleA38EC83B" + } + ] + } + }, + "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookFFA63029": { + "Type": "AWS::AutoScaling::LifecycleHook", + "Properties": { + "AutoScalingGroupName": { + "Ref": "EcsClusterDefaultAutoScalingGroupASGC1A785DB" + }, + "LifecycleTransition": "autoscaling:EC2_INSTANCE_TERMINATING", + "DefaultResult": "CONTINUE", + "HeartbeatTimeout": 300, + "NotificationTargetARN": { + "Ref": "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25" + }, + "RoleARN": { + "Fn::GetAtt": [ + "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleA38EC83B", + "Arn" + ] + } + } + }, "TaskDefTaskRole1EDB4A67": { "Type": "AWS::IAM::Role", "Properties": { diff --git a/packages/@aws-cdk/aws-ecs/test/test.ecs-cluster.ts b/packages/@aws-cdk/aws-ecs/test/test.ecs-cluster.ts index be055197775cc..8748b7cadbc87 100644 --- a/packages/@aws-cdk/aws-ecs/test/test.ecs-cluster.ts +++ b/packages/@aws-cdk/aws-ecs/test/test.ecs-cluster.ts @@ -67,7 +67,7 @@ export = { expect(stack).to(haveResource("AWS::AutoScaling::AutoScalingGroup", { MaxSize: "1", - MinSize: "0", + MinSize: "1", DesiredCapacity: "1", LaunchConfigurationName: { Ref: "EcsClusterDefaultAutoScalingGroupLaunchConfigB7E376C1" @@ -154,6 +154,32 @@ export = { test.done(); }, + + 'lifecycle hook is automatically added'(test: Test) { + // GIVEN + const stack = new cdk.Stack(); + const vpc = new ec2.VpcNetwork(stack, 'MyVpc', {}); + const cluster = new ecs.Cluster(stack, 'EcsCluster', { + vpc, + }); + + // WHEN + cluster.addDefaultAutoScalingGroupCapacity({ + instanceType: new ec2.InstanceType('t2.micro') + }); + + // THEN + expect(stack).to(haveResource('AWS::AutoScaling::LifecycleHook', { + AutoScalingGroupName: { Ref: "EcsClusterDefaultAutoScalingGroupASGC1A785DB" }, + LifecycleTransition: "autoscaling:EC2_INSTANCE_TERMINATING", + DefaultResult: "CONTINUE", + HeartbeatTimeout: 300, + NotificationTargetARN: { Ref: "EcsClusterDefaultAutoScalingGroupDrainECSHookTopicC705BD25" }, + RoleARN: { "Fn::GetAtt": [ "EcsClusterDefaultAutoScalingGroupLifecycleHookDrainHookRoleA38EC83B", "Arn" ] } + })); + + test.done(); + }, }, "allows specifying instance type"(test: Test) {