Skip to content

Commit

Permalink
Created TF module to perform AMI backups using Lamda Functions (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
const-bon authored Aug 11, 2017
1 parent a6ba2db commit 7f8101d
Show file tree
Hide file tree
Showing 10 changed files with 444 additions and 270 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Compiled files
*.tfstate
*.tfstate.backup

# Module directory
.terraform/
*.zip
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
all: ebs-snapshot-janitor.zip schedule-ebs-snapshot-backups.zip
all: lambda_ami_backups.zip lambda_ami_cleanups.zip

%.zip: %.py
zip $@ $<
Expand Down
42 changes: 32 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Terraform config for automatic EBS snapshots
# Terraform module for automatic AMI creation

This repo contains a terraform configuration that creates two lambda functions
that will take automatic EBS snapshots at regular intervals. It is based on
**WARNING!** AMI cleanup works not yet.

This repo contains a terraform module that creates two lambda functions
that will create AMI automatically at regular intervals. It is based on
the code at
<https://serverlesscode.com/post/lambda-schedule-ebs-snapshot-backups/> and
<https://serverlesscode.com/post/lambda-schedule-ebs-snapshot-backups-2/>.
Expand All @@ -10,18 +12,38 @@ the code at

Include this repository as a module in your existing terraform code:

Notes:
* `ami_owner` is an AWS account id.

```
module "lambda_ebs_snapshot" {
source = "github.com/chef/lambda_ebs_snapshot"
# Setting these variables is optional
# ebs_snapshot_backups_schedule = "cron(00 19 * * ? *)"
# ebs_snapshot_janitor_schedule = "cron(05 19 * * ? *)"
module "lambda_ami_backup" {
source = "git::https://github.com/cloudposse/tf_lambda_ami_backup.git?ref=master"
name = "${var.name}"
stage = "${var.stage}"
namespace = "${var.namespace}"
region = "${var.region}"
ami_owner = "${var.ami_owner}"
}
```

### Configuring your instances to be backed up

Tag any instances you want to be backed up with `Backup = true`.
## Variables

| Name | Default | Description | Required |
|:----------------------------:|:--------------:|:--------------------------------------------------------:|:--------:|
| namespace | `` | Namespace (e.g. `cp` or `cloudposse`) | Yes |
| stage | `` | Stage (e.g. `prod`, `dev`, `staging` | Yes |
| name | `` | Name (e.g. `bastion` or `db`) | Yes |
| region | `` | AWS Region where module should operate (e.g. `us-east-1`)| Yes |
| ami_owner | `` | AWS Account ID which is used as a filter for AMI list (e.g. `123456789012`)| Yes |
| backup_schedule | `cron(00 19 * * ? *)` | The scheduling expression. (e.g. cron(0 20 * * ? *) or rate(5 minutes) | No |
| cleanup_schedule | `cron(05 19 * * ? *)` | The scheduling expression. (e.g. cron(0 20 * * ? *) or rate(5 minutes) | No |


## Configuring your instances to be backed up

Tag any instances you want to be backed up with `Snapshot = true`.

By default, old backups will be removed after 7 days, to keep them longer, set
another tag: `Retention = 14`, where 14 is the number of days you want to keep
Expand Down
99 changes: 99 additions & 0 deletions ami_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Automated AMI Backups
#
# @author Robert Kozora <bobby@kozora.me>
#
# This script will search for all instances having a tag with "Backup" or "backup"
# on it. As soon as we have the instances list, we loop through each instance
# and create an AMI of it. Also, it will look for a "Retention" tag key which
# will be used as a retention policy number in days. If there is no tag with
# that name, it will use a 7 days default value for each AMI.
#
# After creating the AMI it creates a "DeleteOn" tag on the AMI indicating when
# it will be deleted using the Retention value and another Lambda function

import boto3
import collections
import datetime
import sys
import pprint

ec = boto3.client('ec2')
#image = ec.Image('id')

def lambda_handler(event, context):

reservations = ec.describe_instances(
Filters=[
{'Name': 'tag-key', 'Values': ['backup', 'Backup', 'Snapshot']},
]
).get(
'Reservations', []
)

instances = sum(
[
[i for i in r['Instances']]
for r in reservations
], [])

print "Found %d instances that need backing up" % len(instances)

to_tag = collections.defaultdict(list)

for instance in instances:
try:
retention_days = [
int(t.get('Value')) for t in instance['Tags']
if t['Key'] == 'Retention'][0]
except IndexError:
retention_days = 7

#for dev in instance['BlockDeviceMappings']:
# if dev.get('Ebs', None) is None:
# continue
# vol_id = dev['Ebs']['VolumeId']
# print "Found EBS volume %s on instance %s" % (
# vol_id, instance['InstanceId'])

#snap = ec.create_snapshot(
# VolumeId=vol_id,
#)

#create_image(instance_id, name, description=None, no_reboot=False, block_device_mapping=None, dry_run=False)
# DryRun, InstanceId, Name, Description, NoReboot, BlockDeviceMappings
create_time = datetime.datetime.now()
create_fmt = create_time.strftime('%Y-%m-%d-%H-%M-%S')

AMIid = ec.create_image(InstanceId=instance['InstanceId'], Name="Lambda - " + instance['InstanceId'] + " from " + create_fmt, Description="Lambda created AMI of instance " + instance['InstanceId'] + " from " + create_fmt, NoReboot=True, DryRun=False)


pprint.pprint(instance)
#sys.exit()
#break

#to_tag[retention_days].append(AMIid)

to_tag[retention_days].append(AMIid['ImageId'])

print "Retaining AMI %s of instance %s for %d days" % (
AMIid['ImageId'],
instance['InstanceId'],
retention_days,
)

print to_tag.keys()

for retention_days in to_tag.keys():
delete_date = datetime.date.today() + datetime.timedelta(days=retention_days)
delete_fmt = delete_date.strftime('%m-%d-%Y')
print "Will delete %d AMIs on %s" % (len(to_tag[retention_days]), delete_fmt)

#break

ec.create_tags(
Resources=to_tag[retention_days],
Tags=[
{'Key': 'DeleteOn', 'Value': delete_fmt},
]
)

118 changes: 118 additions & 0 deletions ami_cleanup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# Automated AMI and Snapshot Deletion
#
# @author Robert Kozora <bobby@kozora.me>
#
# This script will search for all instances having a tag with "Backup" or "backup"
# on it. As soon as we have the instances list, we loop through each instance
# and reference the AMIs of that instance. We check that the latest daily backup
# succeeded then we store every image that's reached its DeleteOn tag's date for
# deletion. We then loop through the AMIs, deregister them and remove all the
# snapshots associated with that AMI.

import boto3
import collections
import datetime
import time
import os
import sys

ec = boto3.client('ec2', os.environ['region'])
ec2 = boto3.resource('ec2', os.environ['region'])
images = ec2.images.filter(Owners=[os.environ['ami_owner']])

def lambda_handler(event, context):

reservations = ec.describe_instances(
Filters=[
{'Name': 'tag-key', 'Values': ['backup', 'Backup', 'Snapshot']},
]
).get(
'Reservations', []
)

instances = sum(
[
[i for i in r['Instances']]
for r in reservations
], [])

print "Found %d instances that need evaluated" % len(instances)

to_tag = collections.defaultdict(list)

date = datetime.datetime.now()
date_fmt = date.strftime('%Y-%m-%d')

imagesList = []

# Set to true once we confirm we have a backup taken today
backupSuccess = False

# Loop through all of our instances with a tag named "Backup"
for instance in instances:
imagecount = 0

# Loop through each image of our current instance
for image in images:

# Our other Lambda Function names its AMIs Lambda - i-instancenumber.
# We now know these images are auto created
if image.name.startswith('Lambda - ' + instance['InstanceId']):

# print "FOUND IMAGE " + image.id + " FOR INSTANCE " + instance['InstanceId']

# Count this image's occcurance
imagecount = imagecount + 1

try:
if image.tags is not None:
deletion_date = [
t.get('Value') for t in image.tags
if t['Key'] == 'DeleteOn'][0]
delete_date = time.strptime(deletion_date, "%m-%d-%Y")
except IndexError:
deletion_date = False
delete_date = False

today_time = datetime.datetime.now().strftime('%m-%d-%Y')
# today_fmt = today_time.strftime('%m-%d-%Y')
today_date = time.strptime(today_time, '%m-%d-%Y')

# If image's DeleteOn date is less than or equal to today,
# add this image to our list of images to process later
if delete_date <= today_date:
imagesList.append(image.id)

# Make sure we have an AMI from today and mark backupSuccess as true
if image.name.endswith(date_fmt):
# Our latest backup from our other Lambda Function succeeded
backupSuccess = True
print "Latest backup from " + date_fmt + " was a success"

print "instance " + instance['InstanceId'] + " has " + str(imagecount) + " AMIs"

print "============="

print "About to process the following AMIs:"
print imagesList

if backupSuccess == True:

snapshots = ec.describe_snapshots(MaxResults=1000, OwnerIds=[os.environ['ami_owner']])['Snapshots']

# loop through list of image IDs
for image in imagesList:
print "deregistering image %s" % image
amiResponse = ec.deregister_image(
DryRun=False,
ImageId=image,
)

for snapshot in snapshots:
if snapshot['Description'].find(image) > 0:
snap = ec.delete_snapshot(SnapshotId=snapshot['SnapshotId'])
print "Deleting snapshot " + snapshot['SnapshotId']
print "-------------"

else:
print "No current backup found. Termination suspended."
Loading

0 comments on commit 7f8101d

Please sign in to comment.