Skip to content

Commit

Permalink
Merge pull request #1 from mgoodfellow/hash-previous-backup-output
Browse files Browse the repository at this point in the history
Write a hash file and handle updates etc.
  • Loading branch information
mgoodfellow committed Jun 30, 2015
2 parents a0ee263 + d8daa64 commit 38f6c43
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 26 deletions.
27 changes: 26 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ It supports the following features:
- Plan based backups
- Custom command run pre-backup
- Storing to S3
- Calculating MD5 hashes of the backup set to avoid uploading duplicate backup sets
- Emailing the result of the backup plans
- Python standard logging framework

Expand Down Expand Up @@ -54,6 +55,7 @@ file
"AWS_REGION": "this is a region",
"EMAIL_FROM": "source@address.com",
"EMAIL_TO": "recipient@address.com",
"HASH_CHECK_FILE": "plan_hashes.txt",
"Plans": [
{
"Name": "MySQL Backup",
Expand Down Expand Up @@ -108,11 +110,34 @@ Run the backup tool using the following method:
s3backup.run_plans()
See ``test.py`` in the ``src`` folder for an example.
See ``test.py`` for an example.

File Hashing
------------

After a backup set is created an MD5 hash is calculated for it. This is then compared against a previously calculated
hash for that particular plan name.

**NOTE:** Do not change the generated HASH_CHECK_FILE!

Finally, be aware of a "gotcha" - the hashes are keyed on the *plan name* - therefore changing the plan name will
cause the backup script to think it needs to upload a new backup set.

Emails
------

An email will be sent after each plan runs. The email will either report a success or a failure. In the event
of a success, it will be reported if there was a new uploaded backup set (and the file name), otherwise it will
state that no changes were detected and no upload was made.

If there was a failure while running the backup, the exception message will be emailed, and the logs can be
referred to for further information.

Future Improvements
-------------------

These are some of the planned future improvements:

- Run multiple pre-backup commands (by providing an array)
- Allow custom format strings for the output files (instead of the default date/time format)
- Modification of the glob2 library to allow hidden files to be included
53 changes: 33 additions & 20 deletions S3Backup/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

class S3BackupTool:

def __init__(self, config_file="config.json", log_file="s3backup.log"):
def __init__(self, config_file="config.json"):
logger.info('Initialising...')

try:
Expand All @@ -52,17 +52,42 @@ def run_plans(self):
logger.info('Executing plan %d of %d', counter, len(self.PLANS))

try:
plan.run()
self.__send_status_email(plan, True)
updated, output_file = plan.run()
self.__send_success_email(plan, updated, output_file)
except Exception, e:
logger.error('Failed to run plan: %s', e)
self.__send_status_email(plan, False, e)
self.__send_failure_email(plan, e)

counter += 1

logger.info('Finished running backup plans')

def __send_status_email(self, plan, success, exception=None):
def __send_success_email(self, plan, updated, output_file):
subject = '[S3-Backup] [SUCCESS] - Plan: %s' % plan.name

body = 'The backup plan, %s, run at %s was SUCCESSFUL\n\n' % (
plan.name,
strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()))

if updated:
body += 'The backup set had changed, so a new backup was uploaded: %s' % output_file
else:
body += 'The backup set had not changed. No new backup uploaded'

self.__send_status_email(subject, body)

def __send_failure_email(self, plan, exception):
subject = '[S3-Backup] [FAILURE] - Plan: %s' % plan.name

body = 'The backup plan, %s, run at %s was a FAILURE\n\n' % (
plan.name,
strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()))

body += '\n\nDetailed failure information:\n\n%s' % exception

self.__send_status_email(subject, body)

def __send_status_email(self, subject, body):
if self.CONFIGURATION['EMAIL_FROM'] is None or self.CONFIGURATION['EMAIL_TO'] is None:
logger.debug('Email not provided, so status update not sent')
return
Expand All @@ -72,25 +97,13 @@ def __send_status_email(self, plan, success, exception=None):
aws_access_key_id=self.CONFIGURATION['AWS_KEY'],
aws_secret_access_key=self.CONFIGURATION['AWS_SECRET'])

result = 'SUCCESS'
if not success:
result = 'FAILURE'

body = 'The backup plan, %s, run at %s was %s' % (
plan.name,
strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()),
result)

if exception is not None:
body += '\n\nDetailed failure information:\n\n%s' % exception

try:
conn.send_email(
self.CONFIGURATION['EMAIL_FROM'],
'[S3-Backup] [%s] - Plan: %s' % (result, plan.name),
subject,
body,
[self.CONFIGURATION['EMAIL_TO']])
except Exception, e:
logger.error('Failed to send email to {0:s} regarding plan: {1:s}'.format(self.CONFIGURATION['EMAIL_TO'],
plan.name),
logger.error('Failed to send email to {0:s} with subject {1:s}'.format(self.CONFIGURATION['EMAIL_TO'],
subject),
e)
3 changes: 2 additions & 1 deletion S3Backup/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

logger = logging.getLogger(name='config_loader')

required_root_values = ['AWS_KEY', 'AWS_SECRET', 'AWS_BUCKET', 'AWS_REGION', 'Plans']
required_root_values = ['AWS_KEY', 'AWS_SECRET', 'AWS_BUCKET', 'AWS_REGION', 'HASH_CHECK_FILE', 'Plans']
optional_root_values = ['EMAIL_FROM', 'EMAIL_TO']

def config_setup(config_file):
Expand All @@ -39,6 +39,7 @@ def config_setup(config_file):
'AWS_SECRET': '',
'AWS_BUCKET': '',
'AWS_REGION': '',
'HASH_CHECK_FILE': '',
'EMAIL_FROM': None,
'EMAIL_TO': None
}
Expand Down
64 changes: 64 additions & 0 deletions S3Backup/hash_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import hashlib
import os
from shutil import move
from tempfile import mkstemp

BLOCKSIZE=65535

def find_hash(hash_file, plan_name):
# Try to find the hash in the hash file
filename = os.path.normpath(hash_file)
if os.path.isfile(filename):
plan_hashes = open(filename, 'r').readlines()
for line in plan_hashes:
parts = line.strip().split('=')
if len(parts) == 2 and parts[0] == plan_name:
return parts[1]

return None

def update_hash(hash_file, plan_name, hash_value):
# Do the update (create the file if it doesn't exist)
filename = os.path.normpath(hash_file)

# If it doesn't exist, we shortcut this
if not os.path.isfile(hash_file):
with open(hash_file, 'w') as new_file:
new_file.write('%s=%s\n' % (plan_name, hash_value))
return

# Otherwise, we need to rebuild the file
fh, abs_path = mkstemp()
is_written = False

with open(abs_path, 'w') as new_file:
with open(filename, 'r') as old_file:
# Handle existing entries in the file
for line in old_file:
parts = line.strip().split('=')
if parts[0] == plan_name:
is_written = True
new_file.write('%s=%s\n' % (plan_name, hash_value))
else:
new_file.write(line)

# If the hash wasn't already in the file
if not is_written:
new_file.write('%s=%s\n' % (plan_name, hash_value))

os.close(fh)

# Remove original file
os.remove(hash_file)

# Move new file
move(abs_path, hash_file)

def calc_hash(filename):
hasher = hashlib.md5()
with open(filename, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
return hasher.hexdigest()
44 changes: 41 additions & 3 deletions S3Backup/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from zipfile import ZipFile
import time
import boto.ses
from S3Backup import hash_file

required_plan_values = ['Name', 'Src', 'OutputPrefix']
optional_plan_values = ['Command']
Expand All @@ -52,6 +53,8 @@ def __init__(self, raw_plan, configuration):
self.command = None
self.output_file = '%s_%s.zip' % (raw_plan['OutputPrefix'], time.strftime("%Y-%m-%d_%H-%M-%S"))

self.new_hash = None

if 'Command' in raw_plan:
self.command = raw_plan['Command']

Expand All @@ -63,7 +66,9 @@ def run(self):
The plan is run in the following order:
1) (if applicable) Run the external command provided
2) Zip source file(s) to destination file
3) Upload destination file to S3 bucket
3) Perform hash check to see if there are any changes (which would require an upload)
4) Upload destination file to S3 bucket
5) Update hash file with new hash
"""
logger.info('Running plan "%s"', self.name)

Expand All @@ -74,12 +79,24 @@ def run(self):
# 2) Zip the source file to the destination file
self.__zip_files()

# 3) Upload destination file to S3 bucket
updated = False

try:
self.__upload()
# 3) Perform hash check to see if there are any changes (which would require an upload)
if not self.__hash_check():
# 4) Upload destination file to S3 bucket
self.__upload()

# 5) Update hash file with new hash
self.__update_hash()

updated = True

finally:
self.__cleanup()

return updated, self.output_file

def __run_command(self):
logger.info('Executing custom command...')

Expand Down Expand Up @@ -137,6 +154,27 @@ def __upload(self):
logger.error('Failed to upload backup file to S3: %s', e)
raise

def __hash_check(self):
previous_hash = hash_file.find_hash(self.CONFIGURATION['HASH_CHECK_FILE'], self.name)

if previous_hash is None:
logger.debug('No previous hash found for plan %s', self.name)
else:
logger.debug('Got a previous hash for plan %s of %s', self.name, previous_hash)

self.new_hash = hash_file.calc_hash(self.output_file)

logger.debug('New hash for plan %s of %s', self.name, self.new_hash)

return previous_hash == self.new_hash

def __update_hash(self):
if self.new_hash is None:
logger.error('Could not update hash as no hash was found')
return

hash_file.update_hash(self.CONFIGURATION['HASH_CHECK_FILE'], self.name, self.new_hash)

def __cleanup(self):
logger.info('Cleaning up temporary file: %s', self.output_file)
try:
Expand Down
3 changes: 2 additions & 1 deletion TODO.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Features to be added

1. Ability to choose custom format strings on output file (rather than automatically appending date/time)
2. Ability to run multiple commands
2. Ability to run multiple commands
3. Modify the glob2 library to support hidden files
1 change: 1 addition & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"AWS_REGION": "this is a region",
"EMAIL_FROM": "source@address.com",
"EMAIL_TO": "recipient@address.com",
"HASH_CHECK_FILE": "plan_hashes.txt",
"Plans": [
{
"Name": "MySQL Backup",
Expand Down

0 comments on commit 38f6c43

Please sign in to comment.