Skip to content

Commit

Permalink
cmd-buildprep, cmd-buildupload: retry on s3 errors
Browse files Browse the repository at this point in the history
Use boto3 instead of cli call. Add TransferConfig to download_file to
automatically retry on connection errors. Use retry decorator to log and
retry errors on head and upload requests in s3
  • Loading branch information
vrutkovs authored and jlebon committed Sep 20, 2019
1 parent 8494839 commit 11c11da
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 46 deletions.
23 changes: 15 additions & 8 deletions src/cmd-buildprep
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class HTTPFetcher(Fetcher):
def __init__(self, url_base):
super().__init__(url_base)

@retry(stop=retry_stop, retry=retry_requests_exception)
@retry(stop=retry_stop, retry=retry_requests_exception, before_sleep=retry_callback)
def fetch_impl(self, url, dest):
# notice we don't use `stream=True` here; the stuff we're fetching for
# now is super small
Expand All @@ -129,7 +129,7 @@ class HTTPFetcher(Fetcher):
with open(dest, mode='wb') as f:
f.write(r.content)

@retry(stop=retry_stop, retry=retry_requests_exception)
@retry(stop=retry_stop, retry=retry_requests_exception, before_sleep=retry_callback)
def exists_impl(self, url):
with requests.head(url) as r:
if r.status_code == 200:
Expand All @@ -143,20 +143,27 @@ class S3Fetcher(Fetcher):

def __init__(self, url_base):
super().__init__(url_base)
self.s3 = boto3.client('s3')
self.s3config = boto3.s3.transfer.TransferConfig(
num_download_attempts=5
)

def fetch_impl(self, url, dest):
subprocess.check_call(['aws', 's3', 'cp', url, dest],
stdout=subprocess.DEVNULL)
assert url.startswith("s3://")
bucket, key = url[len("s3://"):].split('/', 1)
# this function does not need to be retried with the decorator as download_file would
# retry automatically based on s3config settings
self.s3.download_file(bucket, key, dest, Config=self.s3config)

@retry(stop=retry_stop, retry=retry_s3_exception, before_sleep=retry_callback)
def exists_impl(self, url):
assert url.startswith("s3://")
bucket, key = url[len("s3://"):].split('/', 1)
s3 = boto3.client('s3')
# sanity check that the bucket exists and we have access to it
s3.head_bucket(Bucket=bucket)
self.s3.head_bucket(Bucket=bucket)
try:
s3.head_object(Bucket=bucket, Key=key)
except botocore.exceptions.ClientError as e:
self.s3.head_object(Bucket=bucket, Key=key)
except ClientError as e:
if e.response['Error']['Code'] == '404':
return False
raise e
Expand Down
98 changes: 60 additions & 38 deletions src/cmd-buildupload
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
import argparse
import json
import os
import subprocess
import sys
import tempfile
import boto3
from botocore.exceptions import ClientError
from tenacity import retry

sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from cmdlib import load_json, Builds # noqa: E402
from cmdlib import load_json, Builds, retry_stop, retry_s3_exception, retry_callback # noqa: E402

# set image artifact caching at 1y; it'll probably get evicted before that...
# see also: https://stackoverflow.com/questions/2970938
Expand Down Expand Up @@ -53,32 +55,33 @@ def parse_args():


def cmd_upload_s3(args):
bucket, prefix = args.url.split('/', 1)
if not args.freshen:
builds = Builds()
if args.build == 'latest':
args.build = builds.get_latest()
print(f"Targeting build: {args.build}")
if builds.is_legacy():
s3_upload_build(args, builds.get_build_dir(args.build), args.build)
s3_upload_build(args, builds.get_build_dir(args.build), bucket, f'{prefix}/{args.build}')
else:
for arch in builds.get_build_arches(args.build):
s3_upload_build(args, builds.get_build_dir(args.build, arch),
f'{args.build}/{arch}')
bucket, f'{prefix}/{args.build}/{arch}')
# if there's anything else in the build dir, just upload it too,
# e.g. pipelines might inject additional metadata
for f in os.listdir(f'builds/{args.build}'):
# arches already uploaded higher up
if f in builds.get_build_arches(args.build):
continue
# assume it's metadata
s3_cp(args, CACHE_MAX_AGE_METADATA,
f'builds/{args.build}/{f}', f'{args.build}/{f}')
s3_copy(f'builds/{args.build}/{f}', bucket, f'{prefix}/{args.build}/{f}',
CACHE_MAX_AGE_METADATA, args.acl)
if not args.skip_builds_json:
s3_cp(args, CACHE_MAX_AGE_METADATA,
'builds/builds.json', 'builds.json')
s3_copy('builds/builds.json', bucket, f'{prefix}/builds.json',
CACHE_MAX_AGE_METADATA, args.acl, extra_args={}, dry_run=args.dry_run)


def s3_upload_build(args, builddir, dest):
def s3_upload_build(args, builddir, bucket, prefix):
build = load_json(f'{builddir}/meta.json')

# Upload images with special handling for gzipped data.
Expand All @@ -87,7 +90,7 @@ def s3_upload_build(args, builddir, dest):
img = build['images'][imgname]
bn = img['path']
path = os.path.join(builddir, bn)
s3_path = f'{dest}/{bn}'
s3_path = f'{prefix}/{bn}'
set_content_disposition = False

# Don't use the Content-Disposition trick with bare-metal images since
Expand All @@ -98,56 +101,75 @@ def s3_upload_build(args, builddir, dest):
args.enable_gz_peel):
nogz = bn[:-3]
img['path'] = nogz
s3_path = f'{dest}/{nogz}'
s3_path = f'{prefix}/{nogz}'
set_content_disposition = True

if not os.path.exists(path):
if s3_check_exists(args, s3_path):
if s3_check_exists(bucket, s3_path):
continue
else:
raise Exception(f"{path} not found locally or in the s3 destination!")

extra_args = {}
if set_content_disposition:
s3_cp(args, CACHE_MAX_AGE_ARTIFACT, path, s3_path,
'--content-encoding=gzip',
f'--content-disposition=inline; filename={img["path"]}')
else:
s3_cp(args, CACHE_MAX_AGE_ARTIFACT, path, s3_path)
extra_args = {
'ContentEncoding': 'gzip',
'ContentDisposition': f'inline; filename={img["path"]}'
}
s3_copy(path, bucket, s3_path,
CACHE_MAX_AGE_ARTIFACT, args.acl,
extra_args=extra_args,
dry_run=args.dry_run)
uploaded.add(bn)

for f in os.listdir(builddir):
# we do meta.json right after
if f in uploaded or f == 'meta.json':
continue
path = os.path.join(builddir, f)
s3_cp(args, CACHE_MAX_AGE_ARTIFACT, path, f'{dest}/{f}')
s3_copy(path, bucket, f'{prefix}/{f}',
CACHE_MAX_AGE_ARTIFACT, args.acl,
extra_args={},
dry_run=args.dry_run)

# Now upload a modified version of the meta.json which has the fixed
# filenames without the .gz suffixes. We don't want to modify the local
# build dir.
with tempfile.NamedTemporaryFile('w') as f:
json.dump(build, f, indent=4)
f.flush()
s3_cp(args, CACHE_MAX_AGE_METADATA, f.name, f'{dest}/meta.json',
'--content-type=application/json')


def s3_check_exists(args, path):
path = f'{args.url}/{path}'
bucket, key = path.split("/", 1)
s3_args = ['aws', 's3api', 'head-object', '--bucket', bucket, '--key', key]
return subprocess.call(s3_args, stdout=subprocess.DEVNULL) == 0


def s3_cp(args, max_age, src, dest, *s3_args):
acl = f'--acl={args.acl}'
max_age = f'--cache-control=max-age={max_age}'
dest = f's3://{args.url}/{dest}'
s3_args = ['aws', 's3', 'cp', acl, src, dest, max_age, *s3_args]
print("%s: %s" % ("Would run" if args.dry_run else "Running",
subprocess.list2cmdline(s3_args)))
if not args.dry_run:
subprocess.check_call(s3_args, stdout=subprocess.DEVNULL)
s3_copy(f.name, bucket, f'{prefix}/meta.json',
CACHE_MAX_AGE_METADATA, args.acl,
extra_args={
'ContentType': 'application/json'
},
dry_run=args.dry_run)


@retry(stop=retry_stop, retry=retry_s3_exception, before_sleep=retry_callback)
def s3_check_exists(bucket, key):
print(f"Checking if bucket '{bucket}' has key '{key}'")
s3 = boto3.client('s3')
try:
s3.head_object(Bucket=bucket, Key=key)
except ClientError as e:
if e.response['Error']['Code'] == '404':
return False
raise e
return True


@retry(stop=retry_stop, retry=retry_s3_exception, retry_error_callback=retry_callback)
def s3_copy(src, bucket, key, max_age, acl, extra_args={}, dry_run=False):
upload_args = {
'CacheControl': f'max-age={max_age}',
'ACL': acl
}
upload_args.update(extra_args)
s3 = boto3.client('s3')
print(f"{'Would upload' if dry_run else 'Uploading'} {src} to s3://{bucket}/{key} {extra_args if len(extra_args) else ''}")
if not dry_run:
s3.upload_file(Filename=src, Bucket=bucket, Key=key, ExtraArgs=upload_args)


if __name__ == '__main__':
Expand Down

0 comments on commit 11c11da

Please sign in to comment.