From 577162d8ddf8f04154a3a02399466db1d5bfbf76 Mon Sep 17 00:00:00 2001 From: cmadjar Date: Mon, 15 Jan 2024 16:59:50 -0500 Subject: [PATCH 1/3] get Kim's code on 24.1-release branch --- python/lib/aws_s3.py | 56 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-) diff --git a/python/lib/aws_s3.py b/python/lib/aws_s3.py index 8df31c83e..0f4303f6f 100755 --- a/python/lib/aws_s3.py +++ b/python/lib/aws_s3.py @@ -1,6 +1,7 @@ """This class interacts with S3 Buckets""" import boto3 +import lib.utilities import os from botocore.exceptions import ClientError, EndpointConnectionError @@ -16,6 +17,7 @@ def __init__(self, aws_access_key_id, aws_secret_access_key, aws_endpoint_url, b self.aws_endpoint_url = aws_endpoint_url self.bucket_name = bucket_name self.s3 = self.connect_to_s3_bucket() + self.s3_client = self.connect_to_s3_client() if self.s3: self.s3_bucket_obj = self.s3.Bucket(self.bucket_name) @@ -45,6 +47,49 @@ def connect_to_s3_bucket(self): return s3 + def connect_to_s3_client(self): + """ + """ + + # connect to S3 client + try: + session = boto3.session.Session() + s3_client = session.client( + service_name="s3", + aws_access_key_id=self.aws_access_key_id, + aws_secret_access_key=self.aws_secret_access_key, + endpoint_url=self.aws_endpoint_url + ) + except ClientError as err: + print(f'\n[ERROR ] S3 connection failure: {format(err)}\n') + return + except EndpointConnectionError as err: + print(f'[ERROR ] {format(err)}\n') + return + + return s3_client + + def check_object_content_exists(self, file_path, key): + """ + Check if file content already exists + :param file_path: Full path to the file to check hash + :type file_path: str + :param key: S3 object key. It should be identical to the S3 object key. (It will not include `s3://BUCKET_NAME/`) + :type key: str + """ + try: + etag = lib.utilities.compute_md5_hash(file_path) + self.s3_client.head_object(Bucket=self.bucket_name, Key=key, IfMatch=etag) + except ClientError as e: + """ + Per Boto3 documentation for S3.Client.head_object IfMatch will: + Return the object only if its entity tag (ETag) is the same as the one specified; + otherwise, return a 412 (precondition failed) error. + """ + return False + else: + return True + def upload_file(self, file_name, s3_object_name): """ Upload a file to an S3 bucket @@ -59,12 +104,17 @@ def upload_file(self, file_name, s3_object_name): # Upload the file try: - print(f"Uploading {s3_file_name} to {self.aws_endpoint_url}/{s3_bucket_name}") - s3_bucket.upload_file(file_name, s3_file_name) + object_exists = self.check_object_content_exists(file_name, s3_file_name) + if not object_exists: + print(f"Uploading {s3_file_name} to {self.aws_endpoint_url}/{s3_bucket_name}") + s3_bucket.upload_file(file_name, s3_file_name) + elif object_exists: + print( + f"Skipping! Key Content for {s3_file_name} matches key at {self.aws_endpoint_url}/{s3_bucket_name}") except ClientError as err: raise Exception(f"{file_name} upload failure - {format(err)}") - def upload_dir(self, dir_name, s3_object_name, force = False): + def upload_dir(self, dir_name, s3_object_name, force=False): """ Upload a directory to an S3 bucket From ab0a10e86afda485da01be6b9a1cddda84b06793 Mon Sep 17 00:00:00 2001 From: cmadjar Date: Mon, 15 Jan 2024 17:04:47 -0500 Subject: [PATCH 2/3] fix flake8 --- python/lib/aws_s3.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/lib/aws_s3.py b/python/lib/aws_s3.py index 0f4303f6f..be5d6e859 100755 --- a/python/lib/aws_s3.py +++ b/python/lib/aws_s3.py @@ -74,7 +74,8 @@ def check_object_content_exists(self, file_path, key): Check if file content already exists :param file_path: Full path to the file to check hash :type file_path: str - :param key: S3 object key. It should be identical to the S3 object key. (It will not include `s3://BUCKET_NAME/`) + :param key: S3 object key. It should be identical to the S3 object key. + (It will not include `s3://BUCKET_NAME/`) :type key: str """ try: From 900fec6e2326e9234eafe3e4ad240d80a6efb264 Mon Sep 17 00:00:00 2001 From: cmadjar Date: Mon, 15 Jan 2024 17:07:46 -0500 Subject: [PATCH 3/3] fix flake8 --- python/lib/aws_s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/aws_s3.py b/python/lib/aws_s3.py index be5d6e859..ea3e597c7 100755 --- a/python/lib/aws_s3.py +++ b/python/lib/aws_s3.py @@ -81,7 +81,7 @@ def check_object_content_exists(self, file_path, key): try: etag = lib.utilities.compute_md5_hash(file_path) self.s3_client.head_object(Bucket=self.bucket_name, Key=key, IfMatch=etag) - except ClientError as e: + except ClientError: """ Per Boto3 documentation for S3.Client.head_object IfMatch will: Return the object only if its entity tag (ETag) is the same as the one specified;