From f31576b51976d9ea4dafaa895deeebb2daa9a05b Mon Sep 17 00:00:00 2001 From: Bala FA Date: Mon, 19 Oct 2020 17:44:30 +0000 Subject: [PATCH] Refactor XML handling of remove_objects() API (#995) --- docs/API.md | 30 +++--- examples/remove_objects.py | 18 ++-- minio/api.py | 96 ++++++++++------- minio/deleteobjects.py | 167 ++++++++++++++++++++++++++++++ minio/error.py | 14 --- minio/parsers.py | 21 +--- minio/xml_marshal.py | 29 ------ tests/functional/tests.py | 15 ++- tests/unit/remove_objects_test.py | 50 +++------ 9 files changed, 278 insertions(+), 162 deletions(-) create mode 100644 minio/deleteobjects.py diff --git a/docs/API.md b/docs/API.md index e0a427fd2..781595993 100644 --- a/docs/API.md +++ b/docs/API.md @@ -1147,34 +1147,40 @@ minio.remove_object( -### remove_objects(bucket_name, objects_iter) +### remove_objects(bucket_name, delete_object_list, bypass_governance_mode=False) Remove multiple objects. __Parameters__ -| Param | Type | Description | -|:---------------|:-------|:--------------------------------------------------------------------| -| `bucket_name` | _str_ | Name of the bucket. | -| `objects_iter` | _list_ | An iterable type python object providing object names for deletion. | +| Param | Type | Description | +|:-------------------------|:-----------|:--------------------------------------------------------------------| +| `bucket_name` | _str_ | Name of the bucket. | +| `delete_object_list` | _iterable_ | An iterable containing :class:`DeleteObject ` object. | +| `bypass_governance_mode` | _bool_ | Bypass Governance retention mode. | __Return Value__ -| Return | -|:----------------------------------------| -| An iterator contains _MultiDeleteError_ | +| Return | +|:-----------------------------------------------------------------| +| An iterator containing :class:`DeleteError ` object | __Example__ ```py -minio.remove_objects( +errors = minio.remove_objects( "my-bucketname", [ - "my-objectname1", - "my-objectname2", - ("my-objectname3", "13f88b18-8dcd-4c83-88f2-8631fdb6250c"), + DeleteObject("my-objectname1"), + DeleteObject("my-objectname2"), + DeleteObject( + "my-objectname3", + "13f88b18-8dcd-4c83-88f2-8631fdb6250c", + ), ], ) +for error in errors: + print("error occured when deleting object", error) ``` diff --git a/examples/remove_objects.py b/examples/remove_objects.py index aa47a68a2..749e3677c 100644 --- a/examples/remove_objects.py +++ b/examples/remove_objects.py @@ -18,19 +18,17 @@ # are dummy values, please replace them with original values. from minio import Minio -from minio.error import ResponseError +from minio.deleteobjects import DeleteObject client = Minio('s3.amazonaws.com', access_key='YOUR-ACCESSKEYID', secret_key='YOUR-SECRETACCESSKEY') # Remove a prefix recursively. -try: - names = map( - lambda x: x.object_name, - client.list_objects('my-bucketname', 'my-prefix', recursive=True) - ) - for err in client.remove_objects('my-bucketname', names): - print("Deletion Error: {}".format(err)) -except ResponseError as err: - print(err) +delete_object_list = map( + lambda x: DeleteObject(x.object_name), + client.list_objects("my-bucketname", "my-prefix", recursive=True), +) +errors = client.remove_objects("my-bucketname", delete_object_list) +for error in errors: + print("error occured when deleting object", error) diff --git a/minio/api.py b/minio/api.py index 65ecbeb71..228e87e11 100644 --- a/minio/api.py +++ b/minio/api.py @@ -47,6 +47,7 @@ from .credentials import StaticProvider from .datatypes import ListAllMyBucketsResult, Object, parse_list_objects from .definitions import BaseURL, ObjectWriteResult, Part +from .deleteobjects import DeleteError, DeleteRequest, DeleteResult from .error import InvalidResponseError, S3Error, ServerError from .helpers import (amzprefix_user_metadata, check_bucket_name, check_non_empty_string, check_sse, check_ssec, @@ -57,7 +58,7 @@ from .lifecycleconfig import LifecycleConfig from .parsers import (parse_error_response, parse_get_bucket_notification, parse_list_multipart_uploads, - parse_list_parts, parse_multi_delete_response, + parse_list_parts, parse_multipart_upload_result, parse_new_multipart_upload) from .replicationconfig import ReplicationConfig @@ -71,8 +72,7 @@ from .versioningconfig import VersioningConfig from .xml import Element, SubElement, findtext, getbytes, marshal, unmarshal from .xml_marshal import (marshal_bucket_notifications, - xml_marshal_bucket_encryption, - xml_marshal_delete_objects, xml_to_dict) + xml_marshal_bucket_encryption, xml_to_dict) try: from json.decoder import JSONDecodeError @@ -1495,73 +1495,89 @@ def remove_object(self, bucket_name, object_name, version_id=None): query_params={"versionId": version_id} if version_id else None, ) - def _process_remove_objects_batch(self, bucket_name, objects_batch): + def _delete_objects(self, bucket_name, delete_object_list, + quiet=False, bypass_governance_mode=False): """ - Requester and response parser for remove_objects - """ - body = xml_marshal_delete_objects(objects_batch) + Delete multiple objects. + + :param bucket_name: Name of the bucket. + :param delete_object_list: List of maximum 1000 + :class:`DeleteObject ` object. + :param quiet: quiet flag. + :param bypass_governance_mode: Bypass Governance retention mode. + :return: :class:`DeleteResult ` object. + """ + body = marshal(DeleteRequest(delete_object_list, quiet=quiet)) + headers = {"Content-MD5": md5sum_hash(body)} + if bypass_governance_mode: + headers["x-amz-bypass-governance-retention"] = "true" response = self._execute( "POST", bucket_name, body=body, - headers={"Content-MD5": md5sum_hash(body)}, - query_params={'delete': ''}, + headers=headers, + query_params={"delete": ""}, ) - return parse_multi_delete_response(response.data) - def remove_objects(self, bucket_name, objects_iter): + element = ET.fromstring(response.data.decode()) + return ( + DeleteResult([], [DeleteError.fromxml(element)]) + if element.tag.endswith("Error") + else unmarshal(DeleteResult, response.data.decode()) + ) + + def remove_objects(self, bucket_name, delete_object_list, + bypass_governance_mode=False): """ Remove multiple objects. :param bucket_name: Name of the bucket. - :param objects_iter: An iterable type python object providing object - names for deletion. - :return: An iterator contains - :class:`MultiDeleteError `. + :param delete_object_list: An iterable containing + :class:`DeleteObject ` object. + :param bypass_governance_mode: Bypass Governance retention mode. + :return: An iterator containing :class:`DeleteError ` + object. Example:: - minio.remove_objects( + errors = minio.remove_objects( "my-bucketname", [ - "my-objectname1", - "my-objectname2", - ("my-objectname3", "13f88b18-8dcd-4c83-88f2-8631fdb6250c"), + DeleteObject("my-objectname1"), + DeleteObject("my-objectname2"), + DeleteObject( + "my-objectname3", + "13f88b18-8dcd-4c83-88f2-8631fdb6250c", + ), ], ) + for error in errors: + print("error occured when deleting object", error) """ check_bucket_name(bucket_name) - if isinstance(objects_iter, (str, bytes)): - raise TypeError( - 'objects_iter cannot be `str` or `bytes` instance. It must be ' - 'a list, tuple or iterator of object names' - ) # turn list like objects into an iterator. - objects_iter = itertools.chain(objects_iter) - - def check_name(name): - if not isinstance(name, (str, bytes)): - name = name[0] - check_non_empty_string(name) - return True + delete_object_list = itertools.chain(delete_object_list) while True: # get 1000 entries or whatever available. - obj_batch = [ - name for _, name in zip(range(1000), objects_iter) - if check_name(name) + objects = [ + delete_object for _, delete_object in zip( + range(1000), delete_object_list, + ) ] - if not obj_batch: + if not objects: break - errs_result = self._process_remove_objects_batch( - bucket_name, obj_batch, + result = self._delete_objects( + bucket_name, + objects, + quiet=True, + bypass_governance_mode=bypass_governance_mode, ) - # return the delete errors. - for err_result in errs_result: - yield err_result + for error in result.error_list: + yield error def presigned_url(self, method, bucket_name, diff --git a/minio/deleteobjects.py b/minio/deleteobjects.py new file mode 100644 index 000000000..d4959dfe4 --- /dev/null +++ b/minio/deleteobjects.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +# MinIO Python Library for Amazon S3 Compatible Cloud Storage, (C) +# 2020 MinIO, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Request/response of DeleteObjects API.""" + +from __future__ import absolute_import + +from .xml import Element, SubElement, findall, findtext + + +class DeleteObject: + """Delete object request information.""" + + def __init__(self, name, version_id=None): + self._name = name + self._version_id = version_id + + def toxml(self, element): + """Convert to XML.""" + element = SubElement(element, "Object") + SubElement(element, "Key", self._name) + if self._version_id is not None: + SubElement(element, "VersionId", self._version_id) + return element + + +class DeleteRequest: + """Delete object request.""" + + def __init__(self, object_list, quiet=False): + self._object_list = object_list + self._quiet = quiet + + def toxml(self, element): + """Convert to XML.""" + element = Element("Delete") + if self._quiet: + SubElement(element, "Quiet", str(self._quiet)) + for obj in self._object_list: + obj.toxml(element) + return element + + +class DeletedObject: + """Deleted object information.""" + + def __init__(self, name, version_id, delete_marker, + delete_marker_version_id): + self._name = name + self._version_id = version_id + self._delete_marker = delete_marker + self._delete_marker_version_id = delete_marker_version_id + + @property + def name(self): + """Get name.""" + return self._name + + @property + def version_id(self): + """Get version ID.""" + return self._version_id + + @property + def delete_marker(self): + """Get delete marker.""" + return self._delete_marker + + @property + def delete_marker_version_id(self): + """Get delete marker version ID.""" + return self._delete_marker_version_id + + @classmethod + def fromxml(cls, element): + """Create new object with values from XML element.""" + name = findtext(element, "Key", True) + version_id = findtext(element, "VersionId") + delete_marker = findtext(element, "DeleteMarker") + delete_marker = ( + delete_marker is not None and delete_marker.title() == "True" + ) + delete_marker_version_id = findtext(element, "DeleteMarkerVersionId") + return cls(name, version_id, delete_marker, delete_marker_version_id) + + +class DeleteError: + """Delete error information.""" + + def __init__(self, code, message, name, version_id): + self._code = code + self._message = message + self._name = name + self._version_id = version_id + + @property + def code(self): + """Get error code.""" + return self._code + + @property + def message(self): + """Get error message.""" + return self._message + + @property + def name(self): + """Get name.""" + return self._name + + @property + def version_id(self): + """Get version ID.""" + return self._version_id + + @classmethod + def fromxml(cls, element): + """Create new object with values from XML element.""" + code = findtext(element, "Code", True) + message = findtext(element, "Message") + name = findtext(element, "Key") + version_id = findtext(element, "VersionId") + return cls(code, message, name, version_id) + + +class DeleteResult: + """Delete object result.""" + + def __init__(self, object_list, error_list): + self._object_list = object_list + self._error_list = error_list + + @property + def object_list(self): + """Get object list.""" + return self._object_list + + @property + def error_list(self): + """Get error list.""" + return self._error_list + + @classmethod + def fromxml(cls, element): + """Create new object with values from XML element.""" + elements = findall(element, "Deleted") + object_list = [] + for tag in elements: + object_list.append(DeletedObject.fromxml(tag)) + elements = findall(element, "Error") + error_list = [] + for tag in elements: + error_list.append(DeleteError.fromxml(tag)) + return cls(object_list, error_list) diff --git a/minio/error.py b/minio/error.py index 67f53c5b5..f4a40386c 100644 --- a/minio/error.py +++ b/minio/error.py @@ -116,17 +116,3 @@ def copy(self, code, message): self._bucket_name, self._object_name, ) - - -class MultiDeleteError(MinioException): - """Represents an error message in RemoveObjects S3 API.""" - - def __init__(self, object_name, code, message): - self._object_name = object_name - self._code = code - self._message = message - super().__init__( - "unable to remove object {0}; code: {1}, message: {2}".format( - object_name, code, message, - ), - ) diff --git a/minio/parsers.py b/minio/parsers.py index cda09e421..5bfd5bed0 100644 --- a/minio/parsers.py +++ b/minio/parsers.py @@ -32,8 +32,7 @@ from .definitions import (ListMultipartUploadsResult, ListPartsResult, MultipartUploadResult) -# minio specific. -from .error import MultiDeleteError, S3Error +from .error import S3Error from .helpers import strptime_rfc3339 from .xml_marshal import NOTIFICATIONS_ARN_FIELDNAME_MAP @@ -261,24 +260,6 @@ def _add_notifying_service_config(data, notifications, service_key, return notifications -def parse_multi_delete_response(data): - """Parser for Multi-Object Delete API response. - - :param data: XML response body content from service. - - :return: Returns list of error objects for each delete object that - had an error. - - """ - root = S3Element.fromstring('MultiObjectDeleteResult', data) - return [ - MultiDeleteError(errtag.get_child_text('Key'), - errtag.get_child_text('Code'), - errtag.get_child_text('Message')) - for errtag in root.findall('Error') - ] - - def parse_list_multipart_uploads(data): """Parse ListMultipartUploads API resppnse XML.""" return ListMultipartUploadsResult( diff --git a/minio/xml_marshal.py b/minio/xml_marshal.py index 7623f1196..71f31b426 100644 --- a/minio/xml_marshal.py +++ b/minio/xml_marshal.py @@ -248,32 +248,3 @@ def _add_notification_config_to_xml(node, element_name, configs): SubElement(filter_rule_node, 'Name', filter_rule['Name']) SubElement(filter_rule_node, 'Value', filter_rule['Value']) return node - - -def xml_marshal_delete_objects(keys): - """ - Marshal Multi-Object Delete request body from object names. - - :param object_names: List of object keys to be deleted. - :return: Serialized XML string for multi-object delete request body. - """ - root = Element('Delete') - - # use quiet mode in the request - this causes the S3 Server to - # limit its response to only object keys that had errors during - # the delete operation. - SubElement(root, 'Quiet', "true") - - # add each object to the request. - for key in keys: - version_id = None - if not isinstance(key, (str, bytes)): - version_id = key[1] - key = key[0] - - element = SubElement(root, "Object") - SubElement(element, "Key", key) - if version_id: - SubElement(element, "VersionId", version_id) - - return _get_xml_data(root) diff --git a/tests/functional/tests.py b/tests/functional/tests.py index d8cb820ce..a3b2f1304 100644 --- a/tests/functional/tests.py +++ b/tests/functional/tests.py @@ -40,6 +40,7 @@ from minio import CopyConditions, Minio, PostPolicy from minio.commonconfig import ENABLED +from minio.deleteobjects import DeleteObject from minio.error import S3Error from minio.select.helpers import calculate_crc from minio.selectrequest import (CSVInputSerialization, CSVOutputSerialization, @@ -1843,14 +1844,22 @@ def _test_remove_objects(log_entry, version_check=False): object_names.append( (object_name, version_id) if version_check else object_name, ) - log_entry["args"]["objects_iter"] = object_names + log_entry["args"]["delete_object_list"] = object_names + delete_object_list = [] + for args in object_names: + delete_object_list.append( + DeleteObject(args) if isinstance(args, str) + else DeleteObject(args[0], args[1]) + ) # delete the objects in a single library call. - for err in _CLIENT.remove_objects(bucket_name, object_names): + errs = _CLIENT.remove_objects(bucket_name, delete_object_list) + for err in errs: raise ValueError("Remove objects err: {}".format(err)) finally: # Try to clean everything to keep our server intact - for err in _CLIENT.remove_objects(bucket_name, object_names): + errs = _CLIENT.remove_objects(bucket_name, delete_object_list) + for err in errs: raise ValueError("Remove objects err: {}".format(err)) _CLIENT.remove_bucket(bucket_name) diff --git a/tests/unit/remove_objects_test.py b/tests/unit/remove_objects_test.py index af2dbc26d..0eba1382a 100644 --- a/tests/unit/remove_objects_test.py +++ b/tests/unit/remove_objects_test.py @@ -18,39 +18,15 @@ from unittest import TestCase import mock -from nose.tools import raises from minio import Minio from minio.api import _DEFAULT_USER_AGENT +from minio.deleteobjects import DeleteObject from .minio_mocks import MockConnection, MockResponse class RemoveObjectsTest(TestCase): - @raises(TypeError) - def test_object_is_non_string_iterable_1(self): - client = Minio('localhost:9000') - for err in client.remove_objects('hello', 1234): - print(err) - - @raises(TypeError) - def test_object_is_non_string_iterable_2(self): - client = Minio('localhost:9000') - for err in client.remove_objects('hello', u'abc'): - print(err) - - @raises(TypeError) - def test_object_is_non_string_iterable_3(self): - client = Minio('localhost:9000') - for err in client.remove_objects('hello', b'abc'): - print(err) - - @raises(ValueError) - def test_bucket_invalid_name(self): - client = Minio('localhost:9000') - for err in client.remove_objects('AB&CD', 'world'): - print(err) - @mock.patch('urllib3.PoolManager') def test_object_is_list(self, mock_connection): mock_server = MockConnection() @@ -59,11 +35,14 @@ def test_object_is_list(self, mock_connection): MockResponse('POST', 'https://localhost:9000/hello?delete=', {'User-Agent': _DEFAULT_USER_AGENT, - 'Content-Md5': u'5Tg5SmU9Or43L4+iIyfPrQ=='}, 200, - content='') + 'Content-Md5': u'YcTFWle4oiLJ6sT95FwpdA=='}, 200, + content=b'') ) client = Minio('localhost:9000') - for err in client.remove_objects('hello', ["Ab", "c"]): + for err in client.remove_objects( + "hello", + [DeleteObject("Ab"), DeleteObject("c")], + ): print(err) @mock.patch('urllib3.PoolManager') @@ -74,11 +53,14 @@ def test_object_is_tuple(self, mock_connection): MockResponse('POST', 'https://localhost:9000/hello?delete=', {'User-Agent': _DEFAULT_USER_AGENT, - 'Content-Md5': u'5Tg5SmU9Or43L4+iIyfPrQ=='}, 200, - content='') + 'Content-Md5': u'YcTFWle4oiLJ6sT95FwpdA=='}, 200, + content=b'') ) client = Minio('localhost:9000') - for err in client.remove_objects('hello', ('Ab', 'c')): + for err in client.remove_objects( + "hello", + (DeleteObject("Ab"), DeleteObject("c")), + ): print(err) @mock.patch('urllib3.PoolManager') @@ -89,10 +71,10 @@ def test_object_is_iterator(self, mock_connection): MockResponse('POST', 'https://localhost:9000/hello?delete=', {'User-Agent': _DEFAULT_USER_AGENT, - 'Content-Md5': u'5Tg5SmU9Or43L4+iIyfPrQ=='}, 200, - content='') + 'Content-Md5': u'YcTFWle4oiLJ6sT95FwpdA=='}, 200, + content=b'') ) client = Minio('localhost:9000') - it = itertools.chain(('Ab', 'c')) + it = itertools.chain((DeleteObject("Ab"), DeleteObject("c"))) for err in client.remove_objects('hello', it): print(err)