From 545e39d48fcb650d34044e565b16ea375a06476a Mon Sep 17 00:00:00 2001 From: davidlm Date: Tue, 6 Jun 2023 17:25:09 -0400 Subject: [PATCH] wip --- botocore/awsrequest.py | 141 ++++++++++++++++------------------ botocore/config.py | 5 +- botocore/configprovider.py | 4 +- botocore/endpoint.py | 17 ++-- botocore/exceptions.py | 15 ---- botocore/utils.py | 28 ------- tests/unit/test_awsrequest.py | 103 ++++++------------------- 7 files changed, 102 insertions(+), 211 deletions(-) diff --git a/botocore/awsrequest.py b/botocore/awsrequest.py index 9d040fc9cf..918fe60126 100644 --- a/botocore/awsrequest.py +++ b/botocore/awsrequest.py @@ -13,6 +13,7 @@ # language governing permissions and limitations under the License. import functools import gzip +import io import logging from collections.abc import Mapping @@ -34,8 +35,6 @@ logger = logging.getLogger(__name__) -SUPPORTED_COMPRESSION_ENCODINGS = {"gzip": gzip.compress} - class AWSHTTPResponse(HTTPResponse): # The *args, **kwargs is used because the args are slightly @@ -340,26 +339,6 @@ def _urljoin(endpoint_url, url_path, host_prefix): return reconstructed -def compress_aws_request(config, request, operation_model): - botocore.utils.validate_compression_config(config) - compressor = AWSRequestCompressor() - # ``AWSRequest.body`` is a property that will compute a URL encoded value - # if its a dictionary. So we shouild call it once hear to minimize recalculation. - body = request.body - if compressor.should_compress_request( - config=config, - operation_model=operation_model, - body=body, - ): - # AWSRequest.data is the raw input for the request body. This is the parameter - # that should be updated to properly construct an AWSPreparedRequest object. - request.data = compressor.compress( - body=body, - headers=request.headers, - encodings=operation_model.request_compression['encodings'], - ) - - class AWSRequestPreparer: """ This class performs preparation on AWSRequest objects similar to that of @@ -560,68 +539,82 @@ def reset_stream(self): class AWSRequestCompressor: """A class that can compress the body of an ``AWSRequest``.""" - def compress(self, body, encodings, headers): - """Compresses the reqeust body using the specified encodings. + def compress(self, config, request, operation_model): + """Compresses the request body using the specified encodings if conditions + are met. - Set or append the `Content-Encoding` header with the matched encoding - if not present. + Check if the request should be compressed based on the contents of its + body and config settings. Set or append the `Content-Encoding` header + with the matched encoding if not present. """ - for encoding in encodings: - encoder = SUPPORTED_COMPRESSION_ENCODINGS.get(encoding) - if encoder is not None: - if 'Content-Encoding' not in headers: - headers['Content-Encoding'] = encoding - elif encoding not in headers['Content-Encoding']: - headers.replace_header( - 'Content-Encoding', - f'{headers["Content-Encoding"]},{encoding}', + # ``AWSRequest.body`` is a computed property that computes a URL-encoded + # value if it's a dictionary. So we should call it once here to minimize + # recalculation. + body = request.body + if self._should_compress_request(config, body, operation_model): + encodings = operation_model.request_compression['encodings'] + headers = request.headers + for encoding in encodings: + encoder = getattr(self, f'_{encoding}_compress_body', None) + if encoder is not None: + if 'Content-Encoding' not in headers: + headers['Content-Encoding'] = encoding + elif encoding not in headers['Content-Encoding'].split( + ',' + ): + headers.replace_header( + 'Content-Encoding', + f'{headers["Content-Encoding"]},{encoding}', + ) + # AWSRequest.data is the raw input for the request body. + # This is the parameter that should be updated to properly + # construct an AWSPreparedRequest object. + request.data = encoder(body) + else: + logger.debug( + 'Unsupported compression encoding: %s' % encoding ) - return self._compress_body(body, encoder) - else: - logger.debug('Unsupported compression encoding: %s' % encoding) - return body - def _compress_body(self, body, encoder): - if isinstance(body, str): - return encoder(body.encode('utf-8')) - elif isinstance(body, (bytes, bytearray)): - return encoder(body) + def _gzip_compress_body(self, body): + if isinstance(body, (bytes, bytearray)): + return gzip.compress(body) elif hasattr(body, 'read'): - return encoder(b''.join(self._read_body(body))) - raise TypeError('Unable to compress body of type: %s' % type(body)) - - def _read_body(self, body): - while True: - # urllib3 and by extension the http stdlib uses a chunksize of 8192 - # so we do the same here. - chunk = body.read(8192) - if not chunk: - break - if isinstance(chunk, str): - chunk = chunk.encode('utf-8') - yield chunk - if hasattr(body, 'seek'): + if hasattr(body, 'seek') and hasattr(body, 'tell'): + current_position = body.tell() + compressed_obj = self._gzip_compress_fileobj(body) + body.seek(current_position) + return compressed_obj + else: + return self._gzip_compress_fileobj(body) + + def _gzip_compress_fileobj(self, body): + compressed_obj = io.BytesIO() + with gzip.GzipFile(fileobj=compressed_obj, mode='wb') as gz: + while True: + chunk = body.read(8192) + if not chunk: + break + if isinstance(chunk, str): + chunk = chunk.encode('utf-8') + gz.write(chunk) + compressed_obj.seek(0) + if hasattr(body, 'seek') and hasattr(body, 'tell'): body.seek(0) + return compressed_obj - def should_compress_request(self, config, body, operation_model): - """Check if an AWSRequest object's body should be compressed.""" - # compression must be explicitly disabled - if ( - botocore.utils.ensure_boolean(config.disable_request_compression) - is not True - ): - min_size = config.request_min_compression_size_bytes - if min_size is None: - min_size = 10240 - else: - min_size = int(min_size) + def _should_compress_request(self, config, body, operation_model): + if config.disable_request_compression is not True: # Request is compressed no matter the content length if it has a streaming input. # However, if the stream has the `requiresLength` trait it is NOT compressed. + if operation_model.has_streaming_input: + return ( + 'requiresLength' + not in operation_model.get_streaming_input().metadata + ) return ( - operation_model.has_streaming_input - and 'requiresLength' - not in operation_model.get_streaming_input().metadata - ) or min_size <= self._get_body_size(body) + config.request_min_compression_size_bytes + <= self._get_body_size(body) + ) return False def _get_body_size(self, body): diff --git a/botocore/config.py b/botocore/config.py index 38081dd174..11b8bba955 100644 --- a/botocore/config.py +++ b/botocore/config.py @@ -196,7 +196,10 @@ class Config: :type request_min_compression_size_bytes: int :param request_min_compression_bytes: The minimum size in bytes that a - request body should be to trigger compression. + request body should be to trigger compression. A request is compressed + regardless of the size of the body if the operation has streaming input, + but if the operation also contains the `requiresLength` trait, the request + is not compressed. Defaults to None. diff --git a/botocore/configprovider.py b/botocore/configprovider.py index a9e2d0ccfa..0df6692c4e 100644 --- a/botocore/configprovider.py +++ b/botocore/configprovider.py @@ -143,13 +143,13 @@ 'request_min_compression_size_bytes', 'AWS_REQUEST_MIN_COMPRESSION_SIZE_BYTES', 10240, - None, + int, ), 'disable_request_compression': ( 'disable_request_compression', 'AWS_DISABLE_REQUEST_COMPRESSION', False, - None, + utils.ensure_boolean, ), } # A mapping for the s3 specific configuration vars. These are the configuration diff --git a/botocore/endpoint.py b/botocore/endpoint.py index c5c53b799d..2230254bed 100644 --- a/botocore/endpoint.py +++ b/botocore/endpoint.py @@ -20,7 +20,7 @@ import uuid from botocore import parsers -from botocore.awsrequest import compress_aws_request, create_request_object +from botocore.awsrequest import AWSRequestCompressor, create_request_object from botocore.exceptions import HTTPClientError from botocore.history import get_global_history_recorder from botocore.hooks import first_non_none_response @@ -75,14 +75,8 @@ def convert_to_response_dict(http_response, operation_model): class Endpoint: - """ - Represents an endpoint for a particular service in a specific - region. Only an endpoint can make requests. - - :ivar service: The Service object that describes this endpoints - service. - :ivar host: The fully qualified endpoint hostname. - :ivar session: The session object. + """Represents an endpoint for a particular service in a specific region. + Only an endpoint can make requests. """ def __init__( @@ -103,6 +97,7 @@ def __init__( self.http_session = http_session if self.http_session is None: self.http_session = URLLib3Session() + self._request_compressor = AWSRequestCompressor() def __repr__(self): return f'{self._endpoint_prefix}({self.host})' @@ -121,8 +116,8 @@ def make_request(self, operation_model, request_dict): def create_request(self, params, operation_model=None): request = create_request_object(params) if operation_model: - if operation_model.request_compression is not None: - compress_aws_request( + if operation_model.request_compression: + self._request_compressor.compress( params['context']['client_config'], request, operation_model, diff --git a/botocore/exceptions.py b/botocore/exceptions.py index b59160a8c1..1c480abbf8 100644 --- a/botocore/exceptions.py +++ b/botocore/exceptions.py @@ -814,18 +814,3 @@ class EndpointResolutionError(EndpointProviderError): class UnknownEndpointResolutionBuiltInName(EndpointProviderError): fmt = 'Unknown builtin variable name: {name}' - - -class InvalidRequestMinCompressionSizeError(BotoCoreError): - fmt = ( - 'Invalid value provided to "request_min_compression_size_bytes": ' - '{input_value} must be an integer greater than or equal to 0 and ' - 'less than or equal to 10485760.' - ) - - -class InvalidDisableRequestCompressionError(BotoCoreError): - fmt = ( - 'Invalid value provided to "disable_request_compression": ' - '{input_value} must be a boolean.' - ) diff --git a/botocore/utils.py b/botocore/utils.py index e80289483f..484dd0f8f6 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -67,7 +67,6 @@ ConnectTimeoutError, EndpointConnectionError, HTTPClientError, - InvalidDisableRequestCompressionError, InvalidDNSNameError, InvalidEndpointConfigurationError, InvalidExpressionError, @@ -75,7 +74,6 @@ InvalidIMDSEndpointError, InvalidIMDSEndpointModeError, InvalidRegionError, - InvalidRequestMinCompressionSizeError, MetadataRetrievalError, MissingDependencyException, ReadTimeoutError, @@ -3317,29 +3315,3 @@ def _serialize_if_needed(self, value, iso=False): return value.isoformat() return value.strftime('%Y-%m-%dT%H:%M:%S%Z') return value - - -def validate_compression_config(config): - disable_request_compression = config.disable_request_compression - if ( - disable_request_compression is not None - and not isinstance(disable_request_compression, bool) - and not ( - isinstance(disable_request_compression, str) - and disable_request_compression.lower() in ('true', 'false') - ) - ): - raise InvalidDisableRequestCompressionError( - input_value=disable_request_compression - ) - min_size = config.request_min_compression_size_bytes - if min_size is not None: - try: - min_size = int(min_size) - except (TypeError, ValueError): - raise InvalidRequestMinCompressionSizeError(input_value=min_size) - else: - if not 0 <= min_size <= 1024**2: - raise InvalidRequestMinCompressionSizeError( - input_value=min_size - ) diff --git a/tests/unit/test_awsrequest.py b/tests/unit/test_awsrequest.py index 6c6b8979cb..3acabcda55 100644 --- a/tests/unit/test_awsrequest.py +++ b/tests/unit/test_awsrequest.py @@ -28,17 +28,12 @@ AWSRequestCompressor, AWSResponse, HeadersDict, - compress_aws_request, create_request_object, prepare_request_dict, ) from botocore.compat import HTTPHeaders, file_type from botocore.config import Config -from botocore.exceptions import ( - InvalidDisableRequestCompressionError, - InvalidRequestMinCompressionSizeError, - UnseekableStreamError, -) +from botocore.exceptions import UnseekableStreamError from tests import mock, unittest @@ -169,6 +164,10 @@ def _streaming_op_with_compression_requires_length(): disable_request_compression=False, request_min_compression_size_bytes=128, ) +COMPRESSION_CONFIG_0_BYTES = Config( + disable_request_compression=False, + request_min_compression_size_bytes=0, +) GZIP_ENCODINGS = ['gzip'] @@ -1031,43 +1030,17 @@ def test_iteration(self): True, "gzip", ), - ( - Config( - disable_request_compression='false', - request_min_compression_size_bytes='128', - ), - aws_request(), - OP_WITH_COMPRESSION, - True, - 'gzip', - ), - ( - Config( - disable_request_compression='fALse', - request_min_compression_size_bytes='128', - ), - aws_request(), - OP_WITH_COMPRESSION, - True, - 'gzip', - ), - ( - Config(), - aws_request(), - OP_WITH_COMPRESSION, - False, - None, - ), ], ) -def test_compress_aws_request( +def test_compress( + aws_request_compressor, config, aws_request, operation_model, is_compressed, encoding, ): - compress_aws_request(config, aws_request, operation_model) + aws_request_compressor.compress(config, aws_request, operation_model) _assert_compression(is_compressed, aws_request.data) assert ( 'Content-Encoding' in aws_request.headers @@ -1075,54 +1048,24 @@ def test_compress_aws_request( ) == is_compressed -@pytest.mark.parametrize( - 'config, error', - [ - ( - Config(disable_request_compression='always'), - InvalidDisableRequestCompressionError, - ), - ( - Config(disable_request_compression='never'), - InvalidDisableRequestCompressionError, - ), - ( - Config(request_min_compression_size_bytes=-1), - InvalidRequestMinCompressionSizeError, - ), - ( - Config(request_min_compression_size_bytes='zero'), - InvalidRequestMinCompressionSizeError, - ), - ( - Config(request_min_compression_size_bytes=9999999), - InvalidRequestMinCompressionSizeError, - ), - ], -) -def test_bad_request_compression_config_raises(config, error): - with pytest.raises(error): - compress_aws_request( - config, - aws_request(), - OP_WITH_COMPRESSION, - ) - - -@pytest.mark.parametrize('body', [1, False, None, object(), 1.0, set('foo')]) -def test_bad_request_compression_body_raises( - aws_request_compressor, headers, body -): - with pytest.raises(TypeError): - aws_request_compressor.compress( - body=body, encodings=GZIP_ENCODINGS, headers=headers - ) +@pytest.mark.parametrize('body', [1, object(), None, True, 1.0]) +def test_compress_bad_types(aws_request_compressor, body): + aws_request = AWSRequest(data=body) + aws_request_compressor.compress( + COMPRESSION_CONFIG_0_BYTES, aws_request, OP_WITH_COMPRESSION + ) + # no compression will happen on request bodies that do not have a length + assert aws_request.data == body -@pytest.mark.parametrize('body', [io.StringIO("foo"), io.BytesIO(b"foo")]) -def test_body_streams_position_reset(aws_request_compressor, headers, body): +@pytest.mark.parametrize( + 'body', + [io.StringIO("foo"), io.BytesIO(b"foo")], +) +def test_body_streams_position_reset(aws_request_compressor, body): + aws_request = AWSRequest(data=body) aws_request_compressor.compress( - body=body, encodings=GZIP_ENCODINGS, headers=headers + COMPRESSION_CONFIG_0_BYTES, aws_request, OP_WITH_COMPRESSION ) assert body.tell() == 0