Skip to content

Commit

Permalink
request compression (#2959)
Browse files Browse the repository at this point in the history
* request compression

* refactor

* add handler test

* Another round of refactoring. Move compression to a separate file and query body serialization to a utility

* cleanup

* pr feedback

* removed enumerate and set size=-1 instead of returning -1

* change to functional implementation

* simplify assert compression method

* pr feedback

* update wording regarding compression for requests with streaming input

* divert byte encoding

* move min and max min compression size to funciton

* fixed test

* fixed max allowed min compression size, added test for it and moved compression config tests to test_args

* cleanup tests and test bad min and bad max separately

* cleanup

* pr feedback. clean up tests and move urlencoding dicts into compression

* cleaned up tests

* added TypeError to min compression validation and a bunch of formatting cleanup

* extract dict type normalization into separate function

* refactor unit tests

* adjust compression assertion method and move dict compression into parametrized test

* formatting cleanup

* actually convert request_compression_min_size_bytes to int and some more formatting fixes

* add test case for coercible boolean and small fix to no compression test

* fixed test

* actually fixed test

* assert_compression method

* add test cases for non-seekable streams

* pr feedback

* put private note in wrong file. Also removed `classes` since there arent any

* remove duplicate test case

* Refactor unit tests

* Fix incorret operation model

* use compression assertion function in stream test and only use single quotes

* small fix to functional test and changelog

---------

Co-authored-by: Nate Prewitt <nate.prewitt@gmail.com>
  • Loading branch information
dlm6693 and nateprewitt authored Jul 27, 2023
1 parent 10f938d commit bf00aa7
Show file tree
Hide file tree
Showing 11 changed files with 771 additions and 2 deletions.
5 changes: 5 additions & 0 deletions .changes/next-release/enhancement-compression-36791.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "enhancement",
"category": "compression",
"description": "Adds support for the ``requestcompression`` operation trait."
}
54 changes: 54 additions & 0 deletions botocore/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,17 @@ def compute_client_args(
tcp_keepalive=client_config.tcp_keepalive,
user_agent_extra=client_config.user_agent_extra,
user_agent_appid=client_config.user_agent_appid,
request_min_compression_size_bytes=(
client_config.request_min_compression_size_bytes
),
disable_request_compression=(
client_config.disable_request_compression
),
)
self._compute_retry_config(config_kwargs)
self._compute_connect_timeout(config_kwargs)
self._compute_user_agent_appid_config(config_kwargs)
self._compute_request_compression_config(config_kwargs)
s3_config = self.compute_s3_config(client_config)

is_s3_service = self._is_s3_service(service_name)
Expand Down Expand Up @@ -543,6 +550,53 @@ def _compute_connect_timeout(self, config_kwargs):
if connect_timeout:
config_kwargs['connect_timeout'] = connect_timeout

def _compute_request_compression_config(self, config_kwargs):
min_size = config_kwargs.get('request_min_compression_size_bytes')
disabled = config_kwargs.get('disable_request_compression')
if min_size is None:
min_size = self._config_store.get_config_variable(
'request_min_compression_size_bytes'
)
# conversion func is skipped so input validation must be done here
# regardless if the value is coming from the config store or the
# config object
min_size = self._validate_min_compression_size(min_size)
config_kwargs['request_min_compression_size_bytes'] = min_size

if disabled is None:
disabled = self._config_store.get_config_variable(
'disable_request_compression'
)
else:
# if the user provided a value we must check if it's a boolean
disabled = ensure_boolean(disabled)
config_kwargs['disable_request_compression'] = disabled

def _validate_min_compression_size(self, min_size):
min_allowed_min_size = 1
max_allowed_min_size = 1048576
if min_size is not None:
error_msg_base = (
f'Invalid value "{min_size}" for '
'request_min_compression_size_bytes.'
)
try:
min_size = int(min_size)
except (ValueError, TypeError):
msg = (
f'{error_msg_base} Value must be an integer. '
f'Received {type(min_size)} instead.'
)
raise botocore.exceptions.InvalidConfigError(error_msg=msg)
if not min_allowed_min_size <= min_size <= max_allowed_min_size:
msg = (
f'{error_msg_base} Value must be between '
f'{min_allowed_min_size} and {max_allowed_min_size}.'
)
raise botocore.exceptions.InvalidConfigError(error_msg=msg)

return min_size

def _ensure_boolean(self, val):
if isinstance(val, bool):
return val
Expand Down
4 changes: 4 additions & 0 deletions botocore/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from botocore.args import ClientArgsCreator
from botocore.auth import AUTH_TYPE_MAPS
from botocore.awsrequest import prepare_request_dict
from botocore.compress import maybe_compress_request
from botocore.config import Config
from botocore.discovery import (
EndpointDiscoveryHandler,
Expand Down Expand Up @@ -955,6 +956,9 @@ def _make_api_call(self, operation_name, api_params):
if event_response is not None:
http, parsed_response = event_response
else:
maybe_compress_request(
self.meta.config, request_dict, operation_model
)
apply_request_checksum(request_dict)
http, parsed_response = self._make_request(
operation_model, request_dict, request_context
Expand Down
126 changes: 126 additions & 0 deletions botocore/compress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
"""
NOTE: All functions in this module are considered private and are
subject to abrupt breaking changes. Please do not use them directly.
"""

import io
import logging
from gzip import GzipFile
from gzip import compress as gzip_compress

from botocore.compat import urlencode
from botocore.utils import determine_content_length

logger = logging.getLogger(__name__)


def maybe_compress_request(config, request_dict, operation_model):
"""Attempt to compress the request body using the modeled encodings."""
if _should_compress_request(config, request_dict, operation_model):
for encoding in operation_model.request_compression['encodings']:
encoder = COMPRESSION_MAPPING.get(encoding)
if encoder is not None:
logger.debug('Compressing request with %s encoding.', encoding)
request_dict['body'] = encoder(request_dict['body'])
_set_compression_header(request_dict['headers'], encoding)
return
else:
logger.debug('Unsupported compression encoding: %s', encoding)


def _should_compress_request(config, request_dict, operation_model):
if (
config.disable_request_compression is not True
and config.signature_version != 'v2'
and operation_model.request_compression is not None
):
if not _is_compressible_type(request_dict):
body_type = type(request_dict['body'])
log_msg = 'Body type %s does not support compression.'
logger.debug(log_msg, body_type)
return False

if operation_model.has_streaming_input:
streaming_input = operation_model.get_streaming_input()
streaming_metadata = streaming_input.metadata
return 'requiresLength' not in streaming_metadata

body_size = _get_body_size(request_dict['body'])
min_size = config.request_min_compression_size_bytes
return min_size <= body_size

return False


def _is_compressible_type(request_dict):
body = request_dict['body']
# Coerce dict to a format compatible with compression.
if isinstance(body, dict):
body = urlencode(body, doseq=True, encoding='utf-8').encode('utf-8')
request_dict['body'] = body
is_supported_type = isinstance(body, (str, bytes, bytearray))
return is_supported_type or hasattr(body, 'read')


def _get_body_size(body):
size = determine_content_length(body)
if size is None:
logger.debug(
'Unable to get length of the request body: %s. '
'Skipping compression.',
body,
)
size = 0
return size


def _gzip_compress_body(body):
if isinstance(body, str):
return gzip_compress(body.encode('utf-8'))
elif isinstance(body, (bytes, bytearray)):
return gzip_compress(body)
elif hasattr(body, 'read'):
if hasattr(body, 'seek') and hasattr(body, 'tell'):
current_position = body.tell()
compressed_obj = _gzip_compress_fileobj(body)
body.seek(current_position)
return compressed_obj
return _gzip_compress_fileobj(body)


def _gzip_compress_fileobj(body):
compressed_obj = io.BytesIO()
with GzipFile(fileobj=compressed_obj, mode='wb') as gz:
while True:
chunk = body.read(8192)
if not chunk:
break
if isinstance(chunk, str):
chunk = chunk.encode('utf-8')
gz.write(chunk)
compressed_obj.seek(0)
return compressed_obj


def _set_compression_header(headers, encoding):
ce_header = headers.get('Content-Encoding')
if ce_header is None:
headers['Content-Encoding'] = encoding
else:
headers['Content-Encoding'] = f'{ce_header},{encoding}'


COMPRESSION_MAPPING = {'gzip': _gzip_compress_body}
16 changes: 16 additions & 0 deletions botocore/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,20 @@ class Config:
creating new connections if set to True.
Defaults to False.
:type request_min_compression_size_bytes: int
:param request_min_compression_bytes: The minimum size in bytes that a
request body should be to trigger compression. All requests with streaming
input that don't contain the `requiresLength` trait will be compressed
regardless of this setting.
Defaults to None.
:type disable_request_compression: bool
:param disable_request_compression: Disables request body compression if
set to True.
Defaults to None.
"""

OPTION_DEFAULTS = OrderedDict(
Expand All @@ -231,6 +245,8 @@ class Config:
('ignore_configured_endpoint_urls', None),
('defaults_mode', None),
('tcp_keepalive', None),
('request_min_compression_size_bytes', None),
('disable_request_compression', None),
]
)

Expand Down
14 changes: 13 additions & 1 deletion botocore/configprovider.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
"""This module contains the inteface for controlling how configuration
"""This module contains the interface for controlling how configuration
is loaded.
"""
import copy
Expand Down Expand Up @@ -147,6 +147,18 @@
# whatever the defaults are in _retry.json.
'max_attempts': ('max_attempts', 'AWS_MAX_ATTEMPTS', None, int),
'user_agent_appid': ('sdk_ua_app_id', 'AWS_SDK_UA_APP_ID', None, None),
'request_min_compression_size_bytes': (
'request_min_compression_size_bytes',
'AWS_REQUEST_MIN_COMPRESSION_SIZE_BYTES',
10240,
None,
),
'disable_request_compression': (
'disable_request_compression',
'AWS_DISABLE_REQUEST_COMPRESSION',
False,
utils.ensure_boolean,
),
}
# A mapping for the s3 specific configuration vars. These are the configuration
# vars that typically go in the s3 section of the config file. This mapping
Expand Down
8 changes: 8 additions & 0 deletions botocore/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class Shape:
'union',
'contextParam',
'clientContextParams',
'requiresLength',
]
MAP_TYPE = OrderedDict

Expand Down Expand Up @@ -172,6 +173,9 @@ def metadata(self):
* idempotencyToken
* document
* union
* contextParam
* clientContextParams
* requiresLength
:rtype: dict
:return: Metadata about the shape.
Expand Down Expand Up @@ -614,6 +618,10 @@ def context_parameters(self):
and 'name' in shape.metadata['contextParam']
]

@CachedProperty
def request_compression(self):
return self._operation_model.get('requestcompression')

@CachedProperty
def auth_type(self):
return self._operation_model.get('authtype')
Expand Down
Loading

0 comments on commit bf00aa7

Please sign in to comment.