Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

request compression #2959

Merged
merged 37 commits into from
Jul 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
4208cb3
request compression
Jun 6, 2023
8fb9de7
refactor
Jun 26, 2023
b689db8
add handler test
Jun 26, 2023
6cdd1ed
Another round of refactoring. Move compression to a separate file and…
Jun 30, 2023
165c5e1
cleanup
Jul 7, 2023
b97fa8c
pr feedback
Jul 12, 2023
9326d3f
removed enumerate and set size=-1 instead of returning -1
Jul 12, 2023
afee36d
change to functional implementation
Jul 12, 2023
68630f6
simplify assert compression method
Jul 12, 2023
10cec6b
pr feedback
Jul 13, 2023
6247e28
update wording regarding compression for requests with streaming input
Jul 13, 2023
aa54927
divert byte encoding
Jul 16, 2023
debd186
move min and max min compression size to funciton
Jul 16, 2023
6e8c0a3
fixed test
Jul 16, 2023
2f6d52e
fixed max allowed min compression size, added test for it and moved c…
Jul 17, 2023
56fd999
cleanup tests and test bad min and bad max separately
Jul 17, 2023
f5202b4
cleanup
Jul 17, 2023
6cecbad
pr feedback. clean up tests and move urlencoding dicts into compression
Jul 18, 2023
053f053
cleaned up tests
Jul 19, 2023
df00a70
added TypeError to min compression validation and a bunch of formatti…
Jul 19, 2023
30d7c12
extract dict type normalization into separate function
Jul 19, 2023
b9334df
refactor unit tests
Jul 25, 2023
e93321f
adjust compression assertion method and move dict compression into pa…
Jul 25, 2023
ebcd98b
formatting cleanup
Jul 25, 2023
ccd81fa
actually convert request_compression_min_size_bytes to int and some m…
Jul 25, 2023
82e6bed
add test case for coercible boolean and small fix to no compression test
Jul 25, 2023
19586f4
fixed test
Jul 25, 2023
acd343d
actually fixed test
Jul 25, 2023
9b6b03a
assert_compression method
Jul 25, 2023
a5071e1
add test cases for non-seekable streams
Jul 26, 2023
8ed62a8
pr feedback
Jul 27, 2023
004a175
put private note in wrong file. Also removed `classes` since there ar…
Jul 27, 2023
bd6249a
remove duplicate test case
Jul 27, 2023
4011831
Refactor unit tests
nateprewitt Jul 27, 2023
d16faf6
Fix incorret operation model
nateprewitt Jul 27, 2023
3be8638
use compression assertion function in stream test and only use single…
Jul 27, 2023
494c7e3
small fix to functional test and changelog
Jul 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changes/next-release/enhancement-compression-36791.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "enhancement",
"category": "compression",
"description": "Adds support for the ``requestcompression`` operation trait."
}
54 changes: 54 additions & 0 deletions botocore/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,17 @@ def compute_client_args(
tcp_keepalive=client_config.tcp_keepalive,
user_agent_extra=client_config.user_agent_extra,
user_agent_appid=client_config.user_agent_appid,
request_min_compression_size_bytes=(
client_config.request_min_compression_size_bytes
),
disable_request_compression=(
client_config.disable_request_compression
),
nateprewitt marked this conversation as resolved.
Show resolved Hide resolved
)
self._compute_retry_config(config_kwargs)
self._compute_connect_timeout(config_kwargs)
self._compute_user_agent_appid_config(config_kwargs)
self._compute_request_compression_config(config_kwargs)
s3_config = self.compute_s3_config(client_config)

is_s3_service = self._is_s3_service(service_name)
Expand Down Expand Up @@ -543,6 +550,53 @@ def _compute_connect_timeout(self, config_kwargs):
if connect_timeout:
config_kwargs['connect_timeout'] = connect_timeout

def _compute_request_compression_config(self, config_kwargs):
min_size = config_kwargs.get('request_min_compression_size_bytes')
disabled = config_kwargs.get('disable_request_compression')
if min_size is None:
min_size = self._config_store.get_config_variable(
'request_min_compression_size_bytes'
)
# conversion func is skipped so input validation must be done here
# regardless if the value is coming from the config store or the
# config object
min_size = self._validate_min_compression_size(min_size)
config_kwargs['request_min_compression_size_bytes'] = min_size

if disabled is None:
disabled = self._config_store.get_config_variable(
'disable_request_compression'
)
else:
# if the user provided a value we must check if it's a boolean
disabled = ensure_boolean(disabled)
config_kwargs['disable_request_compression'] = disabled

def _validate_min_compression_size(self, min_size):
min_allowed_min_size = 1
max_allowed_min_size = 1048576
if min_size is not None:
nateprewitt marked this conversation as resolved.
Show resolved Hide resolved
error_msg_base = (
f'Invalid value "{min_size}" for '
'request_min_compression_size_bytes.'
)
try:
min_size = int(min_size)
except (ValueError, TypeError):
msg = (
f'{error_msg_base} Value must be an integer. '
f'Received {type(min_size)} instead.'
)
raise botocore.exceptions.InvalidConfigError(error_msg=msg)
if not min_allowed_min_size <= min_size <= max_allowed_min_size:
msg = (
f'{error_msg_base} Value must be between '
f'{min_allowed_min_size} and {max_allowed_min_size}.'
)
raise botocore.exceptions.InvalidConfigError(error_msg=msg)

return min_size

def _ensure_boolean(self, val):
if isinstance(val, bool):
return val
Expand Down
4 changes: 4 additions & 0 deletions botocore/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from botocore.args import ClientArgsCreator
from botocore.auth import AUTH_TYPE_MAPS
from botocore.awsrequest import prepare_request_dict
from botocore.compress import maybe_compress_request
from botocore.config import Config
from botocore.discovery import (
EndpointDiscoveryHandler,
Expand Down Expand Up @@ -955,6 +956,9 @@ def _make_api_call(self, operation_name, api_params):
if event_response is not None:
http, parsed_response = event_response
else:
maybe_compress_request(
self.meta.config, request_dict, operation_model
)
apply_request_checksum(request_dict)
http, parsed_response = self._make_request(
operation_model, request_dict, request_context
Expand Down
126 changes: 126 additions & 0 deletions botocore/compress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
"""
NOTE: All functions in this module are considered private and are
subject to abrupt breaking changes. Please do not use them directly.
"""

dlm6693 marked this conversation as resolved.
Show resolved Hide resolved
import io
import logging
from gzip import GzipFile
from gzip import compress as gzip_compress

from botocore.compat import urlencode
from botocore.utils import determine_content_length

logger = logging.getLogger(__name__)


def maybe_compress_request(config, request_dict, operation_model):
"""Attempt to compress the request body using the modeled encodings."""
if _should_compress_request(config, request_dict, operation_model):
for encoding in operation_model.request_compression['encodings']:
encoder = COMPRESSION_MAPPING.get(encoding)
if encoder is not None:
logger.debug('Compressing request with %s encoding.', encoding)
request_dict['body'] = encoder(request_dict['body'])
_set_compression_header(request_dict['headers'], encoding)
return
else:
logger.debug('Unsupported compression encoding: %s', encoding)


def _should_compress_request(config, request_dict, operation_model):
if (
config.disable_request_compression is not True
and config.signature_version != 'v2'
and operation_model.request_compression is not None
):
if not _is_compressible_type(request_dict):
body_type = type(request_dict['body'])
log_msg = 'Body type %s does not support compression.'
logger.debug(log_msg, body_type)
return False

if operation_model.has_streaming_input:
streaming_input = operation_model.get_streaming_input()
streaming_metadata = streaming_input.metadata
return 'requiresLength' not in streaming_metadata

body_size = _get_body_size(request_dict['body'])
min_size = config.request_min_compression_size_bytes
return min_size <= body_size

return False


def _is_compressible_type(request_dict):
body = request_dict['body']
# Coerce dict to a format compatible with compression.
if isinstance(body, dict):
body = urlencode(body, doseq=True, encoding='utf-8').encode('utf-8')
request_dict['body'] = body
is_supported_type = isinstance(body, (str, bytes, bytearray))
return is_supported_type or hasattr(body, 'read')


def _get_body_size(body):
size = determine_content_length(body)
if size is None:
logger.debug(
'Unable to get length of the request body: %s. '
'Skipping compression.',
body,
)
size = 0
return size


def _gzip_compress_body(body):
if isinstance(body, str):
return gzip_compress(body.encode('utf-8'))
elif isinstance(body, (bytes, bytearray)):
return gzip_compress(body)
elif hasattr(body, 'read'):
if hasattr(body, 'seek') and hasattr(body, 'tell'):
current_position = body.tell()
compressed_obj = _gzip_compress_fileobj(body)
body.seek(current_position)
return compressed_obj
return _gzip_compress_fileobj(body)


def _gzip_compress_fileobj(body):
compressed_obj = io.BytesIO()
with GzipFile(fileobj=compressed_obj, mode='wb') as gz:
while True:
chunk = body.read(8192)
if not chunk:
break
if isinstance(chunk, str):
chunk = chunk.encode('utf-8')
gz.write(chunk)
compressed_obj.seek(0)
return compressed_obj


def _set_compression_header(headers, encoding):
ce_header = headers.get('Content-Encoding')
if ce_header is None:
headers['Content-Encoding'] = encoding
else:
headers['Content-Encoding'] = f'{ce_header},{encoding}'


COMPRESSION_MAPPING = {'gzip': _gzip_compress_body}
16 changes: 16 additions & 0 deletions botocore/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,20 @@ class Config:
creating new connections if set to True.
Defaults to False.
:type request_min_compression_size_bytes: int
:param request_min_compression_bytes: The minimum size in bytes that a
request body should be to trigger compression. All requests with streaming
input that don't contain the `requiresLength` trait will be compressed
regardless of this setting.
Defaults to None.
:type disable_request_compression: bool
:param disable_request_compression: Disables request body compression if
set to True.
Defaults to None.
"""

OPTION_DEFAULTS = OrderedDict(
Expand All @@ -231,6 +245,8 @@ class Config:
('ignore_configured_endpoint_urls', None),
('defaults_mode', None),
('tcp_keepalive', None),
('request_min_compression_size_bytes', None),
('disable_request_compression', None),
]
)

Expand Down
14 changes: 13 additions & 1 deletion botocore/configprovider.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
"""This module contains the inteface for controlling how configuration
"""This module contains the interface for controlling how configuration
is loaded.
"""
import copy
Expand Down Expand Up @@ -147,6 +147,18 @@
# whatever the defaults are in _retry.json.
'max_attempts': ('max_attempts', 'AWS_MAX_ATTEMPTS', None, int),
'user_agent_appid': ('sdk_ua_app_id', 'AWS_SDK_UA_APP_ID', None, None),
'request_min_compression_size_bytes': (
'request_min_compression_size_bytes',
'AWS_REQUEST_MIN_COMPRESSION_SIZE_BYTES',
10240,
None,
dlm6693 marked this conversation as resolved.
Show resolved Hide resolved
),
'disable_request_compression': (
'disable_request_compression',
'AWS_DISABLE_REQUEST_COMPRESSION',
False,
utils.ensure_boolean,
),
}
# A mapping for the s3 specific configuration vars. These are the configuration
# vars that typically go in the s3 section of the config file. This mapping
Expand Down
8 changes: 8 additions & 0 deletions botocore/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class Shape:
'union',
'contextParam',
'clientContextParams',
'requiresLength',
dlm6693 marked this conversation as resolved.
Show resolved Hide resolved
]
MAP_TYPE = OrderedDict

Expand Down Expand Up @@ -172,6 +173,9 @@ def metadata(self):
* idempotencyToken
* document
* union
* contextParam
* clientContextParams
* requiresLength
:rtype: dict
:return: Metadata about the shape.
Expand Down Expand Up @@ -614,6 +618,10 @@ def context_parameters(self):
and 'name' in shape.metadata['contextParam']
]

@CachedProperty
def request_compression(self):
return self._operation_model.get('requestcompression')

@CachedProperty
def auth_type(self):
return self._operation_model.get('authtype')
Expand Down
Loading