diff --git a/botocore/client.py b/botocore/client.py index 173331550c..d66d8a4894 100644 --- a/botocore/client.py +++ b/botocore/client.py @@ -377,9 +377,10 @@ def _service_model(self): return self.meta.service_model def _make_api_call(self, operation_name, api_params): + request_context = {} operation_model = self._service_model.operation_model(operation_name) request_dict = self._convert_to_request_dict( - api_params, operation_model) + api_params, operation_model, context=request_context) http, parsed_response = self._endpoint.make_request( operation_model, request_dict) @@ -388,7 +389,7 @@ def _make_api_call(self, operation_name, api_params): endpoint_prefix=self._service_model.endpoint_prefix, operation_name=operation_name), http_response=http, parsed=parsed_response, - model=operation_model + model=operation_model, context=request_context ) if http.status_code >= 300: @@ -396,7 +397,8 @@ def _make_api_call(self, operation_name, api_params): else: return parsed_response - def _convert_to_request_dict(self, api_params, operation_model): + def _convert_to_request_dict(self, api_params, operation_model, + context=None): # Given the API params provided by the user and the operation_model # we can serialize the request to a request_dict. operation_name = operation_model.name @@ -408,7 +410,7 @@ def _convert_to_request_dict(self, api_params, operation_model): 'provide-client-params.{endpoint_prefix}.{operation_name}'.format( endpoint_prefix=self._service_model.endpoint_prefix, operation_name=operation_name), - params=api_params, model=operation_model) + params=api_params, model=operation_model, context=context) api_params = first_non_none_response(responses, default=api_params) event_name = ( @@ -417,7 +419,7 @@ def _convert_to_request_dict(self, api_params, operation_model): event_name.format( endpoint_prefix=self._service_model.endpoint_prefix, operation_name=operation_name), - params=api_params, model=operation_model) + params=api_params, model=operation_model, context=context) request_dict = self._serializer.serialize_to_request( api_params, operation_model) @@ -428,7 +430,7 @@ def _convert_to_request_dict(self, api_params, operation_model): endpoint_prefix=self._service_model.endpoint_prefix, operation_name=operation_name), model=operation_model, params=request_dict, - request_signer=self._request_signer + request_signer=self._request_signer, context=context ) return request_dict diff --git a/botocore/handlers.py b/botocore/handlers.py index a82b1b967d..8387ed0e60 100644 --- a/botocore/handlers.py +++ b/botocore/handlers.py @@ -22,10 +22,10 @@ import xml.etree.cElementTree import copy import re -import string import warnings -from botocore.compat import urlsplit, urlunsplit, unquote, json, quote, six +from botocore.compat import urlsplit, urlunsplit, unquote, \ + json, quote, six, unquote_str from botocore.docs.utils import AutoPopulatedParam from botocore.docs.utils import HideParamFromOperations from botocore.docs.utils import AppendParamDocumentation @@ -519,6 +519,24 @@ def change_get_to_post(request, **kwargs): request.url, request.data = request.url.split('?', 1) +def set_list_objects_encoding_type_url(params, context, **kwargs): + if 'EncodingType' not in params: + # We set this context so that we know it wasn't the customer that + # requested the encoding. + context['EncodingTypeAutoSet'] = True + params['EncodingType'] = 'url' + + +def decode_list_object(parsed, context, **kwargs): + # This is needed because we are passing url as the encoding type. Since the + # paginator is based on the key, we need to handle it before it can be + # round tripped. + if 'Contents' in parsed and parsed.get('EncodingType') == 'url' and \ + context.get('EncodingTypeAutoSet'): + for content in parsed['Contents']: + content['Key'] = unquote_str(content['Key']) + + # This is a list of (event_name, handler). # When a Session is created, everything in this list will be # automatically registered with that Session. @@ -535,6 +553,8 @@ def change_get_to_post(request, **kwargs): ('before-parameter-build.s3', validate_bucket_name), + ('before-parameter-build.s3.ListObjects', + set_list_objects_encoding_type_url), ('before-call.s3.PutBucketTagging', calculate_md5), ('before-call.s3.PutBucketLifecycle', calculate_md5), ('before-call.s3.PutBucketLifecycleConfiguration', calculate_md5), @@ -588,6 +608,7 @@ def change_get_to_post(request, **kwargs): base64_encode_user_data), ('before-parameter-build.route53', fix_route53_ids), ('before-parameter-build.glacier', inject_account_id), + ('after-call.s3.ListObjects', decode_list_object), # Cloudsearchdomain search operation will be sent by HTTP POST ('request-created.cloudsearchdomain.Search', diff --git a/tests/integration/test_s3.py b/tests/integration/test_s3.py index 7df606f899..e0fd884763 100644 --- a/tests/integration/test_s3.py +++ b/tests/integration/test_s3.py @@ -333,6 +333,20 @@ def test_unicode_key_put_list(self): Bucket=self.bucket_name, Key=key_name) self.assertEqual(parsed['Body'].read().decode('utf-8'), 'foo') + def test_unicode_system_character(self): + # Verify we can use a unicode system character which would normally + # break the xml parser + key_name = 'foo\x08' + self.create_object(key_name) + parsed = self.client.list_objects(Bucket=self.bucket_name) + self.assertEqual(len(parsed['Contents']), 1) + self.assertEqual(parsed['Contents'][0]['Key'], key_name) + + parsed = self.client.list_objects(Bucket=self.bucket_name, + EncodingType='url') + self.assertEqual(len(parsed['Contents']), 1) + self.assertEqual(parsed['Contents'][0]['Key'], 'foo%08') + def test_thread_safe_auth(self): self.auth_paths = [] self.session.register('before-sign', self.increment_auth) diff --git a/tests/unit/test_handlers.py b/tests/unit/test_handlers.py index 291c7966bd..5df4fd010e 100644 --- a/tests/unit/test_handlers.py +++ b/tests/unit/test_handlers.py @@ -563,6 +563,34 @@ def test_valid_bucket_name_period(self): def test_validation_is_noop_if_no_bucket_param_exists(self): self.assertIsNone(handlers.validate_bucket_name(params={})) + def test_set_encoding_type(self): + params = {} + context = {} + handlers.set_list_objects_encoding_type_url(params, context=context) + self.assertEqual(params['EncodingType'], 'url') + self.assertTrue(context['EncodingTypeAutoSet']) + + params['EncodingType'] = 'new_value' + handlers.set_list_objects_encoding_type_url(params, context={}) + self.assertEqual(params['EncodingType'], 'new_value') + + def test_decode_list_objects(self): + parsed = { + 'Contents': [{'Key': "%C3%A7%C3%B6s%25asd%08"}], + 'EncodingType': 'url', + } + context = {'EncodingTypeAutoSet': True} + handlers.decode_list_object(parsed, context=context) + self.assertEqual(parsed['Contents'][0]['Key'], u'\xe7\xf6s%asd\x08') + + def test_decode_list_objects_does_not_decode_without_context(self): + parsed = { + 'Contents': [{'Key': "%C3%A7%C3%B6s%25asd"}], + 'EncodingType': 'url', + } + handlers.decode_list_object(parsed, context={}) + self.assertEqual(parsed['Contents'][0]['Key'], u'%C3%A7%C3%B6s%25asd') + class TestRetryHandlerOrder(BaseSessionTest): def get_handler_names(self, responses): diff --git a/tests/unit/test_s3_addressing.py b/tests/unit/test_s3_addressing.py index b9b2d98163..15467945ae 100644 --- a/tests/unit/test_s3_addressing.py +++ b/tests/unit/test_s3_addressing.py @@ -18,10 +18,8 @@ from tests import BaseSessionTest from mock import patch, Mock -import botocore.session -from botocore import auth -from botocore import credentials -from botocore.exceptions import ServiceNotInRegionError +from botocore.compat import OrderedDict +from botocore.handlers import set_list_objects_encoding_type_url class TestS3Addressing(BaseSessionTest): @@ -35,6 +33,8 @@ def setUp(self): self.mock_response.content = '' self.mock_response.headers = {} self.mock_response.status_code = 200 + self.session.unregister('before-parameter-build.s3.ListObjects', + set_list_objects_encoding_type_url) def get_prepared_request(self, operation, params, force_hmacv1=False): @@ -76,7 +76,8 @@ def test_list_objects_dns_name_non_classic(self): def test_list_objects_unicode_query_string_eu_central_1(self): self.region_name = 'eu-central-1' - params = {'Bucket': 'safename', 'Marker': u'\xe4\xf6\xfc-01.txt'} + params = OrderedDict([('Bucket', 'safename'), + ('Marker', u'\xe4\xf6\xfc-01.txt')]) prepared_request = self.get_prepared_request('list_objects', params) self.assertEqual( prepared_request.url,