diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c435dfcd540d..4a1434a2061f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,10 @@ CHANGELOG Next Release (TBD) ================== +* feature:Page Size: Add a ``--page-size`` option, that + controls page size when perfoming an operation that + uses pagination. + (`issue 889 `__) * bugfix:``aws s3``: Added support for ignoring and warning about files that do not exist, user does not have read permissions, or are special files (i.e. sockets, FIFOs, diff --git a/awscli/clidriver.py b/awscli/clidriver.py index 996869098f28..945a154bcec8 100644 --- a/awscli/clidriver.py +++ b/awscli/clidriver.py @@ -546,6 +546,8 @@ def invoke(self, operation_object, parameters, parsed_globals): endpoint_url=parsed_globals.endpoint_url, verify=parsed_globals.verify_ssl) if operation_object.can_paginate and parsed_globals.paginate: + if parsed_globals.page_size: + parameters['page_size'] = parsed_globals.page_size pages = operation_object.paginate(endpoint, **parameters) self._display_response(operation_object, pages, parsed_globals) diff --git a/awscli/customizations/s3/filegenerator.py b/awscli/customizations/s3/filegenerator.py index d5d74c349240..b53be0c45939 100644 --- a/awscli/customizations/s3/filegenerator.py +++ b/awscli/customizations/s3/filegenerator.py @@ -115,11 +115,12 @@ class FileGenerator(object): ``FileInfo`` objects to send to a ``Comparator`` or ``S3Handler``. """ def __init__(self, service, endpoint, operation_name, - follow_symlinks=True, result_queue=None): + follow_symlinks=True, page_size=None, result_queue=None): self._service = service self._endpoint = endpoint self.operation_name = operation_name self.follow_symlinks = follow_symlinks + self.page_size = page_size self.result_queue = result_queue if not result_queue: self.result_queue = queue.Queue() @@ -284,7 +285,8 @@ def list_objects(self, s3_path, dir_op): else: operation = self._service.get_operation('ListObjects') lister = BucketLister(operation, self._endpoint) - for key in lister.list_objects(bucket=bucket, prefix=prefix): + for key in lister.list_objects(bucket=bucket, prefix=prefix, + page_size=self.page_size): source_path, size, last_update = key if size == 0 and source_path.endswith('/'): if self.operation_name == 'delete': diff --git a/awscli/customizations/s3/subcommands.py b/awscli/customizations/s3/subcommands.py index 43185f0108b9..5de8a6574102 100644 --- a/awscli/customizations/s3/subcommands.py +++ b/awscli/customizations/s3/subcommands.py @@ -248,16 +248,18 @@ def _run_main(self, parsed_args, parsed_globals): self._list_all_buckets() elif parsed_args.dir_op: # Then --recursive was specified. - self._list_all_objects_recursive(bucket, key) + self._list_all_objects_recursive(bucket, key, + parsed_globals.page_size) else: - self._list_all_objects(bucket, key) + self._list_all_objects(bucket, key, parsed_globals.page_size) return 0 - def _list_all_objects(self, bucket, key): + def _list_all_objects(self, bucket, key, page_size=None): operation = self.service.get_operation('ListObjects') iterator = operation.paginate(self.endpoint, bucket=bucket, - prefix=key, delimiter='/') + prefix=key, delimiter='/', + page_size=page_size) for _, response_data in iterator: self._display_page(response_data) @@ -294,10 +296,10 @@ def _list_all_buckets(self): uni_print(print_str) sys.stdout.flush() - def _list_all_objects_recursive(self, bucket, key): + def _list_all_objects_recursive(self, bucket, key, page_size=None): operation = self.service.get_operation('ListObjects') iterator = operation.paginate(self.endpoint, bucket=bucket, - prefix=key) + prefix=key, page_size=page_size) for _, response_data in iterator: self._display_page(response_data, use_basename=False) @@ -373,6 +375,7 @@ def _run_main(self, parsed_args, parsed_globals): cmd_params.add_region(parsed_globals) cmd_params.add_endpoint_url(parsed_globals) cmd_params.add_verify_ssl(parsed_globals) + cmd_params.add_page_size(parsed_globals) cmd_params.add_paths(parsed_args.paths) cmd_params.check_force(parsed_globals) cmd = CommandArchitecture(self._session, self.NAME, @@ -561,9 +564,11 @@ def run(self): self._source_endpoint, operation_name, self.parameters['follow_symlinks'], + self.parameters['page_size'], result_queue=result_queue) rev_generator = FileGenerator(self._service, self._endpoint, '', self.parameters['follow_symlinks'], + self.parameters['page_size'], result_queue=result_queue) taskinfo = [TaskInfo(src=files['src']['path'], src_type='s3', @@ -796,3 +801,6 @@ def add_endpoint_url(self, parsed_globals): def add_verify_ssl(self, parsed_globals): self.parameters['verify_ssl'] = parsed_globals.verify_ssl + + def add_page_size(self, parsed_globals): + self.parameters['page_size'] = parsed_globals.page_size diff --git a/awscli/customizations/s3/utils.py b/awscli/customizations/s3/utils.py index 8613481a5fbc..eea51a5fbdbc 100644 --- a/awscli/customizations/s3/utils.py +++ b/awscli/customizations/s3/utils.py @@ -338,8 +338,9 @@ def __init__(self, operation, endpoint, date_parser=_date_parser): self._endpoint = endpoint self._date_parser = date_parser - def list_objects(self, bucket, prefix=None): - kwargs = {'bucket': bucket, 'encoding_type': 'url'} + def list_objects(self, bucket, prefix=None, page_size=None): + kwargs = {'bucket': bucket, 'encoding_type': 'url', + 'page_size': page_size} if prefix is not None: kwargs['prefix'] = prefix # This event handler is needed because we use encoding_type url and diff --git a/awscli/data/cli.json b/awscli/data/cli.json index bad4f35404c7..5b282494cb34 100644 --- a/awscli/data/cli.json +++ b/awscli/data/cli.json @@ -31,6 +31,10 @@ "query": { "help": "

A JMESPath query to use in filtering the response data.

" }, + "page-size": { + "type": "int", + "help": "

Specifies the page size when paginating.

" + }, "profile": { "help": "

Use a specific profile from your credential file.

" }, diff --git a/tests/integration/customizations/s3/test_filegenerator.py b/tests/integration/customizations/s3/test_filegenerator.py index 0c3912402533..b78fcd7f8878 100644 --- a/tests/integration/customizations/s3/test_filegenerator.py +++ b/tests/integration/customizations/s3/test_filegenerator.py @@ -19,6 +19,7 @@ import unittest import os +import itertools import botocore.session from awscli import EnvironmentVariables @@ -139,6 +140,25 @@ def test_s3_delete_directory(self): compare_files(self, result_list[1], expected_list[1]) compare_files(self, result_list[2], expected_list[2]) + def test_page_size(self): + input_s3_file = {'src': {'path': self.bucket+'/', 'type': 's3'}, + 'dest': {'path': '', 'type': 'local'}, + 'dir_op': True, 'use_src_name': True} + file_gen = FileGenerator(self.service, self.endpoint, '', + page_size=1).call(input_s3_file) + limited_file_gen = itertools.islice(file_gen, 1) + result_list = list(limited_file_gen) + file_stat = FileStat(src=self.file2, + dest='another_directory' + os.sep + 'text2.txt', + compare_key='another_directory/text2.txt', + size=21, + last_update=result_list[0].last_update, + src_type='s3', + dest_type='local', operation_name='') + # Ensure only one item is returned from ``ListObjects`` + self.assertEqual(len(result_list), 1) + compare_files(self, result_list[0], file_stat) + if __name__ == "__main__": unittest.main() diff --git a/tests/integration/customizations/s3/test_plugin.py b/tests/integration/customizations/s3/test_plugin.py index 8b5affd1d728..65e9d0f42217 100644 --- a/tests/integration/customizations/s3/test_plugin.py +++ b/tests/integration/customizations/s3/test_plugin.py @@ -463,13 +463,14 @@ def test_download_non_existent_key(self): class TestSync(BaseS3CLICommand): def test_sync_with_plus_chars_paginate(self): - # 1. Create > 1000 files with '+' in the filename. - # 2. Sync up to s3. - # 3. Sync up to s3 + # This test ensures pagination tokens are url decoded. + # 1. Create > 2 files with '+' in the filename. + # 2. Sync up to s3 while the page size is 2. + # 3. Sync up to s3 while the page size is 2. # 4. Verify nothing was synced up down from s3 in step 3. bucket_name = self.create_bucket() filenames = [] - for i in range(2000): + for i in range(4): # Create a file with a space char and a '+' char in the filename. # We're interested in testing the filename comparisons, not the # mtime comparisons so we're setting the mtime to some time @@ -480,10 +481,12 @@ def test_sync_with_plus_chars_paginate(self): self.files.create_file('foo +%06d' % i, contents='', mtime=mtime)) - p = aws('s3 sync %s s3://%s/' % (self.files.rootdir, bucket_name)) + p = aws('s3 sync %s s3://%s/ --page-size 2' % + (self.files.rootdir, bucket_name)) self.assert_no_errors(p) - time.sleep(5) - p2 = aws('s3 sync %s s3://%s/' % (self.files.rootdir, bucket_name)) + time.sleep(1) + p2 = aws('s3 sync %s s3://%s/ --page-size 2' + % (self.files.rootdir, bucket_name)) self.assertNotIn('upload:', p2.stdout) self.assertEqual('', p2.stdout) diff --git a/tests/unit/customizations/s3/test_ls_command.py b/tests/unit/customizations/s3/test_ls_command.py index b8b5b01fc6fc..c4908e58a7bc 100644 --- a/tests/unit/customizations/s3/test_ls_command.py +++ b/tests/unit/customizations/s3/test_ls_command.py @@ -39,3 +39,36 @@ def test_errors_out_with_extra_arguments(self): stderr = self.run_cmd('s3 ls --extra-argument-foo', expected_rc=255)[1] self.assertIn('Unknown options', stderr) self.assertIn('--extra-argument-foo', stderr) + + def test_operations_use_page_size(self): + time_utc = "2014-01-09T20:45:49.000Z" + self.parsed_responses = [{"CommonPrefixes": [], "Contents": [ + {"Key": "foo/bar.txt", "Size": 100, + "LastModified": time_utc}]}] + stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --page-size 8', expected_rc=0) + call_args = self.operations_called[0][1] + # We should not be calling the args with any delimiter because we + # want a recursive listing. + self.assertEqual(call_args['prefix'], '') + self.assertEqual(call_args['bucket'], 'bucket') + # The page size gets translated to ``MaxKeys`` in the s3 model + self.assertEqual(call_args['MaxKeys'], 8) + + def test_operations_use_page_size_recursive(self): + time_utc = "2014-01-09T20:45:49.000Z" + self.parsed_responses = [{"CommonPrefixes": [], "Contents": [ + {"Key": "foo/bar.txt", "Size": 100, + "LastModified": time_utc}]}] + stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --page-size 8 --recursive', expected_rc=0) + call_args = self.operations_called[0][1] + # We should not be calling the args with any delimiter because we + # want a recursive listing. + self.assertEqual(call_args['prefix'], '') + self.assertEqual(call_args['bucket'], 'bucket') + # The page size gets translated to ``MaxKeys`` in the s3 model + self.assertEqual(call_args['MaxKeys'], 8) + self.assertNotIn('delimiter', call_args) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/customizations/s3/test_subcommands.py b/tests/unit/customizations/s3/test_subcommands.py index a9c397f65c66..363fc496889f 100644 --- a/tests/unit/customizations/s3/test_subcommands.py +++ b/tests/unit/customizations/s3/test_subcommands.py @@ -57,7 +57,9 @@ def setUp(self): def test_ls_command_for_bucket(self): ls_command = ListCommand(self.session) parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False) - ls_command._run_main(parsed_args, mock.Mock()) + parsed_globals = mock.Mock() + parsed_globals.page_size = '5' + ls_command._run_main(parsed_args, parsed_globals) call = self.session.get_service.return_value.get_operation\ .return_value.call paginate = self.session.get_service.return_value.get_operation\ @@ -69,7 +71,8 @@ def test_ls_command_for_bucket(self): 'ListObjects') self.assertEqual( paginate.call_args[1], {'bucket': u'mybucket', - 'delimiter': '/', 'prefix': u''}) + 'delimiter': '/', 'prefix': u'', + 'page_size': u'5'}) def test_ls_command_with_no_args(self): ls_command = ListCommand(self.session) @@ -194,7 +197,7 @@ def test_run_cp_put(self): 'src': local_file, 'dest': s3_file, 'filters': filters, 'paths_type': 'locals3', 'region': 'us-east-1', 'endpoint_url': None, 'verify_ssl': None, - 'follow_symlinks': True} + 'follow_symlinks': True, 'page_size': None} cmd_arc = CommandArchitecture(self.session, 'cp', params) cmd_arc.create_instructions() cmd_arc.run() @@ -210,7 +213,7 @@ def test_error_on_same_line_as_status(self): 'src': local_file, 'dest': s3_file, 'filters': filters, 'paths_type': 'locals3', 'region': 'us-east-1', 'endpoint_url': None, 'verify_ssl': None, - 'follow_symlinks': True} + 'follow_symlinks': True, 'page_size': None} cmd_arc = CommandArchitecture(self.session, 'cp', params) cmd_arc.create_instructions() cmd_arc.run() @@ -233,7 +236,7 @@ def test_run_cp_get(self): 'src': s3_file, 'dest': local_file, 'filters': filters, 'paths_type': 's3local', 'region': 'us-east-1', 'endpoint_url': None, 'verify_ssl': None, - 'follow_symlinks': True} + 'follow_symlinks': True, 'page_size': None} cmd_arc = CommandArchitecture(self.session, 'cp', params) cmd_arc.create_instructions() cmd_arc.run() @@ -250,7 +253,7 @@ def test_run_cp_copy(self): 'src': s3_file, 'dest': s3_file, 'filters': filters, 'paths_type': 's3s3', 'region': 'us-east-1', 'endpoint_url': None, 'verify_ssl': None, - 'follow_symlinks': True} + 'follow_symlinks': True, 'page_size': None} cmd_arc = CommandArchitecture(self.session, 'cp', params) cmd_arc.create_instructions() cmd_arc.run() @@ -267,7 +270,7 @@ def test_run_mv(self): 'src': s3_file, 'dest': s3_file, 'filters': filters, 'paths_type': 's3s3', 'region': 'us-east-1', 'endpoint_url': None, 'verify_ssl': None, - 'follow_symlinks': True} + 'follow_symlinks': True, 'page_size': None} cmd_arc = CommandArchitecture(self.session, 'mv', params) cmd_arc.create_instructions() cmd_arc.run() @@ -284,7 +287,7 @@ def test_run_remove(self): 'src': s3_file, 'dest': s3_file, 'filters': filters, 'paths_type': 's3', 'region': 'us-east-1', 'endpoint_url': None, 'verify_ssl': None, - 'follow_symlinks': True} + 'follow_symlinks': True, 'page_size': None} cmd_arc = CommandArchitecture(self.session, 'rm', params) cmd_arc.create_instructions() cmd_arc.run() @@ -305,7 +308,7 @@ def test_run_sync(self): 'src': local_dir, 'dest': s3_prefix, 'filters': filters, 'paths_type': 'locals3', 'region': 'us-east-1', 'endpoint_url': None, 'verify_ssl': None, - 'follow_symlinks': True} + 'follow_symlinks': True, 'page_size': None} cmd_arc = CommandArchitecture(self.session, 'sync', params) cmd_arc.create_instructions() cmd_arc.run() @@ -320,7 +323,8 @@ def test_run_mb(self): params = {'dir_op': True, 'dryrun': True, 'quiet': False, 'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3', 'region': 'us-east-1', 'endpoint_url': None, - 'verify_ssl': None, 'follow_symlinks': True} + 'verify_ssl': None, 'follow_symlinks': True, + 'page_size': None} cmd_arc = CommandArchitecture(self.session, 'mb', params) cmd_arc.create_instructions() cmd_arc.run() @@ -335,7 +339,8 @@ def test_run_rb(self): params = {'dir_op': True, 'dryrun': True, 'quiet': False, 'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3', 'region': 'us-east-1', 'endpoint_url': None, - 'verify_ssl': None, 'follow_symlinks': True} + 'verify_ssl': None, 'follow_symlinks': True, + 'page_size': None} cmd_arc = CommandArchitecture(self.session, 'rb', params) cmd_arc.create_instructions() rc = cmd_arc.run() @@ -351,7 +356,8 @@ def test_run_rb_nonzero_rc(self): params = {'dir_op': True, 'dryrun': False, 'quiet': False, 'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3', 'region': 'us-east-1', 'endpoint_url': None, - 'verify_ssl': None, 'follow_symlinks': True} + 'verify_ssl': None, 'follow_symlinks': True, + 'page_size': None} cmd_arc = CommandArchitecture(self.session, 'rb', params) cmd_arc.create_instructions() rc = cmd_arc.run() diff --git a/tests/unit/ec2/test_describe_instances.py b/tests/unit/ec2/test_describe_instances.py index fbcb5c0fe27d..91ac4ef86852 100644 --- a/tests/unit/ec2/test_describe_instances.py +++ b/tests/unit/ec2/test_describe_instances.py @@ -87,6 +87,12 @@ def test_multiple_filters_alternate(self): } self.assert_params_for_cmd(cmdlist, result) + def test_page_size(self): + args = ' --page-size 10' + cmdline = self.prefix + args + result = {'MaxResults': '10'} + self.assert_params_for_cmd(cmdline, result) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/s3/test_list_objects.py b/tests/unit/s3/test_list_objects.py index 5e861ba67e8d..bfd38304eccd 100644 --- a/tests/unit/s3/test_list_objects.py +++ b/tests/unit/s3/test_list_objects.py @@ -39,6 +39,16 @@ def test_max_items(self): 'headers': {},} self.assert_params_for_cmd(cmdline, result, ignore_params=['payload']) + def test_page_size(self): + cmdline = self.prefix + cmdline += ' --bucket mybucket' + # The max-items is a customization and therefore won't + # show up in the result params. + cmdline += ' --page-size 100' + result = {'uri_params': {'Bucket': 'mybucket', 'MaxKeys': 100}, + 'headers': {},} + self.assert_params_for_cmd(cmdline, result, ignore_params=['payload']) + def test_starting_token(self): # We don't need to test this in depth because botocore # tests this. We just want to make sure this is hooked up diff --git a/tests/unit/test_clidriver.py b/tests/unit/test_clidriver.py index a0a7c35b5576..45fca5b15622 100644 --- a/tests/unit/test_clidriver.py +++ b/tests/unit/test_clidriver.py @@ -72,6 +72,10 @@ "help": "Disable automatic pagination", "dest": "paginate" }, + "page-size": { + "type": "int", + "help": "

Specifies the page size when paginating.

" + }, } }, 'aws/_services': {'s3':{}}, @@ -656,6 +660,33 @@ def test_invoke_with_no_credentials(self): with self.assertRaises(NoCredentialsError): caller.invoke(None, None, None) + def test_invoke_with_page_size(self): + operation_object = mock.Mock() + paginate = operation_object.paginate + operation_object.can_paginate = True + parsed_globals = mock.Mock() + parsed_globals.paginate = True + parsed_globals.page_size = '10' + parameters = {} + caller = CLIOperationCaller(self.session) + with mock.patch('awscli.clidriver.CLIOperationCaller._display_response'): + caller.invoke(operation_object, parameters, parsed_globals) + self.assertEqual(paginate.call_args[1], {'page_size': u'10'}) + + def test_invoke_with_no_page_size(self): + operation_object = mock.Mock() + paginate = operation_object.paginate + operation_object.can_paginate = True + parsed_globals = mock.Mock() + parsed_globals.paginate = True + parsed_globals.page_size = None + parameters = {} + caller = CLIOperationCaller(self.session) + with mock.patch('awscli.clidriver.CLIOperationCaller._display_response'): + caller.invoke(operation_object, parameters, parsed_globals) + # No parameters were passed to it (i.e. only self and endpoint). + self.assertEqual(len(paginate.call_args), 2) + class TestVerifyArgument(BaseAWSCommandParamsTest): def setUp(self): diff --git a/tests/unit/test_completer.py b/tests/unit/test_completer.py index 06788f46f2f3..fc5365b40a69 100644 --- a/tests/unit/test_completer.py +++ b/tests/unit/test_completer.py @@ -23,7 +23,7 @@ LOG = logging.getLogger(__name__) GLOBALOPTS = ['--debug', '--endpoint-url', '--no-verify-ssl', - '--no-paginate', '--output', '--profile', + '--no-paginate', '--output', '--page-size', '--profile', '--region', '--version', '--color', '--query', '--no-sign-request'] COMPLETIONS = [ @@ -62,7 +62,7 @@ set(['--filters', '--dry-run', '--no-dry-run', '--endpoint-url', '--no-verify-ssl', '--no-paginate', '--no-sign-request', '--output', '--profile', '--starting-token', '--max-items', - '--region', '--version', '--color', '--query'])), + '--region', '--version', '--color', '--query', '--page-size'])), ('aws s3', -1, set(['cp', 'mv', 'rm', 'mb', 'rb', 'ls', 'sync', 'website'])), ('aws s3 m', -1, set(['mv', 'mb'])), ('aws s3 cp -', -1, set(['--no-guess-mime-type', '--dryrun',