Skip to content

Commit

Permalink
Enabled --page-size as global argument.
Browse files Browse the repository at this point in the history
The page size controls the amount of items returned by
each page while paginating. Smoke tests were added for ``ec2``
and ``s3api`` to ensure the parameter does what is expected to
do for known paginating operations.
  • Loading branch information
kyleknap committed Aug 21, 2014
1 parent c560dfe commit 6bf5851
Show file tree
Hide file tree
Showing 14 changed files with 161 additions and 31 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ CHANGELOG
Next Release (TBD)
==================

* feature:Page Size: Add a ``--page-size`` option, that
controls page size when perfoming an operation that
uses pagination.
(`issue 889 <https://github.com/aws/aws-cli/pull/889>`__)
* bugfix:``aws s3``: Added support for ignoring and warning
about files that do not exist, user does not have read
permissions, or are special files (i.e. sockets, FIFOs,
Expand Down
2 changes: 2 additions & 0 deletions awscli/clidriver.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,8 @@ def invoke(self, operation_object, parameters, parsed_globals):
endpoint_url=parsed_globals.endpoint_url,
verify=parsed_globals.verify_ssl)
if operation_object.can_paginate and parsed_globals.paginate:
if parsed_globals.page_size:
parameters['page_size'] = parsed_globals.page_size
pages = operation_object.paginate(endpoint, **parameters)
self._display_response(operation_object, pages,
parsed_globals)
Expand Down
6 changes: 4 additions & 2 deletions awscli/customizations/s3/filegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,12 @@ class FileGenerator(object):
``FileInfo`` objects to send to a ``Comparator`` or ``S3Handler``.
"""
def __init__(self, service, endpoint, operation_name,
follow_symlinks=True, result_queue=None):
follow_symlinks=True, page_size=None, result_queue=None):
self._service = service
self._endpoint = endpoint
self.operation_name = operation_name
self.follow_symlinks = follow_symlinks
self.page_size = page_size
self.result_queue = result_queue
if not result_queue:
self.result_queue = queue.Queue()
Expand Down Expand Up @@ -284,7 +285,8 @@ def list_objects(self, s3_path, dir_op):
else:
operation = self._service.get_operation('ListObjects')
lister = BucketLister(operation, self._endpoint)
for key in lister.list_objects(bucket=bucket, prefix=prefix):
for key in lister.list_objects(bucket=bucket, prefix=prefix,
page_size=self.page_size):
source_path, size, last_update = key
if size == 0 and source_path.endswith('/'):
if self.operation_name == 'delete':
Expand Down
20 changes: 14 additions & 6 deletions awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,16 +248,18 @@ def _run_main(self, parsed_args, parsed_globals):
self._list_all_buckets()
elif parsed_args.dir_op:
# Then --recursive was specified.
self._list_all_objects_recursive(bucket, key)
self._list_all_objects_recursive(bucket, key,
parsed_globals.page_size)
else:
self._list_all_objects(bucket, key)
self._list_all_objects(bucket, key, parsed_globals.page_size)
return 0

def _list_all_objects(self, bucket, key):
def _list_all_objects(self, bucket, key, page_size=None):

operation = self.service.get_operation('ListObjects')
iterator = operation.paginate(self.endpoint, bucket=bucket,
prefix=key, delimiter='/')
prefix=key, delimiter='/',
page_size=page_size)
for _, response_data in iterator:
self._display_page(response_data)

Expand Down Expand Up @@ -294,10 +296,10 @@ def _list_all_buckets(self):
uni_print(print_str)
sys.stdout.flush()

def _list_all_objects_recursive(self, bucket, key):
def _list_all_objects_recursive(self, bucket, key, page_size=None):
operation = self.service.get_operation('ListObjects')
iterator = operation.paginate(self.endpoint, bucket=bucket,
prefix=key)
prefix=key, page_size=page_size)
for _, response_data in iterator:
self._display_page(response_data, use_basename=False)

Expand Down Expand Up @@ -373,6 +375,7 @@ def _run_main(self, parsed_args, parsed_globals):
cmd_params.add_region(parsed_globals)
cmd_params.add_endpoint_url(parsed_globals)
cmd_params.add_verify_ssl(parsed_globals)
cmd_params.add_page_size(parsed_globals)
cmd_params.add_paths(parsed_args.paths)
cmd_params.check_force(parsed_globals)
cmd = CommandArchitecture(self._session, self.NAME,
Expand Down Expand Up @@ -561,9 +564,11 @@ def run(self):
self._source_endpoint,
operation_name,
self.parameters['follow_symlinks'],
self.parameters['page_size'],
result_queue=result_queue)
rev_generator = FileGenerator(self._service, self._endpoint, '',
self.parameters['follow_symlinks'],
self.parameters['page_size'],
result_queue=result_queue)
taskinfo = [TaskInfo(src=files['src']['path'],
src_type='s3',
Expand Down Expand Up @@ -796,3 +801,6 @@ def add_endpoint_url(self, parsed_globals):

def add_verify_ssl(self, parsed_globals):
self.parameters['verify_ssl'] = parsed_globals.verify_ssl

def add_page_size(self, parsed_globals):
self.parameters['page_size'] = parsed_globals.page_size
5 changes: 3 additions & 2 deletions awscli/customizations/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,9 @@ def __init__(self, operation, endpoint, date_parser=_date_parser):
self._endpoint = endpoint
self._date_parser = date_parser

def list_objects(self, bucket, prefix=None):
kwargs = {'bucket': bucket, 'encoding_type': 'url'}
def list_objects(self, bucket, prefix=None, page_size=None):
kwargs = {'bucket': bucket, 'encoding_type': 'url',
'page_size': page_size}
if prefix is not None:
kwargs['prefix'] = prefix
# This event handler is needed because we use encoding_type url and
Expand Down
4 changes: 4 additions & 0 deletions awscli/data/cli.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@
"query": {
"help": "<p>A JMESPath query to use in filtering the response data.</p>"
},
"page-size": {
"type": "int",
"help": "<p>Specifies the page size when paginating.</p>"
},
"profile": {
"help": "<p>Use a specific profile from your credential file.</p>"
},
Expand Down
20 changes: 20 additions & 0 deletions tests/integration/customizations/s3/test_filegenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import unittest
import os
import itertools

import botocore.session
from awscli import EnvironmentVariables
Expand Down Expand Up @@ -139,6 +140,25 @@ def test_s3_delete_directory(self):
compare_files(self, result_list[1], expected_list[1])
compare_files(self, result_list[2], expected_list[2])

def test_page_size(self):
input_s3_file = {'src': {'path': self.bucket+'/', 'type': 's3'},
'dest': {'path': '', 'type': 'local'},
'dir_op': True, 'use_src_name': True}
file_gen = FileGenerator(self.service, self.endpoint, '',
page_size=1).call(input_s3_file)
limited_file_gen = itertools.islice(file_gen, 1)
result_list = list(limited_file_gen)
file_stat = FileStat(src=self.file2,
dest='another_directory' + os.sep + 'text2.txt',
compare_key='another_directory/text2.txt',
size=21,
last_update=result_list[0].last_update,
src_type='s3',
dest_type='local', operation_name='')
# Ensure only one item is returned from ``ListObjects``
self.assertEqual(len(result_list), 1)
compare_files(self, result_list[0], file_stat)


if __name__ == "__main__":
unittest.main()
17 changes: 10 additions & 7 deletions tests/integration/customizations/s3/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,13 +463,14 @@ def test_download_non_existent_key(self):

class TestSync(BaseS3CLICommand):
def test_sync_with_plus_chars_paginate(self):
# 1. Create > 1000 files with '+' in the filename.
# 2. Sync up to s3.
# 3. Sync up to s3
# This test ensures pagination tokens are url decoded.
# 1. Create > 2 files with '+' in the filename.
# 2. Sync up to s3 while the page size is 2.
# 3. Sync up to s3 while the page size is 2.
# 4. Verify nothing was synced up down from s3 in step 3.
bucket_name = self.create_bucket()
filenames = []
for i in range(2000):
for i in range(4):
# Create a file with a space char and a '+' char in the filename.
# We're interested in testing the filename comparisons, not the
# mtime comparisons so we're setting the mtime to some time
Expand All @@ -480,10 +481,12 @@ def test_sync_with_plus_chars_paginate(self):
self.files.create_file('foo +%06d' % i,
contents='',
mtime=mtime))
p = aws('s3 sync %s s3://%s/' % (self.files.rootdir, bucket_name))
p = aws('s3 sync %s s3://%s/ --page-size 2' %
(self.files.rootdir, bucket_name))
self.assert_no_errors(p)
time.sleep(5)
p2 = aws('s3 sync %s s3://%s/' % (self.files.rootdir, bucket_name))
time.sleep(1)
p2 = aws('s3 sync %s s3://%s/ --page-size 2'
% (self.files.rootdir, bucket_name))
self.assertNotIn('upload:', p2.stdout)
self.assertEqual('', p2.stdout)

Expand Down
33 changes: 33 additions & 0 deletions tests/unit/customizations/s3/test_ls_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,36 @@ def test_errors_out_with_extra_arguments(self):
stderr = self.run_cmd('s3 ls --extra-argument-foo', expected_rc=255)[1]
self.assertIn('Unknown options', stderr)
self.assertIn('--extra-argument-foo', stderr)

def test_operations_use_page_size(self):
time_utc = "2014-01-09T20:45:49.000Z"
self.parsed_responses = [{"CommonPrefixes": [], "Contents": [
{"Key": "foo/bar.txt", "Size": 100,
"LastModified": time_utc}]}]
stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --page-size 8', expected_rc=0)
call_args = self.operations_called[0][1]
# We should not be calling the args with any delimiter because we
# want a recursive listing.
self.assertEqual(call_args['prefix'], '')
self.assertEqual(call_args['bucket'], 'bucket')
# The page size gets translated to ``MaxKeys`` in the s3 model
self.assertEqual(call_args['MaxKeys'], 8)

def test_operations_use_page_size_recursive(self):
time_utc = "2014-01-09T20:45:49.000Z"
self.parsed_responses = [{"CommonPrefixes": [], "Contents": [
{"Key": "foo/bar.txt", "Size": 100,
"LastModified": time_utc}]}]
stdout, _, _ = self.run_cmd('s3 ls s3://bucket/ --page-size 8 --recursive', expected_rc=0)
call_args = self.operations_called[0][1]
# We should not be calling the args with any delimiter because we
# want a recursive listing.
self.assertEqual(call_args['prefix'], '')
self.assertEqual(call_args['bucket'], 'bucket')
# The page size gets translated to ``MaxKeys`` in the s3 model
self.assertEqual(call_args['MaxKeys'], 8)
self.assertNotIn('delimiter', call_args)


if __name__ == "__main__":
unittest.main()
30 changes: 18 additions & 12 deletions tests/unit/customizations/s3/test_subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ def setUp(self):
def test_ls_command_for_bucket(self):
ls_command = ListCommand(self.session)
parsed_args = FakeArgs(paths='s3://mybucket/', dir_op=False)
ls_command._run_main(parsed_args, mock.Mock())
parsed_globals = mock.Mock()
parsed_globals.page_size = '5'
ls_command._run_main(parsed_args, parsed_globals)
call = self.session.get_service.return_value.get_operation\
.return_value.call
paginate = self.session.get_service.return_value.get_operation\
Expand All @@ -69,7 +71,8 @@ def test_ls_command_for_bucket(self):
'ListObjects')
self.assertEqual(
paginate.call_args[1], {'bucket': u'mybucket',
'delimiter': '/', 'prefix': u''})
'delimiter': '/', 'prefix': u'',
'page_size': u'5'})

def test_ls_command_with_no_args(self):
ls_command = ListCommand(self.session)
Expand Down Expand Up @@ -194,7 +197,7 @@ def test_run_cp_put(self):
'src': local_file, 'dest': s3_file, 'filters': filters,
'paths_type': 'locals3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'cp', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -210,7 +213,7 @@ def test_error_on_same_line_as_status(self):
'src': local_file, 'dest': s3_file, 'filters': filters,
'paths_type': 'locals3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'cp', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -233,7 +236,7 @@ def test_run_cp_get(self):
'src': s3_file, 'dest': local_file, 'filters': filters,
'paths_type': 's3local', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'cp', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -250,7 +253,7 @@ def test_run_cp_copy(self):
'src': s3_file, 'dest': s3_file, 'filters': filters,
'paths_type': 's3s3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'cp', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -267,7 +270,7 @@ def test_run_mv(self):
'src': s3_file, 'dest': s3_file, 'filters': filters,
'paths_type': 's3s3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'mv', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -284,7 +287,7 @@ def test_run_remove(self):
'src': s3_file, 'dest': s3_file, 'filters': filters,
'paths_type': 's3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'rm', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -305,7 +308,7 @@ def test_run_sync(self):
'src': local_dir, 'dest': s3_prefix, 'filters': filters,
'paths_type': 'locals3', 'region': 'us-east-1',
'endpoint_url': None, 'verify_ssl': None,
'follow_symlinks': True}
'follow_symlinks': True, 'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'sync', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -320,7 +323,8 @@ def test_run_mb(self):
params = {'dir_op': True, 'dryrun': True, 'quiet': False,
'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3',
'region': 'us-east-1', 'endpoint_url': None,
'verify_ssl': None, 'follow_symlinks': True}
'verify_ssl': None, 'follow_symlinks': True,
'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'mb', params)
cmd_arc.create_instructions()
cmd_arc.run()
Expand All @@ -335,7 +339,8 @@ def test_run_rb(self):
params = {'dir_op': True, 'dryrun': True, 'quiet': False,
'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3',
'region': 'us-east-1', 'endpoint_url': None,
'verify_ssl': None, 'follow_symlinks': True}
'verify_ssl': None, 'follow_symlinks': True,
'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'rb', params)
cmd_arc.create_instructions()
rc = cmd_arc.run()
Expand All @@ -351,7 +356,8 @@ def test_run_rb_nonzero_rc(self):
params = {'dir_op': True, 'dryrun': False, 'quiet': False,
'src': s3_prefix, 'dest': s3_prefix, 'paths_type': 's3',
'region': 'us-east-1', 'endpoint_url': None,
'verify_ssl': None, 'follow_symlinks': True}
'verify_ssl': None, 'follow_symlinks': True,
'page_size': None}
cmd_arc = CommandArchitecture(self.session, 'rb', params)
cmd_arc.create_instructions()
rc = cmd_arc.run()
Expand Down
6 changes: 6 additions & 0 deletions tests/unit/ec2/test_describe_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ def test_multiple_filters_alternate(self):
}
self.assert_params_for_cmd(cmdlist, result)

def test_page_size(self):
args = ' --page-size 10'
cmdline = self.prefix + args
result = {'MaxResults': '10'}
self.assert_params_for_cmd(cmdline, result)


if __name__ == "__main__":
unittest.main()
10 changes: 10 additions & 0 deletions tests/unit/s3/test_list_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ def test_max_items(self):
'headers': {},}
self.assert_params_for_cmd(cmdline, result, ignore_params=['payload'])

def test_page_size(self):
cmdline = self.prefix
cmdline += ' --bucket mybucket'
# The max-items is a customization and therefore won't
# show up in the result params.
cmdline += ' --page-size 100'
result = {'uri_params': {'Bucket': 'mybucket', 'MaxKeys': 100},
'headers': {},}
self.assert_params_for_cmd(cmdline, result, ignore_params=['payload'])

def test_starting_token(self):
# We don't need to test this in depth because botocore
# tests this. We just want to make sure this is hooked up
Expand Down
Loading

0 comments on commit 6bf5851

Please sign in to comment.