Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Combining get_all_blobs and Bucket.iterator. #802

Merged
merged 1 commit into from
Apr 8, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/_components/storage-getting-started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,9 @@ bucket object::
>>> bucket = storage.get_bucket('my-bucket', connection=connection)

If you want to get all the blobs in the bucket, you can use
:func:`get_all_blobs <gcloud.storage.bucket.Bucket.get_all_blobs>`::
:func:`list_blobs <gcloud.storage.bucket.Bucket.list_blobs>`::

>>> blobs = bucket.get_all_blobs()
>>> blobs = bucket.list_blobs()

However, if you're looking to iterate through the blobs, you can use the
bucket itself as an iterator::
Expand Down
2 changes: 1 addition & 1 deletion docs/_components/storage-quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ you can create buckets and blobs::
>>> blob = blob.upload_from_string('this is test content!')
>>> print blob.download_as_string()
'this is test content!'
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-new-bucket, my-test-file.txt>]
>>> blob.delete()
>>> bucket.delete()
Expand Down
84 changes: 48 additions & 36 deletions gcloud/storage/bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
False

If you want to get all the blobs in the bucket, you can use
:func:`get_all_blobs <gcloud.storage.bucket.Bucket.get_all_blobs>`::
:func:`list_blobs <gcloud.storage.bucket.Bucket.list_blobs>`::

>>> blobs = bucket.get_all_blobs()
>>> blobs = bucket.list_blobs()

You can also use the bucket as an iterator::

Expand Down Expand Up @@ -104,7 +104,7 @@ def __repr__(self):
return '<Bucket: %s>' % self.name

def __iter__(self):
return iter(self._iterator_class(bucket=self))
return iter(self.list_blobs())

def __contains__(self, blob_name):
blob = Blob(blob_name, bucket=self)
Expand Down Expand Up @@ -223,56 +223,68 @@ def get_blob(self, blob_name):
except NotFound:
return None

def get_all_blobs(self):
"""List all the blobs in this bucket.

This will **not** retrieve all the data for all the blobs, it
will only retrieve the blob paths.

This is equivalent to::

blobs = [blob for blob in bucket]
def list_blobs(self, max_results=None, page_token=None, prefix=None,
delimiter=None, versions=None,
projection='noAcl', fields=None):
"""Return an iterator used to find blobs in the bucket.

:rtype: list of :class:`gcloud.storage.blob.Blob`
:returns: A list of all the Blob objects in this bucket.
"""
return list(self)
:type max_results: integer or ``NoneType``
:param max_results: maximum number of blobs to return.

def iterator(self, prefix=None, delimiter=None, max_results=None,
versions=None):
"""Return an iterator used to find blobs in the bucket.
:type page_token: string
:param page_token: opaque marker for the next "page" of blobs. If not
passed, will return the first page of blobs.

:type prefix: string or None
:type prefix: string or ``NoneType``
:param prefix: optional prefix used to filter blobs.

:type delimiter: string or None
:type delimiter: string or ``NoneType``
:param delimiter: optional delimter, used with ``prefix`` to
emulate hierarchy.

:type max_results: integer or None
:param max_results: maximum number of blobs to return.

:type versions: boolean or None
:type versions: boolean or ``NoneType``
:param versions: whether object versions should be returned as
separate blobs.

:rtype: :class:`_BlobIterator`
:type projection: string or ``NoneType``
:param projection: If used, must be 'full' or 'noAcl'. Defaults to
'noAcl'. Specifies the set of properties to return.

:type fields: string or ``NoneType``
:param fields: Selector specifying which fields to include in a
partial response. Must be a list of fields. For example
to get a partial response with just the next page token
and the language of each blob returned:
'items/contentLanguage,nextPageToken'

:rtype: :class:`_BlobIterator`.
:returns: An iterator of blobs.
"""
extra_params = {}

if max_results is not None:
extra_params['maxResults'] = max_results

if prefix is not None:
extra_params['prefix'] = prefix

if delimiter is not None:
extra_params['delimiter'] = delimiter

if max_results is not None:
extra_params['maxResults'] = max_results

if versions is not None:
extra_params['versions'] = versions

return self._iterator_class(self, extra_params=extra_params)
extra_params['projection'] = projection

if fields is not None:
extra_params['fields'] = fields

result = self._iterator_class(self, extra_params=extra_params)
# Page token must be handled specially since the base `Iterator`
# class has it as a reserved property.
if page_token is not None:
result.next_page_token = page_token
return result

def delete(self, force=False):
"""Delete this bucket.
Expand All @@ -297,7 +309,7 @@ def delete(self, force=False):
contains more than 256 objects / blobs.
"""
if force:
blobs = list(self.iterator(
blobs = list(self.list_blobs(
max_results=self._MAX_OBJECTS_FOR_BUCKET_DELETE + 1))
if len(blobs) > self._MAX_OBJECTS_FOR_BUCKET_DELETE:
message = (
Expand Down Expand Up @@ -325,7 +337,7 @@ def delete_blob(self, blob_name):
>>> from gcloud import storage
>>> connection = storage.get_connection()
>>> bucket = storage.get_bucket('my-bucket', connection=connection)
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-bucket, my-file.txt>]
>>> bucket.delete_blob('my-file.txt')
>>> try:
Expand Down Expand Up @@ -408,7 +420,7 @@ def upload_file(self, filename, blob_name=None):
>>> connection = storage.get_connection()
>>> bucket = storage.get_bucket('my-bucket', connection=connection)
>>> bucket.upload_file('~/my-file.txt', 'remote-text-file.txt')
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-bucket, remote-text-file.txt>]

If you don't provide a blob name, we will try to upload the file
Expand All @@ -418,7 +430,7 @@ def upload_file(self, filename, blob_name=None):
>>> connection = storage.get_connection()
>>> bucket = storage.get_bucket('my-bucket', connection=connection)
>>> bucket.upload_file('~/my-file.txt')
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-bucket, my-file.txt>]

:type filename: string
Expand Down Expand Up @@ -450,7 +462,7 @@ def upload_file_object(self, file_obj, blob_name=None):
>>> connection = storage.get_connection()
>>> bucket = storage.get_bucket('my-bucket', connection=connection)
>>> bucket.upload_file(open('~/my-file.txt'), 'remote-text-file.txt')
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-bucket, remote-text-file.txt>]

If you don't provide a blob name, we will try to upload the file
Expand All @@ -460,7 +472,7 @@ def upload_file_object(self, file_obj, blob_name=None):
>>> connection = storage.get_connection()
>>> bucket = storage.get_bucket('my-bucket', connection=connection)
>>> bucket.upload_file(open('~/my-file.txt'))
>>> print bucket.get_all_blobs()
>>> print bucket.list_blobs()
[<Blob: my-bucket, my-file.txt>]

:type file_obj: file
Expand Down
68 changes: 28 additions & 40 deletions gcloud/storage/test_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test___iter___empty(self):
kw, = connection._requested
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o' % NAME)
self.assertEqual(kw['query_params'], {})
self.assertEqual(kw['query_params'], {'projection': 'noAcl'})

def test___iter___non_empty(self):
NAME = 'name'
Expand All @@ -115,7 +115,7 @@ def test___iter___non_empty(self):
kw, = connection._requested
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o' % NAME)
self.assertEqual(kw['query_params'], {})
self.assertEqual(kw['query_params'], {'projection': 'noAcl'})

def test___contains___miss(self):
NAME = 'name'
Expand Down Expand Up @@ -269,58 +269,46 @@ def test_get_blob_hit(self):
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o/%s' % (NAME, BLOB_NAME))

def test_get_all_blobs_empty(self):
def test_list_blobs_defaults(self):
NAME = 'name'
connection = _Connection({'items': []})
bucket = self._makeOne(NAME, connection)
blobs = bucket.get_all_blobs()
self.assertEqual(blobs, [])
kw, = connection._requested
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o' % NAME)
self.assertEqual(kw['query_params'], {})

def test_get_all_blobs_non_empty(self):
NAME = 'name'
BLOB_NAME = 'blob-name'
connection = _Connection({'items': [{'name': BLOB_NAME}]})
bucket = self._makeOne(NAME, connection)
blobs = bucket.get_all_blobs()
blob, = blobs
self.assertTrue(blob.bucket is bucket)
self.assertEqual(blob.name, BLOB_NAME)
kw, = connection._requested
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o' % NAME)
self.assertEqual(kw['query_params'], {})

def test_iterator_defaults(self):
NAME = 'name'
connection = _Connection({'items': []})
bucket = self._makeOne(NAME, connection)
iterator = bucket.iterator()
iterator = bucket.list_blobs()
blobs = list(iterator)
self.assertEqual(blobs, [])
kw, = connection._requested
self.assertEqual(kw['method'], 'GET')
self.assertEqual(kw['path'], '/b/%s/o' % NAME)
self.assertEqual(kw['query_params'], {})
self.assertEqual(kw['query_params'], {'projection': 'noAcl'})

def test_iterator_explicit(self):
def test_list_blobs_explicit(self):
NAME = 'name'
MAX_RESULTS = 10
PAGE_TOKEN = 'ABCD'
PREFIX = 'subfolder'
DELIMITER = '/'
VERSIONS = True
PROJECTION = 'full'
FIELDS = 'items/contentLanguage,nextPageToken'
EXPECTED = {
'prefix': 'subfolder',
'delimiter': '/',
'maxResults': 10,
'versions': True,
'pageToken': PAGE_TOKEN,
'prefix': PREFIX,
'delimiter': DELIMITER,
'versions': VERSIONS,
'projection': PROJECTION,
'fields': FIELDS,
}
connection = _Connection({'items': []})
bucket = self._makeOne(NAME, connection)
iterator = bucket.iterator(
prefix='subfolder',
delimiter='/',
max_results=10,
versions=True,
iterator = bucket.list_blobs(
max_results=MAX_RESULTS,
page_token=PAGE_TOKEN,
prefix=PREFIX,
delimiter=DELIMITER,
versions=VERSIONS,
projection=PROJECTION,
fields=FIELDS,
)
blobs = list(iterator)
self.assertEqual(blobs, [])
Expand Down Expand Up @@ -1069,7 +1057,7 @@ def get_items_from_response(self, response):
self.assertEqual(kw[0]['query_params'], {'projection': 'full'})
self.assertEqual(kw[1]['method'], 'GET')
self.assertEqual(kw[1]['path'], '/b/%s/o' % NAME)
self.assertEqual(kw[1]['query_params'], {})
self.assertEqual(kw[1]['query_params'], {'projection': 'noAcl'})


class _Connection(object):
Expand Down
15 changes: 8 additions & 7 deletions regression/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,13 @@ def tearDownClass(cls):
blob.delete()

def test_list_files(self):
all_blobs = self.bucket.get_all_blobs()
all_blobs = list(self.bucket.list_blobs())
self.assertEqual(len(all_blobs), len(self.FILENAMES))

def test_paginate_files(self):
truncation_size = 1
count = len(self.FILENAMES) - truncation_size
iterator = self.bucket.iterator(max_results=count)
iterator = self.bucket.list_blobs(max_results=count)
response = iterator.get_next_page_response()
blobs = list(iterator.get_items_from_response(response))
self.assertEqual(len(blobs), count)
Expand Down Expand Up @@ -254,7 +254,7 @@ def tearDownClass(cls):
blob.delete()

def test_root_level_w_delimiter(self):
iterator = self.bucket.iterator(delimiter='/')
iterator = self.bucket.list_blobs(delimiter='/')
response = iterator.get_next_page_response()
blobs = list(iterator.get_items_from_response(response))
self.assertEqual([blob.name for blob in blobs], ['file01.txt'])
Expand All @@ -263,7 +263,7 @@ def test_root_level_w_delimiter(self):
self.assertEqual(iterator.prefixes, ('parent/',))

def test_first_level(self):
iterator = self.bucket.iterator(delimiter='/', prefix='parent/')
iterator = self.bucket.list_blobs(delimiter='/', prefix='parent/')
response = iterator.get_next_page_response()
blobs = list(iterator.get_items_from_response(response))
self.assertEqual([blob.name for blob in blobs], ['parent/file11.txt'])
Expand All @@ -272,7 +272,8 @@ def test_first_level(self):
self.assertEqual(iterator.prefixes, ('parent/child/',))

def test_second_level(self):
iterator = self.bucket.iterator(delimiter='/', prefix='parent/child/')
iterator = self.bucket.list_blobs(delimiter='/',
prefix='parent/child/')
response = iterator.get_next_page_response()
blobs = list(iterator.get_items_from_response(response))
self.assertEqual([blob.name for blob in blobs],
Expand All @@ -288,8 +289,8 @@ def test_third_level(self):
# of 1024 characters in the UTF-8 encoded name:
# https://cloud.google.com/storage/docs/bucketnaming#objectnames
# Exercise a layer deeper to illustrate this.
iterator = self.bucket.iterator(delimiter='/',
prefix='parent/child/grand/')
iterator = self.bucket.list_blobs(delimiter='/',
prefix='parent/child/grand/')
response = iterator.get_next_page_response()
blobs = list(iterator.get_items_from_response(response))
self.assertEqual([blob.name for blob in blobs],
Expand Down