Skip to content

Commit

Permalink
Make storage upload/download have no chunk size by default.
Browse files Browse the repository at this point in the history
  • Loading branch information
dhermes committed Apr 9, 2015
1 parent f08e71b commit 31dc5ae
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 19 deletions.
48 changes: 39 additions & 9 deletions gcloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,19 @@ class Blob(_PropertyMixin):
:param bucket: The bucket to which this blob belongs. Required, unless the
implicit default bucket has been set.
:type chunk_size: integer
:param chunk_size: The size of a chunk of data whenever iterating (1 MB).
This must be a multiple of 256 KB per the API
specification.
:type properties: dict
:param properties: All the other data provided by Cloud Storage.
"""

CHUNK_SIZE = 1024 * 1024 # 1 MB.
"""The size of a chunk of data whenever iterating (1 MB).
This must be a multiple of 256 KB per the API specification.
"""
_CHUNK_SIZE_MULTIPLE = 256 * 1024
"""Number (256 KB, in bytes) that must divide the chunk size."""

def __init__(self, name, bucket=None):
def __init__(self, name, bucket=None, chunk_size=None):
if bucket is None:
bucket = _implicit_environ.get_default_bucket()

Expand All @@ -70,9 +72,35 @@ def __init__(self, name, bucket=None):

super(Blob, self).__init__(name=name)

self._chunk_size = None # Needs to be defined in __init__.
self.chunk_size = chunk_size # Check that setter accepts value.
self.bucket = bucket
self._acl = ObjectACL(self)

@property
def chunk_size(self):
"""Get the blob's default chunk size.
:rtype: integer or ``NoneType``
:returns: The current blob's chunk size, if it is set.
"""
return self._chunk_size

@chunk_size.setter
def chunk_size(self, value):
"""Set the blob's default chunk size.
:type value: integer or ``NoneType``
:param value: The current blob's chunk size, if it is set.
:raises: :class:`ValueError` if ``value`` is not ``None`` and is not a
multiple of 256 KB.
"""
if value is not None and value % self._CHUNK_SIZE_MULTIPLE != 0:
raise ValueError('Chunk size must be a multiple of %d.' % (
self._CHUNK_SIZE_MULTIPLE,))
self._chunk_size = value

@staticmethod
def path_helper(bucket_path, blob_name):
"""Relative URL path for a blob.
Expand Down Expand Up @@ -226,8 +254,10 @@ def download_to_file(self, file_obj):

# Use apitools 'Download' facility.
download = transfer.Download.FromStream(file_obj, auto_transfer=False)
download.chunksize = self.CHUNK_SIZE
headers = {'Range': 'bytes=0-%d' % (self.CHUNK_SIZE - 1)}
headers = {}
if self.chunk_size is not None:
download.chunksize = self.chunk_size
headers['Range'] = 'bytes=0-%d' % (self.chunk_size - 1,)
request = http_wrapper.Request(download_url, 'GET', headers)

download.InitializeDownload(request, self.connection.http)
Expand Down Expand Up @@ -319,7 +349,7 @@ def upload_from_file(self, file_obj, rewind=False, size=None,

upload = transfer.Upload(file_obj, content_type, total_bytes,
auto_transfer=False,
chunksize=self.CHUNK_SIZE)
chunksize=self.chunk_size)

url_builder = _UrlBuilder(bucket_name=self.bucket.name,
object_name=self.name)
Expand Down
71 changes: 61 additions & 10 deletions gcloud/storage/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,41 @@ def test_ctor_explicit(self):
self.assertFalse(blob._acl.loaded)
self.assertTrue(blob._acl.blob is blob)

def test_chunk_size_ctor(self):
from gcloud.storage.blob import Blob
BLOB_NAME = 'blob-name'
BUCKET = object()
chunk_size = 10 * Blob._CHUNK_SIZE_MULTIPLE
blob = self._makeOne(BLOB_NAME, bucket=BUCKET, chunk_size=chunk_size)
self.assertEqual(blob._chunk_size, chunk_size)

def test_chunk_size_getter(self):
BLOB_NAME = 'blob-name'
BUCKET = object()
blob = self._makeOne(BLOB_NAME, bucket=BUCKET)
self.assertEqual(blob.chunk_size, None)
VALUE = object()
blob._chunk_size = VALUE
self.assertTrue(blob.chunk_size is VALUE)

def test_chunk_size_setter(self):
BLOB_NAME = 'blob-name'
BUCKET = object()
blob = self._makeOne(BLOB_NAME, bucket=BUCKET)
self.assertEqual(blob._chunk_size, None)
blob._CHUNK_SIZE_MULTIPLE = 10
blob.chunk_size = 20
self.assertEqual(blob._chunk_size, 20)

def test_chunk_size_setter_bad_value(self):
BLOB_NAME = 'blob-name'
BUCKET = object()
blob = self._makeOne(BLOB_NAME, bucket=BUCKET)
self.assertEqual(blob._chunk_size, None)
blob._CHUNK_SIZE_MULTIPLE = 10
with self.assertRaises(ValueError):
blob.chunk_size = 11

def test_acl_property(self):
from gcloud.storage.acl import ObjectACL
FAKE_BUCKET = _Bucket(None)
Expand Down Expand Up @@ -242,7 +277,7 @@ def test_delete(self):
blob.delete()
self.assertFalse(blob.exists())

def test_download_to_file(self):
def _download_to_file_helper(self, chunk_size=None):
from six.moves.http_client import OK
from six.moves.http_client import PARTIAL_CONTENT
from io import BytesIO
Expand All @@ -259,11 +294,19 @@ def test_download_to_file(self):
MEDIA_LINK = 'http://example.com/media/'
properties = {'mediaLink': MEDIA_LINK}
blob = self._makeOne(BLOB_NAME, bucket=bucket, properties=properties)
blob.CHUNK_SIZE = 3
if chunk_size is not None:
blob._CHUNK_SIZE_MULTIPLE = 1
blob.chunk_size = chunk_size
fh = BytesIO()
blob.download_to_file(fh)
self.assertEqual(fh.getvalue(), b'abcdef')

def test_download_to_file_default(self):
self._download_to_file_helper()

def test_download_to_file_with_chunk_size(self):
self._download_to_file_helper(chunk_size=3)

def test_download_to_filename(self):
import os
import time
Expand All @@ -284,7 +327,8 @@ def test_download_to_filename(self):
properties = {'mediaLink': MEDIA_LINK,
'updated': '2014-12-06T13:13:50.690Z'}
blob = self._makeOne(BLOB_NAME, bucket=bucket, properties=properties)
blob.CHUNK_SIZE = 3
blob._CHUNK_SIZE_MULTIPLE = 1
blob.chunk_size = 3
with NamedTemporaryFile() as f:
blob.download_to_filename(f.name)
f.flush()
Expand All @@ -311,7 +355,8 @@ def test_download_as_string(self):
MEDIA_LINK = 'http://example.com/media/'
properties = {'mediaLink': MEDIA_LINK}
blob = self._makeOne(BLOB_NAME, bucket=bucket, properties=properties)
blob.CHUNK_SIZE = 3
blob._CHUNK_SIZE_MULTIPLE = 1
blob.chunk_size = 3
fetched = blob.download_as_string()
self.assertEqual(fetched, b'abcdef')

Expand All @@ -330,7 +375,8 @@ def _upload_from_file_simple_test_helper(self, properties=None,
)
bucket = _Bucket(connection)
blob = self._makeOne(BLOB_NAME, bucket=bucket, properties=properties)
blob.CHUNK_SIZE = 5
blob._CHUNK_SIZE_MULTIPLE = 1
blob.chunk_size = 5
with NamedTemporaryFile() as fh:
fh.write(DATA)
fh.flush()
Expand Down Expand Up @@ -398,7 +444,8 @@ def test_upload_from_file_resumable(self):
)
bucket = _Bucket(connection)
blob = self._makeOne(BLOB_NAME, bucket=bucket)
blob.CHUNK_SIZE = 5
blob._CHUNK_SIZE_MULTIPLE = 1
blob.chunk_size = 5
# Set the threshhold low enough that we force a resumable uploada.
with _Monkey(transfer, _RESUMABLE_UPLOAD_THRESHOLD=5):
with NamedTemporaryFile() as fh:
Expand Down Expand Up @@ -455,7 +502,8 @@ def test_upload_from_file_w_slash_in_name(self):
)
bucket = _Bucket(connection)
blob = self._makeOne(BLOB_NAME, bucket=bucket)
blob.CHUNK_SIZE = 5
blob._CHUNK_SIZE_MULTIPLE = 1
blob.chunk_size = 5
with NamedTemporaryFile() as fh:
fh.write(DATA)
fh.flush()
Expand Down Expand Up @@ -502,7 +550,8 @@ def _upload_from_filename_test_helper(self, properties=None,
bucket = _Bucket(connection)
blob = self._makeOne(BLOB_NAME, bucket=bucket,
properties=properties)
blob.CHUNK_SIZE = 5
blob._CHUNK_SIZE_MULTIPLE = 1
blob.chunk_size = 5
with NamedTemporaryFile(suffix='.jpeg') as fh:
fh.write(DATA)
fh.flush()
Expand Down Expand Up @@ -565,7 +614,8 @@ def test_upload_from_string_w_bytes(self):
)
bucket = _Bucket(connection)
blob = self._makeOne(BLOB_NAME, bucket=bucket)
blob.CHUNK_SIZE = 5
blob._CHUNK_SIZE_MULTIPLE = 1
blob.chunk_size = 5
blob.upload_from_string(DATA)
rq = connection.http._requested
self.assertEqual(len(rq), 1)
Expand Down Expand Up @@ -603,7 +653,8 @@ def test_upload_from_string_w_text(self):
)
bucket = _Bucket(connection)
blob = self._makeOne(BLOB_NAME, bucket=bucket)
blob.CHUNK_SIZE = 5
blob._CHUNK_SIZE_MULTIPLE = 1
blob.chunk_size = 5
blob.upload_from_string(DATA)
rq = connection.http._requested
self.assertEqual(len(rq), 1)
Expand Down

0 comments on commit 31dc5ae

Please sign in to comment.