From adf5054fcf6aa256b7ff77b0378f66a73a7f949f Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 2 May 2017 16:13:47 -0700 Subject: [PATCH] Factoring out some Blob helpers. (#3357) This is prep work for swapping out the upload implementation to use `google-resumable-media`. --- storage/google/cloud/storage/blob.py | 124 +++++++++++++++++++-------- storage/tests/unit/test_blob.py | 70 +++++++++++++++ 2 files changed, 159 insertions(+), 35 deletions(-) diff --git a/storage/google/cloud/storage/blob.py b/storage/google/cloud/storage/blob.py index b8590164bba4..3691b0c8ba19 100644 --- a/storage/google/cloud/storage/blob.py +++ b/storage/google/cloud/storage/blob.py @@ -27,7 +27,6 @@ import time import httplib2 -import six from six.moves.urllib.parse import quote import google.auth.transport.requests @@ -50,8 +49,10 @@ _API_ACCESS_ENDPOINT = 'https://storage.googleapis.com' +_DEFAULT_CONTENT_TYPE = u'application/octet-stream' _DOWNLOAD_URL_TEMPLATE = ( u'https://www.googleapis.com/download/storage/v1{path}?alt=media') +_CONTENT_TYPE = 'contentType' class Blob(_PropertyMixin): @@ -192,7 +193,7 @@ def public_url(self): :returns: The public URL for this blob. """ return '{storage_base_url}/{bucket_name}/{quoted_name}'.format( - storage_base_url='https://storage.googleapis.com', + storage_base_url=_API_ACCESS_ENDPOINT, bucket_name=self.bucket.name, quoted_name=_quote(self.name)) @@ -269,7 +270,7 @@ def generate_signed_url(self, expiration, method='GET', if credentials is None: client = self._require_client(client) - credentials = client._base_connection.credentials + credentials = client._credentials return generate_signed_url( credentials, resource=resource, @@ -324,6 +325,23 @@ def delete(self, client=None): """ return self.bucket.delete_blob(self.name, client=client) + def _make_transport(self, client): + """Make an authenticated transport with a client's credentials. + + :type client: :class:`~google.cloud.storage.client.Client` + :param client: (Optional) The client to use. If not passed, falls back + to the ``client`` stored on the blob's bucket. + :rtype transport: + :class:`~google.auth.transport.requests.AuthorizedSession` + :returns: The transport (with credentials) that will + make authenticated requests. + """ + client = self._require_client(client) + # Create a ``requests`` transport with the client's credentials. + transport = google.auth.transport.requests.AuthorizedSession( + client._credentials) + return transport + def _get_download_url(self): """Get the download URL for the current blob. @@ -403,14 +421,9 @@ def download_to_file(self, file_obj, client=None): :raises: :class:`google.cloud.exceptions.NotFound` """ - client = self._require_client(client) - # Get the download URL. download_url = self._get_download_url() - # Get any extra headers for the request. headers = _get_encryption_headers(self._encryption_key) - # Create a ``requests`` transport with the client's credentials. - transport = google.auth.transport.requests.AuthorizedSession( - client._credentials) + transport = self._make_transport(client) try: self._do_download(transport, file_obj, download_url, headers) @@ -457,6 +470,36 @@ def download_as_string(self, client=None): self.download_to_file(string_buffer, client=client) return string_buffer.getvalue() + def _get_content_type(self, content_type, filename=None): + """Determine the content type from the current object. + + The return value will be determined in order of precedence: + + - The value passed in to this method (if not :data:`None`) + - The value stored on the current blob + - The default value ('application/octet-stream') + + :type content_type: str + :param content_type: (Optional) type of content. + + :type filename: str + :param filename: (Optional) The name of the file where the content + is stored. + + :rtype: str + :returns: Type of content gathered from the object. + """ + if content_type is None: + content_type = self.content_type + + if content_type is None and filename is not None: + content_type, _ = mimetypes.guess_type(filename) + + if content_type is None: + content_type = _DEFAULT_CONTENT_TYPE + + return content_type + def _create_upload( self, client, file_obj=None, size=None, content_type=None, chunk_size=None, strategy=None, extra_headers=None): @@ -509,8 +552,7 @@ def _create_upload( # API_BASE_URL and build_api_url). connection = client._base_connection - content_type = (content_type or self._properties.get('contentType') or - 'application/octet-stream') + content_type = self._get_content_type(content_type) headers = { 'Accept': 'application/json', @@ -575,10 +617,12 @@ def upload_from_file(self, file_obj, rewind=False, size=None, content_type=None, num_retries=6, client=None): """Upload the contents of this blob from a file-like object. - The content type of the upload will either be - - The value passed in to the function (if any) + The content type of the upload will be determined in order + of precedence: + + - The value passed in to this method (if not :data:`None`) - The value stored on the current blob - - The default value of 'application/octet-stream' + - The default value ('application/octet-stream') .. note:: The effect of uploading to an existing blob depends on the @@ -640,10 +684,7 @@ def upload_from_file(self, file_obj, rewind=False, size=None, # API_BASE_URL and build_api_url). connection = client._base_connection - # Rewind the file if desired. - if rewind: - file_obj.seek(0, os.SEEK_SET) - + _maybe_rewind(file_obj, rewind=rewind) # Get the basic stats about the file. total_bytes = size if total_bytes is None: @@ -679,18 +720,19 @@ def upload_from_file(self, file_obj, rewind=False, size=None, self._check_response_error(request, http_response) response_content = http_response.content - if not isinstance(response_content, - six.string_types): # pragma: NO COVER Python3 - response_content = response_content.decode('utf-8') + response_content = _bytes_to_unicode(response_content) self._set_properties(json.loads(response_content)) def upload_from_filename(self, filename, content_type=None, client=None): """Upload this blob's contents from the content of a named file. - The content type of the upload will either be - - The value passed in to the function (if any) + The content type of the upload will be determined in order + of precedence: + + - The value passed in to this method (if not :data:`None`) - The value stored on the current blob - - The value given by mimetypes.guess_type + - The value given by ``mimetypes.guess_type`` + - The default value ('application/octet-stream') .. note:: The effect of uploading to an existing blob depends on the @@ -714,9 +756,7 @@ def upload_from_filename(self, filename, content_type=None, client=None): :param client: Optional. The client to use. If not passed, falls back to the ``client`` stored on the blob's bucket. """ - content_type = content_type or self._properties.get('contentType') - if content_type is None: - content_type, _ = mimetypes.guess_type(filename) + content_type = self._get_content_type(content_type, filename=filename) with open(filename, 'rb') as file_obj: self.upload_from_file( @@ -749,8 +789,7 @@ def upload_from_string(self, data, content_type='text/plain', client=None): :param client: Optional. The client to use. If not passed, falls back to the ``client`` stored on the blob's bucket. """ - if isinstance(data, six.text_type): - data = data.encode('utf-8') + data = _to_bytes(data, encoding='utf-8') string_buffer = BytesIO() string_buffer.write(data) self.upload_from_file( @@ -777,10 +816,12 @@ def create_resumable_upload_session( .. _documentation on signed URLs: https://cloud.google.com/storage\ /docs/access-control/signed-urls#signing-resumable - The content type of the upload will either be - - The value passed in to the function (if any) + The content type of the upload will be determined in order + of precedence: + + - The value passed in to this method (if not :data:`None`) - The value stored on the current blob - - The default value of 'application/octet-stream' + - The default value ('application/octet-stream') .. note:: The effect of uploading to an existing blob depends on the @@ -1080,7 +1121,7 @@ def update_storage_class(self, new_class, client=None): :rtype: str or ``NoneType`` """ - content_type = _scalar_property('contentType') + content_type = _scalar_property(_CONTENT_TYPE) """HTTP 'Content-Type' header for this object. See: https://tools.ietf.org/html/rfc2616#section-14.17 and @@ -1353,8 +1394,8 @@ def _get_encryption_headers(key, source=False): key = _to_bytes(key) key_hash = hashlib.sha256(key).digest() - key_hash = base64.b64encode(key_hash).rstrip() - key = base64.b64encode(key).rstrip() + key_hash = base64.b64encode(key_hash) + key = base64.b64encode(key) if source: prefix = 'X-Goog-Copy-Source-Encryption-' @@ -1384,3 +1425,16 @@ def _quote(value): """ value = _to_bytes(value, encoding='utf-8') return quote(value, safe='') + + +def _maybe_rewind(stream, rewind=False): + """Rewind the stream if desired. + + :type stream: IO[Bytes] + :param stream: A bytes IO object open for reading. + + :type rewind: bool + :param rewind: Indicates if we should seek to the beginning of the stream. + """ + if rewind: + stream.seek(0, os.SEEK_SET) diff --git a/storage/tests/unit/test_blob.py b/storage/tests/unit/test_blob.py index cade3a458160..f1d761d79b12 100644 --- a/storage/tests/unit/test_blob.py +++ b/storage/tests/unit/test_blob.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import unittest import mock @@ -325,6 +326,15 @@ def test_delete(self): self.assertFalse(blob.exists()) self.assertEqual(bucket._deleted, [(BLOB_NAME, None)]) + @mock.patch('google.auth.transport.requests.AuthorizedSession') + def test__make_transport(self, fake_session_factory): + client = mock.Mock(spec=[u'_credentials']) + blob = self._make_one(u'blob-name', bucket=None) + transport = blob._make_transport(client) + + self.assertIs(transport, fake_session_factory.return_value) + fake_session_factory.assert_called_once_with(client._credentials) + def test__get_download_url_with_media_link(self): blob_name = 'something.txt' bucket = mock.Mock(spec=[]) @@ -674,6 +684,32 @@ def test_download_as_string(self, fake_session_factory): self._check_session_mocks(client, fake_session_factory, media_link) + def test__get_content_type_explicit(self): + blob = self._make_one(u'blob-name', bucket=None) + + content_type = u'text/plain' + return_value = blob._get_content_type(content_type) + self.assertEqual(return_value, content_type) + + def test__get_content_type_from_blob(self): + blob = self._make_one(u'blob-name', bucket=None) + blob.content_type = u'video/mp4' + + return_value = blob._get_content_type(None) + self.assertEqual(return_value, blob.content_type) + + def test__get_content_type_from_filename(self): + blob = self._make_one(u'blob-name', bucket=None) + + return_value = blob._get_content_type(None, filename='archive.tar') + self.assertEqual(return_value, 'application/x-tar') + + def test__get_content_type_default(self): + blob = self._make_one(u'blob-name', bucket=None) + + return_value = blob._get_content_type(None) + self.assertEqual(return_value, u'application/octet-stream') + def test_upload_from_file_size_failure(self): BLOB_NAME = 'blob-name' connection = _Connection() @@ -2270,6 +2306,36 @@ def test_bad_type(self): self._call_fut(None) +class Test__maybe_rewind(unittest.TestCase): + + @staticmethod + def _call_fut(*args, **kwargs): + from google.cloud.storage.blob import _maybe_rewind + + return _maybe_rewind(*args, **kwargs) + + def test_default(self): + stream = mock.Mock(spec=[u'seek']) + ret_val = self._call_fut(stream) + self.assertIsNone(ret_val) + + stream.seek.assert_not_called() + + def test_do_not_rewind(self): + stream = mock.Mock(spec=[u'seek']) + ret_val = self._call_fut(stream, rewind=False) + self.assertIsNone(ret_val) + + stream.seek.assert_not_called() + + def test_do_rewind(self): + stream = mock.Mock(spec=[u'seek']) + ret_val = self._call_fut(stream, rewind=True) + self.assertIsNone(ret_val) + + stream.seek.assert_called_once_with(0, os.SEEK_SET) + + class _Responder(object): def __init__(self, *responses): @@ -2363,6 +2429,10 @@ def __init__(self, connection): def _connection(self): return self._base_connection + @property + def _credentials(self): + return self._base_connection.credentials + class _Stream(object): _closed = False