Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(storage): add opt-in raw download support #9572

Merged
merged 11 commits into from
Nov 5, 2019
2 changes: 1 addition & 1 deletion bigquery/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
dependencies = [
'enum34; python_version < "3.4"',
"google-cloud-core >= 1.0.3, < 2.0dev",
"google-resumable-media >= 0.3.1, != 0.4.0, < 0.5.0dev",
"google-resumable-media >= 0.3.1, != 0.4.0, < 0.6.0dev",
"protobuf >= 3.6.0",
]
extras = {
Expand Down
73 changes: 64 additions & 9 deletions storage/google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
from google import resumable_media
from google.resumable_media.requests import ChunkedDownload
from google.resumable_media.requests import Download
from google.resumable_media.requests import RawDownload
from google.resumable_media.requests import RawChunkedDownload
from google.resumable_media.requests import MultipartUpload
from google.resumable_media.requests import ResumableUpload

Expand Down Expand Up @@ -591,7 +593,14 @@ def _get_download_url(self):
return _add_query_parameters(base_url, name_value_pairs)

def _do_download(
self, transport, file_obj, download_url, headers, start=None, end=None
self,
transport,
file_obj,
download_url,
headers,
start=None,
end=None,
raw_download=False,
):
"""Perform a download without any error handling.

Expand All @@ -617,14 +626,30 @@ def _do_download(

:type end: int
:param end: Optional, The last byte in a range to be downloaded.

:type raw_download: bool
:param raw_download:
Optional, If true, download the object without any expansion.
"""
if self.chunk_size is None:
download = Download(
if raw_download:
klass = RawDownload
else:
klass = Download

download = klass(
download_url, stream=file_obj, headers=headers, start=start, end=end
)
download.consume(transport)

else:
download = ChunkedDownload(

if raw_download:
klass = RawChunkedDownload
else:
klass = ChunkedDownload

download = klass(
download_url,
self.chunk_size,
file_obj,
Expand All @@ -636,7 +661,9 @@ def _do_download(
while not download.finished:
download.consume_next_chunk(transport)

def download_to_file(self, file_obj, client=None, start=None, end=None):
def download_to_file(
self, file_obj, client=None, start=None, end=None, raw_download=False
):
"""Download the contents of this blob into a file-like object.

.. note::
Expand Down Expand Up @@ -676,6 +703,10 @@ def download_to_file(self, file_obj, client=None, start=None, end=None):
:type end: int
:param end: Optional, The last byte in a range to be downloaded.

:type raw_download: bool
:param raw_download:
Optional, If true, download the object without any expansion.

:raises: :class:`google.cloud.exceptions.NotFound`
"""
download_url = self._get_download_url()
Expand All @@ -684,11 +715,15 @@ def download_to_file(self, file_obj, client=None, start=None, end=None):

transport = self._get_transport(client)
try:
self._do_download(transport, file_obj, download_url, headers, start, end)
self._do_download(
transport, file_obj, download_url, headers, start, end, raw_download
)
except resumable_media.InvalidResponse as exc:
_raise_from_invalid_response(exc)

def download_to_filename(self, filename, client=None, start=None, end=None):
def download_to_filename(
self, filename, client=None, start=None, end=None, raw_download=False
):
"""Download the contents of this blob into a named file.

If :attr:`user_project` is set on the bucket, bills the API request
Expand All @@ -708,11 +743,21 @@ def download_to_filename(self, filename, client=None, start=None, end=None):
:type end: int
:param end: Optional, The last byte in a range to be downloaded.

:type raw_download: bool
:param raw_download:
Optional, If true, download the object without any expansion.

:raises: :class:`google.cloud.exceptions.NotFound`
"""
try:
with open(filename, "wb") as file_obj:
self.download_to_file(file_obj, client=client, start=start, end=end)
self.download_to_file(
file_obj,
client=client,
start=start,
end=end,
raw_download=raw_download,
)
except resumable_media.DataCorruption:
# Delete the corrupt downloaded file.
os.remove(filename)
Expand All @@ -723,7 +768,7 @@ def download_to_filename(self, filename, client=None, start=None, end=None):
mtime = time.mktime(updated.timetuple())
os.utime(file_obj.name, (mtime, mtime))

def download_as_string(self, client=None, start=None, end=None):
def download_as_string(self, client=None, start=None, end=None, raw_download=False):
"""Download the contents of this blob as a bytes object.

If :attr:`user_project` is set on the bucket, bills the API request
Expand All @@ -740,12 +785,22 @@ def download_as_string(self, client=None, start=None, end=None):
:type end: int
:param end: Optional, The last byte in a range to be downloaded.

:type raw_download: bool
:param raw_download:
Optional, If true, download the object without any expansion.

:rtype: bytes
:returns: The data stored in this blob.
:raises: :class:`google.cloud.exceptions.NotFound`
"""
string_buffer = BytesIO()
self.download_to_file(string_buffer, client=client, start=start, end=end)
self.download_to_file(
string_buffer,
client=client,
start=start,
end=end,
raw_download=raw_download,
)
return string_buffer.getvalue()

def _get_content_type(self, content_type, filename=None):
Expand Down
2 changes: 1 addition & 1 deletion storage/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
dependencies = [
"google-auth >= 1.2.0",
"google-cloud-core >= 1.0.3, < 2.0dev",
"google-resumable-media >= 0.3.1, != 0.4.0, < 0.5dev",
"google-resumable-media >= 0.5.0, < 0.6dev",
]
extras = {}

Expand Down
19 changes: 19 additions & 0 deletions storage/tests/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

import base64
import datetime
import gzip
import hashlib
import io
import os
import re
import tempfile
Expand Down Expand Up @@ -620,6 +622,23 @@ def test_download_blob_w_uri(self):

self.assertEqual(file_contents, stored_contents)

def test_upload_gzip_encoded_download_raw(self):
payload = b"DEADBEEF" * 1000
raw_stream = io.BytesIO()
with gzip.GzipFile(fileobj=raw_stream, mode="wb") as gzip_stream:
gzip_stream.write(payload)
zipped = raw_stream.getvalue()

blob = self.bucket.blob("test_gzipped.gz")
blob.content_encoding = "gzip"
blob.upload_from_file(raw_stream, rewind=True)

expanded = blob.download_as_string()
self.assertEqual(expanded, payload)

raw = blob.download_as_string(raw_download=True)
self.assertEqual(raw, zipped)


class TestUnicode(unittest.TestCase):
@unittest.skipIf(RUNNING_IN_VPCSC, "Test is not VPCSC compatible.")
Expand Down
Loading