From 439573e7266d8e309f9a1d0364fa91379e3a7b21 Mon Sep 17 00:00:00 2001 From: Travis Hobrla Date: Fri, 19 Feb 2016 14:42:03 -0800 Subject: [PATCH] Fix gsutil interactions with transcoding and compression This change sends accept-encoding: gzip to the service only when the requested object has content-encoding: gzip or the entire range of the object is being requested. This prevents compressive transcoding by the service. Compressive transcoding can cause responses to range requests to instead issue all of the bytes of the object, which, while legal according to the HTTP spec, breaks resumability. The change also adds cache-control no-transform to objects that are uploaded (via the -z or -Z options) with gzipped content-encoding. This ensures that objects that are stored doubly-compressed are served as-is, without removing the first layer of compression. This is necessary because removing the first layer of compression would cause the content served to the user to differ from the cloud-stored hashes for the doubly-compressed object. Fixes https://github.com/GoogleCloudPlatform/gsutil/issues/324 --- gslib/boto_translation.py | 8 +++++--- gslib/cat_helper.py | 3 +++ gslib/cloud_api.py | 2 ++ gslib/cloud_api_delegator.py | 2 ++ gslib/copy_helper.py | 36 +++++++++++++++++++++++++--------- gslib/daisy_chain_wrapper.py | 30 +++++++++++++++++++++++----- gslib/gcs_json_api.py | 32 +++++++++++++++++++----------- gslib/tests/test_cp.py | 13 ++++++++++++ gslib/tests/util.py | 7 +++++++ gslib/util.py | 38 ++++++++++++++++++++++++++---------- 10 files changed, 133 insertions(+), 38 deletions(-) diff --git a/gslib/boto_translation.py b/gslib/boto_translation.py index 8cd603ca23..a92e03ff8d 100644 --- a/gslib/boto_translation.py +++ b/gslib/boto_translation.py @@ -84,6 +84,7 @@ from gslib.translation_helper import LifecycleTranslation from gslib.translation_helper import REMOVE_CORS_CONFIG from gslib.translation_helper import S3MarkerAclFromObjectMetadata +from gslib.util import AddAcceptEncodingGzipIfNeeded from gslib.util import ConfigureNoOpAuthIfNeeded from gslib.util import DEFAULT_FILE_BUFFER_SIZE from gslib.util import GetMaxRetryDelay @@ -424,6 +425,7 @@ def _CurryDigester(self, digester_object): def GetObjectMedia( self, bucket_name, object_name, download_stream, provider=None, generation=None, object_size=None, + compressed_encoding=False, download_strategy=CloudApi.DownloadStrategy.ONE_SHOT, start_byte=0, end_byte=None, progress_callback=None, serialization_data=None, digesters=None): @@ -431,8 +433,8 @@ def GetObjectMedia( # This implementation will get the object metadata first if we don't pass it # in via serialization_data. headers = self._CreateBaseHeaders() - if 'accept-encoding' not in headers: - headers['accept-encoding'] = 'gzip' + AddAcceptEncodingGzipIfNeeded( + headers, compressed_encoding=compressed_encoding) if end_byte is not None: headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte) elif start_byte > 0: @@ -582,7 +584,7 @@ def _PerformResumableDownload(self, fp, start_byte, end_byte, key, fp.flush() # Download succeeded. return - except retryable_exceptions, e: + except retryable_exceptions, e: # pylint: disable=catching-non-exception if debug >= 1: self.logger.info('Caught exception (%s)', repr(e)) if isinstance(e, IOError) and e.errno == errno.EPIPE: diff --git a/gslib/cat_helper.py b/gslib/cat_helper.py index 6ba4a3f6ae..cc839330f6 100644 --- a/gslib/cat_helper.py +++ b/gslib/cat_helper.py @@ -20,6 +20,7 @@ from gslib.exception import CommandException from gslib.exception import NO_URLS_MATCHED_TARGET +from gslib.util import ObjectIsGzipEncoded from gslib.wildcard_iterator import StorageUrlFromString @@ -70,8 +71,10 @@ def CatUrlStrings(self, url_strings, show_header=False, start_byte=0, cat_object = blr.root_object storage_url = StorageUrlFromString(blr.url_string) if storage_url.IsCloudUrl(): + compressed_encoding = ObjectIsGzipEncoded(cat_object) self.command_obj.gsutil_api.GetObjectMedia( cat_object.bucket, cat_object.name, cat_outfd, + compressed_encoding=compressed_encoding, start_byte=start_byte, end_byte=end_byte, object_size=cat_object.size, generation=storage_url.generation, provider=storage_url.scheme) diff --git a/gslib/cloud_api.py b/gslib/cloud_api.py index ad044e32b2..763407ea62 100644 --- a/gslib/cloud_api.py +++ b/gslib/cloud_api.py @@ -258,6 +258,7 @@ class DownloadStrategy(object): def GetObjectMedia(self, bucket_name, object_name, download_stream, provider=None, generation=None, object_size=None, + compressed_encoding=False, download_strategy=DownloadStrategy.ONE_SHOT, start_byte=0, end_byte=None, progress_callback=None, serialization_data=None, digesters=None): @@ -271,6 +272,7 @@ def GetObjectMedia(self, bucket_name, object_name, download_stream, class-wide default is used. generation: Generation of the object to retrieve. object_size: Total size of the object being downloaded. + compressed_encoding: If true, object is stored with a compressed encoding. download_strategy: Cloud API download strategy to use for download. start_byte: Starting point for download (for resumable downloads and range requests). Can be set to negative to request a range diff --git a/gslib/cloud_api_delegator.py b/gslib/cloud_api_delegator.py index 4f1ac29f07..e15af9c61a 100644 --- a/gslib/cloud_api_delegator.py +++ b/gslib/cloud_api_delegator.py @@ -217,11 +217,13 @@ def PatchObjectMetadata(self, bucket_name, object_name, metadata, def GetObjectMedia( self, bucket_name, object_name, download_stream, provider=None, generation=None, object_size=None, + compressed_encoding=False, download_strategy=CloudApi.DownloadStrategy.ONE_SHOT, start_byte=0, end_byte=None, progress_callback=None, serialization_data=None, digesters=None): return self._GetApi(provider).GetObjectMedia( bucket_name, object_name, download_stream, + compressed_encoding=compressed_encoding, download_strategy=download_strategy, start_byte=start_byte, end_byte=end_byte, generation=generation, object_size=object_size, progress_callback=progress_callback, diff --git a/gslib/copy_helper.py b/gslib/copy_helper.py index 42fde9e570..9ab8ef7f9f 100644 --- a/gslib/copy_helper.py +++ b/gslib/copy_helper.py @@ -112,6 +112,7 @@ from gslib.util import IsCloudSubdirPlaceholder from gslib.util import MakeHumanReadable from gslib.util import MIN_SIZE_COMPUTE_LOGGING +from gslib.util import ObjectIsGzipEncoded from gslib.util import ResumableThreshold from gslib.util import TEN_MIB from gslib.util import UsingCrcmodExtension @@ -143,7 +144,7 @@ # For debugging purposes; if True, files and objects that fail hash validation # will be saved with the below suffix appended. -_RENAME_ON_HASH_MISMATCH = False +_RENAME_ON_HASH_MISMATCH = True _RENAME_ON_HASH_MISMATCH_SUFFIX = '_corrupt' PARALLEL_UPLOAD_TEMP_NAMESPACE = ( @@ -1081,7 +1082,7 @@ def _ShouldDoParallelCompositeUpload(logger, allow_splitting, src_url, dst_url, '"parallel_composite_upload_threshold" value in your .boto ' 'configuration file. However, note that if you do this large files ' 'will be uploaded as ' - '`composite objects `_,' + '`composite objects `_,' # pylint: disable=line-too-long 'which means that any user who downloads such objects will need to ' 'have a compiled crcmod installed (see "gsutil help crcmod"). This ' 'is because without a compiled crcmod, computing checksums on ' @@ -1579,11 +1580,23 @@ def _UploadFileToObject(src_url, src_obj_filestream, src_obj_size, upload_size = src_obj_size zipped_file = False if (gzip_exts == GZIP_ALL_FILES or - (gzip_exts and len(fname_parts) > 1 and fname_parts[-1] in gzip_exts)): + (gzip_exts and len(fname_parts) > 1 and fname_parts[-1] in gzip_exts)): upload_url, upload_size = _CompressFileForUpload( src_url, src_obj_filestream, src_obj_size, logger) upload_stream = open(upload_url.object_name, 'rb') dst_obj_metadata.contentEncoding = 'gzip' + # If we're sending an object with gzip encoding, it's possible it also + # has an incompressible content type. Google Cloud Storage will remove + # the top layer of compression when serving the object, which would cause + # the served content not to match the CRC32C/MD5 hashes stored and make + # integrity checking impossible. Therefore we set cache control to + # no-transform to ensure it is served in its original form. The caveat is + # that to read this object, other clients must then support + # accept-encoding:gzip. + if not dst_obj_metadata.cacheControl: + dst_obj_metadata.cacheControl = 'no-transform' + elif 'no-transform' not in dst_obj_metadata.cacheControl.lower(): + dst_obj_metadata.cacheControl += ',no-transform' zipped_file = True elapsed_time = None @@ -1708,8 +1721,7 @@ def _GetDownloadFile(dst_url, src_obj_metadata, logger): # server sends decompressed bytes for a file that is stored compressed # (double compressed case), there is no way we can validate the hash and # we will fail our hash check for the object. - if (src_obj_metadata.contentEncoding and - src_obj_metadata.contentEncoding.lower().endswith('gzip')): + if ObjectIsGzipEncoded(src_obj_metadata): need_to_unzip = True download_file_name = _GetDownloadTempZipFileName(dst_url) logger.info( @@ -2059,8 +2071,7 @@ def _DoSlicedDownload(src_url, src_obj_metadata, dst_url, download_file_name, component_lengths[i]) bytes_transferred = 0 - expect_gzip = (src_obj_metadata.contentEncoding and - src_obj_metadata.contentEncoding.lower().endswith('gzip')) + expect_gzip = ObjectIsGzipEncoded(src_obj_metadata) for cp_result in cp_results: bytes_transferred += cp_result.bytes_transferred server_gzip = (cp_result.server_encoding and @@ -2183,6 +2194,8 @@ def _DownloadObjectToFileResumable(src_url, src_obj_metadata, dst_url, fp = SlicedDownloadFileWrapper(fp, tracker_file_name, src_obj_metadata, start_byte, end_byte) + compressed_encoding = ObjectIsGzipEncoded(src_obj_metadata) + # TODO: With gzip encoding (which may occur on-the-fly and not be part of # the object's metadata), when we request a range to resume, it's possible # that the server will just resend the entire object, which means our @@ -2194,6 +2207,7 @@ def _DownloadObjectToFileResumable(src_url, src_obj_metadata, dst_url, server_encoding = gsutil_api.GetObjectMedia( src_url.bucket_name, src_url.object_name, fp, start_byte=download_start_byte, end_byte=end_byte, + compressed_encoding=compressed_encoding, generation=src_url.generation, object_size=src_obj_metadata.size, download_strategy=CloudApi.DownloadStrategy.RESUMABLE, provider=src_url.scheme, serialization_data=serialization_data, @@ -2572,9 +2586,13 @@ def _CopyObjToObjDaisyChainMode(src_url, src_obj_metadata, dst_url, with open(global_copy_helper_opts.test_callback_file, 'rb') as test_fp: progress_callback = pickle.loads(test_fp.read()).call + compressed_encoding = ObjectIsGzipEncoded(src_obj_metadata) + start_time = time.time() - upload_fp = DaisyChainWrapper(src_url, src_obj_metadata.size, gsutil_api, - progress_callback=progress_callback) + upload_fp = DaisyChainWrapper( + src_url, src_obj_metadata.size, gsutil_api, + compressed_encoding=compressed_encoding, + progress_callback=progress_callback) uploaded_object = None if src_obj_metadata.size == 0: # Resumable uploads of size 0 are not supported. diff --git a/gslib/daisy_chain_wrapper.py b/gslib/daisy_chain_wrapper.py index 4e5717df80..4435341cf5 100644 --- a/gslib/daisy_chain_wrapper.py +++ b/gslib/daisy_chain_wrapper.py @@ -15,6 +15,7 @@ """Wrapper for use in daisy-chained copies.""" from collections import deque +from contextlib import contextmanager import os import threading import time @@ -60,6 +61,14 @@ def write(self, data): # pylint: disable=invalid-name self.daisy_chain_wrapper.bytes_buffered += data_len +@contextmanager +def AcquireLockWithTimeout(lock, timeout): + result = lock.acquire(timeout=timeout) + yield result + if result: + lock.release() + + class DaisyChainWrapper(object): """Wrapper class for daisy-chaining a cloud download to an upload. @@ -73,7 +82,8 @@ class DaisyChainWrapper(object): used. """ - def __init__(self, src_url, src_obj_size, gsutil_api, progress_callback=None, + def __init__(self, src_url, src_obj_size, gsutil_api, + compressed_encoding=False, progress_callback=None, download_chunk_size=_DEFAULT_DOWNLOAD_CHUNK_SIZE): """Initializes the daisy chain wrapper. @@ -81,6 +91,7 @@ def __init__(self, src_url, src_obj_size, gsutil_api, progress_callback=None, src_url: Source CloudUrl to copy from. src_obj_size: Size of source object. gsutil_api: gsutil Cloud API to use for the copy. + compressed_encoding: If true, source object has content-encoding: gzip. progress_callback: Optional callback function for progress notifications for the download thread. Receives calls with arguments (bytes_transferred, total_size). @@ -114,6 +125,7 @@ def __init__(self, src_url, src_obj_size, gsutil_api, progress_callback=None, self.src_obj_size = src_obj_size self.src_url = src_url + self.compressed_encoding = compressed_encoding # This is safe to use the upload and download thread because the download # thread calls only GetObjectMedia, which creates a new HTTP connection @@ -126,6 +138,7 @@ def __init__(self, src_url, src_obj_size, gsutil_api, progress_callback=None, self.download_exception = None self.download_thread = None self.progress_callback = progress_callback + self.thread_started = threading.Event() self.stop_download = threading.Event() self.StartDownloadThread(progress_callback=self.progress_callback) @@ -150,10 +163,12 @@ def PerformDownload(start_byte, progress_callback): # object to support seek() and tell() which requires coordination with # the upload. try: + self.thread_started.set() while start_byte + self._download_chunk_size < self.src_obj_size: self.gsutil_api.GetObjectMedia( self.src_url.bucket_name, self.src_url.object_name, - BufferWrapper(self), start_byte=start_byte, + BufferWrapper(self), compressed_encoding=self.compressed_encoding, + start_byte=start_byte, end_byte=start_byte + self._download_chunk_size - 1, generation=self.src_url.generation, object_size=self.src_obj_size, download_strategy=CloudApi.DownloadStrategy.ONE_SHOT, @@ -165,8 +180,9 @@ def PerformDownload(start_byte, progress_callback): start_byte += self._download_chunk_size self.gsutil_api.GetObjectMedia( self.src_url.bucket_name, self.src_url.object_name, - BufferWrapper(self), start_byte=start_byte, - generation=self.src_url.generation, object_size=self.src_obj_size, + BufferWrapper(self), compressed_encoding=self.compressed_encoding, + start_byte=start_byte, generation=self.src_url.generation, + object_size=self.src_obj_size, download_strategy=CloudApi.DownloadStrategy.ONE_SHOT, provider=self.src_url.scheme, progress_callback=progress_callback) # We catch all exceptions here because we want to store them. @@ -181,6 +197,7 @@ def PerformDownload(start_byte, progress_callback): target=PerformDownload, args=(start_byte, progress_callback)) self.download_thread.start() + self.thread_started.wait() def read(self, amt=None): # pylint: disable=invalid-name """Exposes a stream from the in-memory buffer to the upload.""" @@ -197,11 +214,14 @@ def read(self, amt=None): # pylint: disable=invalid-name with self.lock: if self.buffer: break - with self.download_exception_lock: + if AcquireLockWithTimeout(self.download_exception_lock, 30): if self.download_exception: # Download thread died, so we will never recover. Raise the # exception that killed it. raise self.download_exception # pylint: disable=raising-bad-type + else: + if not self.download_thread.is_alive(): + raise Exception('Download thread died suddenly.') # Buffer was empty, yield thread priority so the download thread can fill. time.sleep(0) with self.lock: diff --git a/gslib/gcs_json_api.py b/gslib/gcs_json_api.py index 48f8e14bb9..cb593c5a3a 100644 --- a/gslib/gcs_json_api.py +++ b/gslib/gcs_json_api.py @@ -35,10 +35,6 @@ import boto from boto import config from gcs_oauth2_boto_plugin import oauth2_helper -import httplib2 -import oauth2client -from oauth2client import devshell -from oauth2client import multistore_file from gslib.cloud_api import AccessDeniedException from gslib.cloud_api import ArgumentException @@ -79,6 +75,7 @@ from gslib.translation_helper import DEFAULT_CONTENT_TYPE from gslib.translation_helper import PRIVATE_DEFAULT_OBJ_ACL from gslib.translation_helper import REMOVE_CORS_CONFIG +from gslib.util import AddAcceptEncodingGzipIfNeeded from gslib.util import GetBotoConfigFileList from gslib.util import GetCertsFile from gslib.util import GetCredentialStoreFilename @@ -90,6 +87,11 @@ from gslib.util import GetPrintableExceptionString from gslib.util import JsonResumableChunkSizeDefined +import httplib2 +import oauth2client +from oauth2client import devshell +from oauth2client import multistore_file + # Implementation supports only 'gs' URLs, so provider is unused. # pylint: disable=unused-argument @@ -679,6 +681,7 @@ def GetObjectMetadata(self, bucket_name, object_name, generation=None, def GetObjectMedia( self, bucket_name, object_name, download_stream, provider=None, generation=None, object_size=None, + compressed_encoding=False, download_strategy=CloudApi.DownloadStrategy.ONE_SHOT, start_byte=0, end_byte=None, progress_callback=None, serialization_data=None, digesters=None): @@ -741,13 +744,16 @@ def GetObjectMedia( return self._PerformResumableDownload( bucket_name, object_name, download_stream, apitools_request, apitools_download, bytes_downloaded_container, + compressed_encoding=compressed_encoding, generation=generation, start_byte=start_byte, end_byte=end_byte, serialization_data=serialization_data) else: return self._PerformDownload( bucket_name, object_name, download_stream, apitools_request, - apitools_download, generation=generation, start_byte=start_byte, - end_byte=end_byte, serialization_data=serialization_data) + apitools_download, generation=generation, + compressed_encoding=compressed_encoding, + start_byte=start_byte, end_byte=end_byte, + serialization_data=serialization_data) except TRANSLATABLE_APITOOLS_EXCEPTIONS, e: self._TranslateExceptionAndRaise(e, bucket_name=bucket_name, object_name=object_name, @@ -756,14 +762,16 @@ def GetObjectMedia( def _PerformResumableDownload( self, bucket_name, object_name, download_stream, apitools_request, apitools_download, bytes_downloaded_container, generation=None, - start_byte=0, end_byte=None, serialization_data=None): + compressed_encoding=False, start_byte=0, end_byte=None, + serialization_data=None): retries = 0 last_progress_byte = start_byte while retries <= self.num_retries: try: return self._PerformDownload( bucket_name, object_name, download_stream, apitools_request, - apitools_download, generation=generation, start_byte=start_byte, + apitools_download, generation=generation, + compressed_encoding=compressed_encoding, start_byte=start_byte, end_byte=end_byte, serialization_data=serialization_data) except HTTP_TRANSFER_EXCEPTIONS, e: self._ValidateHttpAccessTokenRefreshError(e) @@ -789,8 +797,8 @@ def _PerformResumableDownload( def _PerformDownload( self, bucket_name, object_name, download_stream, apitools_request, - apitools_download, generation=None, start_byte=0, end_byte=None, - serialization_data=None): + apitools_download, generation=None, compressed_encoding=False, + start_byte=0, end_byte=None, serialization_data=None): if not serialization_data: try: self.api_client.objects.Get(apitools_request, @@ -816,9 +824,11 @@ def _NoOpCallback(unused_response, unused_download_object): # Since bytes_http is created in this function, we don't get the # user-agent header from api_client's http automatically. additional_headers = { - 'accept-encoding': 'gzip', 'user-agent': self.api_client.user_agent } + AddAcceptEncodingGzipIfNeeded(additional_headers, + compressed_encoding=compressed_encoding) + self._AddPerfTraceTokenToHeaders(additional_headers) if start_byte or end_byte is not None: diff --git a/gslib/tests/test_cp.py b/gslib/tests/test_cp.py index 2abcd281f2..4d6f8af7a1 100644 --- a/gslib/tests/test_cp.py +++ b/gslib/tests/test_cp.py @@ -51,6 +51,7 @@ from gslib.tests.testcase.base import NotParallelizable from gslib.tests.testcase.integration_testcase import SkipForS3 from gslib.tests.util import GenerationFromURI as urigen +from gslib.tests.util import HAS_GS_PORT from gslib.tests.util import HAS_S3_CREDS from gslib.tests.util import ObjectToURI as suri from gslib.tests.util import SequentialAndParallelTransfer @@ -864,6 +865,9 @@ def _Check(): self.assertEqual(acl_json, new_acl_json) _Check() + @unittest.skipUnless( + not HAS_GS_PORT, 'gs_port is defined in config which can cause ' + 'problems when uploading and downloading to the same local host port') def test_daisy_chain_cp_download_failure(self): """Tests cp with the -D option when the download thread dies.""" bucket1_uri = self.CreateBucket() @@ -1812,6 +1816,15 @@ def test_cp_resumable_download_tracker_file_not_matches(self): if os.path.exists(tracker_filename): os.unlink(tracker_filename) + def test_cp_double_gzip(self): + """Tests that upload and download of a doubly-gzipped file succeeds.""" + bucket_uri = self.CreateBucket() + fpath = self.CreateTempFile(file_name='looks-zipped.gz', contents='foo') + self.RunGsUtil(['-h', 'content-type:application/gzip', 'cp', '-Z', + suri(fpath), suri(bucket_uri, 'foo')]) + self.RunGsUtil(['cp', suri(bucket_uri, 'foo'), fpath]) + + @SequentialAndParallelTransfer def test_cp_resumable_download_gzip(self): """Tests that download can be resumed successfully with a gzipped file.""" diff --git a/gslib/tests/util.py b/gslib/tests/util.py index d4da90a658..7b67d2d930 100644 --- a/gslib/tests/util.py +++ b/gslib/tests/util.py @@ -55,6 +55,13 @@ def _HasGSHost(): HAS_GS_HOST = _HasGSHost() +def _HasGSPort(): + return boto.config.get('Credentials', 'gs_port', None) is not None + + +HAS_GS_PORT = _HasGSPort() + + def _UsingJSONApi(): return boto.config.get('GSUtil', 'prefer_api', 'json').upper() != 'XML' diff --git a/gslib/util.py b/gslib/util.py index 10f03539dc..5d54f33e54 100644 --- a/gslib/util.py +++ b/gslib/util.py @@ -40,9 +40,6 @@ from boto.gs.connection import GSConnection from boto.provider import Provider from boto.pyami.config import BotoConfigLocations -import httplib2 -from oauth2client.client import HAS_CRYPTO -from retry_decorator import retry_decorator import gslib from gslib.exception import CommandException @@ -53,6 +50,10 @@ from gslib.translation_helper import S3_DELETE_MARKER_GUID from gslib.translation_helper import S3_MARKER_GUIDS +import httplib2 +from oauth2client.client import HAS_CRYPTO +from retry_decorator import retry_decorator + # Detect platform types. PLATFORM = str(sys.platform).lower() IS_WINDOWS = 'win32' in PLATFORM @@ -188,6 +189,30 @@ def UsingCrcmodExtension(crcmod): getattr(crcmod.crcmod, '_usingExtension', None))) +def ObjectIsGzipEncoded(obj_metadata): + """Returns true if source apitools Object has gzip content-encoding.""" + return (obj_metadata.contentEncoding and + obj_metadata.contentEncoding.lower().endswith('gzip')) + + +def AddAcceptEncodingGzipIfNeeded(headers_dict, compressed_encoding=False): + if compressed_encoding: + # If we send accept-encoding: gzip with a range request, the service + # may respond with the whole object, which would be bad for resuming. + # So only accept gzip encoding if the object we are downloading has + # a gzip content encoding. + # TODO: If we want to support compressive transcoding fully in the client, + # condition on whether we are requesting the entire range of the object. + # In this case, we can accept the first bytes of the object compressively + # transcoded, but we must perform data integrity checking on bytes after + # they are decompressed on-the-fly, and any connection break must be + # resumed without compressive transcoding since we cannot specify an + # offset. We would also need to ensure that hashes for downloaded data + # from objects stored with content-encoding:gzip continue to be calculated + # prior to our own on-the-fly decompression so they match the stored hashes. + headers_dict['accept-encoding'] = 'gzip' + + def CheckFreeSpace(path): """Return path/drive free space (in bytes).""" if IS_WINDOWS: @@ -711,13 +736,6 @@ def ResumableThreshold(): return config.getint('GSUtil', 'resumable_threshold', EIGHT_MIB) -def AddAcceptEncoding(headers): - """Adds accept-encoding:gzip to the dictionary of headers.""" - # If Accept-Encoding is not already set, set it to enable gzip. - if 'accept-encoding' not in headers: - headers['accept-encoding'] = 'gzip' - - # pylint: disable=too-many-statements def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True): """Print full info for given object (like what displays for gsutil ls -L).