From 439573e7266d8e309f9a1d0364fa91379e3a7b21 Mon Sep 17 00:00:00 2001
From: Travis Hobrla <thobrla@google.com>
Date: Fri, 19 Feb 2016 14:42:03 -0800
Subject: [PATCH] Fix gsutil interactions with transcoding and compression

This change sends accept-encoding: gzip to the service only
when the requested object has content-encoding: gzip or the
entire range of the object is being requested. This
prevents compressive transcoding by the service.
Compressive transcoding can cause responses to range requests
to instead issue all of the bytes of the object, which,
while legal according to the HTTP spec, breaks resumability.

The change also adds cache-control no-transform to objects
that are uploaded (via the -z or -Z options) with gzipped
content-encoding. This ensures that objects that are stored
doubly-compressed are served as-is, without removing the first
layer of compression. This is necessary because removing
the first layer of compression would cause the content served
to the user to differ from the cloud-stored hashes for the
doubly-compressed object.

Fixes https://github.com/GoogleCloudPlatform/gsutil/issues/324
---
 gslib/boto_translation.py    |  8 +++++---
 gslib/cat_helper.py          |  3 +++
 gslib/cloud_api.py           |  2 ++
 gslib/cloud_api_delegator.py |  2 ++
 gslib/copy_helper.py         | 36 +++++++++++++++++++++++++---------
 gslib/daisy_chain_wrapper.py | 30 +++++++++++++++++++++++-----
 gslib/gcs_json_api.py        | 32 +++++++++++++++++++-----------
 gslib/tests/test_cp.py       | 13 ++++++++++++
 gslib/tests/util.py          |  7 +++++++
 gslib/util.py                | 38 ++++++++++++++++++++++++++----------
 10 files changed, 133 insertions(+), 38 deletions(-)

diff --git a/gslib/boto_translation.py b/gslib/boto_translation.py
index 8cd603ca23..a92e03ff8d 100644
--- a/gslib/boto_translation.py
+++ b/gslib/boto_translation.py
@@ -84,6 +84,7 @@
 from gslib.translation_helper import LifecycleTranslation
 from gslib.translation_helper import REMOVE_CORS_CONFIG
 from gslib.translation_helper import S3MarkerAclFromObjectMetadata
+from gslib.util import AddAcceptEncodingGzipIfNeeded
 from gslib.util import ConfigureNoOpAuthIfNeeded
 from gslib.util import DEFAULT_FILE_BUFFER_SIZE
 from gslib.util import GetMaxRetryDelay
@@ -424,6 +425,7 @@ def _CurryDigester(self, digester_object):
   def GetObjectMedia(
       self, bucket_name, object_name, download_stream, provider=None,
       generation=None, object_size=None,
+      compressed_encoding=False,
       download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
       start_byte=0, end_byte=None, progress_callback=None,
       serialization_data=None, digesters=None):
@@ -431,8 +433,8 @@ def GetObjectMedia(
     # This implementation will get the object metadata first if we don't pass it
     # in via serialization_data.
     headers = self._CreateBaseHeaders()
-    if 'accept-encoding' not in headers:
-      headers['accept-encoding'] = 'gzip'
+    AddAcceptEncodingGzipIfNeeded(
+        headers, compressed_encoding=compressed_encoding)
     if end_byte is not None:
       headers['range'] = 'bytes=%s-%s' % (start_byte, end_byte)
     elif start_byte > 0:
@@ -582,7 +584,7 @@ def _PerformResumableDownload(self, fp, start_byte, end_byte, key,
         fp.flush()
         # Download succeeded.
         return
-      except retryable_exceptions, e:
+      except retryable_exceptions, e:  # pylint: disable=catching-non-exception
         if debug >= 1:
           self.logger.info('Caught exception (%s)', repr(e))
         if isinstance(e, IOError) and e.errno == errno.EPIPE:
diff --git a/gslib/cat_helper.py b/gslib/cat_helper.py
index 6ba4a3f6ae..cc839330f6 100644
--- a/gslib/cat_helper.py
+++ b/gslib/cat_helper.py
@@ -20,6 +20,7 @@
 
 from gslib.exception import CommandException
 from gslib.exception import NO_URLS_MATCHED_TARGET
+from gslib.util import ObjectIsGzipEncoded
 from gslib.wildcard_iterator import StorageUrlFromString
 
 
@@ -70,8 +71,10 @@ def CatUrlStrings(self, url_strings, show_header=False, start_byte=0,
           cat_object = blr.root_object
           storage_url = StorageUrlFromString(blr.url_string)
           if storage_url.IsCloudUrl():
+            compressed_encoding = ObjectIsGzipEncoded(cat_object)
             self.command_obj.gsutil_api.GetObjectMedia(
                 cat_object.bucket, cat_object.name, cat_outfd,
+                compressed_encoding=compressed_encoding,
                 start_byte=start_byte, end_byte=end_byte,
                 object_size=cat_object.size, generation=storage_url.generation,
                 provider=storage_url.scheme)
diff --git a/gslib/cloud_api.py b/gslib/cloud_api.py
index ad044e32b2..763407ea62 100644
--- a/gslib/cloud_api.py
+++ b/gslib/cloud_api.py
@@ -258,6 +258,7 @@ class DownloadStrategy(object):
 
   def GetObjectMedia(self, bucket_name, object_name, download_stream,
                      provider=None, generation=None, object_size=None,
+                     compressed_encoding=False,
                      download_strategy=DownloadStrategy.ONE_SHOT, start_byte=0,
                      end_byte=None, progress_callback=None,
                      serialization_data=None, digesters=None):
@@ -271,6 +272,7 @@ def GetObjectMedia(self, bucket_name, object_name, download_stream,
                 class-wide default is used.
       generation: Generation of the object to retrieve.
       object_size: Total size of the object being downloaded.
+      compressed_encoding: If true, object is stored with a compressed encoding.
       download_strategy: Cloud API download strategy to use for download.
       start_byte: Starting point for download (for resumable downloads and
                   range requests). Can be set to negative to request a range
diff --git a/gslib/cloud_api_delegator.py b/gslib/cloud_api_delegator.py
index 4f1ac29f07..e15af9c61a 100644
--- a/gslib/cloud_api_delegator.py
+++ b/gslib/cloud_api_delegator.py
@@ -217,11 +217,13 @@ def PatchObjectMetadata(self, bucket_name, object_name, metadata,
   def GetObjectMedia(
       self, bucket_name, object_name, download_stream, provider=None,
       generation=None, object_size=None,
+      compressed_encoding=False,
       download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
       start_byte=0, end_byte=None, progress_callback=None,
       serialization_data=None, digesters=None):
     return self._GetApi(provider).GetObjectMedia(
         bucket_name, object_name, download_stream,
+        compressed_encoding=compressed_encoding,
         download_strategy=download_strategy, start_byte=start_byte,
         end_byte=end_byte, generation=generation, object_size=object_size,
         progress_callback=progress_callback,
diff --git a/gslib/copy_helper.py b/gslib/copy_helper.py
index 42fde9e570..9ab8ef7f9f 100644
--- a/gslib/copy_helper.py
+++ b/gslib/copy_helper.py
@@ -112,6 +112,7 @@
 from gslib.util import IsCloudSubdirPlaceholder
 from gslib.util import MakeHumanReadable
 from gslib.util import MIN_SIZE_COMPUTE_LOGGING
+from gslib.util import ObjectIsGzipEncoded
 from gslib.util import ResumableThreshold
 from gslib.util import TEN_MIB
 from gslib.util import UsingCrcmodExtension
@@ -143,7 +144,7 @@
 
 # For debugging purposes; if True, files and objects that fail hash validation
 # will be saved with the below suffix appended.
-_RENAME_ON_HASH_MISMATCH = False
+_RENAME_ON_HASH_MISMATCH = True
 _RENAME_ON_HASH_MISMATCH_SUFFIX = '_corrupt'
 
 PARALLEL_UPLOAD_TEMP_NAMESPACE = (
@@ -1081,7 +1082,7 @@ def _ShouldDoParallelCompositeUpload(logger, allow_splitting, src_url, dst_url,
             '"parallel_composite_upload_threshold" value in your .boto '
             'configuration file. However, note that if you do this large files '
             'will be uploaded as '
-            '`composite objects <https://cloud.google.com/storage/docs/composite-objects>`_,'
+            '`composite objects <https://cloud.google.com/storage/docs/composite-objects>`_,'  # pylint: disable=line-too-long
             'which means that any user who downloads such objects will need to '
             'have a compiled crcmod installed (see "gsutil help crcmod"). This '
             'is because without a compiled crcmod, computing checksums on '
@@ -1579,11 +1580,23 @@ def _UploadFileToObject(src_url, src_obj_filestream, src_obj_size,
   upload_size = src_obj_size
   zipped_file = False
   if (gzip_exts == GZIP_ALL_FILES or
-         (gzip_exts and len(fname_parts) > 1 and fname_parts[-1] in gzip_exts)):
+      (gzip_exts and len(fname_parts) > 1 and fname_parts[-1] in gzip_exts)):
     upload_url, upload_size = _CompressFileForUpload(
         src_url, src_obj_filestream, src_obj_size, logger)
     upload_stream = open(upload_url.object_name, 'rb')
     dst_obj_metadata.contentEncoding = 'gzip'
+    # If we're sending an object with gzip encoding, it's possible it also
+    # has an incompressible content type. Google Cloud Storage will remove
+    # the top layer of compression when serving the object, which would cause
+    # the served content not to match the CRC32C/MD5 hashes stored and make
+    # integrity checking impossible. Therefore we set cache control to
+    # no-transform to ensure it is served in its original form. The caveat is
+    # that to read this object, other clients must then support
+    # accept-encoding:gzip.
+    if not dst_obj_metadata.cacheControl:
+      dst_obj_metadata.cacheControl = 'no-transform'
+    elif 'no-transform' not in dst_obj_metadata.cacheControl.lower():
+      dst_obj_metadata.cacheControl += ',no-transform'
     zipped_file = True
 
   elapsed_time = None
@@ -1708,8 +1721,7 @@ def _GetDownloadFile(dst_url, src_obj_metadata, logger):
   # server sends decompressed bytes for a file that is stored compressed
   # (double compressed case), there is no way we can validate the hash and
   # we will fail our hash check for the object.
-  if (src_obj_metadata.contentEncoding and
-      src_obj_metadata.contentEncoding.lower().endswith('gzip')):
+  if ObjectIsGzipEncoded(src_obj_metadata):
     need_to_unzip = True
     download_file_name = _GetDownloadTempZipFileName(dst_url)
     logger.info(
@@ -2059,8 +2071,7 @@ def _DoSlicedDownload(src_url, src_obj_metadata, dst_url, download_file_name,
                             component_lengths[i])
 
   bytes_transferred = 0
-  expect_gzip = (src_obj_metadata.contentEncoding and
-                 src_obj_metadata.contentEncoding.lower().endswith('gzip'))
+  expect_gzip = ObjectIsGzipEncoded(src_obj_metadata)
   for cp_result in cp_results:
     bytes_transferred += cp_result.bytes_transferred
     server_gzip = (cp_result.server_encoding and
@@ -2183,6 +2194,8 @@ def _DownloadObjectToFileResumable(src_url, src_obj_metadata, dst_url,
       fp = SlicedDownloadFileWrapper(fp, tracker_file_name, src_obj_metadata,
                                      start_byte, end_byte)
 
+    compressed_encoding = ObjectIsGzipEncoded(src_obj_metadata)
+
     # TODO: With gzip encoding (which may occur on-the-fly and not be part of
     # the object's metadata), when we request a range to resume, it's possible
     # that the server will just resend the entire object, which means our
@@ -2194,6 +2207,7 @@ def _DownloadObjectToFileResumable(src_url, src_obj_metadata, dst_url,
       server_encoding = gsutil_api.GetObjectMedia(
           src_url.bucket_name, src_url.object_name, fp,
           start_byte=download_start_byte, end_byte=end_byte,
+          compressed_encoding=compressed_encoding,
           generation=src_url.generation, object_size=src_obj_metadata.size,
           download_strategy=CloudApi.DownloadStrategy.RESUMABLE,
           provider=src_url.scheme, serialization_data=serialization_data,
@@ -2572,9 +2586,13 @@ def _CopyObjToObjDaisyChainMode(src_url, src_obj_metadata, dst_url,
     with open(global_copy_helper_opts.test_callback_file, 'rb') as test_fp:
       progress_callback = pickle.loads(test_fp.read()).call
 
+  compressed_encoding = ObjectIsGzipEncoded(src_obj_metadata)
+
   start_time = time.time()
-  upload_fp = DaisyChainWrapper(src_url, src_obj_metadata.size, gsutil_api,
-                                progress_callback=progress_callback)
+  upload_fp = DaisyChainWrapper(
+      src_url, src_obj_metadata.size, gsutil_api,
+      compressed_encoding=compressed_encoding,
+      progress_callback=progress_callback)
   uploaded_object = None
   if src_obj_metadata.size == 0:
     # Resumable uploads of size 0 are not supported.
diff --git a/gslib/daisy_chain_wrapper.py b/gslib/daisy_chain_wrapper.py
index 4e5717df80..4435341cf5 100644
--- a/gslib/daisy_chain_wrapper.py
+++ b/gslib/daisy_chain_wrapper.py
@@ -15,6 +15,7 @@
 """Wrapper for use in daisy-chained copies."""
 
 from collections import deque
+from contextlib import contextmanager
 import os
 import threading
 import time
@@ -60,6 +61,14 @@ def write(self, data):  # pylint: disable=invalid-name
         self.daisy_chain_wrapper.bytes_buffered += data_len
 
 
+@contextmanager
+def AcquireLockWithTimeout(lock, timeout):
+  result = lock.acquire(timeout=timeout)
+  yield result
+  if result:
+    lock.release()
+
+
 class DaisyChainWrapper(object):
   """Wrapper class for daisy-chaining a cloud download to an upload.
 
@@ -73,7 +82,8 @@ class DaisyChainWrapper(object):
   used.
   """
 
-  def __init__(self, src_url, src_obj_size, gsutil_api, progress_callback=None,
+  def __init__(self, src_url, src_obj_size, gsutil_api,
+               compressed_encoding=False, progress_callback=None,
                download_chunk_size=_DEFAULT_DOWNLOAD_CHUNK_SIZE):
     """Initializes the daisy chain wrapper.
 
@@ -81,6 +91,7 @@ def __init__(self, src_url, src_obj_size, gsutil_api, progress_callback=None,
       src_url: Source CloudUrl to copy from.
       src_obj_size: Size of source object.
       gsutil_api: gsutil Cloud API to use for the copy.
+      compressed_encoding: If true, source object has content-encoding: gzip.
       progress_callback: Optional callback function for progress notifications
           for the download thread. Receives calls with arguments
           (bytes_transferred, total_size).
@@ -114,6 +125,7 @@ def __init__(self, src_url, src_obj_size, gsutil_api, progress_callback=None,
 
     self.src_obj_size = src_obj_size
     self.src_url = src_url
+    self.compressed_encoding = compressed_encoding
 
     # This is safe to use the upload and download thread because the download
     # thread calls only GetObjectMedia, which creates a new HTTP connection
@@ -126,6 +138,7 @@ def __init__(self, src_url, src_obj_size, gsutil_api, progress_callback=None,
     self.download_exception = None
     self.download_thread = None
     self.progress_callback = progress_callback
+    self.thread_started = threading.Event()
     self.stop_download = threading.Event()
     self.StartDownloadThread(progress_callback=self.progress_callback)
 
@@ -150,10 +163,12 @@ def PerformDownload(start_byte, progress_callback):
       # object to support seek() and tell() which requires coordination with
       # the upload.
       try:
+        self.thread_started.set()
         while start_byte + self._download_chunk_size < self.src_obj_size:
           self.gsutil_api.GetObjectMedia(
               self.src_url.bucket_name, self.src_url.object_name,
-              BufferWrapper(self), start_byte=start_byte,
+              BufferWrapper(self), compressed_encoding=self.compressed_encoding,
+              start_byte=start_byte,
               end_byte=start_byte + self._download_chunk_size - 1,
               generation=self.src_url.generation, object_size=self.src_obj_size,
               download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
@@ -165,8 +180,9 @@ def PerformDownload(start_byte, progress_callback):
           start_byte += self._download_chunk_size
         self.gsutil_api.GetObjectMedia(
             self.src_url.bucket_name, self.src_url.object_name,
-            BufferWrapper(self), start_byte=start_byte,
-            generation=self.src_url.generation, object_size=self.src_obj_size,
+            BufferWrapper(self), compressed_encoding=self.compressed_encoding,
+            start_byte=start_byte, generation=self.src_url.generation,
+            object_size=self.src_obj_size,
             download_strategy=CloudApi.DownloadStrategy.ONE_SHOT,
             provider=self.src_url.scheme, progress_callback=progress_callback)
       # We catch all exceptions here because we want to store them.
@@ -181,6 +197,7 @@ def PerformDownload(start_byte, progress_callback):
         target=PerformDownload,
         args=(start_byte, progress_callback))
     self.download_thread.start()
+    self.thread_started.wait()
 
   def read(self, amt=None):  # pylint: disable=invalid-name
     """Exposes a stream from the in-memory buffer to the upload."""
@@ -197,11 +214,14 @@ def read(self, amt=None):  # pylint: disable=invalid-name
       with self.lock:
         if self.buffer:
           break
-        with self.download_exception_lock:
+        if AcquireLockWithTimeout(self.download_exception_lock, 30):
           if self.download_exception:
             # Download thread died, so we will never recover. Raise the
             # exception that killed it.
             raise self.download_exception  # pylint: disable=raising-bad-type
+        else:
+          if not self.download_thread.is_alive():
+            raise Exception('Download thread died suddenly.')
       # Buffer was empty, yield thread priority so the download thread can fill.
       time.sleep(0)
     with self.lock:
diff --git a/gslib/gcs_json_api.py b/gslib/gcs_json_api.py
index 48f8e14bb9..cb593c5a3a 100644
--- a/gslib/gcs_json_api.py
+++ b/gslib/gcs_json_api.py
@@ -35,10 +35,6 @@
 import boto
 from boto import config
 from gcs_oauth2_boto_plugin import oauth2_helper
-import httplib2
-import oauth2client
-from oauth2client import devshell
-from oauth2client import multistore_file
 
 from gslib.cloud_api import AccessDeniedException
 from gslib.cloud_api import ArgumentException
@@ -79,6 +75,7 @@
 from gslib.translation_helper import DEFAULT_CONTENT_TYPE
 from gslib.translation_helper import PRIVATE_DEFAULT_OBJ_ACL
 from gslib.translation_helper import REMOVE_CORS_CONFIG
+from gslib.util import AddAcceptEncodingGzipIfNeeded
 from gslib.util import GetBotoConfigFileList
 from gslib.util import GetCertsFile
 from gslib.util import GetCredentialStoreFilename
@@ -90,6 +87,11 @@
 from gslib.util import GetPrintableExceptionString
 from gslib.util import JsonResumableChunkSizeDefined
 
+import httplib2
+import oauth2client
+from oauth2client import devshell
+from oauth2client import multistore_file
+
 
 # Implementation supports only 'gs' URLs, so provider is unused.
 # pylint: disable=unused-argument
@@ -679,6 +681,7 @@ def GetObjectMetadata(self, bucket_name, object_name, generation=None,
   def GetObjectMedia(
       self, bucket_name, object_name, download_stream,
       provider=None, generation=None, object_size=None,
+      compressed_encoding=False,
       download_strategy=CloudApi.DownloadStrategy.ONE_SHOT, start_byte=0,
       end_byte=None, progress_callback=None, serialization_data=None,
       digesters=None):
@@ -741,13 +744,16 @@ def GetObjectMedia(
         return self._PerformResumableDownload(
             bucket_name, object_name, download_stream, apitools_request,
             apitools_download, bytes_downloaded_container,
+            compressed_encoding=compressed_encoding,
             generation=generation, start_byte=start_byte, end_byte=end_byte,
             serialization_data=serialization_data)
       else:
         return self._PerformDownload(
             bucket_name, object_name, download_stream, apitools_request,
-            apitools_download, generation=generation, start_byte=start_byte,
-            end_byte=end_byte, serialization_data=serialization_data)
+            apitools_download, generation=generation,
+            compressed_encoding=compressed_encoding,
+            start_byte=start_byte, end_byte=end_byte,
+            serialization_data=serialization_data)
     except TRANSLATABLE_APITOOLS_EXCEPTIONS, e:
       self._TranslateExceptionAndRaise(e, bucket_name=bucket_name,
                                        object_name=object_name,
@@ -756,14 +762,16 @@ def GetObjectMedia(
   def _PerformResumableDownload(
       self, bucket_name, object_name, download_stream, apitools_request,
       apitools_download, bytes_downloaded_container, generation=None,
-      start_byte=0, end_byte=None, serialization_data=None):
+      compressed_encoding=False, start_byte=0, end_byte=None,
+      serialization_data=None):
     retries = 0
     last_progress_byte = start_byte
     while retries <= self.num_retries:
       try:
         return self._PerformDownload(
             bucket_name, object_name, download_stream, apitools_request,
-            apitools_download, generation=generation, start_byte=start_byte,
+            apitools_download, generation=generation,
+            compressed_encoding=compressed_encoding, start_byte=start_byte,
             end_byte=end_byte, serialization_data=serialization_data)
       except HTTP_TRANSFER_EXCEPTIONS, e:
         self._ValidateHttpAccessTokenRefreshError(e)
@@ -789,8 +797,8 @@ def _PerformResumableDownload(
 
   def _PerformDownload(
       self, bucket_name, object_name, download_stream, apitools_request,
-      apitools_download, generation=None, start_byte=0, end_byte=None,
-      serialization_data=None):
+      apitools_download, generation=None, compressed_encoding=False,
+      start_byte=0, end_byte=None, serialization_data=None):
     if not serialization_data:
       try:
         self.api_client.objects.Get(apitools_request,
@@ -816,9 +824,11 @@ def _NoOpCallback(unused_response, unused_download_object):
     # Since bytes_http is created in this function, we don't get the
     # user-agent header from api_client's http automatically.
     additional_headers = {
-        'accept-encoding': 'gzip',
         'user-agent': self.api_client.user_agent
     }
+    AddAcceptEncodingGzipIfNeeded(additional_headers,
+                                  compressed_encoding=compressed_encoding)
+
     self._AddPerfTraceTokenToHeaders(additional_headers)
 
     if start_byte or end_byte is not None:
diff --git a/gslib/tests/test_cp.py b/gslib/tests/test_cp.py
index 2abcd281f2..4d6f8af7a1 100644
--- a/gslib/tests/test_cp.py
+++ b/gslib/tests/test_cp.py
@@ -51,6 +51,7 @@
 from gslib.tests.testcase.base import NotParallelizable
 from gslib.tests.testcase.integration_testcase import SkipForS3
 from gslib.tests.util import GenerationFromURI as urigen
+from gslib.tests.util import HAS_GS_PORT
 from gslib.tests.util import HAS_S3_CREDS
 from gslib.tests.util import ObjectToURI as suri
 from gslib.tests.util import SequentialAndParallelTransfer
@@ -864,6 +865,9 @@ def _Check():
       self.assertEqual(acl_json, new_acl_json)
     _Check()
 
+  @unittest.skipUnless(
+      not HAS_GS_PORT, 'gs_port is defined in config which can cause '
+      'problems when uploading and downloading to the same local host port')
   def test_daisy_chain_cp_download_failure(self):
     """Tests cp with the -D option when the download thread dies."""
     bucket1_uri = self.CreateBucket()
@@ -1812,6 +1816,15 @@ def test_cp_resumable_download_tracker_file_not_matches(self):
       if os.path.exists(tracker_filename):
         os.unlink(tracker_filename)
 
+  def test_cp_double_gzip(self):
+    """Tests that upload and download of a doubly-gzipped file succeeds."""
+    bucket_uri = self.CreateBucket()
+    fpath = self.CreateTempFile(file_name='looks-zipped.gz', contents='foo')
+    self.RunGsUtil(['-h', 'content-type:application/gzip', 'cp', '-Z',
+                    suri(fpath), suri(bucket_uri, 'foo')])
+    self.RunGsUtil(['cp', suri(bucket_uri, 'foo'), fpath])
+
+
   @SequentialAndParallelTransfer
   def test_cp_resumable_download_gzip(self):
     """Tests that download can be resumed successfully with a gzipped file."""
diff --git a/gslib/tests/util.py b/gslib/tests/util.py
index d4da90a658..7b67d2d930 100644
--- a/gslib/tests/util.py
+++ b/gslib/tests/util.py
@@ -55,6 +55,13 @@ def _HasGSHost():
 HAS_GS_HOST = _HasGSHost()
 
 
+def _HasGSPort():
+  return boto.config.get('Credentials', 'gs_port', None) is not None
+
+
+HAS_GS_PORT = _HasGSPort()
+
+
 def _UsingJSONApi():
   return boto.config.get('GSUtil', 'prefer_api', 'json').upper() != 'XML'
 
diff --git a/gslib/util.py b/gslib/util.py
index 10f03539dc..5d54f33e54 100644
--- a/gslib/util.py
+++ b/gslib/util.py
@@ -40,9 +40,6 @@
 from boto.gs.connection import GSConnection
 from boto.provider import Provider
 from boto.pyami.config import BotoConfigLocations
-import httplib2
-from oauth2client.client import HAS_CRYPTO
-from retry_decorator import retry_decorator
 
 import gslib
 from gslib.exception import CommandException
@@ -53,6 +50,10 @@
 from gslib.translation_helper import S3_DELETE_MARKER_GUID
 from gslib.translation_helper import S3_MARKER_GUIDS
 
+import httplib2
+from oauth2client.client import HAS_CRYPTO
+from retry_decorator import retry_decorator
+
 # Detect platform types.
 PLATFORM = str(sys.platform).lower()
 IS_WINDOWS = 'win32' in PLATFORM
@@ -188,6 +189,30 @@ def UsingCrcmodExtension(crcmod):
            getattr(crcmod.crcmod, '_usingExtension', None)))
 
 
+def ObjectIsGzipEncoded(obj_metadata):
+  """Returns true if source apitools Object has gzip content-encoding."""
+  return (obj_metadata.contentEncoding and
+          obj_metadata.contentEncoding.lower().endswith('gzip'))
+
+
+def AddAcceptEncodingGzipIfNeeded(headers_dict, compressed_encoding=False):
+  if compressed_encoding:
+    # If we send accept-encoding: gzip with a range request, the service
+    # may respond with the whole object, which would be bad for resuming.
+    # So only accept gzip encoding if the object we are downloading has
+    # a gzip content encoding.
+    # TODO: If we want to support compressive transcoding fully in the client,
+    # condition on whether we are requesting the entire range of the object.
+    # In this case, we can accept the first bytes of the object compressively
+    # transcoded, but we must perform data integrity checking on bytes after
+    # they are decompressed on-the-fly, and any connection break must be
+    # resumed without compressive transcoding since we cannot specify an
+    # offset. We would also need to ensure that hashes for downloaded data
+    # from objects stored with content-encoding:gzip continue to be calculated
+    # prior to our own on-the-fly decompression so they match the stored hashes.
+    headers_dict['accept-encoding'] = 'gzip'
+
+
 def CheckFreeSpace(path):
   """Return path/drive free space (in bytes)."""
   if IS_WINDOWS:
@@ -711,13 +736,6 @@ def ResumableThreshold():
   return config.getint('GSUtil', 'resumable_threshold', EIGHT_MIB)
 
 
-def AddAcceptEncoding(headers):
-  """Adds accept-encoding:gzip to the dictionary of headers."""
-  # If Accept-Encoding is not already set, set it to enable gzip.
-  if 'accept-encoding' not in headers:
-    headers['accept-encoding'] = 'gzip'
-
-
 # pylint: disable=too-many-statements
 def PrintFullInfoAboutObject(bucket_listing_ref, incl_acl=True):
   """Print full info for given object (like what displays for gsutil ls -L).