Skip to content

Commit

Permalink
Fixes KeyError when retrieving empty but existing object from S3 (#771)
Browse files Browse the repository at this point in the history
* fix: Fixes KeyError when retrieving empty file from S3

* Add test
  • Loading branch information
Darkheir authored Sep 6, 2023
1 parent 3bbfc5d commit 085b711
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 6 deletions.
26 changes: 20 additions & 6 deletions smart_open/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,10 +324,13 @@ def open(

def _get(client, bucket, key, version, range_string):
try:
params = dict(Bucket=bucket, Key=key)
if version:
return client.get_object(Bucket=bucket, Key=key, VersionId=version, Range=range_string)
else:
return client.get_object(Bucket=bucket, Key=key, Range=range_string)
params["VersionId"] = version
if range_string:
params["Range"] = range_string

return client.get_object(**params)
except botocore.client.ClientError as error:
wrapped_error = IOError(
'unable to access bucket: %r key: %r version: %r error: %s' % (
Expand Down Expand Up @@ -447,8 +450,19 @@ def _open_body(self, start=None, stop=None):
error_response = _unwrap_ioerror(ioe)
if error_response is None or error_response.get('Code') != _OUT_OF_RANGE:
raise
self._position = self._content_length = int(error_response['ActualObjectSize'])
self._body = io.BytesIO()
try:
self._position = self._content_length = int(error_response['ActualObjectSize'])
self._body = io.BytesIO()
except KeyError:
response = _get(
self._client,
self._bucket,
self._key,
self._version_id,
None,
)
self._position = self._content_length = response["ContentLength"]
self._body = response["Body"]
else:
#
# Keep track of how many times boto3's built-in retry mechanism
Expand All @@ -461,7 +475,7 @@ def _open_body(self, start=None, stop=None):
self,
response['ResponseMetadata']['RetryAttempts'],
)
units, start, stop, length = smart_open.utils.parse_content_range(response['ContentRange'])
_, start, stop, length = smart_open.utils.parse_content_range(response['ContentRange'])
self._content_length = length
self._position = start
self._body = response['Body']
Expand Down
11 changes: 11 additions & 0 deletions smart_open/tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ def mock_get(*args, **kwargs):
error_response['ActualObjectSize'] = actual_size
error_response['Code'] = 'InvalidRange'
error_response['Message'] = 'The requested range is not satisfiable'
if actual_size is None:
error_response.pop('ActualObjectSize', None)
raise

with mock.patch('smart_open.s3._get', new=mock_get):
Expand Down Expand Up @@ -399,6 +401,15 @@ def test_read_empty_file(self):

self.assertEqual(data, b'')

def test_read_empty_file_no_actual_size(self):
_resource('s3').Object(BUCKET_NAME, KEY_NAME).put(Body=b'')

with self.assertApiCalls(GetObject=2), patch_invalid_range_response(None):
with smart_open.s3.Reader(BUCKET_NAME, KEY_NAME) as fin:
data = fin.read()

self.assertEqual(data, b'')


@moto.mock_s3
class MultipartWriterTest(unittest.TestCase):
Expand Down

0 comments on commit 085b711

Please sign in to comment.