Skip to content

Commit

Permalink
Adaptive buffer size by default
Browse files Browse the repository at this point in the history
  • Loading branch information
belltailjp committed Dec 28, 2021
1 parent fda9ae6 commit 6405725
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
20 changes: 11 additions & 9 deletions pfio/v2/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from .fs import FS, FileStat


DEFAULT_BUFFER_SIZE = 32 * 1024 * 1024
DEFAULT_MAX_BUFFER_SIZE = 16 * 1024 * 1024


def _normalize_key(key: str) -> str:
Expand Down Expand Up @@ -293,11 +293,12 @@ class S3(FS):
- ``aws_secret_access_key``, ``AWS_SECRET_ACCESS_KEY``
- ``endpoint``, ``S3_ENDPOINT``
When opening a file in read mode, by default it enables buffering.
The default buffer size is pfio.v2.S3.DEFAULT_BUFFER_SIZE,
which is used when `buffering` is set to -1.
If `buffering=0` it disables buffering, and if `buffering>0`,
the specified value is used as the buffer size.
It supports buffering when opening a file in binary read mode ("rb").
When ``buffering`` is set to -1 (default), the buffer size will be
the size of the file or ``pfio.v2.S3.DEFAULT_MAX_BUFFER_SIZE``,
whichever smaller.
``buffering=0`` disables buffering, and ``buffering>0`` forcibly sets the
specified value as the buffer size in bytes.
'''

def __init__(self, bucket, prefix=None,
Expand All @@ -316,8 +317,6 @@ def __init__(self, bucket, prefix=None,

self.mpu_chunksize = mpu_chunksize
self.buffering = buffering
if self.buffering < 0:
self.buffering = DEFAULT_BUFFER_SIZE

# boto3.set_stream_logger()

Expand Down Expand Up @@ -380,7 +379,10 @@ def open(self, path, mode='r', **kwargs):
obj = _ObjectReader(self.client, self.bucket, path, mode, kwargs)
if 'b' in mode:
if self.buffering:
obj = io.BufferedReader(obj, buffer_size=self.buffering)
bs = self.buffering
if bs < 0:
bs = min(obj.content_length, DEFAULT_MAX_BUFFER_SIZE)
obj = io.BufferedReader(obj, buffer_size=bs)
else:
obj = io.TextIOWrapper(obj)

Expand Down
1 change: 1 addition & 0 deletions tests/v2_tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def test_s3_files(s3_fixture):
assert not s3.isdir("/bas")


# TODO: Find out a way to know buffer size used in a BufferedReader
@pytest.mark.parametrize("buffering, reader_type",
[(-1, io.BufferedReader),
(0, _ObjectReader),
Expand Down

0 comments on commit 6405725

Please sign in to comment.