Skip to content

Commit

Permalink
Merge branch 'master' into rj/fetch-runs
Browse files Browse the repository at this point in the history
  • Loading branch information
Raalsky authored May 16, 2022
2 parents 4fec5c3 + 9b68fb7 commit ede36f0
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 5 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
## [UNRELEASED] neptune-client 0.16.3

## Fixes
- Fix computing of a multipart upload chunk size ([#897](https://github.com/neptune-ai/neptune-client/pull/897))
- Matching all listed tags instead of any when calling `fetch_runs_table` ([#899](https://github.com/neptune-ai/neptune-client/pull/899))


## neptune-client 0.16.2

## Features
Expand Down
5 changes: 3 additions & 2 deletions neptune/internal/storage/datastream.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#
import dataclasses
import io
import math
import os
import tarfile
from typing import Any, BinaryIO, Generator, Union
Expand Down Expand Up @@ -53,12 +54,12 @@ def _get_chunk_size(self) -> int:
f"File {self._filename} is too big to upload:"
f" {self._total_size} bytes exceeds max size {max_size}"
)
if self._total_size < self._max_chunk_count * self._min_chunk_size:
if self._total_size <= self._max_chunk_count * self._min_chunk_size:
# can be done as minimal size chunks -- go for it!
return self._min_chunk_size
else:
# need larger chunks -- split more or less equally
return self._total_size // (self._max_chunk_count + 1)
return math.ceil(self._total_size / self._max_chunk_count)

def generate(self) -> Generator[FileChunk, Any, None]:
chunk_size = self._get_chunk_size()
Expand Down
61 changes: 59 additions & 2 deletions tests/neptune/internal/storage/test_datastream.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,20 @@
# limitations under the License.
#

# pylint: disable=protected-access
import unittest
from io import BytesIO

from mock import patch
import pytest
from mock import Mock, patch

from neptune.internal.storage.datastream import FileChunk, FileChunkStream
from neptune.internal.api_clients.client_config import MultipartConfig
from neptune.internal.storage.datastream import FileChunk, FileChunker, FileChunkStream
from neptune.internal.storage.storage_utils import (
AttributeUploadConfiguration,
UploadEntry,
)
from neptune.new.exceptions import InternalClientError


class TestFileChunkStream(unittest.TestCase):
Expand Down Expand Up @@ -86,5 +90,58 @@ def test_generate_chunks_from_stream(self):
)


class TestFileChunker:
multipart_config = MultipartConfig(
min_chunk_size=5_242_880, # 5 MB
max_chunk_size=1_073_741_824, # 1 GB
max_chunk_count=1_000,
max_single_part_size=5_242_880, # 1 GB
)

def get_chunk_count(self, file_size, chunk_size):
chunk_idx = 0
while file_size > chunk_size:
chunk_idx += 1
file_size -= chunk_size
return chunk_idx + 1

@pytest.mark.parametrize(
"file_size, expected_chunk_size, expected_chunk_count",
(
(1_000_000, 5_242_880, 1),
(6_000_000, 5_242_880, 2),
(5_242_880_000, 5_242_880, 1_000),
(5_242_880_001, 5_242_881, 1_000),
(5_242_891_001, 5_242_892, 1_000),
(1_073_741_824_000, 1_073_741_824, 1_000),
),
)
def test_chunk_size_for_small_file(
self, file_size, expected_chunk_size, expected_chunk_count
):
chunker = FileChunker(
Mock(), Mock(), total_size=file_size, multipart_config=self.multipart_config
)

chunk_size = chunker._get_chunk_size()

chunk_count = self.get_chunk_count(file_size, chunk_size)
assert chunk_count == expected_chunk_count
assert chunk_size == expected_chunk_size
assert chunk_count <= self.multipart_config.max_chunk_count
assert chunk_size <= self.multipart_config.max_chunk_size
assert chunk_size >= self.multipart_config.min_chunk_size

def test_too_large_file(self):
file_size = 1_073_741_824_001

chunker = FileChunker(
Mock(), Mock(), total_size=file_size, multipart_config=self.multipart_config
)

with pytest.raises(InternalClientError):
chunker._get_chunk_size()


if __name__ == "__main__":
unittest.main()

0 comments on commit ede36f0

Please sign in to comment.