From 8b5f6cb9b89d4622081736fa3ebba74aa76f5682 Mon Sep 17 00:00:00 2001 From: Artsiom Tserashkovich Date: Wed, 11 Oct 2023 15:05:54 +0200 Subject: [PATCH 1/7] add "NEPTUNE_DISABLE_LOCALFILES_CLEANUP" flag for disable deleting local files --- src/neptune/envs.py | 3 ++ src/neptune/internal/disk_queue.py | 4 +- .../operation_processors/operation_storage.py | 6 +++ .../neptune/new/attributes/atoms/test_file.py | 12 ++++++ .../neptune/new/internal/test_disk_queue.py | 39 +++++++++++++++++++ 5 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/neptune/envs.py b/src/neptune/envs.py index 9cec19ac5..9ed466010 100644 --- a/src/neptune/envs.py +++ b/src/neptune/envs.py @@ -34,6 +34,7 @@ "NEPTUNE_NON_RAISING_ON_DISK_ISSUE", "NEPTUNE_ENABLE_DEFAULT_ASYNC_LAG_CALLBACK", "NEPTUNE_ENABLE_DEFAULT_ASYNC_NO_PROGRESS_CALLBACK", + "NEPTUNE_DISABLE_LOCALFILES_CLEANUP", ] from neptune.common.envs import ( @@ -77,4 +78,6 @@ NEPTUNE_NON_RAISING_ON_DISK_ISSUE = "NEPTUNE_NON_RAISING_ON_DISK_ISSUE" +NEPTUNE_DISABLE_LOCALFILES_CLEANUP = "NEPTUNE_DISABLE_LOCALFILES_CLEANUP" + S3_ENDPOINT_URL = "S3_ENDPOINT_URL" diff --git a/src/neptune/internal/disk_queue.py b/src/neptune/internal/disk_queue.py index 19ed0ee3a..2d61fd401 100644 --- a/src/neptune/internal/disk_queue.py +++ b/src/neptune/internal/disk_queue.py @@ -32,6 +32,7 @@ TypeVar, ) +from neptune.envs import NEPTUNE_DISABLE_LOCALFILES_CLEANUP from neptune.exceptions import MalformedOperation from neptune.internal.utils.json_file_splitter import JsonFileSplitter from neptune.internal.utils.sync_offset_file import SyncOffsetFile @@ -177,7 +178,8 @@ def cleanup_if_empty(self) -> None: """ Remove underlying files if queue is empty """ - if self.is_empty(): + disable_cleanup = os.getenv(NEPTUNE_DISABLE_LOCALFILES_CLEANUP, "false").lower() in ("true", "1", "t") + if self.is_empty() and not disable_cleanup: self._remove_data() def _remove_data(self): diff --git a/src/neptune/internal/operation_processors/operation_storage.py b/src/neptune/internal/operation_processors/operation_storage.py index 3fc31b465..41b289ee7 100644 --- a/src/neptune/internal/operation_processors/operation_storage.py +++ b/src/neptune/internal/operation_processors/operation_storage.py @@ -21,6 +21,7 @@ from typing import Optional from neptune.constants import NEPTUNE_DATA_DIRECTORY +from neptune.envs import NEPTUNE_DISABLE_LOCALFILES_CLEANUP from neptune.internal.container_type import ContainerType from neptune.internal.id_formats import UniqueId from neptune.internal.utils.logger import logger @@ -55,6 +56,11 @@ def upload_path(self) -> Path: return self.data_path / "upload_path" def cleanup(self) -> None: + disable_cleanup = os.getenv(NEPTUNE_DISABLE_LOCALFILES_CLEANUP, "false").lower() in ("true", "1", "t") + if not disable_cleanup: + self._remove_storage_folder() + + def _remove_storage_folder(self): shutil.rmtree(self.data_path, ignore_errors=True) parent = self.data_path.parent diff --git a/tests/unit/neptune/new/attributes/atoms/test_file.py b/tests/unit/neptune/new/attributes/atoms/test_file.py index 21305b458..af6789259 100644 --- a/tests/unit/neptune/new/attributes/atoms/test_file.py +++ b/tests/unit/neptune/new/attributes/atoms/test_file.py @@ -36,6 +36,7 @@ FileSetVal, ) from neptune.common.utils import IS_WINDOWS +from neptune.envs import NEPTUNE_DISABLE_LOCALFILES_CLEANUP from neptune.internal.operation import ( UploadFile, UploadFileSet, @@ -187,3 +188,14 @@ def test_clean_files_on_close(self): run.stop() assert not os.path.exists(data_path) + + @patch.dict(os.environ, {NEPTUNE_DISABLE_LOCALFILES_CLEANUP: "True"}) + def test_clean_files_on_close_when_cleanup_disabled(self): + with self._exp() as run: + data_path = run._op_processor._operation_storage.data_path + + assert os.path.exists(data_path) + + run.stop() + + assert os.path.exists(data_path) diff --git a/tests/unit/neptune/new/internal/test_disk_queue.py b/tests/unit/neptune/new/internal/test_disk_queue.py index 74bcb73ec..875fd956f 100644 --- a/tests/unit/neptune/new/internal/test_disk_queue.py +++ b/tests/unit/neptune/new/internal/test_disk_queue.py @@ -14,6 +14,7 @@ # limitations under the License. # import json +import os import random import threading import unittest @@ -21,6 +22,9 @@ from pathlib import Path from tempfile import TemporaryDirectory +import mock + +from neptune.envs import NEPTUNE_DISABLE_LOCALFILES_CLEANUP from neptune.internal.disk_queue import ( DiskQueue, QueueElement, @@ -138,6 +142,41 @@ def test_batch_limit(self): queue.close() + def test_cleanup_if_empty(self): + with TemporaryDirectory() as dirpath: + obj_size = self.get_obj_size_bytes(TestDiskQueue.Obj(1, "1"), 1) + queue = DiskQueue[TestDiskQueue.Obj]( + Path(dirpath), + self._serializer, + self._deserializer, + threading.RLock(), + max_file_size=100, + max_batch_size_bytes=obj_size * 3, + ) + assert os.path.exists(dirpath) + + queue.cleanup_if_empty() + + assert not os.path.exists(dirpath) + + @mock.patch.dict(os.environ, {NEPTUNE_DISABLE_LOCALFILES_CLEANUP: "True"}) + def test_cleanup_if_empty_when_cleanup_disabled(self): + with TemporaryDirectory() as dirpath: + obj_size = self.get_obj_size_bytes(TestDiskQueue.Obj(1, "1"), 1) + queue = DiskQueue[TestDiskQueue.Obj]( + Path(dirpath), + self._serializer, + self._deserializer, + threading.RLock(), + max_file_size=100, + max_batch_size_bytes=obj_size * 3, + ) + assert os.path.exists(dirpath) + + queue.cleanup_if_empty() + + assert os.path.exists(dirpath) + def test_resuming_queue(self): with TemporaryDirectory() as dirpath: queue = DiskQueue[TestDiskQueue.Obj]( From 7a93e43666477e993fbb95e1205e157139f35b47 Mon Sep 17 00:00:00 2001 From: Artsiom Tserashkovich Date: Wed, 11 Oct 2023 15:34:10 +0200 Subject: [PATCH 2/7] Update CHANGELOG.md to reflect all latest changes --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 979366562..ccc552ea8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## [UNRELEASED] neptune 1.8.2rc0 + +### Changes +- Safety (errors suppressing) execution mode ([#1503])(https://github.com/neptune-ai/neptune-client/pull/1503) +- Allow to disable handling of remote signals ([#1508])(https://github.com/neptune-ai/neptune-client/pull/1508) +- Allow to disable local files cleanup ([#1511])(https://github.com/neptune-ai/neptune-client/pull/1511) + + ## neptune 1.8.2 ### Changes From a206d6185e071cf2f56efb77bb2b352b89a21890 Mon Sep 17 00:00:00 2001 From: Artsiom Tserashkovich Date: Wed, 11 Oct 2023 16:18:14 +0200 Subject: [PATCH 3/7] Change behavior of flag for disable local cleanup --- CHANGELOG.md | 2 +- src/neptune/envs.py | 4 +- src/neptune/internal/disk_queue.py | 12 ++++-- .../operation_processors/operation_storage.py | 14 +++---- .../neptune/new/attributes/atoms/test_file.py | 10 +++-- .../neptune/new/internal/test_disk_queue.py | 39 ------------------- 6 files changed, 24 insertions(+), 57 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ccc552ea8..a7757b0be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ### Changes - Safety (errors suppressing) execution mode ([#1503])(https://github.com/neptune-ai/neptune-client/pull/1503) - Allow to disable handling of remote signals ([#1508])(https://github.com/neptune-ai/neptune-client/pull/1508) -- Allow to disable local files cleanup ([#1511])(https://github.com/neptune-ai/neptune-client/pull/1511) +- Allow to disable delete of local parent folder ([#1511])(https://github.com/neptune-ai/neptune-client/pull/1511) ## neptune 1.8.2 diff --git a/src/neptune/envs.py b/src/neptune/envs.py index 9ed466010..5ad693578 100644 --- a/src/neptune/envs.py +++ b/src/neptune/envs.py @@ -34,7 +34,7 @@ "NEPTUNE_NON_RAISING_ON_DISK_ISSUE", "NEPTUNE_ENABLE_DEFAULT_ASYNC_LAG_CALLBACK", "NEPTUNE_ENABLE_DEFAULT_ASYNC_NO_PROGRESS_CALLBACK", - "NEPTUNE_DISABLE_LOCALFILES_CLEANUP", + "NEPTUNE_DISABLE_PARENT_DIR_DELETION", ] from neptune.common.envs import ( @@ -78,6 +78,6 @@ NEPTUNE_NON_RAISING_ON_DISK_ISSUE = "NEPTUNE_NON_RAISING_ON_DISK_ISSUE" -NEPTUNE_DISABLE_LOCALFILES_CLEANUP = "NEPTUNE_DISABLE_LOCALFILES_CLEANUP" +NEPTUNE_DISABLE_PARENT_DIR_DELETION = "NEPTUNE_DISABLE_PARENT_DIR_DELETION" S3_ENDPOINT_URL = "S3_ENDPOINT_URL" diff --git a/src/neptune/internal/disk_queue.py b/src/neptune/internal/disk_queue.py index 2d61fd401..fb6463a8b 100644 --- a/src/neptune/internal/disk_queue.py +++ b/src/neptune/internal/disk_queue.py @@ -32,7 +32,7 @@ TypeVar, ) -from neptune.envs import NEPTUNE_DISABLE_LOCALFILES_CLEANUP +from neptune.envs import NEPTUNE_DISABLE_PARENT_DIR_DELETION from neptune.exceptions import MalformedOperation from neptune.internal.utils.json_file_splitter import JsonFileSplitter from neptune.internal.utils.sync_offset_file import SyncOffsetFile @@ -178,8 +178,7 @@ def cleanup_if_empty(self) -> None: """ Remove underlying files if queue is empty """ - disable_cleanup = os.getenv(NEPTUNE_DISABLE_LOCALFILES_CLEANUP, "false").lower() in ("true", "1", "t") - if self.is_empty() and not disable_cleanup: + if self.is_empty(): self._remove_data() def _remove_data(self): @@ -193,7 +192,12 @@ def _remove_data(self): except FileNotFoundError: files = [] - if len(files) == 0: + disable_parent_dir_deletion = os.getenv(NEPTUNE_DISABLE_PARENT_DIR_DELETION, "false").lower() in ( + "true", + "1", + "t", + ) + if len(files) == 0 and not disable_parent_dir_deletion: try: os.rmdir(parent) except OSError: diff --git a/src/neptune/internal/operation_processors/operation_storage.py b/src/neptune/internal/operation_processors/operation_storage.py index 41b289ee7..a26666ebe 100644 --- a/src/neptune/internal/operation_processors/operation_storage.py +++ b/src/neptune/internal/operation_processors/operation_storage.py @@ -21,7 +21,7 @@ from typing import Optional from neptune.constants import NEPTUNE_DATA_DIRECTORY -from neptune.envs import NEPTUNE_DISABLE_LOCALFILES_CLEANUP +from neptune.envs import NEPTUNE_DISABLE_PARENT_DIR_DELETION from neptune.internal.container_type import ContainerType from neptune.internal.id_formats import UniqueId from neptune.internal.utils.logger import logger @@ -56,17 +56,17 @@ def upload_path(self) -> Path: return self.data_path / "upload_path" def cleanup(self) -> None: - disable_cleanup = os.getenv(NEPTUNE_DISABLE_LOCALFILES_CLEANUP, "false").lower() in ("true", "1", "t") - if not disable_cleanup: - self._remove_storage_folder() - - def _remove_storage_folder(self): shutil.rmtree(self.data_path, ignore_errors=True) parent = self.data_path.parent files = os.listdir(parent) - if len(files) == 0: + disable_parent_dir_deletion = os.getenv(NEPTUNE_DISABLE_PARENT_DIR_DELETION, "false").lower() in ( + "true", + "1", + "t", + ) + if len(files) == 0 and not disable_parent_dir_deletion: try: os.rmdir(parent) except OSError: diff --git a/tests/unit/neptune/new/attributes/atoms/test_file.py b/tests/unit/neptune/new/attributes/atoms/test_file.py index af6789259..24b164c7e 100644 --- a/tests/unit/neptune/new/attributes/atoms/test_file.py +++ b/tests/unit/neptune/new/attributes/atoms/test_file.py @@ -36,7 +36,7 @@ FileSetVal, ) from neptune.common.utils import IS_WINDOWS -from neptune.envs import NEPTUNE_DISABLE_LOCALFILES_CLEANUP +from neptune.envs import NEPTUNE_DISABLE_PARENT_DIR_DELETION from neptune.internal.operation import ( UploadFile, UploadFileSet, @@ -187,9 +187,10 @@ def test_clean_files_on_close(self): run.stop() - assert not os.path.exists(data_path) + assert not os.path.exists(data_path) # exec folder + assert not os.path.exists(data_path.parent) # run folder - @patch.dict(os.environ, {NEPTUNE_DISABLE_LOCALFILES_CLEANUP: "True"}) + @patch.dict(os.environ, {NEPTUNE_DISABLE_PARENT_DIR_DELETION: "True"}) def test_clean_files_on_close_when_cleanup_disabled(self): with self._exp() as run: data_path = run._op_processor._operation_storage.data_path @@ -198,4 +199,5 @@ def test_clean_files_on_close_when_cleanup_disabled(self): run.stop() - assert os.path.exists(data_path) + assert not os.path.exists(data_path) # exec folder + assert os.path.exists(data_path.parent) # run folder diff --git a/tests/unit/neptune/new/internal/test_disk_queue.py b/tests/unit/neptune/new/internal/test_disk_queue.py index 875fd956f..74bcb73ec 100644 --- a/tests/unit/neptune/new/internal/test_disk_queue.py +++ b/tests/unit/neptune/new/internal/test_disk_queue.py @@ -14,7 +14,6 @@ # limitations under the License. # import json -import os import random import threading import unittest @@ -22,9 +21,6 @@ from pathlib import Path from tempfile import TemporaryDirectory -import mock - -from neptune.envs import NEPTUNE_DISABLE_LOCALFILES_CLEANUP from neptune.internal.disk_queue import ( DiskQueue, QueueElement, @@ -142,41 +138,6 @@ def test_batch_limit(self): queue.close() - def test_cleanup_if_empty(self): - with TemporaryDirectory() as dirpath: - obj_size = self.get_obj_size_bytes(TestDiskQueue.Obj(1, "1"), 1) - queue = DiskQueue[TestDiskQueue.Obj]( - Path(dirpath), - self._serializer, - self._deserializer, - threading.RLock(), - max_file_size=100, - max_batch_size_bytes=obj_size * 3, - ) - assert os.path.exists(dirpath) - - queue.cleanup_if_empty() - - assert not os.path.exists(dirpath) - - @mock.patch.dict(os.environ, {NEPTUNE_DISABLE_LOCALFILES_CLEANUP: "True"}) - def test_cleanup_if_empty_when_cleanup_disabled(self): - with TemporaryDirectory() as dirpath: - obj_size = self.get_obj_size_bytes(TestDiskQueue.Obj(1, "1"), 1) - queue = DiskQueue[TestDiskQueue.Obj]( - Path(dirpath), - self._serializer, - self._deserializer, - threading.RLock(), - max_file_size=100, - max_batch_size_bytes=obj_size * 3, - ) - assert os.path.exists(dirpath) - - queue.cleanup_if_empty() - - assert os.path.exists(dirpath) - def test_resuming_queue(self): with TemporaryDirectory() as dirpath: queue = DiskQueue[TestDiskQueue.Obj]( From cdae807b7c3028754726c7ea66db5f6551532d3a Mon Sep 17 00:00:00 2001 From: Artsiom Tserashkovich Date: Wed, 11 Oct 2023 16:21:10 +0200 Subject: [PATCH 4/7] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9662a6cb..df4979448 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ### Changes - Safety (errors suppressing) execution mode ([#1503])(https://github.com/neptune-ai/neptune-client/pull/1503) - Allow to disable handling of remote signals ([#1508])(https://github.com/neptune-ai/neptune-client/pull/1508) -- Allow to disable delete of local parent folder ([#1511])(https://github.com/neptune-ai/neptune-client/pull/1511) +- Allow to disable deletion of local parent folder ([#1511])(https://github.com/neptune-ai/neptune-client/pull/1511) ### Fixes - Added more safe checking to last ack ([#1510](https://github.com/neptune-ai/neptune-client/pull/1510)) From d82cb55a9c0aa34448ddcf19ebcae47f385e0980 Mon Sep 17 00:00:00 2001 From: Artsiom Tserashkovich Date: Wed, 11 Oct 2023 17:30:07 +0200 Subject: [PATCH 5/7] Extract remove parent folder code to utils --- src/neptune/internal/disk_queue.py | 21 ++--------- .../operation_processors/operation_storage.py | 18 ++-------- src/neptune/internal/utils/files.py | 35 +++++++++++++++++++ 3 files changed, 39 insertions(+), 35 deletions(-) create mode 100644 src/neptune/internal/utils/files.py diff --git a/src/neptune/internal/disk_queue.py b/src/neptune/internal/disk_queue.py index fb6463a8b..1517117f9 100644 --- a/src/neptune/internal/disk_queue.py +++ b/src/neptune/internal/disk_queue.py @@ -32,8 +32,8 @@ TypeVar, ) -from neptune.envs import NEPTUNE_DISABLE_PARENT_DIR_DELETION from neptune.exceptions import MalformedOperation +from neptune.internal.utils.files import remove_parent_folder_if_allowed from neptune.internal.utils.json_file_splitter import JsonFileSplitter from neptune.internal.utils.sync_offset_file import SyncOffsetFile @@ -184,24 +184,7 @@ def cleanup_if_empty(self) -> None: def _remove_data(self): path = self._dir_path shutil.rmtree(path, ignore_errors=True) - - parent = path.parent - - try: - files = os.listdir(parent) - except FileNotFoundError: - files = [] - - disable_parent_dir_deletion = os.getenv(NEPTUNE_DISABLE_PARENT_DIR_DELETION, "false").lower() in ( - "true", - "1", - "t", - ) - if len(files) == 0 and not disable_parent_dir_deletion: - try: - os.rmdir(parent) - except OSError: - _logger.info(f"Cannot remove directory: {parent}") + remove_parent_folder_if_allowed(path) def wait_for_empty(self, seconds: Optional[float] = None) -> bool: with self._empty_cond: diff --git a/src/neptune/internal/operation_processors/operation_storage.py b/src/neptune/internal/operation_processors/operation_storage.py index a26666ebe..a54529448 100644 --- a/src/neptune/internal/operation_processors/operation_storage.py +++ b/src/neptune/internal/operation_processors/operation_storage.py @@ -21,10 +21,9 @@ from typing import Optional from neptune.constants import NEPTUNE_DATA_DIRECTORY -from neptune.envs import NEPTUNE_DISABLE_PARENT_DIR_DELETION from neptune.internal.container_type import ContainerType from neptune.internal.id_formats import UniqueId -from neptune.internal.utils.logger import logger +from neptune.internal.utils.files import remove_parent_folder_if_allowed def get_container_dir( @@ -57,17 +56,4 @@ def upload_path(self) -> Path: def cleanup(self) -> None: shutil.rmtree(self.data_path, ignore_errors=True) - - parent = self.data_path.parent - files = os.listdir(parent) - - disable_parent_dir_deletion = os.getenv(NEPTUNE_DISABLE_PARENT_DIR_DELETION, "false").lower() in ( - "true", - "1", - "t", - ) - if len(files) == 0 and not disable_parent_dir_deletion: - try: - os.rmdir(parent) - except OSError: - logger.debug(f"Cannot remove directory: {parent}") + remove_parent_folder_if_allowed(self.data_path) diff --git a/src/neptune/internal/utils/files.py b/src/neptune/internal/utils/files.py new file mode 100644 index 000000000..0a6a4accd --- /dev/null +++ b/src/neptune/internal/utils/files.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2023, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +__all__ = ["remove_parent_folder_if_allowed"] + +import os +from pathlib import Path + +from neptune.envs import NEPTUNE_DISABLE_PARENT_DIR_DELETION +from neptune.internal.utils.logger import logger + + +def remove_parent_folder_if_allowed(path: Path) -> None: + deletion_disabled = os.getenv(NEPTUNE_DISABLE_PARENT_DIR_DELETION, "false").lower() in ("true", "1", "t") + + if not deletion_disabled: + parent = path.parent + files = os.listdir(parent) + if len(files) == 0: + try: + os.rmdir(parent) + except OSError: + logger.debug(f"Cannot remove directory: {parent}") From 0529248512d593f7b34c59054e25140b85e187a2 Mon Sep 17 00:00:00 2001 From: Artsiom Tserashkovich Date: Wed, 11 Oct 2023 17:32:07 +0200 Subject: [PATCH 6/7] Adjust CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index df4979448..065625bd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## [UNRELEASED] neptune 1.8.2rc0 +## [UNRELEASED] neptune 1.8.3 ### Changes - Safety (errors suppressing) execution mode ([#1503])(https://github.com/neptune-ai/neptune-client/pull/1503) From 5c0d5fa9a0f2dafc19e18253ec458cf2477afadf Mon Sep 17 00:00:00 2001 From: Artsiom Tserashkovich Date: Wed, 11 Oct 2023 17:41:52 +0200 Subject: [PATCH 7/7] Wrap listdir by try catch --- src/neptune/internal/utils/files.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/neptune/internal/utils/files.py b/src/neptune/internal/utils/files.py index 0a6a4accd..aa0579985 100644 --- a/src/neptune/internal/utils/files.py +++ b/src/neptune/internal/utils/files.py @@ -27,7 +27,12 @@ def remove_parent_folder_if_allowed(path: Path) -> None: if not deletion_disabled: parent = path.parent - files = os.listdir(parent) + + try: + files = os.listdir(parent) + except FileNotFoundError: + files = [] + if len(files) == 0: try: os.rmdir(parent)