diff --git a/.readthedocs.yaml b/.readthedocs.yaml index f78411d..e385978 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -31,4 +31,3 @@ python: - requirements: docs/requirements.txt - method: pip path: . - diff --git a/src/algbench/benchmark.py b/src/algbench/benchmark.py index d849886..67a6a57 100644 --- a/src/algbench/benchmark.py +++ b/src/algbench/benchmark.py @@ -272,7 +272,7 @@ def clear(self): def delete_if(self, condition: typing.Callable[[typing.Dict], bool]): """ Delete entries if a specific condition is met (return True). - Recreates the internal 'results' folder for this porpose. + Recreates the internal 'results' folder for this porpose. Use `front` to get a preview on how an entry that is passed to the condition looks like. @@ -289,14 +289,14 @@ def func(entry) -> typing.Optional[typing.Dict]: def apply(self, func: typing.Callable[[typing.Dict], typing.Optional[typing.Dict]]): """ - Allows to modify all entries (in place !) inside this benchmark, + Allows to modify all entries (in place !) inside this benchmark, based on the provided callable. It is being called for every - entry inside the database, and the returned entry will be stored + entry inside the database, and the returned entry will be stored instead. If None is returned, the provided entry will be deleted - from the database. + from the database. NOT THREAD-SAFE, execute this while no other instance is accessing - the file system. + the file system. """ self._db.apply(func) self.compress() @@ -310,7 +310,7 @@ def __len__(self): However, this is not recommended, as it is slow. """ return self._db.__len__() - + def empty(self): """ Return True if the database is empty, False otherwise. diff --git a/src/algbench/benchmark_db.py b/src/algbench/benchmark_db.py index 8e3effb..f09a1b3 100644 --- a/src/algbench/benchmark_db.py +++ b/src/algbench/benchmark_db.py @@ -74,12 +74,16 @@ def clear(self): self._data.clear() self._env_data.clear() - def get_env_info(self, env_fingerprint, env_data: typing.Optional[NfsJsonDict]=None): + def get_env_info( + self, env_fingerprint, env_data: typing.Optional[NfsJsonDict] = None + ): if not env_data: env_data = self._env_data return env_data[env_fingerprint] - def _create_entry_with_env(self, entry: typing.Dict, env_data: typing.Optional[NfsJsonDict]=None): + def _create_entry_with_env( + self, entry: typing.Dict, env_data: typing.Optional[NfsJsonDict] = None + ): entry = entry.copy() try: if not env_data: @@ -88,7 +92,7 @@ def _create_entry_with_env(self, entry: typing.Dict, env_data: typing.Optional[N return entry except KeyError: return None - + def __iter__(self): for entry in self._data: entry_with_env = self._create_entry_with_env(entry) @@ -125,4 +129,3 @@ def apply(self, func: typing.Callable[[typing.Dict], typing.Optional[typing.Dict def __len__(self): return len(self._arg_fingerprints) - \ No newline at end of file diff --git a/src/algbench/db/nfs_json_dict.py b/src/algbench/db/nfs_json_dict.py index 24318b8..f531a2c 100644 --- a/src/algbench/db/nfs_json_dict.py +++ b/src/algbench/db/nfs_json_dict.py @@ -62,6 +62,6 @@ def delete(self): def move_directory(self, new_path: str): """ Changes the internal directory of this NFSJsonDict. - Not thread safe! + Not thread safe! """ - self._db.move_directory(new_path) \ No newline at end of file + self._db.move_directory(new_path) diff --git a/src/algbench/db/nfs_json_list.py b/src/algbench/db/nfs_json_list.py index 121f16d..53fae9d 100644 --- a/src/algbench/db/nfs_json_list.py +++ b/src/algbench/db/nfs_json_list.py @@ -9,7 +9,7 @@ import socket import typing import zipfile -from zipfile import ZipFile, BadZipFile +from zipfile import BadZipFile, ZipFile from .json_serializer import to_json @@ -22,7 +22,9 @@ class NfsJsonList: even for slurm pools with NFS. """ - def __init__(self, path: typing.Union[str, pathlib.Path], file_split_mb: float=30): + def __init__( + self, path: typing.Union[str, pathlib.Path], file_split_mb: float = 30 + ): self.path: typing.Union[str, pathlib.Path] = path if not os.path.exists(path): # Could fail in very few unlucky cases on an NFS (parallel creations) @@ -100,13 +102,15 @@ def flush(self): if not os.path.isfile(path): msg = "Could not write to disk for unknown reasons." raise RuntimeError(msg) - + if self._filesize > self._file_split_size: new_unique_name = self._get_unique_name() - _log.info(f"File {self._subfile_path} exceeds {self._file_split_size} MB, starting new file {new_unique_name}.") + _log.info( + f"File {self._subfile_path} exceeds {self._file_split_size} MB, starting new file {new_unique_name}." + ) self._subfile_path = new_unique_name self._filesize = 0 - + _log.info(f"Wrote {len(self._cache)} entries to disk.") self._cache.clear() @@ -192,10 +196,10 @@ def delete(self): def move_directory(self, new_path: str): """ - Changes the internal directory where the db files are + Changes the internal directory where the db files are stored while keeping all files (renames the directory). This operation is not thread safe, especially not - over other nodes or instances of this script. + over other nodes or instances of this script. """ if os.path.exists(new_path) or os.path.isfile(new_path): @@ -206,4 +210,4 @@ def move_directory(self, new_path: str): _log.info(f"Moved NFSJsonList from {self.path} to {new_path}.") self._subfile_path = self._get_unique_name() self._filesize = 0 - self.path = new_path \ No newline at end of file + self.path = new_path diff --git a/src/algbench/db/nfs_json_set.py b/src/algbench/db/nfs_json_set.py index e61e421..129051a 100644 --- a/src/algbench/db/nfs_json_set.py +++ b/src/algbench/db/nfs_json_set.py @@ -51,10 +51,10 @@ def delete(self): def __len__(self): return len(self._values) - + def move_directory(self, new_path: str): """ - Changes the internal directory of this NFSJsonSet (essentially - moves the directory). Not thread safe! + Changes the internal directory of this NFSJsonSet (essentially + moves the directory). Not thread safe! """ self._db.move_directory(new_path) diff --git a/src/algbench/environment.py b/src/algbench/environment.py index 80a8c61..7d8b280 100644 --- a/src/algbench/environment.py +++ b/src/algbench/environment.py @@ -27,6 +27,7 @@ def get_git_revision() -> typing.Optional[str]: label = None return label + def get_python_file() -> typing.Optional[str]: """ Return the path of the calling python file. diff --git a/tests/test_apply.py b/tests/test_apply.py index 3a946a2..6d3caf7 100644 --- a/tests/test_apply.py +++ b/tests/test_apply.py @@ -1,16 +1,17 @@ -import os import typing + from algbench import Benchmark -def test_apply(): +def test_apply(): benchmark = Benchmark("./test_apply") - def generate_entry(args): - return {"entry_index" : args["index"], "data" : f"Data for entry {args['index']}"} + def generate_entry(args): + return {"entry_index": args["index"], "data": f"Data for entry {args['index']}"} + for i in range(20): - benchmark.add(generate_entry, {"index" : i}) - + benchmark.add(generate_entry, {"index": i}) + benchmark.compress() def db_count_entries(b: Benchmark) -> int: @@ -18,7 +19,6 @@ def db_count_entries(b: Benchmark) -> int: assert db_count_entries(benchmark) == 20 - def func(entry: typing.Dict): if entry["result"]["entry_index"] % 2 == 1: return None @@ -26,6 +26,7 @@ def func(entry: typing.Dict): del entry["timestamp"] del entry["runtime"] return entry + benchmark.apply(func) assert db_count_entries(benchmark) == 10 @@ -34,7 +35,7 @@ def func(entry: typing.Dict): assert "timestamp" not in entry assert "result" in entry - benchmark.repair() # implies both a delete_if() and apply() call + benchmark.repair() # implies both a delete_if() and apply() call assert db_count_entries(benchmark) == 10 - benchmark.delete() \ No newline at end of file + benchmark.delete() diff --git a/tests/test_basics.py b/tests/test_basics.py index 30ff6ba..c115f24 100644 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -5,6 +5,9 @@ def test_simple(): benchmark = Benchmark("./test_benchmark") + if not benchmark.empty(): + benchmark.delete() + benchmark = Benchmark("./test_benchmark") def f(x, _test=2, default="default"): print(x) diff --git a/tests/test_extensive.py b/tests/test_extensive.py index 4d2fe80..a47a1d6 100644 --- a/tests/test_extensive.py +++ b/tests/test_extensive.py @@ -9,7 +9,7 @@ def test_extensive_use_case(): def f(x, _test=2, default="default"): print(x) return {"r1": x, "r2": "test"} - + for x in range(500): benchmark.add(f, x, _test=None) @@ -32,5 +32,6 @@ def f(x, _test=2, default="default"): assert len(benchmark_) == 250 benchmark.delete() -if __name__=="__main__": - test_extensive_use_case() \ No newline at end of file + +if __name__ == "__main__": + test_extensive_use_case() diff --git a/tests/test_file_splitting.py b/tests/test_file_splitting.py index b6fdbd6..ff2de72 100644 --- a/tests/test_file_splitting.py +++ b/tests/test_file_splitting.py @@ -1,24 +1,25 @@ import os + from algbench import Benchmark -def test_file_splitting(): +def test_file_splitting(): benchmark = Benchmark("./test_file_split_benchmark") # random data generation taken from https://stackoverflow.com/questions/16308989/ - def generate_one_mb_results(args): - offset = ord(b'A') + def generate_one_mb_results(args): + offset = ord(b"A") rand_bytes = os.urandom(1024 * 1024) array = bytearray(rand_bytes) for i, bt in enumerate(array): array[i] = offset + bt % 26 - return {str(args["index"]) : bytes(array).decode()} - + return {str(args["index"]): bytes(array).decode()} + for i in range(100): - benchmark.add(generate_one_mb_results, {"index" : i}) + benchmark.add(generate_one_mb_results, {"index": i}) - # Hardcoded for now. The file splits are hardcoded in nfs_json_list to be made at 30 MB. - # For the 100MB + some curly brackets and environments, this should produce 4 files. - assert len(os.listdir("./test_file_split_benchmark/results")) == 4 + # Hardcoded for now. The file splits are hardcoded in nfs_json_list to be made at 30 MB. + # For the 100MB + some curly brackets and environments, this should produce 4 files. + assert len(os.listdir("./test_file_split_benchmark/results")) == 4 - benchmark.delete() \ No newline at end of file + benchmark.delete()