Skip to content

Commit

Permalink
CHORES: pre-commit
Browse files Browse the repository at this point in the history
  • Loading branch information
d-krupke committed Dec 30, 2023
1 parent d21f2d3 commit 5c564c9
Show file tree
Hide file tree
Showing 11 changed files with 59 additions and 46 deletions.
1 change: 0 additions & 1 deletion .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,3 @@ python:
- requirements: docs/requirements.txt
- method: pip
path: .

12 changes: 6 additions & 6 deletions src/algbench/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def clear(self):
def delete_if(self, condition: typing.Callable[[typing.Dict], bool]):
"""
Delete entries if a specific condition is met (return True).
Recreates the internal 'results' folder for this porpose.
Recreates the internal 'results' folder for this porpose.
Use `front` to get a preview on how an entry that is
passed to the condition looks like.
Expand All @@ -289,14 +289,14 @@ def func(entry) -> typing.Optional[typing.Dict]:

def apply(self, func: typing.Callable[[typing.Dict], typing.Optional[typing.Dict]]):
"""
Allows to modify all entries (in place !) inside this benchmark,
Allows to modify all entries (in place !) inside this benchmark,
based on the provided callable. It is being called for every
entry inside the database, and the returned entry will be stored
entry inside the database, and the returned entry will be stored
instead. If None is returned, the provided entry will be deleted
from the database.
from the database.
NOT THREAD-SAFE, execute this while no other instance is accessing
the file system.
the file system.
"""
self._db.apply(func)
self.compress()
Expand All @@ -310,7 +310,7 @@ def __len__(self):
However, this is not recommended, as it is slow.
"""
return self._db.__len__()

def empty(self):
"""
Return True if the database is empty, False otherwise.
Expand Down
11 changes: 7 additions & 4 deletions src/algbench/benchmark_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,16 @@ def clear(self):
self._data.clear()
self._env_data.clear()

def get_env_info(self, env_fingerprint, env_data: typing.Optional[NfsJsonDict]=None):
def get_env_info(
self, env_fingerprint, env_data: typing.Optional[NfsJsonDict] = None
):
if not env_data:
env_data = self._env_data
return env_data[env_fingerprint]

def _create_entry_with_env(self, entry: typing.Dict, env_data: typing.Optional[NfsJsonDict]=None):
def _create_entry_with_env(
self, entry: typing.Dict, env_data: typing.Optional[NfsJsonDict] = None
):
entry = entry.copy()
try:
if not env_data:
Expand All @@ -88,7 +92,7 @@ def _create_entry_with_env(self, entry: typing.Dict, env_data: typing.Optional[N
return entry
except KeyError:
return None

def __iter__(self):
for entry in self._data:
entry_with_env = self._create_entry_with_env(entry)
Expand Down Expand Up @@ -125,4 +129,3 @@ def apply(self, func: typing.Callable[[typing.Dict], typing.Optional[typing.Dict

def __len__(self):
return len(self._arg_fingerprints)

4 changes: 2 additions & 2 deletions src/algbench/db/nfs_json_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,6 @@ def delete(self):
def move_directory(self, new_path: str):
"""
Changes the internal directory of this NFSJsonDict.
Not thread safe!
Not thread safe!
"""
self._db.move_directory(new_path)
self._db.move_directory(new_path)
20 changes: 12 additions & 8 deletions src/algbench/db/nfs_json_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import socket
import typing
import zipfile
from zipfile import ZipFile, BadZipFile
from zipfile import BadZipFile, ZipFile

from .json_serializer import to_json

Expand All @@ -22,7 +22,9 @@ class NfsJsonList:
even for slurm pools with NFS.
"""

def __init__(self, path: typing.Union[str, pathlib.Path], file_split_mb: float=30):
def __init__(
self, path: typing.Union[str, pathlib.Path], file_split_mb: float = 30
):
self.path: typing.Union[str, pathlib.Path] = path
if not os.path.exists(path):
# Could fail in very few unlucky cases on an NFS (parallel creations)
Expand Down Expand Up @@ -100,13 +102,15 @@ def flush(self):
if not os.path.isfile(path):
msg = "Could not write to disk for unknown reasons."
raise RuntimeError(msg)

if self._filesize > self._file_split_size:
new_unique_name = self._get_unique_name()
_log.info(f"File {self._subfile_path} exceeds {self._file_split_size} MB, starting new file {new_unique_name}.")
_log.info(
f"File {self._subfile_path} exceeds {self._file_split_size} MB, starting new file {new_unique_name}."
)
self._subfile_path = new_unique_name
self._filesize = 0

_log.info(f"Wrote {len(self._cache)} entries to disk.")
self._cache.clear()

Expand Down Expand Up @@ -192,10 +196,10 @@ def delete(self):

def move_directory(self, new_path: str):
"""
Changes the internal directory where the db files are
Changes the internal directory where the db files are
stored while keeping all files (renames the directory).
This operation is not thread safe, especially not
over other nodes or instances of this script.
over other nodes or instances of this script.
"""

if os.path.exists(new_path) or os.path.isfile(new_path):
Expand All @@ -206,4 +210,4 @@ def move_directory(self, new_path: str):
_log.info(f"Moved NFSJsonList from {self.path} to {new_path}.")
self._subfile_path = self._get_unique_name()
self._filesize = 0
self.path = new_path
self.path = new_path
6 changes: 3 additions & 3 deletions src/algbench/db/nfs_json_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ def delete(self):

def __len__(self):
return len(self._values)

def move_directory(self, new_path: str):
"""
Changes the internal directory of this NFSJsonSet (essentially
moves the directory). Not thread safe!
Changes the internal directory of this NFSJsonSet (essentially
moves the directory). Not thread safe!
"""
self._db.move_directory(new_path)
1 change: 1 addition & 0 deletions src/algbench/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def get_git_revision() -> typing.Optional[str]:
label = None
return label


def get_python_file() -> typing.Optional[str]:
"""
Return the path of the calling python file.
Expand Down
19 changes: 10 additions & 9 deletions tests/test_apply.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,32 @@
import os
import typing

from algbench import Benchmark

def test_apply():

def test_apply():
benchmark = Benchmark("./test_apply")

def generate_entry(args):
return {"entry_index" : args["index"], "data" : f"Data for entry {args['index']}"}
def generate_entry(args):
return {"entry_index": args["index"], "data": f"Data for entry {args['index']}"}

for i in range(20):
benchmark.add(generate_entry, {"index" : i})
benchmark.add(generate_entry, {"index": i})

benchmark.compress()

def db_count_entries(b: Benchmark) -> int:
return len([0 for _ in b])

assert db_count_entries(benchmark) == 20


def func(entry: typing.Dict):
if entry["result"]["entry_index"] % 2 == 1:
return None
else:
del entry["timestamp"]
del entry["runtime"]
return entry

benchmark.apply(func)

assert db_count_entries(benchmark) == 10
Expand All @@ -34,7 +35,7 @@ def func(entry: typing.Dict):
assert "timestamp" not in entry
assert "result" in entry

benchmark.repair() # implies both a delete_if() and apply() call
benchmark.repair() # implies both a delete_if() and apply() call
assert db_count_entries(benchmark) == 10

benchmark.delete()
benchmark.delete()
3 changes: 3 additions & 0 deletions tests/test_basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

def test_simple():
benchmark = Benchmark("./test_benchmark")
if not benchmark.empty():
benchmark.delete()
benchmark = Benchmark("./test_benchmark")

def f(x, _test=2, default="default"):
print(x)
Expand Down
7 changes: 4 additions & 3 deletions tests/test_extensive.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_extensive_use_case():
def f(x, _test=2, default="default"):
print(x)
return {"r1": x, "r2": "test"}

for x in range(500):
benchmark.add(f, x, _test=None)

Expand All @@ -32,5 +32,6 @@ def f(x, _test=2, default="default"):
assert len(benchmark_) == 250
benchmark.delete()

if __name__=="__main__":
test_extensive_use_case()

if __name__ == "__main__":
test_extensive_use_case()
21 changes: 11 additions & 10 deletions tests/test_file_splitting.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
import os

from algbench import Benchmark

def test_file_splitting():

def test_file_splitting():
benchmark = Benchmark("./test_file_split_benchmark")

# random data generation taken from https://stackoverflow.com/questions/16308989/
def generate_one_mb_results(args):
offset = ord(b'A')
def generate_one_mb_results(args):
offset = ord(b"A")
rand_bytes = os.urandom(1024 * 1024)
array = bytearray(rand_bytes)
for i, bt in enumerate(array):
array[i] = offset + bt % 26
return {str(args["index"]) : bytes(array).decode()}
return {str(args["index"]): bytes(array).decode()}

for i in range(100):
benchmark.add(generate_one_mb_results, {"index" : i})
benchmark.add(generate_one_mb_results, {"index": i})

# Hardcoded for now. The file splits are hardcoded in nfs_json_list to be made at 30 MB.
# For the 100MB + some curly brackets and environments, this should produce 4 files.
assert len(os.listdir("./test_file_split_benchmark/results")) == 4
# Hardcoded for now. The file splits are hardcoded in nfs_json_list to be made at 30 MB.
# For the 100MB + some curly brackets and environments, this should produce 4 files.
assert len(os.listdir("./test_file_split_benchmark/results")) == 4

benchmark.delete()
benchmark.delete()

0 comments on commit 5c564c9

Please sign in to comment.