Skip to content

Commit

Permalink
add options for lmdb store to benchmark
Browse files Browse the repository at this point in the history
fix git ignore to not exclude test_datasets; add lmdb requirement

catch incorrect store error in benchmark

benchmark fixes for lmdb/pi

fix benchmark for pi
  • Loading branch information
misko committed Mar 31, 2024
1 parent f290dc6 commit 7995c23
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 14 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
spf/data
**/*.npy
**/*.zip
test_data
test_data.zip
test_data_dl.zip
test_data.txt
Expand All @@ -20,4 +19,5 @@ test_data.txt
**/*.tmp
**/mav.*
**/testdata*
**/temp
**/temp
**/sessions*
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ joblib==1.3.2
kiwisolver==1.4.4
libaio==0.9.1
llvmlite==0.42.0
lmdb==1.4.1
lxml==5.1.0
MarkupSafe==2.1.3
matplotlib==3.7.2
Expand Down
45 changes: 33 additions & 12 deletions spf/sdrpluto/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def benchmark(
filename=None,
compress=None,
chunk_size=1024,
store=None,
):

z = None
Expand All @@ -56,8 +57,16 @@ def benchmark(
)
else:
raise NotImplementedError
if store is None or store == "directory":
store = zarr.DirectoryStore(filename)
elif store == "lmdb":
store = zarr.LMDBStore(
filename, map_size=2**38, writemap=True, map_async=True
)
else:
raise NotImplementedError
z = zarr.open(
filename,
store=store,
mode="w",
shape=(2, total_samples),
chunks=(1, 1024 * chunk_size),
Expand Down Expand Up @@ -144,6 +153,14 @@ def write_to_disk():
parser.add_argument(
"--compress", type=str, help="file", required=False, default="none", nargs="+"
)
parser.add_argument(
"--stores",
type=str,
help="store",
required=False,
default="directory",
nargs="+",
)

args = parser.parse_args()

Expand All @@ -155,15 +172,19 @@ def numify(x):

buffer_sizes = [numify(buffer_size) for buffer_size in args.buffer_sizes]
args.total_samples = numify(args.total_samples)
print("compression\tbuffer_size\tchunk_size\tproperty\tvalue")
print("compression\tstore\tbuffer_size\tchunk_size\tproperty\tvalue")
for compress in args.compress:
for buffer_size in buffer_sizes:
for chunk_size in args.chunk_sizes:
for k, v in benchmark(
args.uri,
buffer_size,
filename=args.write_to_file,
compress=compress,
chunk_size=chunk_size,
).items():
print(f"{compress}\t{buffer_size}\t{chunk_size}\t{k}\t{v}")
for store in args.stores:
for buffer_size in buffer_sizes:
for chunk_size in args.chunk_sizes:
for k, v in benchmark(
args.uri,
buffer_size,
store=store,
filename=args.write_to_file,
compress=compress,
chunk_size=chunk_size,
).items():
print(
f"{compress}\t{store}\t{buffer_size}\t{chunk_size}\t{k}\t{v}"
)

0 comments on commit 7995c23

Please sign in to comment.