Skip to content

Commit

Permalink
Oxidize ZipStorage (#1909)
Browse files Browse the repository at this point in the history
Expose the (read-only for now) Rust ZipStorage and use it instead of the regular (read-write) ZipStorage.

if writing is needed, fall back to current ZipStorage (as a stopgap while the Rust one doesn't support writing)

This PR also removes Python 3.7 support (which we actually dropped in #1839), and adds Python 3.9 and Python 3.10 to the setup.cfg classifiers.

* init rust zipstorage
* unify into ZipStorage
* benchmark that works with previous ZipStorage
* benchmark for loading small data from ZipStorage
* ouroboros seems to work
* peakmem benchmark for zipstorage
* add docker target in nix
  • Loading branch information
luizirber committed Apr 8, 2022
1 parent 1229dc1 commit a998a6a
Show file tree
Hide file tree
Showing 20 changed files with 576 additions and 114 deletions.
43 changes: 43 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ include/sourmash.h: src/core/src/lib.rs \
src/core/src/ffi/nodegraph.rs \
src/core/src/ffi/index/mod.rs \
src/core/src/ffi/index/revindex.rs \
src/core/src/ffi/storage.rs \
src/core/src/errors.rs
cd src/core && \
RUSTC_BOOTSTRAP=1 cbindgen -c cbindgen.toml . -o ../../$@
Expand Down
61 changes: 61 additions & 0 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import os
import random
from pathlib import Path
from tempfile import NamedTemporaryFile


from sourmash.sbt_storage import ZipStorage
from sourmash.minhash import MinHash


Expand Down Expand Up @@ -139,3 +143,60 @@ class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
def setup(self):
PeakmemMinHashSuite.setup(self)
self.mh = MinHash(500, 21, track_abundance=True)

####################

class TimeZipStorageSuite:

def setup(self):
import zipfile
self.zipfile = NamedTemporaryFile()

with zipfile.ZipFile(self.zipfile, mode='w',
compression=zipfile.ZIP_STORED) as storage:
for i in range(100_000):
# just so we have lots of entries
storage.writestr(str(i), b"0")
# one big-ish entry
storage.writestr("sig1", b"9" * 1_000_000)

def time_load_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(20):
storage.load("sig1")

def time_load_small_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(20):
storage.load("99999")

def teardown(self):
self.zipfile.close()


class PeakmemZipStorageSuite:
def setup(self):
import zipfile
self.zipfile = NamedTemporaryFile()

with zipfile.ZipFile(self.zipfile, mode='w',
compression=zipfile.ZIP_STORED) as storage:
for i in range(100_000):
# just so we have lots of entries
storage.writestr(str(i), b"0")
# one big-ish entry
storage.writestr("sig1", b"9" * 1_000_000)


def peakmem_load_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(20):
storage.load("sig1")

def peakmem_load_small_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(20):
storage.load("99999")

def teardown(self):
self.zipfile.close()
2 changes: 1 addition & 1 deletion doc/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ channels:
- defaults
dependencies:
- rust
- python =3.7
- python =3.8
24 changes: 12 additions & 12 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,20 @@
DYLD_LIBRARY_PATH = "${self.packages.${system}.lib}/lib";
NO_BUILD = "1";
};
docker =
let
bin = self.defaultPackage.${system};
in
pkgs.dockerTools.buildLayeredImage {
name = bin.pname;
tag = bin.version;
contents = [ bin ];

config = {
Cmd = [ "/bin/sourmash" ];
WorkingDir = "/";
};
};
};

defaultPackage = self.packages.${system}.sourmash;
Expand Down
21 changes: 21 additions & 0 deletions include/sourmash.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ typedef struct SourmashSearchResult SourmashSearchResult;

typedef struct SourmashSignature SourmashSignature;

typedef struct SourmashZipStorage SourmashZipStorage;

/**
* Represents a string.
*/
Expand Down Expand Up @@ -456,4 +458,23 @@ SourmashStr sourmash_str_from_cstr(const char *s);

char sourmash_translate_codon(const char *codon);

SourmashStr **zipstorage_filenames(const SourmashZipStorage *ptr, uintptr_t *size);

void zipstorage_free(SourmashZipStorage *ptr);

SourmashStr **zipstorage_list_sbts(const SourmashZipStorage *ptr, uintptr_t *size);

const uint8_t *zipstorage_load(const SourmashZipStorage *ptr,
const char *path_ptr,
uintptr_t insize,
uintptr_t *size);

SourmashZipStorage *zipstorage_new(const char *ptr, uintptr_t insize);

SourmashStr zipstorage_path(const SourmashZipStorage *ptr);

void zipstorage_set_subdir(SourmashZipStorage *ptr, const char *path_ptr, uintptr_t insize);

SourmashStr zipstorage_subdir(const SourmashZipStorage *ptr);

#endif /* SOURMASH_H_INCLUDED */
5 changes: 3 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ classifiers =
Operating System :: POSIX :: Linux
Operating System :: MacOS :: MacOS X
Programming Language :: Rust
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Topic :: Scientific/Engineering :: Bio-Informatics
project_urls =
Documentation = https://sourmash.readthedocs.io
Expand All @@ -42,7 +43,7 @@ install_requires =
scipy
deprecation>=2.0.6
cachetools>=4,<5
python_requires = >=3.7
python_requires = >=3.8

[bdist_wheel]
universal = 1
Expand Down
1 change: 1 addition & 0 deletions src/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ twox-hash = "1.6.0"
vec-collections = "0.3.4"
piz = "0.4.0"
memmap2 = "0.5.0"
ouroboros = "0.15.0"

[dev-dependencies]
assert_matches = "1.3.0"
Expand Down
1 change: 1 addition & 0 deletions src/core/src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ pub mod index;
pub mod minhash;
pub mod nodegraph;
pub mod signature;
pub mod storage;

use std::ffi::CStr;
use std::os::raw::c_char;
Expand Down
Loading

0 comments on commit a998a6a

Please sign in to comment.