Skip to content

Commit

Permalink
add ZipStorage, support loading tree from storage (#648)
Browse files Browse the repository at this point in the history
* add ZipStorage, support loading tree from storage
* load and save compressed sigs
* refactor signature loading a bit
* support creating zip SBTs
* prepare v6 for different index types
* change how compression works, and defaults to 1
* v5 to v6
* Fix exceptions (based on symbolic updates)
* implement zipstorage deduplication

Co-authored-by: C. Titus Brown <titus@idyll.org>
  • Loading branch information
luizirber and ctb committed Apr 30, 2020
1 parent fc024be commit ccfcb77
Show file tree
Hide file tree
Showing 25 changed files with 785 additions and 323 deletions.
4 changes: 2 additions & 2 deletions include/sourmash.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ uintptr_t nodegraph_ntables(Nodegraph *ptr);

void nodegraph_save(Nodegraph *ptr, const char *filename);

uint8_t *nodegraph_to_buffer(Nodegraph *ptr, uintptr_t *size);
uint8_t *nodegraph_to_buffer(Nodegraph *ptr, uint8_t compression, uintptr_t *size);

void nodegraph_buffer_free(uint8_t *ptr, uintptr_t insize);

Expand Down Expand Up @@ -267,7 +267,7 @@ Signature **signatures_load_path(const char *ptr,
const char *select_moltype,
uintptr_t *size);

SourmashStr signatures_save_buffer(Signature **ptr, uintptr_t size);
uint8_t *signatures_save_buffer(Signature **ptr, uintptr_t size, uint8_t compression, uintptr_t *osize);

char sourmash_aa_to_dayhoff(char aa);

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def build_native(spec):
'sourmash = sourmash.__main__:main'
]
},
"install_requires": ["screed>=0.9", "cffi>=1.14.0", 'numpy',
"install_requires": ["screed>=0.9", "cffi>=1.14.0", "enum34", 'numpy',
'matplotlib', 'scipy', "deprecation>=2.0.6"],
"setup_requires": [
"setuptools>=38.6.0",
Expand Down
3 changes: 3 additions & 0 deletions sourmash/_compat.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import abc
import sys


Expand All @@ -14,6 +15,7 @@ def implements_to_string(cls):
cls.__unicode__ = cls.__str__
cls.__str__ = lambda x: x.__unicode__().encode('utf-8')
return cls
ABC = abc.ABCMeta(str('ABC'), (object,), {'__slots__': ()})
else:
text_type = str
int_types = (int,)
Expand All @@ -22,3 +24,4 @@ def implements_to_string(cls):
itervalues = lambda x: x.values()
NUL = 0
implements_to_string = lambda x: x
from abc import ABC
3 changes: 2 additions & 1 deletion sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,8 @@ def index(args):
nums = set()
scaleds = set()
for f in inp_files:
notify('\r...reading from {} ({} signatures so far)', f, n, end='')
if n % 100 == 0:
notify('\r...reading from {} ({} signatures so far)', f, n, end='')
siglist = sig.load_signatures(f, ksize=args.ksize,
select_moltype=moltype)

Expand Down
41 changes: 32 additions & 9 deletions sourmash/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,47 @@ def __str__(self):
return rv


class IndexNotSupported(SourmashError):
def __init__(self):
SourmashError.__init__(self, "This index format is not supported in this version of sourmash")


def _make_error(error_name, base=SourmashError, code=None):
class Exc(base):
pass

Exc.__name__ = Exc.__qualname__ = error_name
if code is not None:
Exc.code = code
globals()[Exc.__name__] = Exc
__all__.append(Exc.__name__)
return Exc


def _get_error_base(error_name):
pieces = error_name.split("Error", 1)
if len(pieces) == 2 and pieces[0] and pieces[1]:
base_error_name = pieces[0] + "Error"
base_class = globals().get(base_error_name)
if base_class is None:
base_class = _make_error(base_error_name)
return base_class
return SourmashError


def _make_exceptions():
for attr in dir(lib):
if not attr.startswith('SOURMASH_ERROR_CODE_'):
continue

class Exc(SourmashError):
pass

code = getattr(lib, attr)
if code == 1104:
exceptions_by_code[code] = ValueError
elif code < 100 or code > 10000:
Exc.__name__ = attr[20:].title().replace('_', '')
Exc.code = getattr(lib, attr)
globals()[Exc.__name__] = Exc
Exc.code = code
exceptions_by_code[code] = Exc
__all__.append(Exc.__name__)
error_name = attr[20:].title().replace("_", "")
base = _get_error_base(error_name)
exc = _make_error(error_name, base=base, code=getattr(lib, attr))
exceptions_by_code[exc.code] = exc
else:
exceptions_by_code[code] = ValueError

Expand Down
5 changes: 2 additions & 3 deletions sourmash/index.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
"An Abstract Base Class for collections of signatures."

from __future__ import division
from abc import ABCMeta, abstractmethod
from abc import abstractmethod
from collections import namedtuple

# compatible with Python 2 *and* 3:
ABC = ABCMeta("ABC", (object,), {"__slots__": ()})
from ._compat import ABC


class Index(ABC):
Expand Down
4 changes: 2 additions & 2 deletions sourmash/nodegraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ def from_buffer(buf):
def save(self, filename):
self._methodcall(lib.nodegraph_save, to_bytes(filename))

def to_bytes(self):
def to_bytes(self, compression=1):
size = ffi.new("uintptr_t *")
rawbuf = self._methodcall(lib.nodegraph_to_buffer, size)
rawbuf = self._methodcall(lib.nodegraph_to_buffer, compression, size)
size = size[0]

rawbuf = ffi.gc(rawbuf, lambda o: lib.nodegraph_buffer_free(o, size), size)
Expand Down
Loading

0 comments on commit ccfcb77

Please sign in to comment.