Skip to content

Commit

Permalink
remove tarstorage and replace usage with zipstorage
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Aug 14, 2020
1 parent 788b2cb commit 8caba6e
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 87 deletions.
8 changes: 4 additions & 4 deletions doc/storage.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ Saves internal SBT data in a hidden directory near the SBT JSON description.
We used to create a tar file of JSON + hidden directory,
which requires extracting and using more disk space.

### TarStorage
### ZipStorage

Similar to FSStorage,
but saves the internal SBT data in a `tar` file.
but saves the internal SBT data in a `zip` file.
- Pros
* easy to distribute (two files)
* easy to distribute (one file)
- Cons
* still need to distribute and download everything
(you need the full tar file available locally to be able to use the SBT).
(you need the full zip file available locally to be able to use the SBT).

### IPFSStorage

Expand Down
7 changes: 3 additions & 4 deletions sourmash/sbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,12 @@ def search_transcript(node, seq, threshold):
from cachetools import Cache

from .exceptions import IndexNotSupported
from .sbt_storage import FSStorage, TarStorage, IPFSStorage, RedisStorage, ZipStorage
from .sbt_storage import FSStorage, IPFSStorage, RedisStorage, ZipStorage
from .logging import error, notify, debug
from .index import Index
from .nodegraph import Nodegraph, extract_nodegraph_info, calc_expected_collisions

STORAGES = {
'TarStorage': TarStorage,
'FSStorage': FSStorage,
'IPFSStorage': IPFSStorage,
'RedisStorage': RedisStorage,
Expand Down Expand Up @@ -1310,8 +1309,8 @@ def convert_cmd(name, backend):
backend = IPFSStorage
elif backend.lower() in ('redis', 'redisstorage'):
backend = RedisStorage
elif backend.lower() in ('tar', 'tarstorage'):
backend = TarStorage
elif backend.lower() in ('zip', 'zipstorage'):
backend = ZipStorage
elif backend.lower() in ('fs', 'fsstorage'):
backend = FSStorage
if options:
Expand Down
41 changes: 0 additions & 41 deletions sourmash/sbt_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,47 +82,6 @@ def load(self, path):
return path.read_bytes()


class TarStorage(Storage):

def __init__(self, path=None):
# TODO: leave it open, or close/open every time?

if path is None:
# TODO: Open a temporary file?
pass # CTB: should raise an exception, no?

self.path = os.path.abspath(path)

dirname = os.path.dirname(self.path)
if not os.path.exists(dirname):
os.makedirs(dirname)

if os.path.exists(self.path):
self.tarfile = tarfile.open(path, 'r')
else:
self.tarfile = tarfile.open(path, 'w:gz')

def save(self, path, content):
info = tarfile.TarInfo(path)
info.size = len(content)

# TODO: check tarfile mode, if read-only reopen as writable
self.tarfile.addfile(info, BytesIO(content))

return path

def load(self, path):
content = self.tarfile.getmember(path)
f = self.tarfile.extractfile(content)
return f.read()

def init_args(self):
return {'path': self.path}

def __exit__(self, type, value, traceback):
self.tarfile.close()


class ZipStorage(Storage):

def __init__(self, path):
Expand Down
37 changes: 1 addition & 36 deletions tests/test_sbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sourmash.sbt import SBT, GraphFactory, Leaf, Node
from sourmash.sbtmh import (SigLeaf, search_minhashes,
search_minhashes_containment)
from sourmash.sbt_storage import (FSStorage, TarStorage, RedisStorage,
from sourmash.sbt_storage import (FSStorage, RedisStorage,
IPFSStorage, ZipStorage)

from . import sourmash_tst_utils as utils
Expand Down Expand Up @@ -333,41 +333,6 @@ def test_sbt_fsstorage():
assert os.path.exists(os.path.join(location, '.fstree'))


def test_sbt_tarstorage():
factory = GraphFactory(31, 1e5, 4)
with utils.TempDirectory() as location:
tree = SBT(factory)

for f in utils.SIG_FILES:
sig = load_one_signature(utils.get_test_data(f))

leaf = SigLeaf(os.path.basename(f), sig)
tree.add_node(leaf)
to_search = leaf

print('*' * 60)
print("{}:".format(to_search.metadata))
old_result = {str(s) for s in tree.find(search_minhashes,
to_search.data, 0.1)}
print(*old_result, sep='\n')

with TarStorage(os.path.join(location, 'tree.tar.gz')) as storage:
tree.save(os.path.join(location, 'tree'), storage=storage)

with TarStorage(os.path.join(location, 'tree.tar.gz')) as storage:
tree = SBT.load(os.path.join(location, 'tree'),
leaf_loader=SigLeaf.load,
storage=storage)

print('*' * 60)
print("{}:".format(to_search.metadata))
new_result = {str(s) for s in tree.find(search_minhashes,
to_search.data, 0.1)}
print(*new_result, sep='\n')

assert old_result == new_result


def test_sbt_zipstorage(tmpdir):
# create tree, save to a zip, then load and search.
factory = GraphFactory(31, 1e5, 4)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -3893,8 +3893,8 @@ def test_storage_convert():
for (n1, n2) in zip(sorted(original), sorted(ipfs)))

args = ['storage', 'convert',
'-b', """'TarStorage("{}")'""".format(
os.path.join(location, 'v2.sbt.tar.gz')),
'-b', """'ZipStorage("{}")'""".format(
os.path.join(location, 'v2.sbt.zip')),
testsbt]
status, out, err = utils.runscript('sourmash', args,
in_directory=location)
Expand Down

0 comments on commit 8caba6e

Please sign in to comment.