From 8caba6e8f6906428637d8790f4b3877bdccf4c20 Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Fri, 14 Aug 2020 11:38:22 -0700 Subject: [PATCH] remove tarstorage and replace usage with zipstorage --- doc/storage.md | 8 ++++---- sourmash/sbt.py | 7 +++---- sourmash/sbt_storage.py | 41 ----------------------------------------- tests/test_sbt.py | 37 +------------------------------------ tests/test_sourmash.py | 4 ++-- 5 files changed, 10 insertions(+), 87 deletions(-) diff --git a/doc/storage.md b/doc/storage.md index f688d84ff0..1a3a3c1c00 100644 --- a/doc/storage.md +++ b/doc/storage.md @@ -14,15 +14,15 @@ Saves internal SBT data in a hidden directory near the SBT JSON description. We used to create a tar file of JSON + hidden directory, which requires extracting and using more disk space. -### TarStorage +### ZipStorage Similar to FSStorage, -but saves the internal SBT data in a `tar` file. +but saves the internal SBT data in a `zip` file. - Pros - * easy to distribute (two files) + * easy to distribute (one file) - Cons * still need to distribute and download everything - (you need the full tar file available locally to be able to use the SBT). + (you need the full zip file available locally to be able to use the SBT). ### IPFSStorage diff --git a/sourmash/sbt.py b/sourmash/sbt.py index 37be0343fc..9b39e53f2c 100644 --- a/sourmash/sbt.py +++ b/sourmash/sbt.py @@ -55,13 +55,12 @@ def search_transcript(node, seq, threshold): from cachetools import Cache from .exceptions import IndexNotSupported -from .sbt_storage import FSStorage, TarStorage, IPFSStorage, RedisStorage, ZipStorage +from .sbt_storage import FSStorage, IPFSStorage, RedisStorage, ZipStorage from .logging import error, notify, debug from .index import Index from .nodegraph import Nodegraph, extract_nodegraph_info, calc_expected_collisions STORAGES = { - 'TarStorage': TarStorage, 'FSStorage': FSStorage, 'IPFSStorage': IPFSStorage, 'RedisStorage': RedisStorage, @@ -1310,8 +1309,8 @@ def convert_cmd(name, backend): backend = IPFSStorage elif backend.lower() in ('redis', 'redisstorage'): backend = RedisStorage - elif backend.lower() in ('tar', 'tarstorage'): - backend = TarStorage + elif backend.lower() in ('zip', 'zipstorage'): + backend = ZipStorage elif backend.lower() in ('fs', 'fsstorage'): backend = FSStorage if options: diff --git a/sourmash/sbt_storage.py b/sourmash/sbt_storage.py index 3dc9bcef5c..3d78f7eaee 100644 --- a/sourmash/sbt_storage.py +++ b/sourmash/sbt_storage.py @@ -82,47 +82,6 @@ def load(self, path): return path.read_bytes() -class TarStorage(Storage): - - def __init__(self, path=None): - # TODO: leave it open, or close/open every time? - - if path is None: - # TODO: Open a temporary file? - pass # CTB: should raise an exception, no? - - self.path = os.path.abspath(path) - - dirname = os.path.dirname(self.path) - if not os.path.exists(dirname): - os.makedirs(dirname) - - if os.path.exists(self.path): - self.tarfile = tarfile.open(path, 'r') - else: - self.tarfile = tarfile.open(path, 'w:gz') - - def save(self, path, content): - info = tarfile.TarInfo(path) - info.size = len(content) - - # TODO: check tarfile mode, if read-only reopen as writable - self.tarfile.addfile(info, BytesIO(content)) - - return path - - def load(self, path): - content = self.tarfile.getmember(path) - f = self.tarfile.extractfile(content) - return f.read() - - def init_args(self): - return {'path': self.path} - - def __exit__(self, type, value, traceback): - self.tarfile.close() - - class ZipStorage(Storage): def __init__(self, path): diff --git a/tests/test_sbt.py b/tests/test_sbt.py index c157b5c208..5a4a085241 100644 --- a/tests/test_sbt.py +++ b/tests/test_sbt.py @@ -10,7 +10,7 @@ from sourmash.sbt import SBT, GraphFactory, Leaf, Node from sourmash.sbtmh import (SigLeaf, search_minhashes, search_minhashes_containment) -from sourmash.sbt_storage import (FSStorage, TarStorage, RedisStorage, +from sourmash.sbt_storage import (FSStorage, RedisStorage, IPFSStorage, ZipStorage) from . import sourmash_tst_utils as utils @@ -333,41 +333,6 @@ def test_sbt_fsstorage(): assert os.path.exists(os.path.join(location, '.fstree')) -def test_sbt_tarstorage(): - factory = GraphFactory(31, 1e5, 4) - with utils.TempDirectory() as location: - tree = SBT(factory) - - for f in utils.SIG_FILES: - sig = load_one_signature(utils.get_test_data(f)) - - leaf = SigLeaf(os.path.basename(f), sig) - tree.add_node(leaf) - to_search = leaf - - print('*' * 60) - print("{}:".format(to_search.metadata)) - old_result = {str(s) for s in tree.find(search_minhashes, - to_search.data, 0.1)} - print(*old_result, sep='\n') - - with TarStorage(os.path.join(location, 'tree.tar.gz')) as storage: - tree.save(os.path.join(location, 'tree'), storage=storage) - - with TarStorage(os.path.join(location, 'tree.tar.gz')) as storage: - tree = SBT.load(os.path.join(location, 'tree'), - leaf_loader=SigLeaf.load, - storage=storage) - - print('*' * 60) - print("{}:".format(to_search.metadata)) - new_result = {str(s) for s in tree.find(search_minhashes, - to_search.data, 0.1)} - print(*new_result, sep='\n') - - assert old_result == new_result - - def test_sbt_zipstorage(tmpdir): # create tree, save to a zip, then load and search. factory = GraphFactory(31, 1e5, 4) diff --git a/tests/test_sourmash.py b/tests/test_sourmash.py index 99085d7a1f..bbcdf78023 100644 --- a/tests/test_sourmash.py +++ b/tests/test_sourmash.py @@ -3893,8 +3893,8 @@ def test_storage_convert(): for (n1, n2) in zip(sorted(original), sorted(ipfs))) args = ['storage', 'convert', - '-b', """'TarStorage("{}")'""".format( - os.path.join(location, 'v2.sbt.tar.gz')), + '-b', """'ZipStorage("{}")'""".format( + os.path.join(location, 'v2.sbt.zip')), testsbt] status, out, err = utils.runscript('sourmash', args, in_directory=location)