From 84b200b74ac01515cd10b9830fe02541374e9f3f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 25 Mar 2022 06:54:52 -0700 Subject: [PATCH] rationalize _signatures_with_internal --- src/sourmash/index/__init__.py | 14 +++++++++----- src/sourmash/lca/lca_db.py | 3 ++- src/sourmash/sbt.py | 2 +- src/sourmash/sourmash_args.py | 9 +-------- tests/test_index.py | 2 +- tests/test_manifest.py | 6 +++--- tests/test_sourmash_args.py | 4 ++-- 7 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/sourmash/index/__init__.py b/src/sourmash/index/__init__.py index 67f5605cbc..f126981a6d 100644 --- a/src/sourmash/index/__init__.py +++ b/src/sourmash/index/__init__.py @@ -71,7 +71,11 @@ def signatures_with_location(self): yield ss, self.location def _signatures_with_internal(self): - """Return an iterator of tuples (ss, location, internal_location). + """Return an iterator of tuples (ss, internal_location). + + Unlike 'signatures_with_location()', this iterator should return + _all_ signatures in the object, not just those that remain after + selection/filtering. This is an internal API for use in generating manifests, and may change without warning. @@ -600,7 +604,7 @@ def load(cls, location, traverse_yield_all=False, use_manifest=True): use_manifest=use_manifest) def _signatures_with_internal(self): - """Return an iterator of tuples (ss, location, internal_location). + """Return an iterator of tuples (ss, internal_location). Note: does not limit signatures to subsets. """ @@ -615,7 +619,7 @@ def _signatures_with_internal(self): self.traverse_yield_all: fp = zf.open(zipinfo) for ss in load_signatures(fp): - yield ss, zf.filename, zipinfo.filename + yield ss, zipinfo.filename def signatures(self): "Load all signatures in the zip file." @@ -888,14 +892,14 @@ def signatures_with_location(self): yield row['signature'], loc def _signatures_with_internal(self): - """Return an iterator of tuples (ss, parent, location) + """Return an iterator of tuples (ss, location) CTB note: here, 'internal_location' is the source file for the index. This is a special feature of this (in memory) class. """ parent = self.parent for row in self.manifest.rows: - yield row['signature'], parent, row['internal_location'] + yield row['signature'], row['internal_location'] def __len__(self): diff --git a/src/sourmash/lca/lca_db.py b/src/sourmash/lca/lca_db.py index fbd2fc8d8b..6b5eb09eaf 100644 --- a/src/sourmash/lca/lca_db.py +++ b/src/sourmash/lca/lca_db.py @@ -181,8 +181,9 @@ def signatures(self): yield v def _signatures_with_internal(self): + "Return all of the signatures in this LCA database." for idx, ss in self._signatures.items(): - yield ss, self.location, idx + yield ss, idx def select(self, ksize=None, moltype=None, num=0, scaled=0, abund=None, containment=False, picklist=None): diff --git a/src/sourmash/sbt.py b/src/sourmash/sbt.py index 6fe0a6cd14..d974c23c91 100644 --- a/src/sourmash/sbt.py +++ b/src/sourmash/sbt.py @@ -189,7 +189,7 @@ def _signatures_with_internal(self): """ for k in self.leaves(): ss = k.data - yield ss, self.location, k._path + yield ss, k._path def select(self, ksize=None, moltype=None, num=0, scaled=0, containment=False, abund=None, picklist=None): diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py index a66709d8d9..25372cea02 100644 --- a/src/sourmash/sourmash_args.py +++ b/src/sourmash/sourmash_args.py @@ -775,16 +775,9 @@ def get_manifest(idx, *, require=True, rebuild=False): debug_literal(f"get_manifest: no manifest found / rebuild={rebuild}") - # CTB: CollectionManifest.create_manifest wants (ss, iloc). - # so this is an adaptor function! Might want to just change - # what `create_manifest` takes. - def manifest_iloc_iter(idx): - for (ss, loc, iloc) in idx._signatures_with_internal(): - yield ss, iloc - # need to build one... try: - m = CollectionManifest.create_manifest(manifest_iloc_iter(idx), + m = CollectionManifest.create_manifest(idx._signatures_with_internal(), include_signature=False) debug_literal("get_manifest: rebuilt manifest.") except NotImplementedError: diff --git a/tests/test_index.py b/tests/test_index.py index ea9cc01630..95eebc6d34 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -1271,7 +1271,7 @@ def test_multi_index_load_from_directory(): # also check internal locations and parent value -- assert mi.parent.endswith('prot/protein') - ilocs = [ x[2] for x in mi._signatures_with_internal() ] + ilocs = [ x[1] for x in mi._signatures_with_internal() ] assert endings[0] in ilocs, ilocs assert endings[1] in ilocs, ilocs diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 35f3fec14e..9fe59a83cf 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -17,7 +17,7 @@ def test_generate_manifest(): rows = [] siglist = [] - for (sig, _, loc) in loader._signatures_with_internal(): + for (sig, loc) in loader._signatures_with_internal(): row = index.CollectionManifest.make_manifest_row(sig, loc) rows.append(row) siglist.append(sig) @@ -43,7 +43,7 @@ def test_manifest_to_picklist(): rows = [] siglist = [] - for (sig, _, loc) in loader._signatures_with_internal(): + for (sig, loc) in loader._signatures_with_internal(): row = index.CollectionManifest.make_manifest_row(sig, loc) rows.append(row) siglist.append(sig) @@ -64,7 +64,7 @@ def test_save_load_manifest(): rows = [] siglist = [] - for (sig, _, loc) in loader._signatures_with_internal(): + for (sig, loc) in loader._signatures_with_internal(): row = index.CollectionManifest.make_manifest_row(sig, loc) rows.append(row) siglist.append(sig) diff --git a/tests/test_sourmash_args.py b/tests/test_sourmash_args.py index aaff6209b5..969a6b6df8 100644 --- a/tests/test_sourmash_args.py +++ b/tests/test_sourmash_args.py @@ -447,7 +447,7 @@ class FakeIndex(LinearIndex): was_called = 0 def _signatures_with_internal(self): self.was_called = 1 - return [(ss47, "fakeloc", "fakeiloc")] + return [(ss47, "fakeiloc")] idx = FakeIndex([sig47]) @@ -471,7 +471,7 @@ class FakeIndex(LinearIndex): def _signatures_with_internal(self): self.was_called = 1 - return [(ss47, "fakeloc", "fakeiloc")] + return [(ss47, "fakeiloc")] idx = FakeIndex([sig47])