Skip to content

Commit

Permalink
use it in index CLI, fix bug for single-leaf SBT
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Sep 24, 2020
1 parent 92d8f50 commit c02451a
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 4 deletions.
16 changes: 13 additions & 3 deletions sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from . import signature as sig
from . import sourmash_args
from .logging import notify, error, print_results, set_quiet
from .sbt import scaffold
from .sbtmh import SearchMinHashesFindBest, SigLeaf

from .sourmash_args import DEFAULT_LOAD_K, FileOutput
Expand Down Expand Up @@ -327,10 +328,11 @@ def index(args):
set_quiet(args.quiet)
moltype = sourmash_args.calculate_moltype(args)

tree = None
if args.append:
tree = load_sbt_index(args.sbt_name)
else:
tree = create_sbt_index(args.bf_size, n_children=args.n_children)
#else:
# tree = create_sbt_index(args.bf_size, n_children=args.n_children)

if args.sparseness < 0 or args.sparseness > 1.0:
error('sparseness must be in range [0.0, 1.0].')
Expand Down Expand Up @@ -366,6 +368,7 @@ def index(args):

# load all matching signatures in this file
ss = None
sigs = []
for ss in siglist:
ksizes.add(ss.minhash.ksize)
moltypes.add(sourmash_args.get_moltype(ss))
Expand All @@ -375,12 +378,19 @@ def index(args):
ss.minhash = ss.minhash.downsample(scaled=args.scaled)
scaleds.add(ss.minhash.scaled)

tree.insert(ss)
if tree:
tree.insert(ss)
else:
sigs.append(ss)
n += 1

if not ss:
continue

if not tree:
# TODO: init storage
tree = scaffold(sigs, None)

# check to make sure we aren't loading incompatible signatures
if len(ksizes) > 1 or len(moltypes) > 1:
error('multiple k-mer sizes or molecule types present; fail.')
Expand Down
18 changes: 17 additions & 1 deletion sourmash/sbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1442,7 +1442,23 @@ def scaffold(datasets, storage):

# next_round only contains the root of the SBT
# Convert from binary tree to nodes/leaves
root = next_round.pop()
if total_datasets == 1:
d = datasets.pop()
if isinstance(d, SigLeaf):
common = set(d.data.minhash.hashes)
elif isinstance(d, Leaf):
# TODO: we can't build common as a set;
# use Nodegraph instead?
raise NotImplementedError()
elif isinstance(d, SourmashSignature):
common = set(d.minhash.hashes)
d = SigLeaf(d.md5sum(), d)
else:
raise ValueError("Input not supported")

root = InternalNode(common, BinaryLeaf(d), None)
else:
root = next_round.pop()
assert not next_round

nodes = {}
Expand Down

0 comments on commit c02451a

Please sign in to comment.