Skip to content

Commit

Permalink
Add append option to sbt_index
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Jan 20, 2017
1 parent 28c11a3 commit 73dd7ed
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 2 deletions.
8 changes: 6 additions & 2 deletions sourmash_lib/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,15 +486,19 @@ def sbt_index(args):
help='signatures to load into SBT')
parser.add_argument('-k', '--ksize', type=int, default=None)
parser.add_argument('--traverse-directory', action='store_true')
parser.add_argument('--append', action='store_true', default=False)
parser.add_argument('-x', '--bf-size', type=float, default=1e5)

sourmash_args.add_moltype_args(parser)

args = parser.parse_args(args)
moltype = sourmash_args.calculate_moltype(args)

factory = GraphFactory(1, args.bf_size, 4)
tree = SBT(factory)
if args.append:
tree = SBT.load(args.sbt_name, leaf_loader=SigLeaf.load)
else:
factory = GraphFactory(1, args.bf_size, 4)
tree = SBT(factory)

if args.traverse_directory:
inp_files = list(sourmash_args.traverse_find_sigs(args.signatures))
Expand Down
4 changes: 4 additions & 0 deletions sourmash_lib/sbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,16 @@ def parent(self, pos):
if pos == 0:
return None
p = int(math.floor((pos - 1) / self.d))
self.nodes[p] = self.nodes[p].do_load()
return NodePos(p, self.nodes[p])

def children(self, pos):
return [self.child(pos, c) for c in range(self.d)]

def child(self, parent, pos):
cd = self.d * parent + pos + 1
if self.nodes[cd] is not None:
self.nodes[cd] = self.nodes[cd].do_load()
return NodePos(cd, self.nodes[cd])

def save(self, tag):
Expand All @@ -184,6 +187,7 @@ def save(self, tag):
structure[i] = None
continue

node = node.do_load()
basename = os.path.basename(node.name)
data = {
'filename': os.path.join('.sbt.' + basetag,
Expand Down
46 changes: 46 additions & 0 deletions sourmash_lib/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,52 @@ def test_do_sourmash_sbt_index_traverse():
assert testdata2 in out


def test_do_sourmash_sbt_index_append():
with utils.TempDirectory() as location:
testdata1 = utils.get_test_data('short.fa')
testdata2 = utils.get_test_data('short2.fa')
testdata3 = utils.get_test_data('short3.fa')
status, out, err = utils.runscript('sourmash',
['compute', testdata1, testdata2, testdata3],
in_directory=location)

status, out, err = utils.runscript('sourmash',
['sbt_index', 'zzz',
'short.fa.sig', 'short2.fa.sig'],
in_directory=location)

assert os.path.exists(os.path.join(location, 'zzz.sbt.json'))

sbt_name = os.path.join(location, 'zzz',)
sig_loc = os.path.join(location, 'short3.fa.sig')
status, out, err = utils.runscript('sourmash',
['sbt_search', sbt_name, sig_loc])
print(out)

assert testdata1 in out
assert testdata2 in out
assert testdata3 not in out

status, out, err = utils.runscript('sourmash',
['sbt_index', '--append',
'zzz',
'short3.fa.sig'],
in_directory=location)

assert os.path.exists(os.path.join(location, 'zzz.sbt.json'))

sbt_name = os.path.join(location, 'zzz',)
sig_loc = os.path.join(location, 'short3.fa.sig')
status, out, err = utils.runscript('sourmash',
['sbt_search', '--threshold', '0.95',
sbt_name, sig_loc])
print(out)

assert testdata1 not in out
assert testdata2 in out
assert testdata3 in out


def test_do_sourmash_sbt_search_otherdir():
with utils.TempDirectory() as location:
testdata1 = utils.get_test_data('short.fa')
Expand Down

0 comments on commit 73dd7ed

Please sign in to comment.