Skip to content

Commit

Permalink
Merge branch 'master' of github.com:dib-lab/sourmash into fix/scaled_1
Browse files Browse the repository at this point in the history
  • Loading branch information
ctb committed Apr 9, 2017
2 parents 97c0c06 + 6f728ff commit 086397d
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 43 deletions.
75 changes: 41 additions & 34 deletions sourmash_lib/sbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,6 @@ def create_nodegraph():
return create_nodegraph


class LazyNode(object):
def __init__(self, load_fn, *args):
self.load_fn = load_fn
self.args = args

def do_load(self):
sbt_node = self.load_fn(*self.args)
return sbt_node


class SBT(object):

def __init__(self, factory, d=2):
Expand Down Expand Up @@ -149,7 +139,6 @@ def find(self, search_fn, *args, **kwargs):
if node_g is None:
continue

node_g = node_g.do_load()
if node_p not in visited:
visited.add(node_p)
if search_fn(node_g, *args):
Expand Down Expand Up @@ -267,9 +256,9 @@ def _load_v1(jnodes, leaf_loader, dirname):

if 'internal' in jnode['filename']:
jnode['factory'] = factory
sbt_node = LazyNode(Node.load, jnode, dirname)
sbt_node = Node.load(jnode, dirname)
else:
sbt_node = LazyNode(leaf_loader, jnode, dirname)
sbt_node = leaf_loader(jnode, dirname)

sbt_nodes.append(sbt_node)

Expand Down Expand Up @@ -298,9 +287,9 @@ def _load_v2(cls, info, leaf_loader, dirname):

if 'internal' in node['filename']:
node['factory'] = factory
sbt_node = LazyNode(Node.load, node, dirname)
sbt_node = Node.load(node, dirname)
else:
sbt_node = LazyNode(leaf_loader, node, dirname)
sbt_node = leaf_loader(node, dirname)

sbt_nodes.append(sbt_node)

Expand Down Expand Up @@ -350,11 +339,13 @@ def __iter__(self):


class Node(object):
"Internal node of SBT; has 0, 1, or 2 children."
"Internal node of SBT."

def __init__(self, factory, name=None):
self.data = factory()
def __init__(self, factory, name=None, fullpath=None):
self.name = name
self._factory = factory
self._data = None
self._filename = fullpath

def __str__(self):
return '*Node:{name} [occupied: {nb}, fpr: {fpr:.2}]'.format(
Expand All @@ -364,46 +355,62 @@ def __str__(self):
def save(self, filename):
self.data.save(filename)

@property
def data(self):
if self._data is None:
if self._filename is None:
self._data = self._factory()
else:
self._data = khmer.load_nodegraph(self._filename)
return self._data

@data.setter
def data(self, new_data):
self._data = new_data

@staticmethod
def load(info, dirname):
new_node = Node(info['factory'], name=info['name'])

filename = os.path.join(dirname, info['filename'])
new_node.data = khmer.load_nodegraph(filename)
new_node = Node(info['factory'], name=info['name'], fullpath=filename)
return new_node

def do_load(self): # for lazy loading, quickfix
return self


class Leaf(object):
def __init__(self, metadata, data, name=None):
def __init__(self, metadata, data=None, name=None, fullpath=None):
self.metadata = metadata
if name is None:
name = metadata
self.name = name
self.data = data

def do_load(self): # for lazy loading, quickfix
return self
self._data = data
self._filename = fullpath

def __str__(self):
return '**Leaf:{name} [occupied: {nb}, fpr: {fpr:.2}] -> {metadata}'.format(
name=self.name, metadata=self.metadata,
nb=self.data.n_occupied(),
fpr=khmer.calc_expected_collisions(self.data, True, 1.1))

@property
def data(self):
if self._data is None:
# TODO: what if self._filename is None?
self._data = khmer.load_nodegraph(self._filename)
return self._data

@data.setter
def data(self, new_data):
self._data = new_data

def save(self, filename):
self.data.save(filename)

def update(self, parent):
parent.data.update(self.data)

@staticmethod
def load(info, dirname):
filepath = os.path.join(dirname, info['filename'])
data = khmer.load_nodegraph(filepath)
return Leaf(info['metadata'], data, name=info['name'])
@classmethod
def load(cls, info, dirname):
filename = os.path.join(dirname, info['filename'])
return cls(info['metadata'], name=info['name'], fullpath=filename)


def filter_distance( filter_a, filter_b, n=1000 ) :
Expand Down
26 changes: 17 additions & 9 deletions sourmash_lib/sbtmh.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import print_function
from __future__ import division
import os

from .sbt import Leaf
from . import Estimators
Expand All @@ -13,21 +12,30 @@ def __str__(self):

def save(self, filename):
from sourmash_lib import signature

# this is here only for triggering the property load
# before we reopen the file (and overwrite the previous
# content...)
self.data

with open(filename, 'w') as fp:
signature.save_signatures([self.data], fp)

def update(self, parent):
for v in self.data.estimator.mh.get_mins():
parent.data.count(v)

@staticmethod
def load(info, dirname):
from sourmash_lib import signature

filename = os.path.join(dirname, info['filename'])
it = signature.load_signatures(filename)
data, = list(it) # should only be one signature
return SigLeaf(info['metadata'], data, name=info['name'])
@property
def data(self):
if self._data is None:
from sourmash_lib import signature
it = signature.load_signatures(self._filename)
self._data, = list(it) # should only be one signature
return self._data

@data.setter
def data(self, new_data):
self._data = new_data


def search_minhashes(node, sig, threshold, results=None):
Expand Down

0 comments on commit 086397d

Please sign in to comment.