Skip to content

Commit

Permalink
Merge pull request #55 from TheJacksonLaboratory/hasten-boolean-queries
Browse files Browse the repository at this point in the history
Speed up the boolean ontology graph queries
  • Loading branch information
ielis authored Feb 13, 2024
2 parents d07e5fc + 5b56440 commit 8cf1d55
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 12 deletions.
22 changes: 20 additions & 2 deletions benches/graph_traversal.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def bench_base_graph(fpath_hpo: str,

results = defaultdict(list)

root = graph.root
for curie, label in CURIE2LABEL.items():
term_id = hpotk.TermId.from_curie(curie)
label = ontology.get_term_name(curie)
Expand All @@ -52,6 +53,11 @@ def bench_base_graph(fpath_hpo: str,
'get_ancestors': lambda: list(graph.get_ancestors(term_id)),
'get_children': lambda: list(graph.get_children(term_id)),
'get_descendants': lambda: list(graph.get_descendants(term_id)),

'is_parent_of': lambda: graph.is_parent_of(root, term_id),
'is_ancestor_of': lambda: graph.is_ancestor_of(root, term_id),
'is_child_of': lambda: graph.is_child_of(term_id, root),
'is_descendant_of': lambda: graph.is_descendant_of(term_id, root),
}

for method in benches:
Expand All @@ -66,12 +72,14 @@ def bench_base_graph(fpath_hpo: str,


def bench_indexed_graph(fpath_hpo: str,
number: int = 1000) -> typing.Mapping[str, typing.Mapping[str, float]]:
number: int = 1000) -> typing.Mapping[str, typing.Sequence]:
factory = hpotk.graph.CsrIndexedGraphFactory()
ontology = hpotk.load_minimal_ontology(fpath_hpo, graph_factory=factory)
graph: hpotk.graph.IndexedOntologyGraph = ontology.graph

results = defaultdict(list)
root = graph.root
root_idx = graph.root_idx
for curie, label in CURIE2LABEL.items():
term_id = hpotk.TermId.from_curie(curie)
idx = graph.node_to_idx(term_id)
Expand All @@ -87,6 +95,15 @@ def bench_indexed_graph(fpath_hpo: str,
'get_children': lambda: list(graph.get_children(term_id)),
'get_descendant_idx': lambda: list(graph.get_descendant_idx(idx)),
'get_descendants': lambda: list(graph.get_descendants(term_id)),

'is_parent_of_idx': lambda: graph.is_parent_of_idx(root_idx, idx),
'is_parent_of': lambda: graph.is_parent_of(root, term_id),
'is_ancestor_of_idx': lambda: graph.is_ancestor_of_idx(root_idx, idx),
'is_ancestor_of': lambda: graph.is_ancestor_of(root, term_id),
'is_child_of_idx': lambda: graph.is_child_of_idx(idx, root_idx),
'is_child_of': lambda: graph.is_child_of(term_id, root),
'is_descendant_of_idx': lambda: graph.is_descendant_of_idx(idx, root_idx),
'is_descendant_of': lambda: graph.is_descendant_of(term_id, root),
}

for method in benches:
Expand Down Expand Up @@ -124,6 +141,7 @@ def bench(fpath_hpo: str, number: int, revision: str):
df = df.set_index(['group', 'method', 'payload', 'revision']).sort_index()

fpath_df = f'graph_traversal-{number}-{revision}.csv'
logger.info('Storing results at `%s`', fpath_df)
df.to_csv(fpath_df)


Expand All @@ -145,7 +163,7 @@ def main() -> int:

args = parser.parse_args(argv)
if args.revision is None:
revision = datetime.datetime.now().strftime('%Y-%m-%d')
revision = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
else:
revision = args.revision
bench(args.hpo, args.number, revision)
Expand Down
136 changes: 126 additions & 10 deletions src/hpotk/graph/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def is_leaf(self, node: typing.Union[str, NODE, Identified]) -> bool:
:return: `True` if the `node` is a leaf node or `False` otherwise.
:raises ValueError: if `node` is not present in the graph.
"""
for _ in self.get_descendants(node):
for _ in self.get_children(node):
return False
return True

Expand Down Expand Up @@ -273,33 +273,149 @@ def get_ancestors(self, source: typing.Union[str, NODE, Identified],
include_source: bool = False) -> typing.Iterator[NODE]:
return self._map_with_iter_func(source, include_source, self.get_ancestor_idx)

def is_leaf(self, node: typing.Union[str, NODE, Identified]) -> bool:
node_idx = self._map_to_term_idx(node)
if node_idx is None:
raise ValueError(f'No graph node found for {node}')

for _ in self.get_children_idx(node_idx):
return True
return False

def is_parent_of_idx(self, sub: int, obj: int) -> bool:
"""
Return `True` if the subject `sub` is a parent of the object `obj`.
:param sub: index of a graph node.
:param obj: index of the other graph node.
:return: `True` if the `sub` is a parent of the `obj`.
:raises ValueError: if no such node exists for the `obj` index.
"""
return any(sub == idx for idx in self.get_parents_idx(obj))

def is_parent_of(self, sub: typing.Union[str, NODE, Identified],
obj: typing.Union[str, NODE, Identified]) -> bool:
obj_idx = self._map_to_term_idx(obj)
if obj_idx is None:
raise ValueError(f'No graph node found for {obj}')

sub_idx = self._map_to_term_idx(sub)
if sub_idx is None:
return False

return any(sub_idx == idx for idx in self.get_parents_idx(obj_idx))

def is_ancestor_of_idx(self, sub: int, obj: int) -> bool:
"""
Return `True` if the subject `sub` is an ancestor of the object `obj`.
:param sub: index of a graph node.
:param obj: index of the other graph node.
:return: `True` if the `sub` is an ancestor of the `obj`.
:raises ValueError: if no such node exists for the `obj` index.
"""
return any(sub == idx for idx in self.get_ancestor_idx(obj))

def is_ancestor_of(self, sub: typing.Union[str, NODE, Identified],
obj: typing.Union[str, NODE, Identified]) -> bool:
obj_idx = self._map_to_term_idx(obj)
if obj_idx is None:
raise ValueError(f'No graph node found for {obj}')

sub_idx = self._map_to_term_idx(sub)
if sub_idx is None:
return False

return any(sub_idx == idx for idx in self.get_ancestor_idx(obj_idx))

def is_child_of_idx(self, sub: int, obj: int) -> bool:
"""
Return `True` if the subject `sub` is a child of the object `obj`.
:param sub: index of a graph node.
:param obj: index of the other graph node.
:return: `True` if the `sub` is a child of the `obj`.
:raises ValueError: if no such node exists for the `sub` index.
"""
# TODO: ValueError for `sub` may break the pattern
return any(obj == idx for idx in self.get_parents_idx(sub))

def is_child_of(self, sub: typing.Union[str, NODE, Identified],
obj: typing.Union[str, NODE, Identified]) -> bool:
obj_idx = self._map_to_term_idx(obj)
if obj_idx is None:
raise ValueError(f'No graph node found for {obj}')

sub_idx = self._map_to_term_idx(sub)
if sub_idx is None:
return False

# Exploit the fact that a term has usually fewer parents than children.
return any(obj_idx == idx for idx in self.get_parents_idx(sub_idx))

def is_descendant_of_idx(self, sub: int, obj: int) -> bool:
"""
Return `True` if the subject `sub` is a descendant of the object `obj`.
:param sub: index of a graph node.
:param obj: index of the other graph node.
:return: `True` if the `sub` is a descendant of the `obj`.
:raises ValueError: if no such node exists for the `sub` index.
"""
# TODO: ValueError for `sub` may break the pattern
return any(obj == idx for idx in self.get_ancestor_idx(sub))

def is_descendant_of(self, sub: typing.Union[str, NODE, Identified],
obj: typing.Union[str, NODE, Identified]) -> bool:
obj_idx = self._map_to_term_idx(obj)
if obj_idx is None:
raise ValueError(f'No graph node found for {obj}')

sub_idx = self._map_to_term_idx(sub)
if sub_idx is None:
return False

# Exploit the fact that a term has usually fewer parents than children.
return any(obj_idx == idx for idx in self.get_ancestor_idx(sub_idx))

def _map_with_iter_func(self, node: typing.Union[str, NODE, Identified],
include_source: bool,
func: typing.Callable[[int], typing.Iterator[int]]) -> typing.Iterator[NODE]:
term_id = self._map_to_term_id(node)
idx = self.node_to_idx(term_id)
idx = self._map_to_term_idx(node)
if idx is not None:
if include_source:
return itertools.chain((term_id,), map(lambda i: self.idx_to_node(i), func(idx)))
return itertools.chain(
(self.idx_to_node(idx),),
map(self.idx_to_node, func(idx)))
else:
return map(lambda i: self.idx_to_node(i), func(idx))
return map(self.idx_to_node, func(idx))
else:
raise ValueError(f'{node} is not present in the graph!')

def _map_with_seq_func(self, node: typing.Union[str, NODE, Identified],
include_source: bool,
func: typing.Callable[[int], typing.Sequence[int]]) -> typing.Iterator[NODE]:
term_id = self._map_to_term_id(node)
idx = self.node_to_idx(term_id)
idx = self._map_to_term_idx(node)
if idx is not None:
if include_source:
return itertools.chain((term_id,), map(lambda i: self.idx_to_node(i), func(idx)))
return itertools.chain(
(self.idx_to_node(idx),),
map(self.idx_to_node, func(idx)))
else:
return map(lambda i: self.idx_to_node(i), func(idx))
return map(self.idx_to_node, func(idx))
else:
raise ValueError(f'{node} is not present in the graph!')

# TODO: possibly override is_parent, is_leaf, is_ancestor, etc.
def _map_to_term_idx(self, node: typing.Union[str, NODE, Identified]) -> typing.Optional[int]:
"""
A convenience method to convert a `node` into the node index.
:param node: one of the expected node types, including CURIE `str`, NODE, or an :class:`Identified` item.
:return: the node index or `None` if the node is not present in the graph.
:raises ValueError: if the node is not in one of the expected types.
"""
term_id = self._map_to_term_id(node)
return self.node_to_idx(term_id)

# The rest

Expand Down

0 comments on commit 8cf1d55

Please sign in to comment.