diff --git a/.gitignore b/.gitignore index 2840779..ab75228 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Benches +benches/history/ + # IDE stuff .idea/ *.iml diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index cf57919..e25360d 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -44,7 +44,7 @@ tox run python3 -m build # Deploy -python3 -m twine upload --sign --identity dist/* +python3 -m twine upload dist/* # Clear the deployed files rm -rf dist diff --git a/benches/graph_traversal.py b/benches/graph_traversal.py index 6db3138..3a5355f 100644 --- a/benches/graph_traversal.py +++ b/benches/graph_traversal.py @@ -41,6 +41,7 @@ def bench_base_graph(fpath_hpo: str, results = defaultdict(list) + root = graph.root for curie, label in CURIE2LABEL.items(): term_id = hpotk.TermId.from_curie(curie) label = ontology.get_term_name(curie) @@ -52,6 +53,11 @@ def bench_base_graph(fpath_hpo: str, 'get_ancestors': lambda: list(graph.get_ancestors(term_id)), 'get_children': lambda: list(graph.get_children(term_id)), 'get_descendants': lambda: list(graph.get_descendants(term_id)), + + 'is_parent_of': lambda: graph.is_parent_of(root, term_id), + 'is_ancestor_of': lambda: graph.is_ancestor_of(root, term_id), + 'is_child_of': lambda: graph.is_child_of(term_id, root), + 'is_descendant_of': lambda: graph.is_descendant_of(term_id, root), } for method in benches: @@ -66,12 +72,14 @@ def bench_base_graph(fpath_hpo: str, def bench_indexed_graph(fpath_hpo: str, - number: int = 1000) -> typing.Mapping[str, typing.Mapping[str, float]]: + number: int = 1000) -> typing.Mapping[str, typing.Sequence]: factory = hpotk.graph.CsrIndexedGraphFactory() ontology = hpotk.load_minimal_ontology(fpath_hpo, graph_factory=factory) graph: hpotk.graph.IndexedOntologyGraph = ontology.graph results = defaultdict(list) + root = graph.root + root_idx = graph.root_idx for curie, label in CURIE2LABEL.items(): term_id = hpotk.TermId.from_curie(curie) idx = graph.node_to_idx(term_id) @@ -87,6 +95,15 @@ def bench_indexed_graph(fpath_hpo: str, 'get_children': lambda: list(graph.get_children(term_id)), 'get_descendant_idx': lambda: list(graph.get_descendant_idx(idx)), 'get_descendants': lambda: list(graph.get_descendants(term_id)), + + 'is_parent_of_idx': lambda: graph.is_parent_of_idx(root_idx, idx), + 'is_parent_of': lambda: graph.is_parent_of(root, term_id), + 'is_ancestor_of_idx': lambda: graph.is_ancestor_of_idx(root_idx, idx), + 'is_ancestor_of': lambda: graph.is_ancestor_of(root, term_id), + 'is_child_of_idx': lambda: graph.is_child_of_idx(idx, root_idx), + 'is_child_of': lambda: graph.is_child_of(term_id, root), + 'is_descendant_of_idx': lambda: graph.is_descendant_of_idx(idx, root_idx), + 'is_descendant_of': lambda: graph.is_descendant_of(term_id, root), } for method in benches: @@ -121,9 +138,10 @@ def bench(fpath_hpo: str, number: int, revision: str): df = pd.concat(results) df['revision'] = revision - df = df.set_index(['revision', 'method', 'group', 'payload']).sort_index() + df = df.set_index(['group', 'method', 'payload', 'revision']).sort_index() fpath_df = f'graph_traversal-{number}-{revision}.csv' + logger.info('Storing results at `%s`', fpath_df) df.to_csv(fpath_df) @@ -145,7 +163,7 @@ def main() -> int: args = parser.parse_args(argv) if args.revision is None: - revision = datetime.datetime.now().strftime('%Y-%m-%d') + revision = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S') else: revision = args.revision bench(args.hpo, args.number, revision) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 4e40f05..312ff69 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "hpo-toolkit" %} -{% set version = "0.4.1" %} +{% set version = "0.4.2" %} package: name: {{ name|lower }} diff --git a/src/hpotk/__init__.py b/src/hpotk/__init__.py index 5116bd5..f72e015 100644 --- a/src/hpotk/__init__.py +++ b/src/hpotk/__init__.py @@ -2,7 +2,7 @@ HPO toolkit is a library for working with Human Phenotype Ontology and the HPO annotation data. """ -__version__ = "0.4.1" +__version__ = "0.4.2" from . import algorithm from . import annotations diff --git a/src/hpotk/graph/_api.py b/src/hpotk/graph/_api.py index 521c4c9..3e15777 100644 --- a/src/hpotk/graph/_api.py +++ b/src/hpotk/graph/_api.py @@ -91,7 +91,7 @@ def is_leaf(self, node: typing.Union[str, NODE, Identified]) -> bool: :return: `True` if the `node` is a leaf node or `False` otherwise. :raises ValueError: if `node` is not present in the graph. """ - for _ in self.get_descendants(node): + for _ in self.get_children(node): return False return True @@ -273,33 +273,149 @@ def get_ancestors(self, source: typing.Union[str, NODE, Identified], include_source: bool = False) -> typing.Iterator[NODE]: return self._map_with_iter_func(source, include_source, self.get_ancestor_idx) + def is_leaf(self, node: typing.Union[str, NODE, Identified]) -> bool: + node_idx = self._map_to_term_idx(node) + if node_idx is None: + raise ValueError(f'No graph node found for {node}') + + for _ in self.get_children_idx(node_idx): + return True + return False + + def is_parent_of_idx(self, sub: int, obj: int) -> bool: + """ + Return `True` if the subject `sub` is a parent of the object `obj`. + + :param sub: index of a graph node. + :param obj: index of the other graph node. + :return: `True` if the `sub` is a parent of the `obj`. + :raises ValueError: if no such node exists for the `obj` index. + """ + return any(sub == idx for idx in self.get_parents_idx(obj)) + + def is_parent_of(self, sub: typing.Union[str, NODE, Identified], + obj: typing.Union[str, NODE, Identified]) -> bool: + obj_idx = self._map_to_term_idx(obj) + if obj_idx is None: + raise ValueError(f'No graph node found for {obj}') + + sub_idx = self._map_to_term_idx(sub) + if sub_idx is None: + return False + + return any(sub_idx == idx for idx in self.get_parents_idx(obj_idx)) + + def is_ancestor_of_idx(self, sub: int, obj: int) -> bool: + """ + Return `True` if the subject `sub` is an ancestor of the object `obj`. + + :param sub: index of a graph node. + :param obj: index of the other graph node. + :return: `True` if the `sub` is an ancestor of the `obj`. + :raises ValueError: if no such node exists for the `obj` index. + """ + return any(sub == idx for idx in self.get_ancestor_idx(obj)) + + def is_ancestor_of(self, sub: typing.Union[str, NODE, Identified], + obj: typing.Union[str, NODE, Identified]) -> bool: + obj_idx = self._map_to_term_idx(obj) + if obj_idx is None: + raise ValueError(f'No graph node found for {obj}') + + sub_idx = self._map_to_term_idx(sub) + if sub_idx is None: + return False + + return any(sub_idx == idx for idx in self.get_ancestor_idx(obj_idx)) + + def is_child_of_idx(self, sub: int, obj: int) -> bool: + """ + Return `True` if the subject `sub` is a child of the object `obj`. + + :param sub: index of a graph node. + :param obj: index of the other graph node. + :return: `True` if the `sub` is a child of the `obj`. + :raises ValueError: if no such node exists for the `sub` index. + """ + # TODO: ValueError for `sub` may break the pattern + return any(obj == idx for idx in self.get_parents_idx(sub)) + + def is_child_of(self, sub: typing.Union[str, NODE, Identified], + obj: typing.Union[str, NODE, Identified]) -> bool: + obj_idx = self._map_to_term_idx(obj) + if obj_idx is None: + raise ValueError(f'No graph node found for {obj}') + + sub_idx = self._map_to_term_idx(sub) + if sub_idx is None: + return False + + # Exploit the fact that a term has usually fewer parents than children. + return any(obj_idx == idx for idx in self.get_parents_idx(sub_idx)) + + def is_descendant_of_idx(self, sub: int, obj: int) -> bool: + """ + Return `True` if the subject `sub` is a descendant of the object `obj`. + + :param sub: index of a graph node. + :param obj: index of the other graph node. + :return: `True` if the `sub` is a descendant of the `obj`. + :raises ValueError: if no such node exists for the `sub` index. + """ + # TODO: ValueError for `sub` may break the pattern + return any(obj == idx for idx in self.get_ancestor_idx(sub)) + + def is_descendant_of(self, sub: typing.Union[str, NODE, Identified], + obj: typing.Union[str, NODE, Identified]) -> bool: + obj_idx = self._map_to_term_idx(obj) + if obj_idx is None: + raise ValueError(f'No graph node found for {obj}') + + sub_idx = self._map_to_term_idx(sub) + if sub_idx is None: + return False + + # Exploit the fact that a term has usually fewer parents than children. + return any(obj_idx == idx for idx in self.get_ancestor_idx(sub_idx)) + def _map_with_iter_func(self, node: typing.Union[str, NODE, Identified], include_source: bool, func: typing.Callable[[int], typing.Iterator[int]]) -> typing.Iterator[NODE]: - term_id = self._map_to_term_id(node) - idx = self.node_to_idx(term_id) + idx = self._map_to_term_idx(node) if idx is not None: if include_source: - return itertools.chain((term_id,), map(lambda i: self.idx_to_node(i), func(idx))) + return itertools.chain( + (self.idx_to_node(idx),), + map(self.idx_to_node, func(idx))) else: - return map(lambda i: self.idx_to_node(i), func(idx)) + return map(self.idx_to_node, func(idx)) else: raise ValueError(f'{node} is not present in the graph!') def _map_with_seq_func(self, node: typing.Union[str, NODE, Identified], include_source: bool, func: typing.Callable[[int], typing.Sequence[int]]) -> typing.Iterator[NODE]: - term_id = self._map_to_term_id(node) - idx = self.node_to_idx(term_id) + idx = self._map_to_term_idx(node) if idx is not None: if include_source: - return itertools.chain((term_id,), map(lambda i: self.idx_to_node(i), func(idx))) + return itertools.chain( + (self.idx_to_node(idx),), + map(self.idx_to_node, func(idx))) else: - return map(lambda i: self.idx_to_node(i), func(idx)) + return map(self.idx_to_node, func(idx)) else: raise ValueError(f'{node} is not present in the graph!') - # TODO: possibly override is_parent, is_leaf, is_ancestor, etc. + def _map_to_term_idx(self, node: typing.Union[str, NODE, Identified]) -> typing.Optional[int]: + """ + A convenience method to convert a `node` into the node index. + + :param node: one of the expected node types, including CURIE `str`, NODE, or an :class:`Identified` item. + :return: the node index or `None` if the node is not present in the graph. + :raises ValueError: if the node is not in one of the expected types. + """ + term_id = self._map_to_term_id(node) + return self.node_to_idx(term_id) # The rest