From 5ae7ef5cc866e19aa10ffd67ac2725920b4592b9 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 9 Feb 2024 16:19:36 -0500 Subject: [PATCH 1/6] Next development iteration `v0.4.1dev0`. --- benches/graph_traversal.py | 2 +- recipe/meta.yaml | 2 +- src/hpotk/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benches/graph_traversal.py b/benches/graph_traversal.py index 6db3138..c5df1dd 100644 --- a/benches/graph_traversal.py +++ b/benches/graph_traversal.py @@ -121,7 +121,7 @@ def bench(fpath_hpo: str, number: int, revision: str): df = pd.concat(results) df['revision'] = revision - df = df.set_index(['revision', 'method', 'group', 'payload']).sort_index() + df = df.set_index(['group', 'method', 'payload', 'revision']).sort_index() fpath_df = f'graph_traversal-{number}-{revision}.csv' df.to_csv(fpath_df) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 27e3626..56ad40e 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "hpo-toolkit" %} -{% set version = "0.4.0" %} +{% set version = "0.4.1dev0" %} package: name: {{ name|lower }} diff --git a/src/hpotk/__init__.py b/src/hpotk/__init__.py index 712b632..f8855f1 100644 --- a/src/hpotk/__init__.py +++ b/src/hpotk/__init__.py @@ -2,7 +2,7 @@ HPO toolkit is a library for working with Human Phenotype Ontology and the HPO annotation data. """ -__version__ = "0.4.0" +__version__ = "0.4.1dev0" from . import algorithm from . import annotations From d07e5fc849e0e381bf11438ac149c42ac91f510f Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Fri, 9 Feb 2024 16:21:43 -0500 Subject: [PATCH 2/6] Ignore bench history. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 2840779..ab75228 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Benches +benches/history/ + # IDE stuff .idea/ *.iml From 1c2bcb49cb97a1c372401abe800189836fb7a831 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 12 Feb 2024 21:06:09 -0500 Subject: [PATCH 3/6] Speed up the boolean queries on the indexed ontology graph. --- src/hpotk/graph/_api.py | 136 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 126 insertions(+), 10 deletions(-) diff --git a/src/hpotk/graph/_api.py b/src/hpotk/graph/_api.py index 521c4c9..3e15777 100644 --- a/src/hpotk/graph/_api.py +++ b/src/hpotk/graph/_api.py @@ -91,7 +91,7 @@ def is_leaf(self, node: typing.Union[str, NODE, Identified]) -> bool: :return: `True` if the `node` is a leaf node or `False` otherwise. :raises ValueError: if `node` is not present in the graph. """ - for _ in self.get_descendants(node): + for _ in self.get_children(node): return False return True @@ -273,33 +273,149 @@ def get_ancestors(self, source: typing.Union[str, NODE, Identified], include_source: bool = False) -> typing.Iterator[NODE]: return self._map_with_iter_func(source, include_source, self.get_ancestor_idx) + def is_leaf(self, node: typing.Union[str, NODE, Identified]) -> bool: + node_idx = self._map_to_term_idx(node) + if node_idx is None: + raise ValueError(f'No graph node found for {node}') + + for _ in self.get_children_idx(node_idx): + return True + return False + + def is_parent_of_idx(self, sub: int, obj: int) -> bool: + """ + Return `True` if the subject `sub` is a parent of the object `obj`. + + :param sub: index of a graph node. + :param obj: index of the other graph node. + :return: `True` if the `sub` is a parent of the `obj`. + :raises ValueError: if no such node exists for the `obj` index. + """ + return any(sub == idx for idx in self.get_parents_idx(obj)) + + def is_parent_of(self, sub: typing.Union[str, NODE, Identified], + obj: typing.Union[str, NODE, Identified]) -> bool: + obj_idx = self._map_to_term_idx(obj) + if obj_idx is None: + raise ValueError(f'No graph node found for {obj}') + + sub_idx = self._map_to_term_idx(sub) + if sub_idx is None: + return False + + return any(sub_idx == idx for idx in self.get_parents_idx(obj_idx)) + + def is_ancestor_of_idx(self, sub: int, obj: int) -> bool: + """ + Return `True` if the subject `sub` is an ancestor of the object `obj`. + + :param sub: index of a graph node. + :param obj: index of the other graph node. + :return: `True` if the `sub` is an ancestor of the `obj`. + :raises ValueError: if no such node exists for the `obj` index. + """ + return any(sub == idx for idx in self.get_ancestor_idx(obj)) + + def is_ancestor_of(self, sub: typing.Union[str, NODE, Identified], + obj: typing.Union[str, NODE, Identified]) -> bool: + obj_idx = self._map_to_term_idx(obj) + if obj_idx is None: + raise ValueError(f'No graph node found for {obj}') + + sub_idx = self._map_to_term_idx(sub) + if sub_idx is None: + return False + + return any(sub_idx == idx for idx in self.get_ancestor_idx(obj_idx)) + + def is_child_of_idx(self, sub: int, obj: int) -> bool: + """ + Return `True` if the subject `sub` is a child of the object `obj`. + + :param sub: index of a graph node. + :param obj: index of the other graph node. + :return: `True` if the `sub` is a child of the `obj`. + :raises ValueError: if no such node exists for the `sub` index. + """ + # TODO: ValueError for `sub` may break the pattern + return any(obj == idx for idx in self.get_parents_idx(sub)) + + def is_child_of(self, sub: typing.Union[str, NODE, Identified], + obj: typing.Union[str, NODE, Identified]) -> bool: + obj_idx = self._map_to_term_idx(obj) + if obj_idx is None: + raise ValueError(f'No graph node found for {obj}') + + sub_idx = self._map_to_term_idx(sub) + if sub_idx is None: + return False + + # Exploit the fact that a term has usually fewer parents than children. + return any(obj_idx == idx for idx in self.get_parents_idx(sub_idx)) + + def is_descendant_of_idx(self, sub: int, obj: int) -> bool: + """ + Return `True` if the subject `sub` is a descendant of the object `obj`. + + :param sub: index of a graph node. + :param obj: index of the other graph node. + :return: `True` if the `sub` is a descendant of the `obj`. + :raises ValueError: if no such node exists for the `sub` index. + """ + # TODO: ValueError for `sub` may break the pattern + return any(obj == idx for idx in self.get_ancestor_idx(sub)) + + def is_descendant_of(self, sub: typing.Union[str, NODE, Identified], + obj: typing.Union[str, NODE, Identified]) -> bool: + obj_idx = self._map_to_term_idx(obj) + if obj_idx is None: + raise ValueError(f'No graph node found for {obj}') + + sub_idx = self._map_to_term_idx(sub) + if sub_idx is None: + return False + + # Exploit the fact that a term has usually fewer parents than children. + return any(obj_idx == idx for idx in self.get_ancestor_idx(sub_idx)) + def _map_with_iter_func(self, node: typing.Union[str, NODE, Identified], include_source: bool, func: typing.Callable[[int], typing.Iterator[int]]) -> typing.Iterator[NODE]: - term_id = self._map_to_term_id(node) - idx = self.node_to_idx(term_id) + idx = self._map_to_term_idx(node) if idx is not None: if include_source: - return itertools.chain((term_id,), map(lambda i: self.idx_to_node(i), func(idx))) + return itertools.chain( + (self.idx_to_node(idx),), + map(self.idx_to_node, func(idx))) else: - return map(lambda i: self.idx_to_node(i), func(idx)) + return map(self.idx_to_node, func(idx)) else: raise ValueError(f'{node} is not present in the graph!') def _map_with_seq_func(self, node: typing.Union[str, NODE, Identified], include_source: bool, func: typing.Callable[[int], typing.Sequence[int]]) -> typing.Iterator[NODE]: - term_id = self._map_to_term_id(node) - idx = self.node_to_idx(term_id) + idx = self._map_to_term_idx(node) if idx is not None: if include_source: - return itertools.chain((term_id,), map(lambda i: self.idx_to_node(i), func(idx))) + return itertools.chain( + (self.idx_to_node(idx),), + map(self.idx_to_node, func(idx))) else: - return map(lambda i: self.idx_to_node(i), func(idx)) + return map(self.idx_to_node, func(idx)) else: raise ValueError(f'{node} is not present in the graph!') - # TODO: possibly override is_parent, is_leaf, is_ancestor, etc. + def _map_to_term_idx(self, node: typing.Union[str, NODE, Identified]) -> typing.Optional[int]: + """ + A convenience method to convert a `node` into the node index. + + :param node: one of the expected node types, including CURIE `str`, NODE, or an :class:`Identified` item. + :return: the node index or `None` if the node is not present in the graph. + :raises ValueError: if the node is not in one of the expected types. + """ + term_id = self._map_to_term_id(node) + return self.node_to_idx(term_id) # The rest From 5b56440a1be84da6d7a382e916ca2258b0d8c6db Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 12 Feb 2024 21:06:35 -0500 Subject: [PATCH 4/6] Bench boolean ontology graph queries. --- benches/graph_traversal.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/benches/graph_traversal.py b/benches/graph_traversal.py index c5df1dd..3a5355f 100644 --- a/benches/graph_traversal.py +++ b/benches/graph_traversal.py @@ -41,6 +41,7 @@ def bench_base_graph(fpath_hpo: str, results = defaultdict(list) + root = graph.root for curie, label in CURIE2LABEL.items(): term_id = hpotk.TermId.from_curie(curie) label = ontology.get_term_name(curie) @@ -52,6 +53,11 @@ def bench_base_graph(fpath_hpo: str, 'get_ancestors': lambda: list(graph.get_ancestors(term_id)), 'get_children': lambda: list(graph.get_children(term_id)), 'get_descendants': lambda: list(graph.get_descendants(term_id)), + + 'is_parent_of': lambda: graph.is_parent_of(root, term_id), + 'is_ancestor_of': lambda: graph.is_ancestor_of(root, term_id), + 'is_child_of': lambda: graph.is_child_of(term_id, root), + 'is_descendant_of': lambda: graph.is_descendant_of(term_id, root), } for method in benches: @@ -66,12 +72,14 @@ def bench_base_graph(fpath_hpo: str, def bench_indexed_graph(fpath_hpo: str, - number: int = 1000) -> typing.Mapping[str, typing.Mapping[str, float]]: + number: int = 1000) -> typing.Mapping[str, typing.Sequence]: factory = hpotk.graph.CsrIndexedGraphFactory() ontology = hpotk.load_minimal_ontology(fpath_hpo, graph_factory=factory) graph: hpotk.graph.IndexedOntologyGraph = ontology.graph results = defaultdict(list) + root = graph.root + root_idx = graph.root_idx for curie, label in CURIE2LABEL.items(): term_id = hpotk.TermId.from_curie(curie) idx = graph.node_to_idx(term_id) @@ -87,6 +95,15 @@ def bench_indexed_graph(fpath_hpo: str, 'get_children': lambda: list(graph.get_children(term_id)), 'get_descendant_idx': lambda: list(graph.get_descendant_idx(idx)), 'get_descendants': lambda: list(graph.get_descendants(term_id)), + + 'is_parent_of_idx': lambda: graph.is_parent_of_idx(root_idx, idx), + 'is_parent_of': lambda: graph.is_parent_of(root, term_id), + 'is_ancestor_of_idx': lambda: graph.is_ancestor_of_idx(root_idx, idx), + 'is_ancestor_of': lambda: graph.is_ancestor_of(root, term_id), + 'is_child_of_idx': lambda: graph.is_child_of_idx(idx, root_idx), + 'is_child_of': lambda: graph.is_child_of(term_id, root), + 'is_descendant_of_idx': lambda: graph.is_descendant_of_idx(idx, root_idx), + 'is_descendant_of': lambda: graph.is_descendant_of(term_id, root), } for method in benches: @@ -124,6 +141,7 @@ def bench(fpath_hpo: str, number: int, revision: str): df = df.set_index(['group', 'method', 'payload', 'revision']).sort_index() fpath_df = f'graph_traversal-{number}-{revision}.csv' + logger.info('Storing results at `%s`', fpath_df) df.to_csv(fpath_df) @@ -145,7 +163,7 @@ def main() -> int: args = parser.parse_args(argv) if args.revision is None: - revision = datetime.datetime.now().strftime('%Y-%m-%d') + revision = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S') else: revision = args.revision bench(args.hpo, args.number, revision) From 64221abe6129bde52f74d108ec5af6349c6d8e3c Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 12 Feb 2024 21:26:06 -0500 Subject: [PATCH 5/6] Bump to `0.4.1post0`. --- HOW_TO_RELEASE.md | 2 +- recipe/meta.yaml | 2 +- src/hpotk/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index cf57919..e25360d 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -44,7 +44,7 @@ tox run python3 -m build # Deploy -python3 -m twine upload --sign --identity dist/* +python3 -m twine upload dist/* # Clear the deployed files rm -rf dist diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 4e40f05..e82a95f 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "hpo-toolkit" %} -{% set version = "0.4.1" %} +{% set version = "0.4.1post0" %} package: name: {{ name|lower }} diff --git a/src/hpotk/__init__.py b/src/hpotk/__init__.py index 5116bd5..da8549a 100644 --- a/src/hpotk/__init__.py +++ b/src/hpotk/__init__.py @@ -2,7 +2,7 @@ HPO toolkit is a library for working with Human Phenotype Ontology and the HPO annotation data. """ -__version__ = "0.4.1" +__version__ = "0.4.1post0" from . import algorithm from . import annotations From 7413db2aa8918c63eb6f9ffd0e9e0d3182a857d3 Mon Sep 17 00:00:00 2001 From: Daniel Danis Date: Mon, 12 Feb 2024 21:28:11 -0500 Subject: [PATCH 6/6] Bump to `0.4.2`. --- recipe/meta.yaml | 2 +- src/hpotk/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index e82a95f..312ff69 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,5 +1,5 @@ {% set name = "hpo-toolkit" %} -{% set version = "0.4.1post0" %} +{% set version = "0.4.2" %} package: name: {{ name|lower }} diff --git a/src/hpotk/__init__.py b/src/hpotk/__init__.py index da8549a..f72e015 100644 --- a/src/hpotk/__init__.py +++ b/src/hpotk/__init__.py @@ -2,7 +2,7 @@ HPO toolkit is a library for working with Human Phenotype Ontology and the HPO annotation data. """ -__version__ = "0.4.1post0" +__version__ = "0.4.2" from . import algorithm from . import annotations