diff --git a/notebooks/Tutorial.ipynb b/notebooks/Tutorial.ipynb index f620dfa..083a0f5 100644 --- a/notebooks/Tutorial.ipynb +++ b/notebooks/Tutorial.ipynb @@ -418,6 +418,50 @@ " print(f\"{term.identifier.value} - {term.name}\")" ] }, + { + "cell_type": "markdown", + "id": "c219878d-ae6e-4279-b7b7-993b4e2110dc", + "metadata": {}, + "source": [ + "## Term relationship tests\n", + "\n", + "We can check if a term is a *parent* or an *ancestor* of another term using the ontology graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "9f02ea88-694b-4d4a-a2cd-8e604d40cf18", + "metadata": {}, + "outputs": [], + "source": [ + "arachnodactyly = TermId.from_curie('HP:0001166')\n", + "abnormality_of_the_hand = TermId.from_curie('HP:0001155')\n", + "long_fingers = TermId.from_curie('HP:0100807')\n", + "\n", + "assert o.graph.is_parent_of(long_fingers, arachnodactyly)\n", + "assert o.graph.is_ancestor_of(abnormality_of_the_hand, arachnodactyly)" + ] + }, + { + "cell_type": "markdown", + "id": "5efc4a21-b115-4511-80fd-cf02d3afdc46", + "metadata": {}, + "source": [ + "Similarly, we can check if a term is a *child* or a *descendant* of another term:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "df6344dd-9b72-4d94-88aa-201a9578fc7b", + "metadata": {}, + "outputs": [], + "source": [ + "assert o.graph.is_child_of(arachnodactyly, long_fingers)\n", + "assert o.graph.is_descendant_of(arachnodactyly, long_fingers)" + ] + }, { "cell_type": "markdown", "id": "a94bcf99-fb72-4b83-bc3d-805fd7a30cb6", @@ -439,7 +483,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "id": "3b2f58cc-e008-4153-96b5-1abbd4db9e47", "metadata": {}, "outputs": [], @@ -487,7 +531,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "id": "0e668a54-c146-47be-921e-72ab29ee7d28", "metadata": {}, "outputs": [], @@ -531,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "id": "b575229e-1c08-4d02-aaa3-012c9a8c80bd", "metadata": {}, "outputs": [], @@ -571,7 +615,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "id": "654026c8-0e98-45ee-b245-0fa72f8498c3", "metadata": {}, "outputs": [ @@ -616,7 +660,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "215903d1-5c3f-4b0f-8fa0-486ef272f5a8", "metadata": {}, "outputs": [ @@ -645,7 +689,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "id": "0b1e1b30-e70e-4ec9-89c9-5c702d24ea25", "metadata": {}, "outputs": [ @@ -672,7 +716,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "id": "5c4bf517-3e4c-4444-9ada-8bdd9cdcbbbe", "metadata": { "tags": [] @@ -684,7 +728,7 @@ "HpoFrequency(identifier=HP:0040283, lower_bound=0.05, upper_bound=0.29)" ] }, - "execution_count": 18, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -705,7 +749,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "id": "03e7bc93-a58e-422e-a9e1-b04f40956dfb", "metadata": { "tags": [] @@ -728,7 +772,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 22, "id": "7daaab3e-6ead-4230-88d5-b8c080b3730b", "metadata": { "tags": [] @@ -759,7 +803,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 23, "id": "d7bf2281-d8ed-4373-b094-ef7df0c60aa8", "metadata": { "tags": [] @@ -790,7 +834,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 24, "id": "2ddccd09-43d8-448d-8df1-f55ae8eeb658", "metadata": { "tags": [] @@ -821,7 +865,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 25, "id": "0c183a9c-802b-43a1-8bf9-b0e81e5a26f1", "metadata": { "tags": [] @@ -867,7 +911,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 26, "id": "9b5e5216-abde-4b8e-888b-fd12b43ce92e", "metadata": { "tags": [] @@ -879,7 +923,7 @@ "'Parsed 12429 diseases'" ] }, - "execution_count": 24, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -905,7 +949,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 27, "id": "b4303dc6-a653-4720-8a50-0600d655b679", "metadata": { "tags": [] @@ -933,7 +977,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 28, "id": "45035d68-baad-4e69-ab51-5e9c9e0aad86", "metadata": { "tags": [] @@ -962,7 +1006,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 29, "id": "e061b854-2218-4355-8a9f-d6ef9988f588", "metadata": { "tags": [] @@ -995,7 +1039,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 30, "id": "2faad640-4765-49c5-9eba-535018782e5b", "metadata": { "tags": [] @@ -1046,7 +1090,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 31, "id": "fe09955e-9a7d-49dc-9cd7-e7db4b876af5", "metadata": { "tags": [] @@ -1068,7 +1112,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 32, "id": "7ad52558-e398-413f-8d65-0e67d6099d42", "metadata": { "tags": [] @@ -1083,8 +1127,8 @@ } ], "source": [ - "ic_arachnodactyly = term_id2ic[arachnodactyly.identifier]\n", - "print(f'IC of {arachnodactyly.name}: {ic_arachnodactyly} nats')" + "ic_arachnodactyly = term_id2ic[arachnodactyly]\n", + "print(f'IC of Arachnodactyly: {ic_arachnodactyly} nats')" ] }, { @@ -1137,7 +1181,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 33, "id": "deb939b7-847f-4b9f-a823-61ea8e9cbb1a", "metadata": { "tags": [] @@ -1149,7 +1193,7 @@ "{'annotated_items_version': '2023-04-05', 'ontology_version': '2023-04-06'}" ] }, - "execution_count": 31, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1172,16 +1216,14 @@ }, { "cell_type": "code", - "execution_count": 32, - "id": "4e4afd86-1c26-454e-a627-07b29eb00f01", - "metadata": { - "tags": [] - }, + "execution_count": 34, + "id": "6f4dec17-d7df-442a-b803-2aabe86ca1b4", + "metadata": {}, "outputs": [], "source": [ "from hpotk.algorithm.similarity import precalculate_ic_mica_for_hpo_concept_pairs, SimilarityContainer\n", "\n", - "#sc: SimilarityContainer = precalculate_ic_mica_for_hpo_concept_pairs(term_id2ic, o)" + "# sc: SimilarityContainer = precalculate_ic_mica_for_hpo_concept_pairs(term_id2ic, o)" ] }, { @@ -1196,7 +1238,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 35, "id": "bea2ed63-59bc-4081-a9e8-d81ec6d06d80", "metadata": { "tags": [] @@ -1216,7 +1258,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 36, "id": "0ade7bf4-7d4c-4f5d-b612-dd3f8d9e2c57", "metadata": { "tags": [] diff --git a/src/hpotk/graph/_api.py b/src/hpotk/graph/_api.py index 5ce4a50..f25ecfb 100644 --- a/src/hpotk/graph/_api.py +++ b/src/hpotk/graph/_api.py @@ -62,6 +62,52 @@ def is_leaf(self, node: NODE) -> bool: return False return True + def is_parent_of(self, sub: NODE, obj: NODE) -> bool: + """ + Return `True` if the subject `sub` is a parent of the object `obj`. + + :param sub: a graph node. + :param obj: other graph node. + :return: `True` if the `sub` is a parent of the `obj`. + """ + return self._run_query(self.get_parents, sub, obj) + + def is_ancestor_of(self, sub: NODE, obj: NODE) -> bool: + """ + Return `True` if the subject `sub` is an ancestor of the object `obj`. + + :param sub: a graph node. + :param obj: other graph node. + :return: `True` if the `sub` is an ancestor of the `obj`. + """ + return self._run_query(self.get_ancestors, sub, obj) + + def is_child_of(self, sub: NODE, obj: NODE) -> bool: + """ + Return `True` if the `sub` is a child of the `obj`. + + :param sub: a graph node. + :param obj: other graph node. + :return: `True` if the `sub` is a child of the `obj`. + """ + return self._run_query(self.get_children, sub, obj) + + def is_descendant_of(self, sub: NODE, obj: NODE) -> bool: + """ + Return `True` if the `sub` is a descendant of the `obj`. + + :param sub: a graph node. + :param obj: other graph node. + :return: `True` if the `sub` is a descendant of the `obj`. + """ + return self._run_query(self.get_descendants, sub, obj) + + @staticmethod + def _run_query(func: typing.Callable[[NODE], typing.Iterable[NODE]], + sub: NODE, + obj: NODE) -> bool: + return any(sub == term_id for term_id in func(obj)) + @abc.abstractmethod def __contains__(self, item: NODE) -> bool: pass diff --git a/src/hpotk/graph/test__api.py b/src/hpotk/graph/test__api.py new file mode 100644 index 0000000..8b84ddd --- /dev/null +++ b/src/hpotk/graph/test__api.py @@ -0,0 +1,136 @@ +import unittest + +import ddt +import numpy as np + +from hpotk.model import TermId +from ._csr_graph import BisectPoweredCsrOntologyGraph +from .csr import ImmutableCsrMatrix + + +@ddt.ddt +class TestCsrOntologyGraph(unittest.TestCase): + """ + Tests of the `OntologyGraph` API that just happen to be using `BisectPoweredCsrOntologyGraph` as the implementation. + """ + + NODES: np.ndarray + GRAPH: BisectPoweredCsrOntologyGraph + + @classmethod + def setUpClass(cls) -> None: + root = TermId.from_curie('HP:1') + curies = [ + 'HP:01', 'HP:010', 'HP:011', 'HP:0110', + 'HP:02', 'HP:020', 'HP:021', 'HP:022', + 'HP:03', 'HP:1' + ] + nodes = np.fromiter(map(TermId.from_curie, curies), dtype=object) + row = [0, 3, 5, 7, 9, 13, 14, 15, 16, 17, 20] + col = [1, 2, 9, 0, 3, 0, 3, 1, 2, 5, 6, 7, 9, 4, 4, 4, 9, 0, 4, 8] + data = [-1, -1, 1, 1, -1, 1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1] + am = ImmutableCsrMatrix(row, col, data, shape=(len(nodes), len(nodes)), dtype=int) + + cls.NODES = nodes + cls.GRAPH = BisectPoweredCsrOntologyGraph(root, nodes, am) + + @ddt.data( + # True examples + ('HP:01', 'HP:1', True), + + ('HP:010', 'HP:01', True), + ('HP:011', 'HP:01', True), + ('HP:0110', 'HP:011', True), + + ('HP:02', 'HP:1', True), + ('HP:020', 'HP:02', True), + ('HP:021', 'HP:02', True), + ('HP:022', 'HP:02', True), + + ('HP:03', 'HP:1', True), + + # False examples + ('HP:1', 'HP:1', False), + ('HP:01', 'HP:02', False), + ('HP:020', 'HP:01', False), + ('HP:03', 'HP:0110', False), + ) + @ddt.unpack + def test_is_child_of(self, sub, obj, expected): + sub = TermId.from_curie(sub) + obj = TermId.from_curie(obj) + + self.assertEqual(expected, self.GRAPH.is_child_of(sub, obj)) + + @ddt.data( + # True examples + ('HP:010', 'HP:1', True), + ('HP:011', 'HP:1', True), + ('HP:0110', 'HP:1', True), + ('HP:0110', 'HP:01', True), + + + ('HP:020', 'HP:1', True), + ('HP:021', 'HP:1', True), + ('HP:022', 'HP:1', True), + + # False examples + ('HP:1', 'HP:1', False), + ('HP:1', 'HP:01', False), + ) + @ddt.unpack + def test_is_descendant_of(self, sub, obj, expected): + sub = TermId.from_curie(sub) + obj = TermId.from_curie(obj) + + self.assertEqual(expected, self.GRAPH.is_descendant_of(sub, obj)) + + @ddt.data( + # True examples + ('HP:1', 'HP:01', True), + + ('HP:01', 'HP:010', True), + ('HP:01', 'HP:011', True), + ('HP:011', 'HP:0110', True), + + ('HP:1', 'HP:02', True), + ('HP:02', 'HP:020', True), + ('HP:02', 'HP:021', True), + ('HP:02', 'HP:022', True), + + ('HP:1', 'HP:03', True), + + # False examples + ('HP:1', 'HP:1', False), + ('HP:02', 'HP:01', False), + ('HP:01', 'HP:020', False), + ('HP:0110', 'HP:03', False), + ) + @ddt.unpack + def test_is_parent_of(self, sub, obj, expected): + sub = TermId.from_curie(sub) + obj = TermId.from_curie(obj) + + self.assertEqual(expected, self.GRAPH.is_parent_of(sub, obj)) + + @ddt.data( + # True examples + ('HP:1', 'HP:010', True), + ('HP:1', 'HP:011', True), + ('HP:1', 'HP:0110', True), + ('HP:01', 'HP:0110', True), + + ('HP:1', 'HP:020', True), + ('HP:1', 'HP:021', True), + ('HP:1', 'HP:022', True), + + # False examples + ('HP:1', 'HP:1', False), + ('HP:01', 'HP:1', False), + ) + @ddt.unpack + def test_is_ancestor_of(self, sub, obj, expected): + sub = TermId.from_curie(sub) + obj = TermId.from_curie(obj) + + self.assertEqual(expected, self.GRAPH.is_ancestor_of(sub, obj))