diff --git a/.circleci/config.yml b/.circleci/config.yml index aa0f633b..c9b8f2e7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -64,7 +64,7 @@ jobs: - run: name: Install pysal dependencies command: | - sudo apt install libspatialindex-dev xdg-utils shared-mime-info + sudo apt install libspatialindex-dev xdg-utils shared-mime-info desktop-file-utils - run: name: Setup pandoc command: sudo apt update && sudo apt install -y pandoc optipng diff --git a/docs/api.rst b/docs/api.rst index f9b222fa..990414cf 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -43,6 +43,7 @@ causal graph operations. .. autosummary:: :toctree: generated/ + inducing_path is_valid_mec_graph possible_ancestors possible_descendants diff --git a/docs/glossary.rst b/docs/glossary.rst index 18545086..c1fced4a 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -35,14 +35,13 @@ General Concepts API Refers to both the *specific* interfaces for graphs implemented in pywhy-graphs and the *generalized* conventions across types of - graphs as described in this glossary and :ref:`overviewed in the - contributor documentation `. + graphs as described in this glossary. The specific interfaces that constitute pywhy-graphs's public API are largely documented in :ref:`api_ref`. However, we less formally consider anything as public API if none of the identifiers required to access it - begins with ``_``. We generally try to maintain :term:`backwards - compatibility` for all objects in the public API. + begins with ``_``. We generally try to maintain backwards + compatibility for all objects in the public API. Private API, including functions, modules and methods beginning ``_`` are not assured to be stable. @@ -85,7 +84,7 @@ General Concepts experimental An experimental tool is already usable but its public API, such as default parameter values or fitted attributes, is still subject to - change in future versions without the usual :term:`deprecation` + change in future versions without the usual deprecation warning policy. F-node diff --git a/docs/whats_new/v0.1.rst b/docs/whats_new/v0.1.rst index 10efc821..a29b4d14 100644 --- a/docs/whats_new/v0.1.rst +++ b/docs/whats_new/v0.1.rst @@ -45,6 +45,8 @@ Changelog - |Feature| Implement pre-commit hooks for development, by `Jaron Lee`_ (:pr:`68`) - |Feature| Implement a new submodule for converting graphs to a functional model, with :func:`pywhy_graphs.functional.make_graph_linear_gaussian`, by `Adam Li`_ (:pr:`75`) - |Feature| Implement a multidomain linear functional graph, with :func:`pywhy_graphs.functional.make_graph_multidomain`, by `Adam Li`_ (:pr:`77`) +- |Feature| Implement and test functions to find inducing paths between two nodes, `Aryan Roy`_ (:pr:`78`) + Code and Documentation Contributors ----------------------------------- diff --git a/pywhy_graphs/algorithms/generic.py b/pywhy_graphs/algorithms/generic.py index b98caaf5..5b895165 100644 --- a/pywhy_graphs/algorithms/generic.py +++ b/pywhy_graphs/algorithms/generic.py @@ -1,4 +1,4 @@ -from typing import List, Union +from typing import List, Set, Union import networkx as nx @@ -12,6 +12,7 @@ "is_node_common_cause", "set_nodes_as_latent_confounders", "is_valid_mec_graph", + "inducing_path", ] @@ -333,3 +334,265 @@ def _single_shortest_path_early_stop(G, firstlevel, paths, cutoff, join, valid_p nextlevel[w] = 1 level += 1 return paths + + +def _directed_sub_graph_ancestors(G, node: Node): + """Finds the ancestors of a node in the directed subgraph. + + Parameters + ---------- + G : Graph + The graph. + node : Node + The node for which we have to find the ancestors. + + Returns + ------- + out : set + The parents of the provided node. + """ + + return nx.ancestors(G.sub_directed_graph(), node) + + +def _directed_sub_graph_parents(G, node: Node): + """Finds the parents of a node in the directed subgraph. + + Parameters + ---------- + G : Graph + The graph. + node : Node + The node for which we have to find the parents. + + Returns + ------- + out : set + The parents of the provided node. + """ + + return set(G.sub_directed_graph().predecessors(node)) + + +def _bidirected_sub_graph_neighbors(G, node: Node): + """Finds the neighbors of a node in the bidirected subgraph. + + Parameters + ---------- + G : Graph + The graph. + node : Node + The node for which we have to find the neighbors. + + Returns + ------- + out : set + The parents of the provided node. + """ + bidirected_parents = set() + + if not isinstance(G, CPDAG): + bidirected_parents = set(G.sub_bidirected_graph().neighbors(node)) + + return bidirected_parents + + +def _is_collider(G, prev_node: Node, cur_node: Node, next_node: Node): + """Checks if the given node is a collider or not. + + Parameters + ---------- + G : graph + The graph. + prev_node : node + The previous node in the path. + cur_node : node + The node to be checked. + next_node: Node + The next node in the path. + + Returns + ------- + iscollider : bool + Bool is set true if the node is a collider, false otherwise. + """ + parents = _directed_sub_graph_parents(G, cur_node) + parents = parents.union(_bidirected_sub_graph_neighbors(G, cur_node)) + + if prev_node in parents and next_node in parents: + return True + + return False + + +def _shortest_valid_path( + G, + node_x: Node, + node_y: Node, + L: Set, + S: Set, + visited: Set, + all_ancestors: Set, + cur_node: Node, + prev_node: Node, +): + """Recursively explores a graph to find a path. + + Finds path that are compliant with the inducing path requirements. + + Parameters + ---------- + G : graph + The graph. + node_x : node + The source node. + node_y : node + The destination node + L : Set + Set containing all the non-colliders. + S : Set + Set containing all the colliders. + visited : Set + Set containing all the nodes already visited. + all_ancestors : Set + Set containing all the ancestors a collider needs to be checked against. + cur_node : node + The current node. + prev_node : node + The previous node in the path. + + Returns + ------- + path : Tuple[bool, path] + A tuple containing a bool and a path which is empty if the bool is false. + """ + path_exists = False + path = [] + visited.add(cur_node) + neighbors = G.neighbors(cur_node) + + if cur_node is node_y: + return (True, [node_y]) + + for elem in neighbors: + if elem in visited: + continue + + else: + # If the current node is a collider, check that it is either an + # ancestor of X, Y or any element of S or that it is + # the destination node itself. + if ( + _is_collider(G, prev_node, cur_node, elem) + and (cur_node not in all_ancestors) + and (cur_node not in S) + and (cur_node is not node_y) + ): + continue + + # If the current node is not a collider, check that it is + # either in L or the destination node itself. + + elif ( + not _is_collider(G, prev_node, cur_node, elem) + and (cur_node not in L) + and (cur_node is not node_y) + ): + continue + + # if it is a valid node and not the destination node, + # check if it has a path to the destination node + + path_exists, temp_path = _shortest_valid_path( + G, node_x, node_y, L, S, visited, all_ancestors, elem, cur_node + ) + + if path_exists: + path.append(cur_node) + path.extend(temp_path) + break + + return (path_exists, path) + + +def inducing_path(G, node_x: Node, node_y: Node, L: Set = None, S: Set = None): + """Checks if an inducing path exists between two nodes. + + An inducing path is defined in :footcite:`Zhang2008`. + + Parameters + ---------- + G : Graph + The graph. + node_x : node + The source node. + node_y : node + The destination node. + L : Set + Nodes that are ignored on the path. Defaults to an empty set. See Notes for details. + S: Set + Nodes that are always conditioned on. Defaults to an empty set. See Notes for details. + + Returns + ------- + path : Tuple[bool, path] + A tuple containing a bool and a path if the bool is true, an empty list otherwise. + + Notes + ----- + An inducing path intuitively is a path between two non-adjacent nodes that + cannot be d-separated. Therefore, the path is always "active" regardless of + what variables we condition on. L contains all the non-colliders, these nodes + are ignored along the path. S contains nodes that are always conditioned on + (hence if the ancestors of colliders are in S, then those collider + paths are always "active"). + + References + ---------- + .. footbibliography:: + """ + if L is None: + L = set() + + if S is None: + S = set() + + nodes = set(G.nodes) + + if node_x not in nodes or node_y not in nodes: + raise ValueError("The provided nodes are not in the graph.") + + if node_x == node_y: + raise ValueError("The source and destination nodes are the same.") + + path = [] # this will contain the path. + + x_ancestors = _directed_sub_graph_ancestors(G, node_x) + y_ancestors = _directed_sub_graph_ancestors(G, node_y) + + xy_ancestors = x_ancestors.union(y_ancestors) + + s_ancestors: set[Node] = set() + + for elem in S: + s_ancestors = s_ancestors.union(_directed_sub_graph_ancestors(G, elem)) + + # ancestors of X, Y and all the elements of S + + all_ancestors = xy_ancestors.union(s_ancestors) + x_neighbors = G.neighbors(node_x) + + path_exists = False + for elem in x_neighbors: + + visited = {node_x} + if elem not in visited: + path_exists, temp_path = _shortest_valid_path( + G, node_x, node_y, L, S, visited, all_ancestors, elem, node_x + ) + if path_exists: + path.append(node_x) + path.extend(temp_path) + break + + return (path_exists, path) diff --git a/pywhy_graphs/algorithms/tests/test_generic.py b/pywhy_graphs/algorithms/tests/test_generic.py index 866d9ade..d0a52981 100644 --- a/pywhy_graphs/algorithms/tests/test_generic.py +++ b/pywhy_graphs/algorithms/tests/test_generic.py @@ -2,6 +2,7 @@ import pytest import pywhy_graphs +from pywhy_graphs import ADMG def test_convert_to_latent_confounder_errors(): @@ -39,3 +40,191 @@ def test_convert_to_latent_confounder(graph_func): G.remove_edge(3, 1, G.bidirected_edge_name) G.add_edge(3, 1, G.directed_edge_name) assert pywhy_graphs.is_node_common_cause(G, 3) + + +def test_inducing_path(): + + admg = ADMG() + + admg.add_edge("X", "Y", admg.directed_edge_name) + admg.add_edge("Z", "Y", admg.bidirected_edge_name) + admg.add_edge("Z", "H", admg.bidirected_edge_name) + + # X -> Y <-> z <-> H + + S = {"Y", "Z"} + L = set() + assert pywhy_graphs.inducing_path(admg, "X", "H", L, S)[0] + + admg.add_edge("H", "J", admg.directed_edge_name) + + # X -> Y <-> z <-> H -> J + + S = {"Y", "Z"} + L = {"H"} + + assert pywhy_graphs.inducing_path(admg, "X", "J", L, S)[0] + + admg.add_edge("K", "J", admg.directed_edge_name) + + # X -> Y <-> z <-> H -> J <- K + S = {"Y", "Z", "J"} + L = {"H"} + + assert pywhy_graphs.inducing_path(admg, "X", "K", L, S)[0] # no directed path exists + + admg.add_edge("J", "K", admg.directed_edge_name) + + # X -> Y <-> z <-> H -> J <-> K + + S = {"Y", "J"} + L = {"H"} + + assert not pywhy_graphs.inducing_path(admg, "X", "K", L, S)[ + 0 + ] # A collider on the path is not in S + + S = {"Y", "Z"} + L = set() + + assert not pywhy_graphs.inducing_path(admg, "X", "K", L, S)[ + 0 + ] # A non-collider on the path is not in S + + +def test_inducing_path_wihtout_LandS(): + + admg = ADMG() + + admg.add_edge("X", "Y", admg.directed_edge_name) + + L = set() + S = set() + + # X -> Y + + assert pywhy_graphs.inducing_path(admg, "X", "Y", L, S)[0] + + admg.add_edge("Y", "X", admg.directed_edge_name) + + # X <-> Y + + assert pywhy_graphs.inducing_path(admg, "X", "Y", L, S)[0] + + +def test_inducing_path_one_direction(): + + admg = ADMG() + + admg.add_edge("A", "B", admg.directed_edge_name) + admg.add_edge("B", "C", admg.directed_edge_name) + admg.add_edge("C", "D", admg.directed_edge_name) + admg.add_edge("B", "C", admg.bidirected_edge_name) + + L = {"C"} + S = {"B"} + + # A -> B -> C -> D + # B <-> C + + assert pywhy_graphs.inducing_path(admg, "A", "D", L, S)[0] + + L = set() + S = {"B"} + + assert not pywhy_graphs.inducing_path(admg, "A", "D", L, S)[0] + + L = {"C"} + S = set() + + assert pywhy_graphs.inducing_path(admg, "A", "D", L, S)[0] + + admg.add_edge("D", "C", admg.bidirected_edge_name) + + # A -> B -> C -> D + # B <-> C + # C <-> D + + L = set() + S = {"B"} + + assert pywhy_graphs.inducing_path(admg, "A", "D", L, S)[0] + + L = set() + S = set() + + assert pywhy_graphs.inducing_path(admg, "A", "D", L, S)[0] + + +def test_inducing_path_corner_cases(): + # X <- Y <-> Z <-> H; Z -> X + admg = ADMG() + admg.add_edge("Y", "X", admg.directed_edge_name) + admg.add_edge("Z", "X", admg.directed_edge_name) + admg.add_edge("Z", "Y", admg.bidirected_edge_name) + admg.add_edge("Z", "H", admg.bidirected_edge_name) + + # not an inducing path, since Y is not a collider and Y is not part of L + S = {} + L = {} + assert not pywhy_graphs.inducing_path(admg, "X", "H", L, S)[0] + + # now an inducing path, since Y is not a collider, but is part of L + L = {"Y"} + assert pywhy_graphs.inducing_path(admg, "X", "H", L, S)[0] + + # X <-> Y <-> Z <-> H; Z -> X + admg = ADMG() + admg.add_edge("Y", "X", admg.bidirected_edge_name) + admg.add_edge("Z", "X", admg.directed_edge_name) + admg.add_edge("Z", "Y", admg.bidirected_edge_name) + admg.add_edge("Z", "H", admg.bidirected_edge_name) + + # not an inducing path, since Y is not an ancestor of X, H, or S + S = {} + L = {} + assert not pywhy_graphs.inducing_path(admg, "X", "H", L, S)[0] + + # still not an inducing path, since Y is a collider + L = {"Y"} + assert not pywhy_graphs.inducing_path(admg, "X", "H", L, S)[0] + + # now add an edge Y -> A + admg.add_edge("Y", "A", admg.directed_edge_name) + + # an inducing path, since Y is a collider and is an ancestor of X, H, or S + L = {} + S = {"A"} + assert pywhy_graphs.inducing_path(admg, "X", "H", L, S)[0] + + # an inducing path, since Y is a collider and is an ancestor of X, H, or S + L = {} + S = {"Y"} + assert pywhy_graphs.inducing_path(admg, "X", "H", L, S)[0] + + # X -> Z <- Y, A <- B <- Z + admg = ADMG() + admg.add_edge("X", "Z", admg.directed_edge_name) + admg.add_edge("Y", "Z", admg.directed_edge_name) + admg.add_edge("Z", "B", admg.directed_edge_name) + admg.add_edge("B", "A", admg.directed_edge_name) + + L = {} + S = {"A"} + + assert pywhy_graphs.inducing_path(admg, "X", "Y", L, S)[0] + + +def test_is_collider(): + # Z -> X -> A <- B -> Y; H -> A + admg = ADMG() + admg.add_edge("Z", "X", admg.directed_edge_name) + admg.add_edge("H", "A", admg.directed_edge_name) + admg.add_edge("X", "A", admg.directed_edge_name) + admg.add_edge("B", "A", admg.directed_edge_name) + admg.add_edge("B", "Y", admg.directed_edge_name) + + L = {"X", "B"} + S = {"A"} + + assert pywhy_graphs.inducing_path(admg, "Z", "Y", L, S)[0]