From 7045e11aa03c9425a0522f48c2631ddef22a4ccc Mon Sep 17 00:00:00 2001 From: Tobie Tusing Date: Fri, 7 Apr 2023 09:09:58 -0700 Subject: [PATCH] Remove trivial nodes before building subdag (#7194) * remove trial nodes before building subdag * add changie * Update graph.py remove comment * further optimize by sorting node search by degree * change degree to product of in and out degree --- .../Under the Hood-20230319-172824.yaml | 6 ++++++ core/dbt/graph/graph.py | 21 ++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Under the Hood-20230319-172824.yaml diff --git a/.changes/unreleased/Under the Hood-20230319-172824.yaml b/.changes/unreleased/Under the Hood-20230319-172824.yaml new file mode 100644 index 00000000000..06722f57ae6 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20230319-172824.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Improves build times for common selections by improving subgraph calculation +time: 2023-03-19T17:28:24.321555-07:00 +custom: + Author: '@ttusing' + Issue: "7195" diff --git a/core/dbt/graph/graph.py b/core/dbt/graph/graph.py index a5b5e91456e..29f24cae734 100644 --- a/core/dbt/graph/graph.py +++ b/core/dbt/graph/graph.py @@ -74,7 +74,26 @@ def get_subset_graph(self, selected: Iterable[UniqueId]) -> "Graph": new_graph = self.graph.copy() include_nodes = set(selected) - for node in self: + still_removing = True + while still_removing: + nodes_to_remove = list( + node + for node in new_graph + if node not in include_nodes + and (new_graph.in_degree(node) * new_graph.out_degree(node)) == 0 + ) + if len(nodes_to_remove) == 0: + still_removing = False + else: + new_graph.remove_nodes_from(nodes_to_remove) + + # sort remaining nodes by degree + remaining_nodes = list(new_graph.nodes()) + remaining_nodes.sort( + key=lambda node: new_graph.in_degree(node) * new_graph.out_degree(node) + ) + + for node in remaining_nodes: if node not in include_nodes: source_nodes = [x for x, _ in new_graph.in_edges(node)] target_nodes = [x for _, x in new_graph.out_edges(node)]