Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add graph edges #762

Merged
merged 4 commits into from
May 8, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ def __init__(self, agate_table=None, **kwargs):
self.agate_table = agate_table
super(ParsedNode, self).__init__(**kwargs)

@property
def depends_on_nodes(self):
"""Return the list of node IDs that this node depends on."""
return self._contents['depends_on']['nodes']

def to_dict(self):
"""Similar to 'serialize', but tacks the agate_table attribute in too.

Expand Down Expand Up @@ -302,6 +307,21 @@ class ParsedMacros(APIObject):
SCHEMA = PARSED_MACROS_CONTRACT


def build_edges(nodes):
"""Build the forward and backward edges on the given list of ParsedNodes
and return them as two separate dictionaries, each mapping unique IDs to
lists of edges.
"""
backward_edges = {}
# pre-populate the forward edge dict for simplicity
forward_edges = {node.unique_id: [] for node in nodes}
for node in nodes:
backward_edges[node.unique_id] = node.depends_on_nodes[:]
for unique_id in node.depends_on_nodes:
forward_edges[unique_id].append(node.unique_id)
return forward_edges, backward_edges


class ParsedManifest(object):
"""The final result of parsing all macros and nodes in a graph."""
def __init__(self, nodes, macros):
Expand All @@ -315,9 +335,13 @@ def serialize(self):
"""Convert the parsed manifest to a nested dict structure that we can
safely serialize to JSON.
"""
forward_edges, backward_edges = build_edges(self.nodes.values())

return {
'nodes': {k: v.serialize() for k, v in self.nodes.items()},
'macros': {k: v.serialize() for k, v in self.macros.items()},
'parent_map': backward_edges,
'child_map': forward_edges,
}

def to_flat_graph(self):
Expand Down
225 changes: 225 additions & 0 deletions test/unit/test_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
import unittest

import copy
import os

import dbt.flags
from dbt.contracts.graph.parsed import ParsedNode, ParsedManifest

class ManifestTest(unittest.TestCase):
def setUp(self):
dbt.flags.STRICT_MODE = True

self.maxDiff = None

self.model_config = {
'enabled': True,
'materialized': 'view',
'post-hook': [],
'pre-hook': [],
'vars': {},
'quoting': {},
'column_types': {},
}

self.nested_nodes = {
'model.snowplow.events': ParsedNode(
name='events',
schema='analytics',
resource_type='model',
unique_id='model.snowplow.events',
fqn=['snowplow', 'events'],
empty=False,
package_name='snowplow',
refs=[],
depends_on={
'nodes': [],
'macros': []
},
config=self.model_config,
tags=[],
path='events.sql',
original_file_path='events.sql',
root_path='',
raw_sql='does not matter'
),
'model.root.events': ParsedNode(
name='events',
schema='analytics',
resource_type='model',
unique_id='model.root.events',
fqn=['root', 'events'],
empty=False,
package_name='root',
refs=[],
depends_on={
'nodes': [],
'macros': []
},
config=self.model_config,
tags=[],
path='events.sql',
original_file_path='events.sql',
root_path='',
raw_sql='does not matter'
),
'model.root.dep': ParsedNode(
name='dep',
schema='analytics',
resource_type='model',
unique_id='model.root.dep',
fqn=['root', 'dep'],
empty=False,
package_name='root',
refs=[['events']],
depends_on={
'nodes': ['model.root.events'],
'macros': []
},
config=self.model_config,
tags=[],
path='multi.sql',
original_file_path='multi.sql',
root_path='',
raw_sql='does not matter'
),
'model.root.nested': ParsedNode(
name='nested',
schema='analytics',
resource_type='model',
unique_id='model.root.nested',
fqn=['root', 'nested'],
empty=False,
package_name='root',
refs=[['events']],
depends_on={
'nodes': ['model.root.dep'],
'macros': []
},
config=self.model_config,
tags=[],
path='multi.sql',
original_file_path='multi.sql',
root_path='',
raw_sql='does not matter'
),
'model.root.sibling': ParsedNode(
name='sibling',
schema='analytics',
resource_type='model',
unique_id='model.root.sibling',
fqn=['root', 'sibling'],
empty=False,
package_name='root',
refs=[['events']],
depends_on={
'nodes': ['model.root.events'],
'macros': []
},
config=self.model_config,
tags=[],
path='multi.sql',
original_file_path='multi.sql',
root_path='',
raw_sql='does not matter'
),
'model.root.multi': ParsedNode(
name='multi',
schema='analytics',
resource_type='model',
unique_id='model.root.multi',
fqn=['root', 'multi'],
empty=False,
package_name='root',
refs=[['events']],
depends_on={
'nodes': ['model.root.nested', 'model.root.sibling'],
'macros': []
},
config=self.model_config,
tags=[],
path='multi.sql',
original_file_path='multi.sql',
root_path='',
raw_sql='does not matter'
),
}

def test__no_nodes(self):
manifest = ParsedManifest(nodes={}, macros={})
self.assertEqual(
manifest.serialize(),
{'nodes': {}, 'macros': {}, 'parent_map': {}, 'child_map': {}}
)

def test__nested_nodes(self):
nodes = copy.copy(self.nested_nodes)
manifest = ParsedManifest(nodes=nodes, macros={})
serialized = manifest.serialize()
parent_map = serialized['parent_map']
child_map = serialized['child_map']
# make sure there aren't any extra/missing keys.
self.assertEqual(set(parent_map), set(nodes))
self.assertEqual(set(child_map), set(nodes))
self.assertEqual(
parent_map['model.root.sibling'],
['model.root.events']
)
self.assertEqual(
parent_map['model.root.nested'],
['model.root.dep']
)
self.assertEqual(
parent_map['model.root.dep'],
['model.root.events']
)
# order doesn't matter.
self.assertEqual(
set(parent_map['model.root.multi']),
set(['model.root.nested', 'model.root.sibling'])
)
self.assertEqual(
parent_map['model.root.events'],
[],
)
self.assertEqual(
parent_map['model.snowplow.events'],
[],
)

self.assertEqual(
child_map['model.root.sibling'],
['model.root.multi'],
)
self.assertEqual(
child_map['model.root.nested'],
['model.root.multi'],
)
self.assertEqual(
child_map['model.root.dep'],
['model.root.nested']
)
self.assertEqual(
child_map['model.root.multi'],
[]
)
self.assertEqual(
set(child_map['model.root.events']),
set(['model.root.dep', 'model.root.sibling'])
)
self.assertEqual(
child_map['model.snowplow.events'],
[]
)

def test__to_flat_graph(self):
nodes = copy.copy(self.nested_nodes)
manifest = ParsedManifest(nodes=nodes, macros={})
flat_graph = manifest.to_flat_graph()
flat_nodes = flat_graph['nodes']
self.assertEqual(set(flat_graph), set(['nodes', 'macros']))
self.assertEqual(flat_graph['macros'], {})
self.assertEqual(set(flat_nodes), set(self.nested_nodes))
expected_keys = set(ParsedNode.SCHEMA['required']) | {'agate_table'}
for node in flat_nodes.values():
self.assertEqual(set(node), expected_keys)