Skip to content

Commit

Permalink
Add graph edges (#762)
Browse files Browse the repository at this point in the history
Add parent and child maps to ParsedManifest.serialize() output
  • Loading branch information
beckjake committed May 8, 2018
1 parent f3c835b commit 0879ca7
Show file tree
Hide file tree
Showing 3 changed files with 250 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

- Use Mapping instead of dict as the base class for APIObject ([#756](https://github.com/fishtown-analytics/dbt/pull/756))
- Write JSON manifest file to disk during compilation ([#761](https://github.com/fishtown-analytics/dbt/pull/761))
- Add forward and backward graph edges to the JSON manifest file ([#762](https://github.com/fishtown-analytics/dbt/pull/762))

## dbt 0.10.1 (Unreleased)

Expand Down
24 changes: 24 additions & 0 deletions dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ def __init__(self, agate_table=None, **kwargs):
self.agate_table = agate_table
super(ParsedNode, self).__init__(**kwargs)

@property
def depends_on_nodes(self):
"""Return the list of node IDs that this node depends on."""
return self._contents['depends_on']['nodes']

def to_dict(self):
"""Similar to 'serialize', but tacks the agate_table attribute in too.
Expand Down Expand Up @@ -302,6 +307,21 @@ class ParsedMacros(APIObject):
SCHEMA = PARSED_MACROS_CONTRACT


def build_edges(nodes):
"""Build the forward and backward edges on the given list of ParsedNodes
and return them as two separate dictionaries, each mapping unique IDs to
lists of edges.
"""
backward_edges = {}
# pre-populate the forward edge dict for simplicity
forward_edges = {node.unique_id: [] for node in nodes}
for node in nodes:
backward_edges[node.unique_id] = node.depends_on_nodes[:]
for unique_id in node.depends_on_nodes:
forward_edges[unique_id].append(node.unique_id)
return forward_edges, backward_edges


class ParsedManifest(object):
"""The final result of parsing all macros and nodes in a graph."""
def __init__(self, nodes, macros):
Expand All @@ -315,9 +335,13 @@ def serialize(self):
"""Convert the parsed manifest to a nested dict structure that we can
safely serialize to JSON.
"""
forward_edges, backward_edges = build_edges(self.nodes.values())

return {
'nodes': {k: v.serialize() for k, v in self.nodes.items()},
'macros': {k: v.serialize() for k, v in self.macros.items()},
'parent_map': backward_edges,
'child_map': forward_edges,
}

def to_flat_graph(self):
Expand Down
225 changes: 225 additions & 0 deletions test/unit/test_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
import unittest

import copy
import os

import dbt.flags
from dbt.contracts.graph.parsed import ParsedNode, ParsedManifest

class ManifestTest(unittest.TestCase):
def setUp(self):
dbt.flags.STRICT_MODE = True

self.maxDiff = None

self.model_config = {
'enabled': True,
'materialized': 'view',
'post-hook': [],
'pre-hook': [],
'vars': {},
'quoting': {},
'column_types': {},
}

self.nested_nodes = {
'model.snowplow.events': ParsedNode(
name='events',
schema='analytics',
resource_type='model',
unique_id='model.snowplow.events',
fqn=['snowplow', 'events'],
empty=False,
package_name='snowplow',
refs=[],
depends_on={
'nodes': [],
'macros': []
},
config=self.model_config,
tags=[],
path='events.sql',
original_file_path='events.sql',
root_path='',
raw_sql='does not matter'
),
'model.root.events': ParsedNode(
name='events',
schema='analytics',
resource_type='model',
unique_id='model.root.events',
fqn=['root', 'events'],
empty=False,
package_name='root',
refs=[],
depends_on={
'nodes': [],
'macros': []
},
config=self.model_config,
tags=[],
path='events.sql',
original_file_path='events.sql',
root_path='',
raw_sql='does not matter'
),
'model.root.dep': ParsedNode(
name='dep',
schema='analytics',
resource_type='model',
unique_id='model.root.dep',
fqn=['root', 'dep'],
empty=False,
package_name='root',
refs=[['events']],
depends_on={
'nodes': ['model.root.events'],
'macros': []
},
config=self.model_config,
tags=[],
path='multi.sql',
original_file_path='multi.sql',
root_path='',
raw_sql='does not matter'
),
'model.root.nested': ParsedNode(
name='nested',
schema='analytics',
resource_type='model',
unique_id='model.root.nested',
fqn=['root', 'nested'],
empty=False,
package_name='root',
refs=[['events']],
depends_on={
'nodes': ['model.root.dep'],
'macros': []
},
config=self.model_config,
tags=[],
path='multi.sql',
original_file_path='multi.sql',
root_path='',
raw_sql='does not matter'
),
'model.root.sibling': ParsedNode(
name='sibling',
schema='analytics',
resource_type='model',
unique_id='model.root.sibling',
fqn=['root', 'sibling'],
empty=False,
package_name='root',
refs=[['events']],
depends_on={
'nodes': ['model.root.events'],
'macros': []
},
config=self.model_config,
tags=[],
path='multi.sql',
original_file_path='multi.sql',
root_path='',
raw_sql='does not matter'
),
'model.root.multi': ParsedNode(
name='multi',
schema='analytics',
resource_type='model',
unique_id='model.root.multi',
fqn=['root', 'multi'],
empty=False,
package_name='root',
refs=[['events']],
depends_on={
'nodes': ['model.root.nested', 'model.root.sibling'],
'macros': []
},
config=self.model_config,
tags=[],
path='multi.sql',
original_file_path='multi.sql',
root_path='',
raw_sql='does not matter'
),
}

def test__no_nodes(self):
manifest = ParsedManifest(nodes={}, macros={})
self.assertEqual(
manifest.serialize(),
{'nodes': {}, 'macros': {}, 'parent_map': {}, 'child_map': {}}
)

def test__nested_nodes(self):
nodes = copy.copy(self.nested_nodes)
manifest = ParsedManifest(nodes=nodes, macros={})
serialized = manifest.serialize()
parent_map = serialized['parent_map']
child_map = serialized['child_map']
# make sure there aren't any extra/missing keys.
self.assertEqual(set(parent_map), set(nodes))
self.assertEqual(set(child_map), set(nodes))
self.assertEqual(
parent_map['model.root.sibling'],
['model.root.events']
)
self.assertEqual(
parent_map['model.root.nested'],
['model.root.dep']
)
self.assertEqual(
parent_map['model.root.dep'],
['model.root.events']
)
# order doesn't matter.
self.assertEqual(
set(parent_map['model.root.multi']),
set(['model.root.nested', 'model.root.sibling'])
)
self.assertEqual(
parent_map['model.root.events'],
[],
)
self.assertEqual(
parent_map['model.snowplow.events'],
[],
)

self.assertEqual(
child_map['model.root.sibling'],
['model.root.multi'],
)
self.assertEqual(
child_map['model.root.nested'],
['model.root.multi'],
)
self.assertEqual(
child_map['model.root.dep'],
['model.root.nested']
)
self.assertEqual(
child_map['model.root.multi'],
[]
)
self.assertEqual(
set(child_map['model.root.events']),
set(['model.root.dep', 'model.root.sibling'])
)
self.assertEqual(
child_map['model.snowplow.events'],
[]
)

def test__to_flat_graph(self):
nodes = copy.copy(self.nested_nodes)
manifest = ParsedManifest(nodes=nodes, macros={})
flat_graph = manifest.to_flat_graph()
flat_nodes = flat_graph['nodes']
self.assertEqual(set(flat_graph), set(['nodes', 'macros']))
self.assertEqual(flat_graph['macros'], {})
self.assertEqual(set(flat_nodes), set(self.nested_nodes))
expected_keys = set(ParsedNode.SCHEMA['required']) | {'agate_table'}
for node in flat_nodes.values():
self.assertEqual(set(node), expected_keys)

0 comments on commit 0879ca7

Please sign in to comment.