diff --git a/CHANGELOG.md b/CHANGELOG.md index 53ae1fbda61..1985bf47f5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Use Mapping instead of dict as the base class for APIObject ([#756](https://github.com/fishtown-analytics/dbt/pull/756)) - Write JSON manifest file to disk during compilation ([#761](https://github.com/fishtown-analytics/dbt/pull/761)) + - Add forward and backward graph edges to the JSON manifest file ([#762](https://github.com/fishtown-analytics/dbt/pull/762)) ## dbt 0.10.1 (Unreleased) diff --git a/dbt/contracts/graph/parsed.py b/dbt/contracts/graph/parsed.py index 79c405d17fa..ecd28ccd731 100644 --- a/dbt/contracts/graph/parsed.py +++ b/dbt/contracts/graph/parsed.py @@ -255,6 +255,11 @@ def __init__(self, agate_table=None, **kwargs): self.agate_table = agate_table super(ParsedNode, self).__init__(**kwargs) + @property + def depends_on_nodes(self): + """Return the list of node IDs that this node depends on.""" + return self._contents['depends_on']['nodes'] + def to_dict(self): """Similar to 'serialize', but tacks the agate_table attribute in too. @@ -302,6 +307,21 @@ class ParsedMacros(APIObject): SCHEMA = PARSED_MACROS_CONTRACT +def build_edges(nodes): + """Build the forward and backward edges on the given list of ParsedNodes + and return them as two separate dictionaries, each mapping unique IDs to + lists of edges. + """ + backward_edges = {} + # pre-populate the forward edge dict for simplicity + forward_edges = {node.unique_id: [] for node in nodes} + for node in nodes: + backward_edges[node.unique_id] = node.depends_on_nodes[:] + for unique_id in node.depends_on_nodes: + forward_edges[unique_id].append(node.unique_id) + return forward_edges, backward_edges + + class ParsedManifest(object): """The final result of parsing all macros and nodes in a graph.""" def __init__(self, nodes, macros): @@ -315,9 +335,13 @@ def serialize(self): """Convert the parsed manifest to a nested dict structure that we can safely serialize to JSON. """ + forward_edges, backward_edges = build_edges(self.nodes.values()) + return { 'nodes': {k: v.serialize() for k, v in self.nodes.items()}, 'macros': {k: v.serialize() for k, v in self.macros.items()}, + 'parent_map': backward_edges, + 'child_map': forward_edges, } def to_flat_graph(self): diff --git a/test/unit/test_manifest.py b/test/unit/test_manifest.py new file mode 100644 index 00000000000..c880ad28a0c --- /dev/null +++ b/test/unit/test_manifest.py @@ -0,0 +1,225 @@ +import unittest + +import copy +import os + +import dbt.flags +from dbt.contracts.graph.parsed import ParsedNode, ParsedManifest + +class ManifestTest(unittest.TestCase): + def setUp(self): + dbt.flags.STRICT_MODE = True + + self.maxDiff = None + + self.model_config = { + 'enabled': True, + 'materialized': 'view', + 'post-hook': [], + 'pre-hook': [], + 'vars': {}, + 'quoting': {}, + 'column_types': {}, + } + + self.nested_nodes = { + 'model.snowplow.events': ParsedNode( + name='events', + schema='analytics', + resource_type='model', + unique_id='model.snowplow.events', + fqn=['snowplow', 'events'], + empty=False, + package_name='snowplow', + refs=[], + depends_on={ + 'nodes': [], + 'macros': [] + }, + config=self.model_config, + tags=[], + path='events.sql', + original_file_path='events.sql', + root_path='', + raw_sql='does not matter' + ), + 'model.root.events': ParsedNode( + name='events', + schema='analytics', + resource_type='model', + unique_id='model.root.events', + fqn=['root', 'events'], + empty=False, + package_name='root', + refs=[], + depends_on={ + 'nodes': [], + 'macros': [] + }, + config=self.model_config, + tags=[], + path='events.sql', + original_file_path='events.sql', + root_path='', + raw_sql='does not matter' + ), + 'model.root.dep': ParsedNode( + name='dep', + schema='analytics', + resource_type='model', + unique_id='model.root.dep', + fqn=['root', 'dep'], + empty=False, + package_name='root', + refs=[['events']], + depends_on={ + 'nodes': ['model.root.events'], + 'macros': [] + }, + config=self.model_config, + tags=[], + path='multi.sql', + original_file_path='multi.sql', + root_path='', + raw_sql='does not matter' + ), + 'model.root.nested': ParsedNode( + name='nested', + schema='analytics', + resource_type='model', + unique_id='model.root.nested', + fqn=['root', 'nested'], + empty=False, + package_name='root', + refs=[['events']], + depends_on={ + 'nodes': ['model.root.dep'], + 'macros': [] + }, + config=self.model_config, + tags=[], + path='multi.sql', + original_file_path='multi.sql', + root_path='', + raw_sql='does not matter' + ), + 'model.root.sibling': ParsedNode( + name='sibling', + schema='analytics', + resource_type='model', + unique_id='model.root.sibling', + fqn=['root', 'sibling'], + empty=False, + package_name='root', + refs=[['events']], + depends_on={ + 'nodes': ['model.root.events'], + 'macros': [] + }, + config=self.model_config, + tags=[], + path='multi.sql', + original_file_path='multi.sql', + root_path='', + raw_sql='does not matter' + ), + 'model.root.multi': ParsedNode( + name='multi', + schema='analytics', + resource_type='model', + unique_id='model.root.multi', + fqn=['root', 'multi'], + empty=False, + package_name='root', + refs=[['events']], + depends_on={ + 'nodes': ['model.root.nested', 'model.root.sibling'], + 'macros': [] + }, + config=self.model_config, + tags=[], + path='multi.sql', + original_file_path='multi.sql', + root_path='', + raw_sql='does not matter' + ), + } + + def test__no_nodes(self): + manifest = ParsedManifest(nodes={}, macros={}) + self.assertEqual( + manifest.serialize(), + {'nodes': {}, 'macros': {}, 'parent_map': {}, 'child_map': {}} + ) + + def test__nested_nodes(self): + nodes = copy.copy(self.nested_nodes) + manifest = ParsedManifest(nodes=nodes, macros={}) + serialized = manifest.serialize() + parent_map = serialized['parent_map'] + child_map = serialized['child_map'] + # make sure there aren't any extra/missing keys. + self.assertEqual(set(parent_map), set(nodes)) + self.assertEqual(set(child_map), set(nodes)) + self.assertEqual( + parent_map['model.root.sibling'], + ['model.root.events'] + ) + self.assertEqual( + parent_map['model.root.nested'], + ['model.root.dep'] + ) + self.assertEqual( + parent_map['model.root.dep'], + ['model.root.events'] + ) + # order doesn't matter. + self.assertEqual( + set(parent_map['model.root.multi']), + set(['model.root.nested', 'model.root.sibling']) + ) + self.assertEqual( + parent_map['model.root.events'], + [], + ) + self.assertEqual( + parent_map['model.snowplow.events'], + [], + ) + + self.assertEqual( + child_map['model.root.sibling'], + ['model.root.multi'], + ) + self.assertEqual( + child_map['model.root.nested'], + ['model.root.multi'], + ) + self.assertEqual( + child_map['model.root.dep'], + ['model.root.nested'] + ) + self.assertEqual( + child_map['model.root.multi'], + [] + ) + self.assertEqual( + set(child_map['model.root.events']), + set(['model.root.dep', 'model.root.sibling']) + ) + self.assertEqual( + child_map['model.snowplow.events'], + [] + ) + + def test__to_flat_graph(self): + nodes = copy.copy(self.nested_nodes) + manifest = ParsedManifest(nodes=nodes, macros={}) + flat_graph = manifest.to_flat_graph() + flat_nodes = flat_graph['nodes'] + self.assertEqual(set(flat_graph), set(['nodes', 'macros'])) + self.assertEqual(flat_graph['macros'], {}) + self.assertEqual(set(flat_nodes), set(self.nested_nodes)) + expected_keys = set(ParsedNode.SCHEMA['required']) | {'agate_table'} + for node in flat_nodes.values(): + self.assertEqual(set(node), expected_keys)