Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add generated dates (#864) #877

Merged
merged 6 commits into from
Jul 25, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dbt.api import APIObject
from dbt.utils import deep_merge
from dbt.utils import deep_merge, timestring
from dbt.node_types import NodeType

import dbt.clients.jinja
Expand Down Expand Up @@ -252,6 +252,9 @@
'properties': {
'nodes': PARSED_NODES_CONTRACT,
'macros': PARSED_MACROS_CONTRACT,
'generated_at': {
'type': 'date-time'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the correct json schema draft4 type for this is:

{
    "type": "string",
    "format": "date-time"
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, I was wondering how tests were passing after this comment - it turns out we never use this schema at all. Should I just remove it entirely?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need at least one json schema for the manifest, and at least one for the catalog. it's actually really important as a piece of documentation to share with the community. so ideally we'd either (a) apply it or (b) just leave it in here but do our best to correct it, even though it's not currently used. up to you which you want to do here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've changed the ParsedManifest to validate the schema (and made sure it fails on the old schema).

}
},
'required': ['nodes', 'macros'],
}
Expand Down Expand Up @@ -333,12 +336,14 @@ def build_edges(nodes):

class ParsedManifest(object):
"""The final result of parsing all macros and nodes in a graph."""
def __init__(self, nodes, macros):
def __init__(self, nodes, macros, generated_at):
"""The constructor. nodes and macros are dictionaries mapping unique
IDs to ParsedNode and ParsedMacro objects, respectively.
IDs to ParsedNode and ParsedMacro objects, respectively. generated_at
is a text timestamp in RFC 3339 format.
"""
self.nodes = nodes
self.macros = macros
self.generated_at = generated_at

def serialize(self):
"""Convert the parsed manifest to a nested dict structure that we can
Expand All @@ -351,6 +356,7 @@ def serialize(self):
'macros': {k: v.serialize() for k, v in self.macros.items()},
'parent_map': backward_edges,
'child_map': forward_edges,
'generated_at': self.generated_at,
}

def _find_by_name(self, name, package, subgraph, nodetype):
Expand Down
4 changes: 3 additions & 1 deletion dbt/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from dbt.node_types import NodeType
from dbt.contracts.graph.parsed import ParsedManifest
from dbt.utils import timestring

import dbt.parser

Expand All @@ -18,7 +19,8 @@ def load_all(cls, root_project, all_projects):
for loader in cls._LOADERS:
nodes.update(loader.load_all(root_project, all_projects, macros))

manifest = ParsedManifest(nodes=nodes, macros=macros)
manifest = ParsedManifest(nodes=nodes, macros=macros,
generated_at=timestring())
manifest = dbt.parser.ParserUtils.process_refs(manifest, root_project)
return manifest

Expand Down
2 changes: 1 addition & 1 deletion dbt/task/generate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import os

from dbt.contracts.graph.parsed import ParsedManifest, ParsedNode, ParsedMacro
from dbt.adapters.factory import get_adapter
from dbt.clients.system import write_file
from dbt.compat import bigint
Expand Down Expand Up @@ -115,6 +114,7 @@ def run(self):
for row in results
]
results = unflatten(results)
results['generated_at'] = dbt.utils.timestring()

path = os.path.join(self.project['target-path'], CATALOG_FILENAME)
write_file(path, json.dumps(results))
Expand Down
7 changes: 7 additions & 0 deletions dbt/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime
import os
import hashlib
import itertools
Expand Down Expand Up @@ -460,3 +461,9 @@ def filter_null_values(input):

def add_ephemeral_model_prefix(s):
return '__dbt__CTE__{}'.format(s)


def timestring():
"""Get the current datetime as an RFC 3339-compliant string"""
# isoformat doesn't include the mandatory trailing 'Z' for UTC.
return datetime.utcnow().isoformat() + 'Z'
43 changes: 33 additions & 10 deletions test/integration/029_docs_generate_tests/test_docs_generate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
from datetime import datetime, timedelta

from test.integration.base import DBTIntegrationTest, use_profile

Expand Down Expand Up @@ -43,6 +44,25 @@ def run_and_generate(self, extra=None):
self.assertEqual(len(self.run_dbt()), 1)
self.run_dbt(['docs', 'generate'])

def assertRecent(self, timestr):
"""Given a timestring in '%Y-%m-%dT%H:%M:%SZ' format (ISO8601), assert
that it represents a time before now and a time after 24h ago.

We can't just set the time via freezegun.freeze_time because that
breaks SSL, and a lot of these tests use SSL.
"""
now = datetime.utcnow()
yesterday = now + timedelta(days=-1)
parsed = datetime.strptime(timestr, '%Y-%m-%dT%H:%M:%S.%fZ')
self.assertLess(
yesterday, parsed,
'parsed date {} happened over 24h ago'.format(parsed)
)
self.assertGreater(
now, parsed,
'parsed date {} happened in the future'.format(parsed)
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hah interesting note about freezegun.freeze_time. i think this is fine as a single-use hack. let's make sure not to overuse this.


def verify_catalog(self, expected):
self.assertTrue(os.path.exists('./target/catalog.json'))

Expand All @@ -51,6 +71,8 @@ def verify_catalog(self, expected):

my_schema_name = self.unique_schema()
self.assertIn(my_schema_name, catalog)
self.assertIn('generated_at', catalog)
self.assertRecent(catalog.pop('generated_at'))
my_schema = catalog[my_schema_name]
self.assertEqual(expected, my_schema)

Expand Down Expand Up @@ -162,14 +184,15 @@ def verify_manifest(self, expected_manifest):

self.assertEqual(
set(manifest),
{'nodes', 'macros', 'parent_map', 'child_map'}
{'nodes', 'macros', 'parent_map', 'child_map', 'generated_at'}
)

self.verify_manifest_macros(manifest)
manifest_without_macros = {
k: v for k, v in manifest.items() if k != 'macros'
manifest_without_extras = {
k: v for k, v in manifest.items()
if k not in {'macros', 'generated_at'}
}
self.assertEqual(manifest_without_macros, expected_manifest)
self.assertEqual(manifest_without_extras, expected_manifest)

@use_profile('postgres')
def test__postgres__run_and_generate(self):
Expand Down Expand Up @@ -298,7 +321,7 @@ def test__bigquery__run_and_generate(self):
{
'name': 'id',
'index': 1,
'type': 'INTEGER',
'type': 'INT64',
'comment': None,
},
{
Expand Down Expand Up @@ -361,31 +384,31 @@ def test__bigquery__nested_models(self):
{
"name": "field_1",
"index": 1,
"type": "INTEGER",
"type": "INT64",
"comment": None
},
{
"name": "field_2",
"index": 2,
"type": "INTEGER",
"type": "INT64",
"comment": None
},
{
"name": "field_3",
"index": 3,
"type": "INTEGER",
"type": "INT64",
"comment": None
},
{
"name": "nested_field.field_4",
"index": 4,
"type": "INTEGER",
"type": "INT64",
"comment": None
},
{
"name": "nested_field.field_5",
"index": 5,
"type": "INTEGER",
"type": "INT64",
"comment": None
}
]
Expand Down
21 changes: 21 additions & 0 deletions test/integration/034_redshift_test/test_late_binding_view.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import json
import os

from nose.plugins.attrib import attr
from test.integration.base import DBTIntegrationTest


class TestLateBindingView(DBTIntegrationTest):
@property
def schema(self):
return 'late_binding_view_033'

@staticmethod
def dir(path):
return os.path.normpath(
os.path.join('test/integration/033_redshift_test', path)
)

@property
def models(self):
return self.dir("models")
17 changes: 13 additions & 4 deletions test/unit/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

import dbt.flags
from dbt.contracts.graph.parsed import ParsedNode, ParsedManifest
from dbt.utils import timestring
import freezegun

class ManifestTest(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -151,17 +153,23 @@ def setUp(self):
),
}

@freezegun.freeze_time('2018-02-14T09:15:13Z')
def test__no_nodes(self):
manifest = ParsedManifest(nodes={}, macros={})
manifest = ParsedManifest(nodes={}, macros={},
generated_at=timestring())
self.assertEqual(
manifest.serialize(),
{'nodes': {}, 'macros': {}, 'parent_map': {}, 'child_map': {}}
{'nodes': {}, 'macros': {}, 'parent_map': {}, 'child_map': {},
'generated_at': '2018-02-14T09:15:13Z'}
)

@freezegun.freeze_time('2018-02-14T09:15:13Z')
def test__nested_nodes(self):
nodes = copy.copy(self.nested_nodes)
manifest = ParsedManifest(nodes=nodes, macros={})
manifest = ParsedManifest(nodes=nodes, macros={},
generated_at=timestring())
serialized = manifest.serialize()
self.assertEqual(serialized['generated_at'], '2018-02-14T09:15:13Z')
parent_map = serialized['parent_map']
child_map = serialized['child_map']
# make sure there aren't any extra/missing keys.
Expand Down Expand Up @@ -220,7 +228,8 @@ def test__nested_nodes(self):

def test__to_flat_graph(self):
nodes = copy.copy(self.nested_nodes)
manifest = ParsedManifest(nodes=nodes, macros={})
manifest = ParsedManifest(nodes=nodes, macros={},
generated_at=timestring())
flat_graph = manifest.to_flat_graph()
flat_nodes = flat_graph['nodes']
self.assertEqual(set(flat_graph), set(['nodes', 'macros']))
Expand Down
2 changes: 2 additions & 0 deletions test/unit/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import dbt.flags
from dbt.parser import ModelParser, MacroParser, DataTestParser, SchemaParser, ParserUtils
from dbt.utils import timestring

from dbt.node_types import NodeType
from dbt.contracts.graph.parsed import ParsedManifest, ParsedNode, ParsedMacro
Expand Down Expand Up @@ -702,6 +703,7 @@ def test__process_refs__packages(self):
manifest = ParsedManifest(
nodes={k: ParsedNode(**v) for (k,v) in graph['nodes'].items()},
macros={k: ParsedMacro(**v) for (k,v) in graph['macros'].items()},
generated_at=timestring(),
)

processed_manifest = ParserUtils.process_refs(manifest, 'root')
Expand Down