Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Write run results to disk (#829) #904

Merged
merged 7 commits into from
Aug 10, 2018
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- Add a 'generated_at' field to both the manifest and the catalog. ([#887](https://github.com/fishtown-analytics/dbt/pull/877))
- Version 2 of schema.yml, which allows users to create table and column comments that end up in the manifest ([#880](https://github.com/fishtown-analytics/dbt/pull/880))
- Add `docs` blocks that users can put into `.md` files and `doc()` value for schema v2 description fields ([#888](https://github.com/fishtown-analytics/dbt/pull/888))
- Write out a 'run_results.json' after dbt invocations. ([#904](https://github.com/fishtown-analytics/dbt/pull/904))

## dbt 0.10.2 - Betsy Ross (August 3, 2018)

Expand Down
6 changes: 6 additions & 0 deletions dbt/clients/system.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import errno
import fnmatch
import json
import os
import os.path
import shutil
Expand All @@ -11,6 +12,7 @@

import dbt.compat
import dbt.exceptions
import dbt.utils

from dbt.logger import GLOBAL_LOGGER as logger

Expand Down Expand Up @@ -117,6 +119,10 @@ def write_file(path, contents=''):
return True


def write_json(path, data):
return write_file(path, json.dumps(data, cls=dbt.utils.JSONEncoder))


def _windows_rmdir_readonly(func, path, exc):
exception_val = exc[1]
if exception_val.errno == errno.EACCES:
Expand Down
1 change: 1 addition & 0 deletions dbt/compat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import codecs
import json

WHICH_PYTHON = None

Expand Down
11 changes: 2 additions & 9 deletions dbt/compilation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import dbt.loader
from dbt.contracts.graph.compiled import CompiledNode, CompiledGraph

from dbt.clients.system import write_file
from dbt.clients.system import write_json
from dbt.logger import GLOBAL_LOGGER as logger

graph_file_name = 'graph.gpickle'
Expand Down Expand Up @@ -90,13 +90,6 @@ def initialize(self):
dbt.clients.system.make_directory(self.project['target-path'])
dbt.clients.system.make_directory(self.project['modules-path'])

def __write(self, build_filepath, payload):
target_path = os.path.join(self.project['target-path'], build_filepath)

write_file(target_path, payload)

return target_path

def compile_node(self, node, manifest):
logger.debug("Compiling {}".format(node.get('unique_id')))

Expand Down Expand Up @@ -157,7 +150,7 @@ def write_manifest_file(self, manifest):
"""
filename = manifest_file_name
manifest_path = os.path.join(self.project['target-path'], filename)
write_file(manifest_path, json.dumps(manifest.serialize()))
write_json(manifest_path, manifest.serialize())

def write_graph_file(self, linker):
filename = graph_file_name
Expand Down
5 changes: 4 additions & 1 deletion dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,14 @@
'generated_at': {
'type': 'string',
'format': 'date-time',
'description': (
'The time at which the manifest was generated'
),
},
'parent_map': NODE_EDGE_MAP,
'child_map': NODE_EDGE_MAP,
},
'required': ['nodes', 'macros', 'docs'],
'required': ['nodes', 'macros', 'docs', 'generated_at'],
}


Expand Down
2 changes: 1 addition & 1 deletion dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dbt.api import APIObject
from dbt.utils import deep_merge, timestring
from dbt.utils import deep_merge
from dbt.node_types import NodeType
from dbt.exceptions import raise_duplicate_resource_name, \
raise_patch_targets_not_found
Expand Down
125 changes: 125 additions & 0 deletions dbt/contracts/results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from dbt.api.object import APIObject
from dbt.utils import deep_merge
from dbt.contracts.graph.manifest import COMPILE_RESULT_NODE_CONTRACT
from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT
from dbt.contracts.graph.compiled import COMPILED_NODE_CONTRACT
from dbt.contracts.graph.manifest import PARSED_MANIFEST_CONTRACT

RUN_MODEL_RESULT_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': 'The result of a single node being run',
'properties': {
'error': {
'type': ['string', 'null'],
'description': 'The error string, or None if there was no error',
},
'skip': {
'type': 'boolean',
'description': 'True if this node was skipped',
},
# This is assigned by dbt.ui.printer.print_test_result_line, if a test
# has no error and a non-zero status
'fail': {
'type': ['boolean', 'null'],
'description': 'On tests, true if the test failed',
},
'status': {
'type': ['string', 'null', 'number'],
'description': 'The status result of the node execution',
},
'execution_time': {
'type': 'number',
'description': 'The execution time, in seconds',
},
'node': COMPILE_RESULT_NODE_CONTRACT,
},
'required': ['node'],
}


def named_property(name, doc=None):
def get_prop(self):
return self._contents.get(name)

def set_prop(self, value):
self._contents[name] = value
self.validate()

return property(get_prop, set_prop, doc=doc)


class RunModelResult(APIObject):
SCHEMA = RUN_MODEL_RESULT_CONTRACT

def __init__(self, node, error=None, skip=False, status=None, failed=None,
execution_time=0):
super(RunModelResult, self).__init__(node=node, error=error, skip=skip,
status=status, fail=failed,
execution_time=execution_time)

# these all get set after the fact, generally
error = named_property('error',
'If there was an error, the text of that error')
skip = named_property('skip', 'True if the model was skipped')
fail = named_property('fail', 'True if this was a test and it failed')
status = named_property('status', 'The status of the model execution')
execution_time = named_property('execution_time',
'The time in seconds to execute the model')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

love this


@property
def errored(self):
return self.error is not None

@property
def failed(self):
return self.fail

@property
def skipped(self):
return self.skip

def serialize(self):
result = super(RunModelResult, self).serialize()
result['node'] = self.node.serialize()
return result


EXECUTION_RESULT_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': 'The result of a single dbt invocation',
'properties': {
'results': {
'type': 'array',
'items': RUN_MODEL_RESULT_CONTRACT,
'description': 'An array of results, one per model',
},
'generated_at': {
'type': 'string',
'format': 'date-time',
'description': (
'The time at which the execution result was generated'
),
},
'elapsed_time': {
'type': 'number',
'description': (
'The time elapsed from before_run to after_run (hooks are not '
'included)'
),
}
},
'required': ['results', 'generated_at', 'elapsed_time'],
}


class ExecutionResult(APIObject):
SCHEMA = EXECUTION_RESULT_CONTRACT

def serialize(self):
return {
'results': [r.serialize() for r in self.results],
'generated_at': self.generated_at,
'elapsed_time': self.elapsed_time,
}
32 changes: 1 addition & 31 deletions dbt/node_runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from dbt.utils import get_nodes_by_tags
from dbt.node_types import NodeType, RunHookType
from dbt.adapters.factory import get_adapter
from dbt.contracts.results import RunModelResult

import dbt.clients.jinja
import dbt.context.runtime
Expand Down Expand Up @@ -39,37 +40,6 @@ def track_model_run(index, num_nodes, run_model_result):
})


class RunModelResult(object):
def __init__(self, node, error=None, skip=False, status=None,
failed=None, execution_time=0):
self.node = node
self.error = error
self.skip = skip
self.fail = failed
self.status = status
self.execution_time = execution_time

@property
def errored(self):
return self.error is not None

@property
def failed(self):
return self.fail

@property
def skipped(self):
return self.skip


class RunOperationResult(RunModelResult):
def __init__(self, node, error=None, skip=False, status=None,
failed=None, execution_time=0, returned=None):
super(RunOperationResult, self).__init__(node, error, skip, status,
failed, execution_time)
self.returned = returned


class BaseRunner(object):
print_header = True

Expand Down
17 changes: 17 additions & 0 deletions dbt/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dbt.logger import GLOBAL_LOGGER as logger
from dbt.contracts.graph.parsed import ParsedNode
from dbt.contracts.graph.manifest import CompileResultNode
from dbt.contracts.results import ExecutionResult

import dbt.clients.jinja
import dbt.compilation
Expand All @@ -13,12 +14,17 @@
import dbt.tracking
import dbt.model
import dbt.ui.printer
import dbt.utils
from dbt.clients.system import write_json

import dbt.graph.selector

from multiprocessing.dummy import Pool as ThreadPool


RESULT_FILE_NAME = 'run_results.json'


class RunManager(object):
def __init__(self, project, target_path, args):
self.project = project
Expand Down Expand Up @@ -170,6 +176,10 @@ def execute_nodes(self, linker, Runner, manifest, node_dependency_list):

return node_results

def write_results(self, execution_result):
filepath = os.path.join(self.project['target-path'], RESULT_FILE_NAME)
write_json(filepath, execution_result.serialize())

def compile(self, project):
compiler = dbt.compilation.Compiler(project)
compiler.initialize()
Expand Down Expand Up @@ -218,6 +228,13 @@ def run_from_graph(self, Selector, Runner, query):
finally:
adapter.cleanup_connections()

result = ExecutionResult(
results=res,
elapsed_time=elapsed,
generated_at=dbt.utils.timestring(),
)
self.write_results(result)

return res

# ------------------------------------
Expand Down
4 changes: 2 additions & 2 deletions dbt/task/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import shutil

from dbt.adapters.factory import get_adapter
from dbt.clients.system import write_file
from dbt.clients.system import write_json
from dbt.compat import bigint
from dbt.include import DOCS_INDEX_FILE_PATH
import dbt.ui.printer
Expand Down Expand Up @@ -180,7 +180,7 @@ def run(self):
results['generated_at'] = dbt.utils.timestring()

path = os.path.join(self.project['target-path'], CATALOG_FILENAME)
write_file(path, json.dumps(results))
write_json(path, results)

dbt.ui.printer.print_timestamped_line(
'Catalog written to {}'.format(os.path.abspath(path))
Expand Down
13 changes: 13 additions & 0 deletions dbt/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from datetime import datetime
from decimal import Decimal
import os
import hashlib
import itertools
import json
import collections
import copy
import functools
Expand Down Expand Up @@ -441,3 +443,14 @@ def timestring():
"""Get the current datetime as an RFC 3339-compliant string"""
# isoformat doesn't include the mandatory trailing 'Z' for UTC.
return datetime.utcnow().isoformat() + 'Z'


class JSONEncoder(json.JSONEncoder):
"""A 'custom' json encoder that does normal json encoder things, but also
handles `Decimal`s. Naturally, this can lose precision because they get
converted to floats.
"""
def default(self, obj):
if isinstance(obj, Decimal):
return float(obj)
return super(JSONEncoder, self).default(obj)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really don't like the way Python handles JSON encoding. This looks like a good start in handling this better in dbt.

17 changes: 17 additions & 0 deletions test/integration/029_docs_generate_tests/ref_models/docs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{% docs ephemeral_summary %}
A summmary table of the ephemeral copy of the seed data
{% enddocs %}

{% docs summary_first_name %}
The first name being summarized
{% enddocs %}

{% docs summary_count %}
The number of instances of the first name
{% enddocs %}

{% docs view_summary %}
A view of the summary of the ephemeral copy of the seed data
{% enddocs %}


Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{
config(
materialized = "ephemeral"
)
}}

select * from {{ this.schema }}.seed
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{{
config(
materialized = "table"
)
}}

select first_name, count(*) as ct from {{ref('ephemeral_copy')}}
group by first_name
order by first_name asc
Loading