Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docs blocks (#810) #888

Merged
merged 8 commits into from
Aug 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

### Features

- Extend catalog and manifest to also support Snowflake, BigQuery, and Redshift
- Add a 'generated_at' field to both the manifest and the catalog.
- Version 2 of schema.yml, which allows users to create table and column comments that end up in the manifest
- Extend catalog and manifest to also support Snowflake, BigQuery, and Redshift, in addition to existing Postgres support ([#866](https://github.com/fishtown-analytics/dbt/pull/866), [#857](https://github.com/fishtown-analytics/dbt/pull/857), [#849](https://github.com/fishtown-analytics/dbt/pull/849))
- Add a 'generated_at' field to both the manifest and the catalog. ([#887](https://github.com/fishtown-analytics/dbt/pull/877))
- Version 2 of schema.yml, which allows users to create table and column comments that end up in the manifest ([#880](https://github.com/fishtown-analytics/dbt/pull/880))
- Add `docs` blocks that users can put into `.md` files and `doc()` value for schema v2 description fields ([#888](https://github.com/fishtown-analytics/dbt/pull/888))

## dbt 0.10.2 (unreleased, codename: Betsy Ross)

Expand Down
7 changes: 6 additions & 1 deletion dbt/api/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,17 @@ def get(self, key, default=None):
except KeyError:
return default

def set(self, key, value):
self._contents[key] = value

# most users of APIObject also expect the attributes to be available via
# dot-notation because the previous implementation assigned to __dict__.
# we should consider removing this if we fix all uses to have properties.
def __getattr__(self, name):
if name in self._contents:
if name != '_contents' and name in self._contents:
return self._contents[name]
elif hasattr(self.__class__, name):
return getattr(self.__class__, name)
raise AttributeError((
"'{}' object has no attribute '{}'"
).format(type(self).__name__, name))
16 changes: 16 additions & 0 deletions dbt/clients/jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,21 @@ def parse(self, parser):
return node


class DocumentationExtension(jinja2.ext.Extension):
tags = ['docs']

def parse(self, parser):
node = jinja2.nodes.Macro(lineno=next(parser.stream).lineno)
docs_name = parser.parse_assign_target(name_only=True).name

node.args = []
node.defaults = []
node.name = dbt.utils.get_docs_macro_name(docs_name)
node.body = parser.parse_statements(('name:enddocs',),
drop_needle=True)
return node


def create_macro_capture_env(node):

class ParserMacroCapture(jinja2.Undefined):
Expand Down Expand Up @@ -169,6 +184,7 @@ def get_template(string, ctx, node=None, capture_macros=False):

args['extensions'].append(MaterializationExtension)
args['extensions'].append(OperationExtension)
args['extensions'].append(DocumentationExtension)

env = MacroFuzzEnvironment(**args)

Expand Down
25 changes: 25 additions & 0 deletions dbt/context/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,31 @@ def ref(*args):
return ref


def docs(unparsed, docrefs, column_name=None):

def do_docs(*args):
if len(args) != 1 and len(args) != 2:
dbt.exceptions.doc_invalid_args(unparsed, args)
doc_package_name = ''
doc_name = args[0]
if len(args) == 2:
doc_package_name = args[1]

docref = {
'documentation_package': doc_package_name,
'documentation_name': doc_name,
}
if column_name is not None:
docref['column_name'] = column_name

docrefs.append(docref)

# IDK
return True

return do_docs


class Config:
def __init__(self, model):
self.model = model
Expand Down
113 changes: 107 additions & 6 deletions dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import dbt.clients.jinja

from dbt.contracts.graph.unparsed import UNPARSED_NODE_CONTRACT, \
UNPARSED_MACRO_CONTRACT
UNPARSED_MACRO_CONTRACT, UNPARSED_DOCUMENTATION_FILE_CONTRACT

from dbt.logger import GLOBAL_LOGGER as logger # noqa

Expand Down Expand Up @@ -87,6 +87,35 @@
}


# Docrefs are not quite like regular references, as they indicate what they
# apply to as well as what they are referring to (so the doc package + doc
# name, but also the column name if relevant). This is because column
# descriptions are rendered separately from their models.
DOCREF_CONTRACT = {
'type': 'object',
'properties': {
'documentation_name': {
'type': 'string',
'description': 'The name of the documentation block referred to',
},
'documentation_package': {
'type': 'string',
'description': (
'If provided, the documentation package name referred to'
),
},
'column_name': {
'type': 'string',
'description': (
'If the documentation refers to a column instead of the '
'model, the column name should be set'
),
},
},
'required': ['documentation_name', 'documentation_package']
}


PARSED_NODE_CONTRACT = deep_merge(
UNPARSED_NODE_CONTRACT,
{
Expand Down Expand Up @@ -183,6 +212,10 @@
'The path to the patch source if the node was patched'
),
},
'docrefs': {
'type': 'array',
'items': DOCREF_CONTRACT,
}
},
'required': UNPARSED_NODE_CONTRACT['required'] + [
'unique_id', 'fqn', 'schema', 'refs', 'depends_on', 'empty',
Expand Down Expand Up @@ -218,9 +251,14 @@
'columns': {
'type': 'array',
'items': COLUMN_INFO_CONTRACT,
},
'docrefs': {
'type': 'array',
'items': DOCREF_CONTRACT,
}
},
'required': ['name', 'original_file_path', 'description', 'columns'],
'required': ['name', 'original_file_path', 'description', 'columns',
'docrefs'],
}


Expand Down Expand Up @@ -311,6 +349,49 @@ class ParsedNodePatch(APIObject):
}


# This is just the file + its ID
PARSED_DOCUMENTATION_CONTRACT = deep_merge(
UNPARSED_DOCUMENTATION_FILE_CONTRACT,
{
'properties': {
'name': {
'type': 'string',
'description': (
'Name of this node, as referred to by doc() references'
),
},
'unique_id': {
'type': 'string',
'minLength': 1,
'maxLength': 255,
'description': (
'The unique ID of this node as stored in the manifest'
),
},
'block_contents': {
'type': 'string',
'description': 'The contents of just the docs block',
},
},
'required': UNPARSED_DOCUMENTATION_FILE_CONTRACT['required'] + [
'name', 'unique_id', 'block_contents',
],
}
)


PARSED_DOCUMENTATIONS_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'description': (
'A collection of the parsed docs, stored by their uniqe IDs.'
),
'patternProperties': {
'.*': PARSED_DOCUMENTATION_CONTRACT,
},
}


NODE_EDGE_MAP = {
'type': 'object',
'additionalProperties': False,
Expand All @@ -337,14 +418,15 @@ class ParsedNodePatch(APIObject):
'properties': {
'nodes': PARSED_NODES_CONTRACT,
'macros': PARSED_MACROS_CONTRACT,
'docs': PARSED_DOCUMENTATIONS_CONTRACT,
'generated_at': {
'type': 'string',
'format': 'date-time',
},
'parent_map': NODE_EDGE_MAP,
'child_map': NODE_EDGE_MAP,
},
'required': ['nodes', 'macros'],
'required': ['nodes', 'macros', 'docs'],
}


Expand Down Expand Up @@ -384,6 +466,7 @@ def patch(self, patch):
'patch_path': patch.original_file_path,
'description': patch.description,
'columns': patch.columns,
'docrefs': patch.docrefs,
})
# patches always trigger re-validation
self.validate()
Expand All @@ -407,6 +490,10 @@ def generator(self):
self.template, self._contents)


class ParsedDocumentation(APIObject):
SCHEMA = PARSED_DOCUMENTATION_CONTRACT


class ParsedNodes(APIObject):
SCHEMA = PARSED_NODES_CONTRACT

Expand Down Expand Up @@ -437,13 +524,14 @@ def build_edges(nodes):
class ParsedManifest(APIObject):
SCHEMA = PARSED_MANIFEST_CONTRACT
"""The final result of parsing all macros and nodes in a graph."""
def __init__(self, nodes, macros, generated_at):
def __init__(self, nodes, macros, docs, generated_at):
"""The constructor. nodes and macros are dictionaries mapping unique
IDs to ParsedNode and ParsedMacro objects, respectively. generated_at
is a text timestamp in RFC 3339 format.
"""
self.nodes = nodes
self.macros = macros
self.docs = docs
self.generated_at = generated_at
self._contents = {}
super(ParsedManifest, self).__init__()
Expand All @@ -457,15 +545,15 @@ def serialize(self):
return {
'nodes': {k: v.serialize() for k, v in self.nodes.items()},
'macros': {k: v.serialize() for k, v in self.macros.items()},
'docs': {k: v.serialize() for k, v in self.docs.items()},
'parent_map': backward_edges,
'child_map': forward_edges,
'generated_at': self.generated_at,
}

def _find_by_name(self, name, package, subgraph, nodetype):
"""

Find a node by its given name in the appropraite sugraph.
Find a node by its given name in the appropriate sugraph.
"""
if subgraph == 'nodes':
search = self.nodes
Expand All @@ -481,6 +569,19 @@ def _find_by_name(self, name, package, subgraph, nodetype):
package,
nodetype)

def find_docs_by_name(self, name, package=None):
for unique_id, doc in self.docs.items():
parts = unique_id.split('.')
if len(parts) != 2:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

am i crazy or should parts have 3 items here? Something like docs.project_name.docs_name?

msg = "documentation names cannot contain '.' characters"
dbt.exceptions.raise_compiler_error(msg, doc)

found_package, found_node = parts

if (name == found_node and package in {None, found_package}):
return doc
return None

def find_operation_by_name(self, name, package):
return self._find_by_name(name, package, 'macros',
[NodeType.Operation])
Expand Down
51 changes: 51 additions & 0 deletions dbt/contracts/graph/unparsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,54 @@ class UnparsedNodeUpdate(APIObject):
to be updated, referencing a certain node (specifically, a Model).
"""
SCHEMA = UNPARSED_NODE_UPDATE_CONTRACT


UNPARSED_DOCUMENTATION_FILE_CONTRACT = {
'type': 'object',
'additionalProperties': False,
'properties': {
'package_name': {
'type': 'string',
},
# filesystem
'root_path': {
'type': 'string',
'description': 'The absolute path to the project root',
},
'path': {
'type': 'string',
'description': (
'Relative path to the source file from the project root. '
'Usually the same as original_file_path, but in some cases '
'dbt will generate a path.'),
},
'original_file_path': {
'type': 'string',
'description': (
'Relative path to the originating file from the project root.'
),
},
'file_contents': {
'type': 'string',
'description': (
'The raw text provided in the documentation block, presumably '
'markdown.'
),
},
# TODO: I would like to remove this, but some graph error handling
# cares about it.
'resource_type': {
'enum': [
NodeType.Documentation,
]
},
},
'required': [
'package_name', 'root_path', 'path', 'original_file_path',
'file_contents', 'resource_type'
],
}


class UnparsedDocumentationFile(APIObject):
SCHEMA = UNPARSED_DOCUMENTATION_FILE_CONTRACT
Loading