dbt-labs · beckjake · Aug 2, 2018 · Jul 26, 2018 · Jul 26, 2018 · Jul 26, 2018
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,9 +2,10 @@
 
 ### Features
 
-- Extend catalog and manifest to also support Snowflake, BigQuery, and Redshift
-- Add a 'generated_at' field to both the manifest and the catalog.
-- Version 2 of schema.yml, which allows users to create table and column comments that end up in the manifest
+- Extend catalog and manifest to also support Snowflake, BigQuery, and Redshift, in addition to existing Postgres support ([#866](https://github.com/fishtown-analytics/dbt/pull/866), [#857](https://github.com/fishtown-analytics/dbt/pull/857), [#849](https://github.com/fishtown-analytics/dbt/pull/849))
+- Add a 'generated_at' field to both the manifest and the catalog. ([#887](https://github.com/fishtown-analytics/dbt/pull/877))
+- Version 2 of schema.yml, which allows users to create table and column comments that end up in the manifest ([#880](https://github.com/fishtown-analytics/dbt/pull/880))
+- Add `docs` blocks that users can put into `.md` files and `doc()` value for schema v2 description fields ([#888](https://github.com/fishtown-analytics/dbt/pull/888))
 
 ## dbt 0.10.2 (unreleased, codename: Betsy Ross)
 

diff --git a/dbt/api/object.py b/dbt/api/object.py
@@ -102,12 +102,17 @@ def get(self, key, default=None):
         except KeyError:
             return default
 
+    def set(self, key, value):
+        self._contents[key] = value
+
     # most users of APIObject also expect the attributes to be available via
     # dot-notation because the previous implementation assigned to __dict__.
     # we should consider removing this if we fix all uses to have properties.
     def __getattr__(self, name):
-        if name in self._contents:
+        if name != '_contents' and name in self._contents:
             return self._contents[name]
+        elif hasattr(self.__class__, name):
+            return getattr(self.__class__, name)
         raise AttributeError((
             "'{}' object has no attribute '{}'"
         ).format(type(self).__name__, name))
diff --git a/dbt/clients/jinja.py b/dbt/clients/jinja.py
@@ -126,6 +126,21 @@ def parse(self, parser):
         return node
 
 
+class DocumentationExtension(jinja2.ext.Extension):
+    tags = ['docs']
+
+    def parse(self, parser):
+        node = jinja2.nodes.Macro(lineno=next(parser.stream).lineno)
+        docs_name = parser.parse_assign_target(name_only=True).name
+
+        node.args = []
+        node.defaults = []
+        node.name = dbt.utils.get_docs_macro_name(docs_name)
+        node.body = parser.parse_statements(('name:enddocs',),
+                                            drop_needle=True)
+        return node
+
+
 def create_macro_capture_env(node):
 
     class ParserMacroCapture(jinja2.Undefined):
@@ -169,6 +184,7 @@ def get_template(string, ctx, node=None, capture_macros=False):
 
         args['extensions'].append(MaterializationExtension)
         args['extensions'].append(OperationExtension)
+        args['extensions'].append(DocumentationExtension)
 
         env = MacroFuzzEnvironment(**args)
 

diff --git a/dbt/context/parser.py b/dbt/context/parser.py
@@ -20,6 +20,31 @@ def ref(*args):
     return ref
 
 
+def docs(unparsed, docrefs, column_name=None):
+
+    def do_docs(*args):
+        if len(args) != 1 and len(args) != 2:
+            dbt.exceptions.doc_invalid_args(unparsed, args)
+        doc_package_name = ''
+        doc_name = args[0]
+        if len(args) == 2:
+            doc_package_name = args[1]
+
+        docref = {
+            'documentation_package': doc_package_name,
+            'documentation_name': doc_name,
+        }
+        if column_name is not None:
+            docref['column_name'] = column_name
+
+        docrefs.append(docref)
+
+        # IDK
+        return True
+
+    return do_docs
+
+
 class Config:
     def __init__(self, model):
         self.model = model

diff --git a/dbt/contracts/graph/parsed.py b/dbt/contracts/graph/parsed.py
@@ -7,7 +7,7 @@
 import dbt.clients.jinja
 
 from dbt.contracts.graph.unparsed import UNPARSED_NODE_CONTRACT, \
-    UNPARSED_MACRO_CONTRACT
+    UNPARSED_MACRO_CONTRACT, UNPARSED_DOCUMENTATION_FILE_CONTRACT
 
 from dbt.logger import GLOBAL_LOGGER as logger  # noqa
 
@@ -87,6 +87,35 @@
 }
 
 
+# Docrefs are not quite like regular references, as they indicate what they
+# apply to as well as what they are referring to (so the doc package + doc
+# name, but also the column name if relevant). This is because column
+# descriptions are rendered separately from their models.
+DOCREF_CONTRACT = {
+    'type': 'object',
+    'properties': {
+        'documentation_name': {
+            'type': 'string',
+            'description': 'The name of the documentation block referred to',
+        },
+        'documentation_package': {
+            'type': 'string',
+            'description': (
+                'If provided, the documentation package name referred to'
+            ),
+        },
+        'column_name': {
+            'type': 'string',
+            'description': (
+                'If the documentation refers to a column instead of the '
+                'model, the column name should be set'
+            ),
+        },
+    },
+    'required': ['documentation_name', 'documentation_package']
+}
+
+
 PARSED_NODE_CONTRACT = deep_merge(
     UNPARSED_NODE_CONTRACT,
     {
@@ -183,6 +212,10 @@
                     'The path to the patch source if the node was patched'
                 ),
             },
+            'docrefs': {
+                'type': 'array',
+                'items': DOCREF_CONTRACT,
+            }
         },
         'required': UNPARSED_NODE_CONTRACT['required'] + [
             'unique_id', 'fqn', 'schema', 'refs', 'depends_on', 'empty',
@@ -218,9 +251,14 @@
         'columns': {
             'type': 'array',
             'items': COLUMN_INFO_CONTRACT,
+        },
+        'docrefs': {
+            'type': 'array',
+            'items': DOCREF_CONTRACT,
         }
     },
-    'required': ['name', 'original_file_path', 'description', 'columns'],
+    'required': ['name', 'original_file_path', 'description', 'columns',
+                 'docrefs'],
 }
 
 
@@ -311,6 +349,49 @@ class ParsedNodePatch(APIObject):
 }
 
 
+# This is just the file + its ID
+PARSED_DOCUMENTATION_CONTRACT = deep_merge(
+    UNPARSED_DOCUMENTATION_FILE_CONTRACT,
+    {
+        'properties': {
+            'name': {
+                'type': 'string',
+                'description': (
+                    'Name of this node, as referred to by doc() references'
+                ),
+            },
+            'unique_id': {
+                'type': 'string',
+                'minLength': 1,
+                'maxLength': 255,
+                'description': (
+                    'The unique ID of this node as stored in the manifest'
+                ),
+            },
+            'block_contents': {
+                'type': 'string',
+                'description': 'The contents of just the docs block',
+            },
+        },
+        'required': UNPARSED_DOCUMENTATION_FILE_CONTRACT['required'] + [
+            'name', 'unique_id', 'block_contents',
+        ],
+    }
+)
+
+
+PARSED_DOCUMENTATIONS_CONTRACT = {
+    'type': 'object',
+    'additionalProperties': False,
+    'description': (
+        'A collection of the parsed docs, stored by their uniqe IDs.'
+    ),
+    'patternProperties': {
+        '.*': PARSED_DOCUMENTATION_CONTRACT,
+    },
+}
+
+
 NODE_EDGE_MAP = {
     'type': 'object',
     'additionalProperties': False,
@@ -337,14 +418,15 @@ class ParsedNodePatch(APIObject):
     'properties': {
         'nodes': PARSED_NODES_CONTRACT,
         'macros': PARSED_MACROS_CONTRACT,
+        'docs': PARSED_DOCUMENTATIONS_CONTRACT,
         'generated_at': {
             'type': 'string',
             'format': 'date-time',
         },
         'parent_map': NODE_EDGE_MAP,
         'child_map': NODE_EDGE_MAP,
     },
-    'required': ['nodes', 'macros'],
+    'required': ['nodes', 'macros', 'docs'],
 }
 
 
@@ -384,6 +466,7 @@ def patch(self, patch):
             'patch_path': patch.original_file_path,
             'description': patch.description,
             'columns': patch.columns,
+            'docrefs': patch.docrefs,
         })
         # patches always trigger re-validation
         self.validate()
@@ -407,6 +490,10 @@ def generator(self):
             self.template, self._contents)
 
 
+class ParsedDocumentation(APIObject):
+    SCHEMA = PARSED_DOCUMENTATION_CONTRACT
+
+
 class ParsedNodes(APIObject):
     SCHEMA = PARSED_NODES_CONTRACT
 
@@ -437,13 +524,14 @@ def build_edges(nodes):
 class ParsedManifest(APIObject):
     SCHEMA = PARSED_MANIFEST_CONTRACT
     """The final result of parsing all macros and nodes in a graph."""
-    def __init__(self, nodes, macros, generated_at):
+    def __init__(self, nodes, macros, docs, generated_at):
         """The constructor. nodes and macros are dictionaries mapping unique
         IDs to ParsedNode and ParsedMacro objects, respectively. generated_at
         is a text timestamp in RFC 3339 format.
         """
         self.nodes = nodes
         self.macros = macros
+        self.docs = docs
         self.generated_at = generated_at
         self._contents = {}
         super(ParsedManifest, self).__init__()
@@ -457,15 +545,15 @@ def serialize(self):
         return {
             'nodes': {k: v.serialize() for k, v in self.nodes.items()},
             'macros': {k: v.serialize() for k, v in self.macros.items()},
+            'docs': {k: v.serialize() for k, v in self.docs.items()},
             'parent_map': backward_edges,
             'child_map': forward_edges,
             'generated_at': self.generated_at,
         }
 
     def _find_by_name(self, name, package, subgraph, nodetype):
         """
-
-        Find a node by its given name in the appropraite sugraph.
+        Find a node by its given name in the appropriate sugraph.
         """
         if subgraph == 'nodes':
             search = self.nodes
@@ -481,6 +569,19 @@ def _find_by_name(self, name, package, subgraph, nodetype):
             package,
             nodetype)
 
+    def find_docs_by_name(self, name, package=None):
+        for unique_id, doc in self.docs.items():
+            parts = unique_id.split('.')
+            if len(parts) != 2:
+                msg = "documentation names cannot contain '.' characters"
+                dbt.exceptions.raise_compiler_error(msg, doc)
+
+            found_package, found_node = parts
+
+            if (name == found_node and package in {None, found_package}):
+                return doc
+        return None
+
     def find_operation_by_name(self, name, package):
         return self._find_by_name(name, package, 'macros',
                                   [NodeType.Operation])

diff --git a/dbt/contracts/graph/unparsed.py b/dbt/contracts/graph/unparsed.py
@@ -160,3 +160,54 @@ class UnparsedNodeUpdate(APIObject):
     to be updated, referencing a certain node (specifically, a Model).
     """
     SCHEMA = UNPARSED_NODE_UPDATE_CONTRACT
+
+
+UNPARSED_DOCUMENTATION_FILE_CONTRACT = {
+    'type': 'object',
+    'additionalProperties': False,
+    'properties': {
+        'package_name': {
+            'type': 'string',
+        },
+        # filesystem
+        'root_path': {
+            'type': 'string',
+            'description': 'The absolute path to the project root',
+        },
+        'path': {
+            'type': 'string',
+            'description': (
+                'Relative path to the source file from the project root. '
+                'Usually the same as original_file_path, but in some cases '
+                'dbt will generate a path.'),
+        },
+        'original_file_path': {
+            'type': 'string',
+            'description': (
+                'Relative path to the originating file from the project root.'
+                ),
+        },
+        'file_contents': {
+            'type': 'string',
+            'description': (
+                'The raw text provided in the documentation block, presumably '
+                'markdown.'
+            ),
+        },
+        # TODO: I would like to remove this, but some graph error handling
+        # cares about it.
+        'resource_type': {
+            'enum': [
+                NodeType.Documentation,
+            ]
+        },
+    },
+    'required': [
+        'package_name', 'root_path', 'path', 'original_file_path',
+        'file_contents', 'resource_type'
+    ],
+}
+
+
+class UnparsedDocumentationFile(APIObject):
+    SCHEMA = UNPARSED_DOCUMENTATION_FILE_CONTRACT