Skip to content

Commit

Permalink
Merge pull request #1513 from fishtown-analytics/fix/only-parse-docs-…
Browse files Browse the repository at this point in the history
…blocks

Only parse docs blocks contents when reading md files (#988)
  • Loading branch information
beckjake authored Jun 6, 2019
2 parents ddb1785 + a4a9221 commit c1387c5
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 25 deletions.
43 changes: 40 additions & 3 deletions core/dbt/clients/_jinja_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def end_pat(self):
RAW_START_PATTERN = regex(
r'(?:\s*\{\%\-|\{\%)\s*(?P<raw_start>(raw))\s*(?:\-\%\}\s*|\%\})'
)
EXPR_START_PATTERN = regex(r'(?P<expr_start>(\{\{\s*))')
EXPR_END_PATTERN = regex(r'(?P<expr_end>(\s*\}\}))')

BLOCK_START_PATTERN = regex(''.join((
r'(?:\s*\{\%\-|\{\%)\s*',
Expand Down Expand Up @@ -92,6 +94,8 @@ def end_pat(self):
r'"([^"\\]*(?:\\.[^"\\]*)*)"))'
)

QUOTE_START_PATTERN = regex(r'''(?P<quote>(['"]))''')

# any number of non-quote characters, followed by:
# - quote: a quote mark indicating start of a string (you'll want to backtrack
# the regex end on quotes and then match with the string pattern)
Expand Down Expand Up @@ -179,6 +183,31 @@ def _expect_match(self, expected_name, *patterns, **kwargs):
dbt.exceptions.raise_compiler_error(msg)
return match

def handle_expr(self):
"""Handle an expression. At this point we're at a string like:
{{ 1 + 2 }}
^ right here
We expect to find a `}}`, but we might find one in a string before
that. Imagine the case of `{{ 2 * "}}" }}`...
You're not allowed to have blocks or comments inside an expr so it is
pretty straightforward, I hope: only strings can get in the way.
"""
while True:
match = self._expect_match('}}',
EXPR_END_PATTERN,
QUOTE_START_PATTERN)
if match.groupdict().get('expr_end') is not None:
break
else:
# it's a quote. we haven't advanced for this match yet, so
# just slurp up the whole string, no need to rewind.
match = self._expect_match('string', STRING_PATTERN)
self.advance(match.end())

self.advance(match.end())

def handle_block(self, match, block_start=None):
"""Handle a block. The current state of the parser should be after the
open block is completed:
Expand All @@ -197,12 +226,18 @@ def handle_block(self, match, block_start=None):

self._block_contents = ''

search = [found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN,
EXPR_START_PATTERN]

# docs and macros do not honor embedded quotes
if found.block_type_name not in ('docs', 'macro'):
# is this right?
search.append(QUOTE_START_PATTERN)

# you can have as many comments in your block as you'd like!
while True:
match = self._expect_match(
'"{}"'.format(found.end_block_type_name),
found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN,
regex('''(?P<quote>(['"]))''')
'"{}"'.format(found.end_block_type_name), *search
)
groups = match.groupdict()
if groups.get('endblock') is not None:
Expand All @@ -218,6 +253,8 @@ def handle_block(self, match, block_start=None):
self.rewind()
match = self._expect_match('any string', STRING_PATTERN)
self.advance(match.end())
elif groups.get('expr_start') is not None:
self.handle_expr()
else:
raise dbt.exceptions.InternalException(
'unhandled regex in handle_block, no match: {}'
Expand Down
2 changes: 1 addition & 1 deletion core/dbt/node_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class NodeType(object):
Macro = 'macro'
Operation = 'operation'
Seed = 'seed'
Documentation = 'documentation'
Documentation = 'docs'
Source = 'source'
RPCCall = 'rpc'

Expand Down
34 changes: 24 additions & 10 deletions core/dbt/parser/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from dbt.parser.base import BaseParser
from dbt.contracts.graph.unparsed import UnparsedDocumentationFile
from dbt.contracts.graph.parsed import ParsedDocumentation
from dbt.clients.jinja import extract_toplevel_blocks, get_template
from dbt.clients import system

import jinja2.runtime
import os
Expand All @@ -16,14 +18,12 @@ def load_file(cls, package_name, root_dir, relative_dirs):
"""
extension = "[!.#~]*.md"

file_matches = dbt.clients.system.find_matching(
root_dir,
relative_dirs,
extension)
file_matches = system.find_matching(root_dir, relative_dirs, extension)

for file_match in file_matches:
file_contents = dbt.clients.system.load_file_contents(
file_match.get('absolute_path'), strip=False)
file_contents = system.load_file_contents(
file_match.get('absolute_path'),
strip=False)

parts = dbt.utils.split_path(file_match.get('relative_path', ''))
name, _ = os.path.splitext(parts[-1])
Expand All @@ -44,12 +44,26 @@ def load_file(cls, package_name, root_dir, relative_dirs):

def parse(self, docfile):
try:
template = dbt.clients.jinja.get_template(docfile.file_contents,
{})
except dbt.exceptions.CompilationException as e:
e.node = docfile
blocks = extract_toplevel_blocks(docfile.file_contents)
except dbt.exceptions.CompilationException as exc:
if exc.node is None:
exc.node = docfile
raise

for block in blocks:
if block.block_type_name != NodeType.Documentation:
continue

try:
template = get_template(block.full_block, {})
except dbt.exceptions.CompilationException as e:
e.node = docfile
raise
# in python 3.x this can just be "yield from" isntead of a loop
for d in self._parse_template_docs(template, docfile):
yield d

def _parse_template_docs(self, template, docfile):
for key, item in template.module.__dict__.items():
if type(item) != jinja2.runtime.Macro:
continue
Expand Down
1 change: 1 addition & 0 deletions test/integration/029_docs_generate_tests/models/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is a readme.md file with {{ invalid-ish jinja }} in it
12 changes: 6 additions & 6 deletions test/integration/029_docs_generate_tests/test_docs_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1282,7 +1282,7 @@ def expected_postgres_references_manifest(self, model_database=None):
'original_file_path': docs_path,
'package_name': 'test',
'path': 'docs.md',
'resource_type': 'documentation',
'resource_type': 'docs',
'root_path': os.getcwd(),
'unique_id': 'test.ephemeral_summary'
},
Expand All @@ -1293,7 +1293,7 @@ def expected_postgres_references_manifest(self, model_database=None):
'original_file_path': docs_path,
'package_name': 'test',
'path': 'docs.md',
'resource_type': 'documentation',
'resource_type': 'docs',
'root_path': os.getcwd(),
'unique_id': 'test.source_info',
},
Expand All @@ -1304,7 +1304,7 @@ def expected_postgres_references_manifest(self, model_database=None):
'original_file_path': docs_path,
'package_name': 'test',
'path': 'docs.md',
'resource_type': 'documentation',
'resource_type': 'docs',
'root_path': os.getcwd(),
'unique_id': 'test.summary_count'
},
Expand All @@ -1315,7 +1315,7 @@ def expected_postgres_references_manifest(self, model_database=None):
'original_file_path': docs_path,
'package_name': 'test',
'path': 'docs.md',
'resource_type': 'documentation',
'resource_type': 'docs',
'root_path': os.getcwd(),
'unique_id': 'test.summary_first_name'
},
Expand All @@ -1326,7 +1326,7 @@ def expected_postgres_references_manifest(self, model_database=None):
'original_file_path': docs_path,
'package_name': 'test',
'path': 'docs.md',
'resource_type': 'documentation',
'resource_type': 'docs',
'root_path': os.getcwd(),
'unique_id': 'test.table_info'
},
Expand All @@ -1340,7 +1340,7 @@ def expected_postgres_references_manifest(self, model_database=None):
'original_file_path': docs_path,
'package_name': 'test',
'path': 'docs.md',
'resource_type': 'documentation',
'resource_type': 'docs',
'root_path': os.getcwd(),
'unique_id': 'test.view_summary'
},
Expand Down
7 changes: 2 additions & 5 deletions test/unit/test_docs_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,12 @@
import mock
import unittest

from dbt.config import RuntimeConfig
from dbt.node_types import NodeType
import dbt.utils
from dbt.parser import docs
from dbt.contracts.graph.unparsed import UnparsedDocumentationFile

from .utils import config_from_parts_or_dicts

#DocumentationParser


SNOWPLOW_SESSIONS_DOCS = r'''
This table contains one record for every session recorded by Snowplow.
Expand Down Expand Up @@ -98,7 +94,8 @@ def setUp(self):
self.subdir_project_config = config_from_parts_or_dicts(
project=subdir_project, profile=profile_data
)
@mock.patch('dbt.clients.system')

@mock.patch('dbt.parser.docs.system')
def test_load_file(self, system):
system.load_file_contents.return_value = TEST_DOCUMENTATION_FILE
system.find_matching.return_value = [{
Expand Down
22 changes: 22 additions & 0 deletions test/unit/test_jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,28 @@ def test_quoted_endblock_within_block(self):
self.assertEqual(blocks[0].block_type_name, 'myblock')
self.assertEqual(blocks[0].contents, '{% set x = ("{% endmyblock %}") %} ')

def test_docs_block(self):
body = '{% docs __my_doc__ %} asdf {# nope {% enddocs %}} #} {% enddocs %} {% docs __my_other_doc__ %} asdf "{% enddocs %}'
all_blocks = extract_toplevel_blocks(body)
blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
self.assertEqual(len(blocks), 2)
self.assertEqual(blocks[0].block_type_name, 'docs')
self.assertEqual(blocks[0].contents, ' asdf {# nope {% enddocs %}} #} ')
self.assertEqual(blocks[0].block_name, '__my_doc__')
self.assertEqual(blocks[1].block_type_name, 'docs')
self.assertEqual(blocks[1].contents, ' asdf "')
self.assertEqual(blocks[1].block_name, '__my_other_doc__')

def test_docs_block_expr(self):
body = '{% docs more_doc %} asdf {{ "{% enddocs %}" ~ "}}" }}{% enddocs %}'
all_blocks = extract_toplevel_blocks(body)
blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
self.assertEqual(len(blocks), 1)
self.assertEqual(blocks[0].block_type_name, 'docs')
self.assertEqual(blocks[0].contents, ' asdf {{ "{% enddocs %}" ~ "}}" }}')
self.assertEqual(blocks[0].block_name, 'more_doc')


bar_block = '''{% mytype bar %}
{# a comment
that inside it has
Expand Down

0 comments on commit c1387c5

Please sign in to comment.