Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partial parse updates (#1835) #1836

Merged
merged 1 commit into from
Oct 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/dbt/contracts/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def from_dict(cls, data, validate=True):
class UserConfig(ExtensibleJsonSchemaMixin, Replaceable):
send_anonymous_usage_stats: bool = DEFAULT_SEND_ANONYMOUS_USAGE_STATS
use_colors: bool = DEFAULT_USE_COLORS
partial_parse: Optional[bool] = None
printer_width: Optional[int] = None

def set_values(self, cookie_dir):
Expand Down
2 changes: 1 addition & 1 deletion core/dbt/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def set_from_args(args):

TEST_NEW_PARSER = getattr(args, 'test_new_parser', TEST_NEW_PARSER)
WRITE_JSON = getattr(args, 'write_json', WRITE_JSON)
PARTIAL_PARSE = getattr(args, 'partial_parse', PARTIAL_PARSE)
PARTIAL_PARSE = getattr(args, 'partial_parse', None)
MP_CONTEXT = _get_context()


Expand Down
21 changes: 18 additions & 3 deletions core/dbt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,17 +789,32 @@ def parse_args(args, cls=DBTArgumentParser):
'''
)

p.add_argument(
partial_flag = p.add_mutually_exclusive_group()
partial_flag.add_argument(
'--partial-parse',
action='store_true',
action='store_const',
const=True,
dest='partial_parse',
default=None,
help='''
Allow for partial parsing by looking for and writing to a pickle file
in the target directory.
in the target directory. This overrides the user configuration file.

WARNING: This can result in unexpected behavior if you use env_var()!
'''
)

partial_flag.add_argument(
'--no-partial-parse',
action='store_const',
const=False,
default=None,
dest='partial_parse',
help='''
Disallow partial parsing. This overrides the user configuration file.
'''
)

# if set, run dbt in single-threaded mode: thread count is ignored, and
# calls go through `map` instead of the thread pool. This is useful for
# getting performance information about aspects of dbt that normally run in
Expand Down
32 changes: 29 additions & 3 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

PARTIAL_PARSE_FILE_NAME = 'partial_parse.pickle'
PARSING_STATE = DbtProcessState('parsing')
DEFAULT_PARTIAL_PARSE = False


_parser_types = [
Expand Down Expand Up @@ -174,6 +175,8 @@ def load_only_macros(self) -> Manifest:

def load(self, internal_manifest: Optional[Manifest] = None):
old_results = self.read_parse_results()
if old_results is not None:
logger.debug('Got an acceptable cached parse result')
self._load_macros(old_results, internal_manifest=internal_manifest)
# make a manifest with just the macros to get the context
macro_manifest = Manifest.from_macros(
Expand All @@ -192,11 +195,23 @@ def write_parse_results(self):
with open(path, 'wb') as fp:
pickle.dump(self.results, fp)

def _matching_parse_results(self, result: ParseResult) -> bool:
def matching_parse_results(self, result: ParseResult) -> bool:
"""Compare the global hashes of the read-in parse results' values to
the known ones, and return if it is ok to re-use the results.
"""
try:
if result.dbt_version != __version__:
logger.debug(
'dbt version mismatch: {} != {}, cache invalidated'
.format(result.dbt_version, __version__)
)
return False
except AttributeError:
logger.debug('malformed result file, cache invalidated')
return False

valid = True

if self.results.vars_hash != result.vars_hash:
logger.debug('vars hash mismatch, cache invalidated')
valid = False
Expand Down Expand Up @@ -227,8 +242,19 @@ def _matching_parse_results(self, result: ParseResult) -> bool:
valid = False
return valid

def _partial_parse_enabled(self):
# if the CLI is set, follow that
if dbt.flags.PARTIAL_PARSE is not None:
return dbt.flags.PARTIAL_PARSE
# if the config is set, follow that
elif self.root_project.config.partial_parse is not None:
return self.root_project.config.partial_parse
else:
return DEFAULT_PARTIAL_PARSE

def read_parse_results(self) -> Optional[ParseResult]:
if not dbt.flags.PARTIAL_PARSE:
if not self._partial_parse_enabled():
logger.debug('Partial parsing not enabled')
return None
path = os.path.join(self.root_project.target_path,
PARTIAL_PARSE_FILE_NAME)
Expand All @@ -240,7 +266,7 @@ def read_parse_results(self) -> Optional[ParseResult]:
# keep this check inside the try/except in case something about
# the file has changed in weird ways, perhaps due to being a
# different version of dbt
if self._matching_parse_results(result):
if self.matching_parse_results(result):
return result
except Exception as exc:
logger.debug(
Expand Down
6 changes: 4 additions & 2 deletions core/dbt/parser/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
ParsedSourceDefinition, ParsedAnalysisNode, ParsedHookNode, ParsedRPCNode,
ParsedModelNode, ParsedSeedNode, ParsedTestNode, ParsedSnapshotNode,
)
from dbt.contracts.util import Writable
from dbt.contracts.util import Writable, Replaceable
from dbt.exceptions import (
raise_duplicate_resource_name, raise_duplicate_patch_name,
CompilationException, InternalException
)
from dbt.version import __version__


# Parsers can return anything as long as it's a unique ID
Expand Down Expand Up @@ -43,7 +44,7 @@ def dict_field():


@dataclass
class ParseResult(JsonSchemaMixin, Writable):
class ParseResult(JsonSchemaMixin, Writable, Replaceable):
vars_hash: FileHash
profile_hash: FileHash
project_hashes: MutableMapping[str, FileHash]
Expand All @@ -54,6 +55,7 @@ class ParseResult(JsonSchemaMixin, Writable):
patches: MutableMapping[str, ParsedNodePatch] = dict_field()
files: MutableMapping[str, SourceFile] = dict_field()
disabled: MutableMapping[str, List[ParsedNode]] = dict_field()
dbt_version: str = __version__

def get_file(self, source_file: SourceFile) -> SourceFile:
key = source_file.search_key
Expand Down
30 changes: 26 additions & 4 deletions test/unit/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def tearDown(self):
self.mock_filesystem_constructor.stop()
self.mock_hook_constructor.stop()
self.load_patch.stop()
self.load_source_file_ptcher.stop()
self.load_source_file_patcher.stop()

def setUp(self):
dbt.flags.STRICT_MODE = True
Expand All @@ -58,6 +58,7 @@ def setUp(self):
self.get_adapter_patcher_cmn = patch('dbt.context.common.get_adapter')
self.factory_cmn = self.get_adapter_patcher_cmn.start()


def mock_write_gpickle(graph, outfile):
self.graph_result = graph
self.mock_write_gpickle = self.write_gpickle_patcher.start()
Expand Down Expand Up @@ -86,12 +87,19 @@ def _load_projects(config, paths):

self.mock_models = []

def _mock_parse_result(config, all_projects):
return ParseResult(
vars_hash=FileHash.from_contents('vars'),
project_hashes={name: FileHash.from_contents(name) for name in all_projects},
profile_hash=FileHash.from_contents('profile'),
)

self.load_patch = patch('dbt.parser.manifest.make_parse_result')
self.mock_parse_result = self.load_patch.start()
self.mock_parse_result.return_value = ParseResult.rpc()
self.mock_parse_result.side_effect = _mock_parse_result

self.load_source_file_ptcher = patch.object(BaseParser, 'load_file')
self.mock_source_file = self.load_source_file_ptcher.start()
self.load_source_file_patcher = patch.object(BaseParser, 'load_file')
self.mock_source_file = self.load_source_file_patcher.start()
self.mock_source_file.side_effect = lambda path: [n for n in self.mock_models if n.path == path][0]

def filesystem_iter(iter_self):
Expand Down Expand Up @@ -291,3 +299,17 @@ def test__dependency_list(self):
queue.get(block=False)
queue.mark_done(got.unique_id)
self.assertTrue(queue.empty())

def test__partial_parse(self):
config = self.get_config()

loader = dbt.parser.manifest.ManifestLoader(config, {config.project_name: config})
loader.load()
loader.create_manifest()
results = loader.results

self.assertTrue(loader.matching_parse_results(results))
too_low = results.replace(dbt_version='0.0.1a1')
self.assertFalse(loader.matching_parse_results(too_low))
too_high = results.replace(dbt_version='99999.99.99')
self.assertFalse(loader.matching_parse_results(too_high))