From f32519b156c220fa26470be49b6d83df944b67fe Mon Sep 17 00:00:00 2001 From: Jacob Beck <jake@fishtownanalytics.com> Date: Tue, 15 Oct 2019 18:03:40 -0600 Subject: [PATCH] Partial parse upates Use the user's profile config and the CLI arguments to determine whether to try partial parsing - CLI wins Check the dbt version number as part of deciding if a cached result is ok --- core/dbt/contracts/project.py | 1 + core/dbt/flags.py | 2 +- core/dbt/main.py | 21 ++++++++++++++++++--- core/dbt/parser/manifest.py | 32 +++++++++++++++++++++++++++++--- core/dbt/parser/results.py | 6 ++++-- test/unit/test_graph.py | 30 ++++++++++++++++++++++++++---- 6 files changed, 79 insertions(+), 13 deletions(-) diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py index b3119e86424..b7f27ac282a 100644 --- a/core/dbt/contracts/project.py +++ b/core/dbt/contracts/project.py @@ -174,6 +174,7 @@ def from_dict(cls, data, validate=True): class UserConfig(ExtensibleJsonSchemaMixin, Replaceable): send_anonymous_usage_stats: bool = DEFAULT_SEND_ANONYMOUS_USAGE_STATS use_colors: bool = DEFAULT_USE_COLORS + partial_parse: Optional[bool] = None printer_width: Optional[int] = None def set_values(self, cookie_dir): diff --git a/core/dbt/flags.py b/core/dbt/flags.py index aef5a3f1664..5d0a48c237f 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -53,7 +53,7 @@ def set_from_args(args): TEST_NEW_PARSER = getattr(args, 'test_new_parser', TEST_NEW_PARSER) WRITE_JSON = getattr(args, 'write_json', WRITE_JSON) - PARTIAL_PARSE = getattr(args, 'partial_parse', PARTIAL_PARSE) + PARTIAL_PARSE = getattr(args, 'partial_parse', None) MP_CONTEXT = _get_context() diff --git a/core/dbt/main.py b/core/dbt/main.py index d6341d73403..fcd7d504fd0 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -789,17 +789,32 @@ def parse_args(args, cls=DBTArgumentParser): ''' ) - p.add_argument( + partial_flag = p.add_mutually_exclusive_group() + partial_flag.add_argument( '--partial-parse', - action='store_true', + action='store_const', + const=True, + dest='partial_parse', + default=None, help=''' Allow for partial parsing by looking for and writing to a pickle file - in the target directory. + in the target directory. This overrides the user configuration file. WARNING: This can result in unexpected behavior if you use env_var()! ''' ) + partial_flag.add_argument( + '--no-partial-parse', + action='store_const', + const=False, + default=None, + dest='partial_parse', + help=''' + Disallow partial parsing. This overrides the user configuration file. + ''' + ) + # if set, run dbt in single-threaded mode: thread count is ignored, and # calls go through `map` instead of the thread pool. This is useful for # getting performance information about aspects of dbt that normally run in diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index af97ec2f50f..5f332329681 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -32,6 +32,7 @@ PARTIAL_PARSE_FILE_NAME = 'partial_parse.pickle' PARSING_STATE = DbtProcessState('parsing') +DEFAULT_PARTIAL_PARSE = False _parser_types = [ @@ -174,6 +175,8 @@ def load_only_macros(self) -> Manifest: def load(self, internal_manifest: Optional[Manifest] = None): old_results = self.read_parse_results() + if old_results is not None: + logger.debug('Got an acceptable cached parse result') self._load_macros(old_results, internal_manifest=internal_manifest) # make a manifest with just the macros to get the context macro_manifest = Manifest.from_macros( @@ -192,11 +195,23 @@ def write_parse_results(self): with open(path, 'wb') as fp: pickle.dump(self.results, fp) - def _matching_parse_results(self, result: ParseResult) -> bool: + def matching_parse_results(self, result: ParseResult) -> bool: """Compare the global hashes of the read-in parse results' values to the known ones, and return if it is ok to re-use the results. """ + try: + if result.dbt_version != __version__: + logger.debug( + 'dbt version mismatch: {} != {}, cache invalidated' + .format(result.dbt_version, __version__) + ) + return False + except AttributeError: + logger.debug('malformed result file, cache invalidated') + return False + valid = True + if self.results.vars_hash != result.vars_hash: logger.debug('vars hash mismatch, cache invalidated') valid = False @@ -227,8 +242,19 @@ def _matching_parse_results(self, result: ParseResult) -> bool: valid = False return valid + def _partial_parse_enabled(self): + # if the CLI is set, follow that + if dbt.flags.PARTIAL_PARSE is not None: + return dbt.flags.PARTIAL_PARSE + # if the config is set, follow that + elif self.root_project.config.partial_parse is not None: + return self.root_project.config.partial_parse + else: + return DEFAULT_PARTIAL_PARSE + def read_parse_results(self) -> Optional[ParseResult]: - if not dbt.flags.PARTIAL_PARSE: + if not self._partial_parse_enabled(): + logger.debug('Partial parsing not enabled') return None path = os.path.join(self.root_project.target_path, PARTIAL_PARSE_FILE_NAME) @@ -240,7 +266,7 @@ def read_parse_results(self) -> Optional[ParseResult]: # keep this check inside the try/except in case something about # the file has changed in weird ways, perhaps due to being a # different version of dbt - if self._matching_parse_results(result): + if self.matching_parse_results(result): return result except Exception as exc: logger.debug( diff --git a/core/dbt/parser/results.py b/core/dbt/parser/results.py index 437c0f26a83..b594159730e 100644 --- a/core/dbt/parser/results.py +++ b/core/dbt/parser/results.py @@ -9,11 +9,12 @@ ParsedSourceDefinition, ParsedAnalysisNode, ParsedHookNode, ParsedRPCNode, ParsedModelNode, ParsedSeedNode, ParsedTestNode, ParsedSnapshotNode, ) -from dbt.contracts.util import Writable +from dbt.contracts.util import Writable, Replaceable from dbt.exceptions import ( raise_duplicate_resource_name, raise_duplicate_patch_name, CompilationException, InternalException ) +from dbt.version import __version__ # Parsers can return anything as long as it's a unique ID @@ -43,7 +44,7 @@ def dict_field(): @dataclass -class ParseResult(JsonSchemaMixin, Writable): +class ParseResult(JsonSchemaMixin, Writable, Replaceable): vars_hash: FileHash profile_hash: FileHash project_hashes: MutableMapping[str, FileHash] @@ -54,6 +55,7 @@ class ParseResult(JsonSchemaMixin, Writable): patches: MutableMapping[str, ParsedNodePatch] = dict_field() files: MutableMapping[str, SourceFile] = dict_field() disabled: MutableMapping[str, List[ParsedNode]] = dict_field() + dbt_version: str = __version__ def get_file(self, source_file: SourceFile) -> SourceFile: key = source_file.search_key diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index c9ac80ae495..95c0e196ed3 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -37,7 +37,7 @@ def tearDown(self): self.mock_filesystem_constructor.stop() self.mock_hook_constructor.stop() self.load_patch.stop() - self.load_source_file_ptcher.stop() + self.load_source_file_patcher.stop() def setUp(self): dbt.flags.STRICT_MODE = True @@ -58,6 +58,7 @@ def setUp(self): self.get_adapter_patcher_cmn = patch('dbt.context.common.get_adapter') self.factory_cmn = self.get_adapter_patcher_cmn.start() + def mock_write_gpickle(graph, outfile): self.graph_result = graph self.mock_write_gpickle = self.write_gpickle_patcher.start() @@ -86,12 +87,19 @@ def _load_projects(config, paths): self.mock_models = [] + def _mock_parse_result(config, all_projects): + return ParseResult( + vars_hash=FileHash.from_contents('vars'), + project_hashes={name: FileHash.from_contents(name) for name in all_projects}, + profile_hash=FileHash.from_contents('profile'), + ) + self.load_patch = patch('dbt.parser.manifest.make_parse_result') self.mock_parse_result = self.load_patch.start() - self.mock_parse_result.return_value = ParseResult.rpc() + self.mock_parse_result.side_effect = _mock_parse_result - self.load_source_file_ptcher = patch.object(BaseParser, 'load_file') - self.mock_source_file = self.load_source_file_ptcher.start() + self.load_source_file_patcher = patch.object(BaseParser, 'load_file') + self.mock_source_file = self.load_source_file_patcher.start() self.mock_source_file.side_effect = lambda path: [n for n in self.mock_models if n.path == path][0] def filesystem_iter(iter_self): @@ -291,3 +299,17 @@ def test__dependency_list(self): queue.get(block=False) queue.mark_done(got.unique_id) self.assertTrue(queue.empty()) + + def test__partial_parse(self): + config = self.get_config() + + loader = dbt.parser.manifest.ManifestLoader(config, {config.project_name: config}) + loader.load() + loader.create_manifest() + results = loader.results + + self.assertTrue(loader.matching_parse_results(results)) + too_low = results.replace(dbt_version='0.0.1a1') + self.assertFalse(loader.matching_parse_results(too_low)) + too_high = results.replace(dbt_version='99999.99.99') + self.assertFalse(loader.matching_parse_results(too_high))