From f32519b156c220fa26470be49b6d83df944b67fe Mon Sep 17 00:00:00 2001
From: Jacob Beck <jake@fishtownanalytics.com>
Date: Tue, 15 Oct 2019 18:03:40 -0600
Subject: [PATCH] Partial parse upates

Use the user's profile config and the CLI arguments to determine whether to try partial parsing
  - CLI wins
Check the dbt version number as part of deciding if a cached result is ok
---
 core/dbt/contracts/project.py |  1 +
 core/dbt/flags.py             |  2 +-
 core/dbt/main.py              | 21 ++++++++++++++++++---
 core/dbt/parser/manifest.py   | 32 +++++++++++++++++++++++++++++---
 core/dbt/parser/results.py    |  6 ++++--
 test/unit/test_graph.py       | 30 ++++++++++++++++++++++++++----
 6 files changed, 79 insertions(+), 13 deletions(-)

diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py
index b3119e86424..b7f27ac282a 100644
--- a/core/dbt/contracts/project.py
+++ b/core/dbt/contracts/project.py
@@ -174,6 +174,7 @@ def from_dict(cls, data, validate=True):
 class UserConfig(ExtensibleJsonSchemaMixin, Replaceable):
     send_anonymous_usage_stats: bool = DEFAULT_SEND_ANONYMOUS_USAGE_STATS
     use_colors: bool = DEFAULT_USE_COLORS
+    partial_parse: Optional[bool] = None
     printer_width: Optional[int] = None
 
     def set_values(self, cookie_dir):
diff --git a/core/dbt/flags.py b/core/dbt/flags.py
index aef5a3f1664..5d0a48c237f 100644
--- a/core/dbt/flags.py
+++ b/core/dbt/flags.py
@@ -53,7 +53,7 @@ def set_from_args(args):
 
     TEST_NEW_PARSER = getattr(args, 'test_new_parser', TEST_NEW_PARSER)
     WRITE_JSON = getattr(args, 'write_json', WRITE_JSON)
-    PARTIAL_PARSE = getattr(args, 'partial_parse', PARTIAL_PARSE)
+    PARTIAL_PARSE = getattr(args, 'partial_parse', None)
     MP_CONTEXT = _get_context()
 
 
diff --git a/core/dbt/main.py b/core/dbt/main.py
index d6341d73403..fcd7d504fd0 100644
--- a/core/dbt/main.py
+++ b/core/dbt/main.py
@@ -789,17 +789,32 @@ def parse_args(args, cls=DBTArgumentParser):
         '''
     )
 
-    p.add_argument(
+    partial_flag = p.add_mutually_exclusive_group()
+    partial_flag.add_argument(
         '--partial-parse',
-        action='store_true',
+        action='store_const',
+        const=True,
+        dest='partial_parse',
+        default=None,
         help='''
         Allow for partial parsing by looking for and writing to a pickle file
-        in the target directory.
+        in the target directory. This overrides the user configuration file.
 
         WARNING: This can result in unexpected behavior if you use env_var()!
         '''
     )
 
+    partial_flag.add_argument(
+        '--no-partial-parse',
+        action='store_const',
+        const=False,
+        default=None,
+        dest='partial_parse',
+        help='''
+        Disallow partial parsing. This overrides the user configuration file.
+        '''
+    )
+
     # if set, run dbt in single-threaded mode: thread count is ignored, and
     # calls go through `map` instead of the thread pool. This is useful for
     # getting performance information about aspects of dbt that normally run in
diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py
index af97ec2f50f..5f332329681 100644
--- a/core/dbt/parser/manifest.py
+++ b/core/dbt/parser/manifest.py
@@ -32,6 +32,7 @@
 
 PARTIAL_PARSE_FILE_NAME = 'partial_parse.pickle'
 PARSING_STATE = DbtProcessState('parsing')
+DEFAULT_PARTIAL_PARSE = False
 
 
 _parser_types = [
@@ -174,6 +175,8 @@ def load_only_macros(self) -> Manifest:
 
     def load(self, internal_manifest: Optional[Manifest] = None):
         old_results = self.read_parse_results()
+        if old_results is not None:
+            logger.debug('Got an acceptable cached parse result')
         self._load_macros(old_results, internal_manifest=internal_manifest)
         # make a manifest with just the macros to get the context
         macro_manifest = Manifest.from_macros(
@@ -192,11 +195,23 @@ def write_parse_results(self):
         with open(path, 'wb') as fp:
             pickle.dump(self.results, fp)
 
-    def _matching_parse_results(self, result: ParseResult) -> bool:
+    def matching_parse_results(self, result: ParseResult) -> bool:
         """Compare the global hashes of the read-in parse results' values to
         the known ones, and return if it is ok to re-use the results.
         """
+        try:
+            if result.dbt_version != __version__:
+                logger.debug(
+                    'dbt version mismatch: {} != {}, cache invalidated'
+                    .format(result.dbt_version, __version__)
+                )
+                return False
+        except AttributeError:
+            logger.debug('malformed result file, cache invalidated')
+            return False
+
         valid = True
+
         if self.results.vars_hash != result.vars_hash:
             logger.debug('vars hash mismatch, cache invalidated')
             valid = False
@@ -227,8 +242,19 @@ def _matching_parse_results(self, result: ParseResult) -> bool:
                     valid = False
         return valid
 
+    def _partial_parse_enabled(self):
+        # if the CLI is set, follow that
+        if dbt.flags.PARTIAL_PARSE is not None:
+            return dbt.flags.PARTIAL_PARSE
+        # if the config is set, follow that
+        elif self.root_project.config.partial_parse is not None:
+            return self.root_project.config.partial_parse
+        else:
+            return DEFAULT_PARTIAL_PARSE
+
     def read_parse_results(self) -> Optional[ParseResult]:
-        if not dbt.flags.PARTIAL_PARSE:
+        if not self._partial_parse_enabled():
+            logger.debug('Partial parsing not enabled')
             return None
         path = os.path.join(self.root_project.target_path,
                             PARTIAL_PARSE_FILE_NAME)
@@ -240,7 +266,7 @@ def read_parse_results(self) -> Optional[ParseResult]:
                 # keep this check inside the try/except in case something about
                 # the file has changed in weird ways, perhaps due to being a
                 # different version of dbt
-                if self._matching_parse_results(result):
+                if self.matching_parse_results(result):
                     return result
             except Exception as exc:
                 logger.debug(
diff --git a/core/dbt/parser/results.py b/core/dbt/parser/results.py
index 437c0f26a83..b594159730e 100644
--- a/core/dbt/parser/results.py
+++ b/core/dbt/parser/results.py
@@ -9,11 +9,12 @@
     ParsedSourceDefinition, ParsedAnalysisNode, ParsedHookNode, ParsedRPCNode,
     ParsedModelNode, ParsedSeedNode, ParsedTestNode, ParsedSnapshotNode,
 )
-from dbt.contracts.util import Writable
+from dbt.contracts.util import Writable, Replaceable
 from dbt.exceptions import (
     raise_duplicate_resource_name, raise_duplicate_patch_name,
     CompilationException, InternalException
 )
+from dbt.version import __version__
 
 
 # Parsers can return anything as long as it's a unique ID
@@ -43,7 +44,7 @@ def dict_field():
 
 
 @dataclass
-class ParseResult(JsonSchemaMixin, Writable):
+class ParseResult(JsonSchemaMixin, Writable, Replaceable):
     vars_hash: FileHash
     profile_hash: FileHash
     project_hashes: MutableMapping[str, FileHash]
@@ -54,6 +55,7 @@ class ParseResult(JsonSchemaMixin, Writable):
     patches: MutableMapping[str, ParsedNodePatch] = dict_field()
     files: MutableMapping[str, SourceFile] = dict_field()
     disabled: MutableMapping[str, List[ParsedNode]] = dict_field()
+    dbt_version: str = __version__
 
     def get_file(self, source_file: SourceFile) -> SourceFile:
         key = source_file.search_key
diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py
index c9ac80ae495..95c0e196ed3 100644
--- a/test/unit/test_graph.py
+++ b/test/unit/test_graph.py
@@ -37,7 +37,7 @@ def tearDown(self):
         self.mock_filesystem_constructor.stop()
         self.mock_hook_constructor.stop()
         self.load_patch.stop()
-        self.load_source_file_ptcher.stop()
+        self.load_source_file_patcher.stop()
 
     def setUp(self):
         dbt.flags.STRICT_MODE = True
@@ -58,6 +58,7 @@ def setUp(self):
         self.get_adapter_patcher_cmn = patch('dbt.context.common.get_adapter')
         self.factory_cmn = self.get_adapter_patcher_cmn.start()
 
+
         def mock_write_gpickle(graph, outfile):
             self.graph_result = graph
         self.mock_write_gpickle = self.write_gpickle_patcher.start()
@@ -86,12 +87,19 @@ def _load_projects(config, paths):
 
         self.mock_models = []
 
+        def _mock_parse_result(config, all_projects):
+            return ParseResult(
+                vars_hash=FileHash.from_contents('vars'),
+                project_hashes={name: FileHash.from_contents(name) for name in all_projects},
+                profile_hash=FileHash.from_contents('profile'),
+            )
+
         self.load_patch = patch('dbt.parser.manifest.make_parse_result')
         self.mock_parse_result = self.load_patch.start()
-        self.mock_parse_result.return_value = ParseResult.rpc()
+        self.mock_parse_result.side_effect = _mock_parse_result
 
-        self.load_source_file_ptcher = patch.object(BaseParser, 'load_file')
-        self.mock_source_file = self.load_source_file_ptcher.start()
+        self.load_source_file_patcher = patch.object(BaseParser, 'load_file')
+        self.mock_source_file = self.load_source_file_patcher.start()
         self.mock_source_file.side_effect = lambda path: [n for n in self.mock_models if n.path == path][0]
 
         def filesystem_iter(iter_self):
@@ -291,3 +299,17 @@ def test__dependency_list(self):
                 queue.get(block=False)
             queue.mark_done(got.unique_id)
         self.assertTrue(queue.empty())
+
+    def test__partial_parse(self):
+        config = self.get_config()
+
+        loader = dbt.parser.manifest.ManifestLoader(config, {config.project_name: config})
+        loader.load()
+        loader.create_manifest()
+        results = loader.results
+
+        self.assertTrue(loader.matching_parse_results(results))
+        too_low = results.replace(dbt_version='0.0.1a1')
+        self.assertFalse(loader.matching_parse_results(too_low))
+        too_high = results.replace(dbt_version='99999.99.99')
+        self.assertFalse(loader.matching_parse_results(too_high))