From 2a4fe40cfd22754900de978b3e73c939b3874629 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Fri, 7 Aug 2020 09:57:29 -0600 Subject: [PATCH] Add state:modified and state:new selectors --- CHANGELOG.md | 9 +- core/dbt/contracts/files.py | 166 ++ core/dbt/contracts/graph/compiled.py | 19 +- core/dbt/contracts/graph/manifest.py | 182 +- core/dbt/contracts/graph/model_config.py | 39 +- core/dbt/contracts/graph/parsed.py | 186 +- core/dbt/contracts/state.py | 13 + core/dbt/contracts/util.py | 6 +- core/dbt/graph/selector.py | 8 +- core/dbt/graph/selector_methods.py | 105 +- core/dbt/main.py | 152 +- core/dbt/parser/base.py | 18 +- core/dbt/parser/hooks.py | 2 +- core/dbt/parser/manifest.py | 3 +- core/dbt/parser/results.py | 6 +- core/dbt/parser/schemas.py | 2 + core/dbt/parser/search.py | 2 +- core/dbt/parser/seeds.py | 9 +- core/dbt/task/compile.py | 1 + core/dbt/task/freshness.py | 1 + core/dbt/task/list.py | 2 + core/dbt/task/run.py | 57 +- core/dbt/task/runnable.py | 10 +- core/dbt/task/seed.py | 1 + core/dbt/task/snapshot.py | 1 + core/dbt/task/test.py | 4 +- core/setup.py | 2 +- .../test_docs_generate.py | 63 +- .../062_defer_state_test/macros/macros.sql | 3 + .../test_modified_state.py | 185 ++ test/integration/base.py | 2 + test/unit/test_compiler.py | 30 +- test/unit/test_context.py | 4 +- test/unit/test_contracts_graph_compiled.py | 905 +++--- test/unit/test_contracts_graph_parsed.py | 2532 ++++++++++------- test/unit/test_docs_blocks.py | 3 +- test/unit/test_graph.py | 3 +- test/unit/test_graph_selector_methods.py | 224 +- test/unit/test_manifest.py | 40 +- test/unit/test_parse_manifest.py | 2 +- test/unit/test_parser.py | 11 +- test/unit/utils.py | 21 + 42 files changed, 3202 insertions(+), 1832 deletions(-) create mode 100644 core/dbt/contracts/files.py create mode 100644 core/dbt/contracts/state.py create mode 100644 test/integration/062_defer_state_test/macros/macros.sql create mode 100644 test/integration/062_defer_state_test/test_modified_state.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 897deb3d93f..5236043173e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,5 @@ ## dbt 0.18.0 (Release TBD) - -### Features -- Add support for impersonating a service account using `impersonate_service_account` in the BigQuery profile configuration ([#2677](https://github.com/fishtown-analytics/dbt/issues/2677)) ([docs](https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile#service-account-impersonation)) -- Macros in the current project can override internal dbt macros that are called through `execute_macros`. ([#2301](https://github.com/fishtown-analytics/dbt/issues/2301), [#2686](https://github.com/fishtown-analytics/dbt/pull/2686)) - - ### Breaking changes - `adapter_macro` is no longer a macro, instead it is a builtin context method. Any custom macros that intercepted it by going through `context['dbt']` will need to instead access it via `context['builtins']` ([#2302](https://github.com/fishtown-analytics/dbt/issues/2302), [#2673](https://github.com/fishtown-analytics/dbt/pull/2673)) - `adapter_macro` is now deprecated. Use `adapter.dispatch` instead. @@ -13,6 +7,9 @@ ### Features - Added a `dispatch` method to the context adapter and deprecated `adapter_macro`. ([#2302](https://github.com/fishtown-analytics/dbt/issues/2302), [#2679](https://github.com/fishtown-analytics/dbt/pull/2679)) - The built-in schema tests now use `adapter.dispatch`, so they can be overridden for adapter plugins ([#2415](https://github.com/fishtown-analytics/dbt/issues/2415), [#2684](https://github.com/fishtown-analytics/dbt/pull/2684)) +- Add support for impersonating a service account using `impersonate_service_account` in the BigQuery profile configuration ([#2677](https://github.com/fishtown-analytics/dbt/issues/2677)) ([docs](https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile#service-account-impersonation)) +- Macros in the current project can override internal dbt macros that are called through `execute_macros`. ([#2301](https://github.com/fishtown-analytics/dbt/issues/2301), [#2686](https://github.com/fishtown-analytics/dbt/pull/2686)) +- Add state:modified and state:new selectors ([#2641](https://github.com/fishtown-analytics/dbt/issues/2641), [#2695](https://github.com/fishtown-analytics/dbt/pull/2695)) Contributors: - [@bbhoss](https://github.com/bbhoss) ([#2677](https://github.com/fishtown-analytics/dbt/pull/2677)) diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py new file mode 100644 index 00000000000..fc36fbaebb5 --- /dev/null +++ b/core/dbt/contracts/files.py @@ -0,0 +1,166 @@ +import hashlib +import os +from dataclasses import dataclass, field +from typing import List, Optional, Union + +from hologram import JsonSchemaMixin + +from dbt.exceptions import InternalException + +from .util import MacroKey, SourceKey + + +MAXIMUM_SEED_SIZE = 1 * 1024 * 1024 +MAXIMUM_SEED_SIZE_NAME = '1MB' + + +@dataclass +class FilePath(JsonSchemaMixin): + searched_path: str + relative_path: str + project_root: str + + @property + def search_key(self) -> str: + # TODO: should this be project name + path relative to project root? + return self.absolute_path + + @property + def full_path(self) -> str: + # useful for symlink preservation + return os.path.join( + self.project_root, self.searched_path, self.relative_path + ) + + @property + def absolute_path(self) -> str: + return os.path.abspath(self.full_path) + + @property + def original_file_path(self) -> str: + # this is mostly used for reporting errors. It doesn't show the project + # name, should it? + return os.path.join( + self.searched_path, self.relative_path + ) + + def seed_too_large(self) -> bool: + """Return whether the file this represents is over the seed size limit + """ + return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE + + +@dataclass +class FileHash(JsonSchemaMixin): + name: str # the hash type name + checksum: str # the hashlib.hash_type().hexdigest() of the file contents + + @classmethod + def empty(cls): + return FileHash(name='none', checksum='') + + @classmethod + def path(cls, path: str): + return FileHash(name='path', checksum=path) + + def __eq__(self, other): + if not isinstance(other, FileHash): + return NotImplemented + + if self.name == 'none' or self.name != other.name: + return False + + return self.checksum == other.checksum + + def compare(self, contents: str) -> bool: + """Compare the file contents with the given hash""" + if self.name == 'none': + return False + + return self.from_contents(contents, name=self.name) == self.checksum + + @classmethod + def from_contents(cls, contents: str, name='sha256') -> 'FileHash': + """Create a file hash from the given file contents. The hash is always + the utf-8 encoding of the contents given, because dbt only reads files + as utf-8. + """ + data = contents.encode('utf-8') + checksum = hashlib.new(name, data).hexdigest() + return cls(name=name, checksum=checksum) + + +@dataclass +class RemoteFile(JsonSchemaMixin): + @property + def searched_path(self) -> str: + return 'from remote system' + + @property + def relative_path(self) -> str: + return 'from remote system' + + @property + def absolute_path(self) -> str: + return 'from remote system' + + @property + def original_file_path(self): + return 'from remote system' + + +@dataclass +class SourceFile(JsonSchemaMixin): + """Define a source file in dbt""" + path: Union[FilePath, RemoteFile] # the path information + checksum: FileHash + # we don't want to serialize this + _contents: Optional[str] = None + # the unique IDs contained in this file + nodes: List[str] = field(default_factory=list) + docs: List[str] = field(default_factory=list) + macros: List[str] = field(default_factory=list) + sources: List[str] = field(default_factory=list) + # any node patches in this file. The entries are names, not unique ids! + patches: List[str] = field(default_factory=list) + # any macro patches in this file. The entries are package, name pairs. + macro_patches: List[MacroKey] = field(default_factory=list) + # any source patches in this file. The entries are package, name pairs + source_patches: List[SourceKey] = field(default_factory=list) + + @property + def search_key(self) -> Optional[str]: + if isinstance(self.path, RemoteFile): + return None + if self.checksum.name == 'none': + return None + return self.path.search_key + + @property + def contents(self) -> str: + if self._contents is None: + raise InternalException('SourceFile has no contents!') + return self._contents + + @contents.setter + def contents(self, value): + self._contents = value + + @classmethod + def empty(cls, path: FilePath) -> 'SourceFile': + self = cls(path=path, checksum=FileHash.empty()) + self.contents = '' + return self + + @classmethod + def big_seed(cls, path: FilePath) -> 'SourceFile': + """Parse seeds over the size limit with just the path""" + self = cls(path=path, checksum=FileHash.path(path.absolute_path)) + self.contents = '' + return self + + @classmethod + def remote(cls, contents: str) -> 'SourceFile': + self = cls(path=RemoteFile(), checksum=FileHash.empty()) + self.contents = contents + return self diff --git a/core/dbt/contracts/graph/compiled.py b/core/dbt/contracts/graph/compiled.py index 72138b53eb6..58be09742eb 100644 --- a/core/dbt/contracts/graph/compiled.py +++ b/core/dbt/contracts/graph/compiled.py @@ -11,7 +11,8 @@ ParsedSeedNode, ParsedSnapshotNode, ParsedSourceDefinition, - SeedConfig, + SchemaTestMixin, + SeedMixin, TestConfig, ) from dbt.node_types import NodeType @@ -93,14 +94,8 @@ class CompiledRPCNode(CompiledNode): @dataclass -class CompiledSeedNode(CompiledNode): - resource_type: NodeType = field(metadata={'restrict': [NodeType.Seed]}) - config: SeedConfig = field(default_factory=SeedConfig) - - @property - def empty(self): - """ Seeds are never empty""" - return False +class CompiledSeedNode(SeedMixin, CompiledNode): + pass @dataclass @@ -115,10 +110,8 @@ class CompiledDataTestNode(CompiledNode): @dataclass -class CompiledSchemaTestNode(CompiledNode, HasTestMetadata): - resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]}) - column_name: Optional[str] = None - config: TestConfig = field(default_factory=TestConfig) +class CompiledSchemaTestNode(SchemaTestMixin, CompiledNode, HasTestMetadata): + pass CompiledTestNode = Union[CompiledDataTestNode, CompiledSchemaTestNode] diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index a186e1189d0..f6a4b10beea 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -1,14 +1,12 @@ import abc import enum -import hashlib -import os from dataclasses import dataclass, field from datetime import datetime from itertools import chain, islice from multiprocessing.synchronize import Lock from typing import ( Dict, List, Optional, Union, Mapping, MutableMapping, Any, Set, Tuple, - TypeVar, Callable, Iterable, Generic, cast + TypeVar, Callable, Iterable, Generic, cast, AbstractSet ) from typing_extensions import Protocol from uuid import UUID @@ -22,10 +20,13 @@ ParsedMacro, ParsedDocumentation, ParsedNodePatch, ParsedMacroPatch, ParsedSourceDefinition ) -from dbt.contracts.util import Readable, Writable, Replaceable +from dbt.contracts.files import SourceFile +from dbt.contracts.util import ( + Readable, Writable, Replaceable, MacroKey, SourceKey +) from dbt.exceptions import ( - raise_duplicate_resource_name, InternalException, raise_compiler_error, - warn_or_error, raise_invalid_patch + raise_duplicate_resource_name, raise_compiler_error, warn_or_error, + raise_invalid_patch, ) from dbt.helper_types import PathSet from dbt.logger import GLOBAL_LOGGER as logger @@ -36,8 +37,6 @@ import dbt.utils NodeEdgeMap = Dict[str, List[str]] -MacroKey = Tuple[str, str] -SourceKey = Tuple[str, str] PackageName = str DocName = str RefName = str @@ -172,153 +171,6 @@ def _search_packages( return [current_project, node_package, None] -@dataclass -class FilePath(JsonSchemaMixin): - searched_path: str - relative_path: str - project_root: str - - @property - def search_key(self) -> str: - # TODO: should this be project name + path relative to project root? - return self.absolute_path - - @property - def full_path(self) -> str: - # useful for symlink preservation - return os.path.join( - self.project_root, self.searched_path, self.relative_path - ) - - @property - def absolute_path(self) -> str: - return os.path.abspath(self.full_path) - - @property - def original_file_path(self) -> str: - # this is mostly used for reporting errors. It doesn't show the project - # name, should it? - return os.path.join( - self.searched_path, self.relative_path - ) - - -@dataclass -class FileHash(JsonSchemaMixin): - name: str # the hash type name - checksum: str # the hashlib.hash_type().hexdigest() of the file contents - - @classmethod - def empty(cls): - return FileHash(name='none', checksum='') - - @classmethod - def path(cls, path: str): - return FileHash(name='path', checksum=path) - - def __eq__(self, other): - if not isinstance(other, FileHash): - return NotImplemented - - if self.name == 'none' or self.name != other.name: - return False - - return self.checksum == other.checksum - - def compare(self, contents: str) -> bool: - """Compare the file contents with the given hash""" - if self.name == 'none': - return False - - return self.from_contents(contents, name=self.name) == self.checksum - - @classmethod - def from_contents(cls, contents: str, name='sha256'): - """Create a file hash from the given file contents. The hash is always - the utf-8 encoding of the contents given, because dbt only reads files - as utf-8. - """ - data = contents.encode('utf-8') - checksum = hashlib.new(name, data).hexdigest() - return cls(name=name, checksum=checksum) - - -@dataclass -class RemoteFile(JsonSchemaMixin): - @property - def searched_path(self) -> str: - return 'from remote system' - - @property - def relative_path(self) -> str: - return 'from remote system' - - @property - def absolute_path(self) -> str: - return 'from remote system' - - @property - def original_file_path(self): - return 'from remote system' - - -@dataclass -class SourceFile(JsonSchemaMixin): - """Define a source file in dbt""" - path: Union[FilePath, RemoteFile] # the path information - checksum: FileHash - # we don't want to serialize this - _contents: Optional[str] = None - # the unique IDs contained in this file - nodes: List[str] = field(default_factory=list) - docs: List[str] = field(default_factory=list) - macros: List[str] = field(default_factory=list) - sources: List[str] = field(default_factory=list) - # any node patches in this file. The entries are names, not unique ids! - patches: List[str] = field(default_factory=list) - # any macro patches in this file. The entries are package, name pairs. - macro_patches: List[MacroKey] = field(default_factory=list) - # any source patches in this file. The entries are package, name pairs - source_patches: List[SourceKey] = field(default_factory=list) - - @property - def search_key(self) -> Optional[str]: - if isinstance(self.path, RemoteFile): - return None - if self.checksum.name == 'none': - return None - return self.path.search_key - - @property - def contents(self) -> str: - if self._contents is None: - raise InternalException('SourceFile has no contents!') - return self._contents - - @contents.setter - def contents(self, value): - self._contents = value - - @classmethod - def empty(cls, path: FilePath) -> 'SourceFile': - self = cls(path=path, checksum=FileHash.empty()) - self.contents = '' - return self - - @classmethod - def seed(cls, path: FilePath) -> 'SourceFile': - """Seeds always parse the same regardless of their content.""" - self = cls(path=path, checksum=FileHash.path(path.absolute_path)) - self.contents = '' - return self - - @classmethod - def remote(cls, contents: str) -> 'SourceFile': - self = cls(path=RemoteFile(), checksum=FileHash.empty()) - self.contents = contents - return self - - @dataclass class ManifestMetadata(JsonSchemaMixin, Replaceable): """Metadata for the manifest.""" @@ -1014,7 +866,7 @@ def resolve_doc( def merge_from_artifact( self, other: 'WritableManifest', - selected: Set[UniqueID], + selected: AbstractSet[UniqueID], ) -> None: """Given the selected unique IDs and a writable manifest, update this manifest by replacing any unselected nodes with their counterpart. @@ -1038,6 +890,24 @@ def merge_from_artifact( f'Merged {len(merged)} items from state (sample: {sample})' ) + # provide support for copy.deepcopy() - we jsut need to avoid the lock! + def __reduce_ex__(self, protocol): + args = ( + self.nodes, + self.sources, + self.macros, + self.docs, + self.generated_at, + self.disabled, + self.files, + self.metadata, + self.flat_graph, + self._docs_cache, + self._sources_cache, + self._refs_cache, + ) + return self.__class__, args + @dataclass class WritableManifest(JsonSchemaMixin, Writable, Readable): diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index 494cc99ee00..d60d1a26150 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -174,16 +174,37 @@ def __delitem__(self, key): else: del self._extra[key] - def __iter__(self): + def _content_iterator(self, include_hidden: bool): + seen = set() for fld, _ in self._get_fields(): - yield fld.name + seen.add(fld.name) + if ( + include_hidden or + ShowBehavior.from_field(fld) != ShowBehavior.Hide + ): + yield fld.name for key in self._extra: - yield key + if key not in seen: + seen.add(key) + yield key + + def __iter__(self): + yield from self._content_iterator(include_hidden=True) def __len__(self): return len(self._get_fields()) + len(self._extra) + def same_contents(self: T, other: T) -> bool: + """This is like __eq__, except it ignores hidden fields.""" + for key in self._content_iterator(include_hidden=False): + try: + if self[key] != other[key]: + return False + except KeyError: + return False + return True + @classmethod def _extract_dict( cls, src: Dict[str, Any], data: Dict[str, Any] @@ -272,6 +293,15 @@ def finalize_and_validate(self: T) -> T: dct = self.to_dict(omit_none=False, validate=False) return self.from_dict(dct) + def replace(self, **kwargs): + dct = self.to_dict(validate=False) + + mapping = self.field_mapping() + for key, value in kwargs.items(): + new_key = mapping.get(key, key) + dct[new_key] = value + return self.from_dict(dct, validate=False) + @dataclass class SourceConfig(BaseConfig): @@ -320,8 +350,7 @@ class NodeConfig(BaseConfig): ) tags: Union[List[str], str] = field( default_factory=list_str, - # TODO: hide this one? - metadata=MergeBehavior.Append.meta(), + metadata=ShowBehavior.Hide.meta(MergeBehavior.Append.meta()), ) full_refresh: Optional[bool] = None diff --git a/core/dbt/contracts/graph/parsed.py b/core/dbt/contracts/graph/parsed.py index 140671bcfa9..68212568b83 100644 --- a/core/dbt/contracts/graph/parsed.py +++ b/core/dbt/contracts/graph/parsed.py @@ -10,12 +10,14 @@ Sequence, Tuple, Iterator, + TypeVar, ) from hologram import JsonSchemaMixin from hologram.helpers import ExtensibleJsonSchemaMixin from dbt.clients.system import write_file +from dbt.contracts.files import FileHash, MAXIMUM_SEED_SIZE_NAME from dbt.contracts.graph.unparsed import ( UnparsedNode, UnparsedDocumentation, Quoting, Docs, UnparsedBaseNode, FreshnessThreshold, ExternalTable, @@ -23,6 +25,7 @@ UnparsedSourceTableDefinition, UnparsedColumn, TestDef ) from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin +from dbt.exceptions import warn_or_error from dbt.logger import GLOBAL_LOGGER as logger # noqa from dbt import flags from dbt.node_types import NodeType @@ -45,8 +48,11 @@ @dataclass -class ColumnInfo(AdditionalPropertiesMixin, ExtensibleJsonSchemaMixin, - Replaceable): +class ColumnInfo( + AdditionalPropertiesMixin, + ExtensibleJsonSchemaMixin, + Replaceable +): name: str description: str = '' meta: Dict[str, Any] = field(default_factory=dict) @@ -122,7 +128,7 @@ def patch(self, patch: 'ParsedNodePatch'): self.docs = patch.docs if flags.STRICT_MODE: assert isinstance(self, JsonSchemaMixin) - self.to_dict(validate=True) + self.to_dict(validate=True, omit_none=False) def get_materialization(self): return self.config.materialized @@ -140,6 +146,7 @@ class ParsedNodeMandatory( Replaceable ): alias: str + checksum: FileHash config: NodeConfig = field(default_factory=NodeConfig) @property @@ -177,9 +184,63 @@ def write_node(self, target_path: str, subdirectory: str, payload: str): return full_path +T = TypeVar('T', bound='ParsedNode') + + @dataclass class ParsedNode(ParsedNodeDefaults, ParsedNodeMixins): - pass + + def _persist_column_docs(self) -> bool: + return bool(self.config.persist_docs.get('columns')) + + def _persist_relation_docs(self) -> bool: + return bool(self.config.persist_docs.get('relation')) + + def _same_body(self: T, other: T) -> bool: + return self.raw_sql == other.raw_sql + + def _same_description_persisted(self: T, other: T) -> bool: + # the check on configs will handle the case where we have different + # persist settings, so we only have to care about the cases where they + # are the same.. + if self._persist_relation_docs(): + if self.description != other.description: + return False + + if self._persist_column_docs(): + # assert other._persist_column_docs() + column_descriptions = { + k: v.description for k, v in self.columns.items() + } + other_column_descriptions = { + k: v.description for k, v in other.columns.items() + } + if column_descriptions != other_column_descriptions: + return False + + return True + + def _same_name(self: T, old: T) -> bool: + return ( + self.database == old.database and + self.schema == old.schema and + self.identifier == old.identifier and + True + ) + + def same_contents(self: T, old: Optional[T]) -> bool: + if old is None: + return False + + return ( + self.resource_type == old.resource_type and + self._same_body(old) and + self.config.same_contents(old.config) and + self._same_description_persisted(old) and + self._same_name(old) and + self.fqn == old.fqn and + True + ) @dataclass @@ -188,13 +249,66 @@ class ParsedAnalysisNode(ParsedNode): @dataclass -class ParsedHookNode(ParsedNode): +class HookMixin(JsonSchemaMixin): resource_type: NodeType = field( metadata={'restrict': [NodeType.Operation]} ) index: Optional[int] = None +@dataclass +class SeedMixin(JsonSchemaMixin): + resource_type: NodeType = field(metadata={'restrict': [NodeType.Seed]}) + config: SeedConfig = field(default_factory=SeedConfig) + + @property + def empty(self): + """ Seeds are never empty""" + return False + + def _same_body(self: 'ParsedSeedNode', other: 'ParsedSeedNode') -> bool: + # for seeds, we check the hashes. If the hashes are different types, + # no match. If the hashes are both the same 'path', log a warning and + # assume they are the same + # if the current checksum is a path, we want to log a warning. + result = self.checksum == other.checksum + + if self.checksum.name == 'path': + msg: str + if other.checksum.name != 'path': + msg = ( + f'Found a seed >{MAXIMUM_SEED_SIZE_NAME} in size. The ' + f'previous file was <={MAXIMUM_SEED_SIZE_NAME}, so it ' + f'has changed' + ) + elif result: + msg = ( + f'Found a seed >{MAXIMUM_SEED_SIZE_NAME} in size at ' + f'the same path, dbt cannot tell if it has changed: ' + f'assuming they are the same' + ) + elif not result: + msg = ( + f'Found a seed >{MAXIMUM_SEED_SIZE_NAME} in size. The ' + f'previous file was in a different location, assuming it ' + f'has changed' + ) + else: + msg = ( + f'Found a seed >{MAXIMUM_SEED_SIZE_NAME} in size. The ' + f'previous file had a checksum type of ' + f'{other.checksum.name}, so it has changed' + ) + warn_or_error(msg, node=self) + + return result + + +@dataclass +class ParsedHookNode(HookMixin, ParsedNode): + pass + + @dataclass class ParsedModelNode(ParsedNode): resource_type: NodeType = field(metadata={'restrict': [NodeType.Model]}) @@ -206,18 +320,12 @@ class ParsedRPCNode(ParsedNode): @dataclass -class ParsedSeedNode(ParsedNode): - resource_type: NodeType = field(metadata={'restrict': [NodeType.Seed]}) - config: SeedConfig = field(default_factory=SeedConfig) - - @property - def empty(self): - """ Seeds are never empty""" - return False +class ParsedSeedNode(SeedMixin, ParsedNode): + pass @dataclass -class TestMetadata(JsonSchemaMixin): +class TestMetadata(JsonSchemaMixin, Replaceable): namespace: Optional[str] name: str kwargs: Dict[str, Any] @@ -235,11 +343,22 @@ class ParsedDataTestNode(ParsedNode): @dataclass -class ParsedSchemaTestNode(ParsedNode, HasTestMetadata): +class SchemaTestMixin(JsonSchemaMixin): resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]}) column_name: Optional[str] = None config: TestConfig = field(default_factory=TestConfig) + # make sure to keep this in sync with CompiledSchemaTestNode... + def _same_body( + self: 'ParsedSchemaTestNode', other: 'ParsedSchemaTestNode' + ) -> bool: + return self.test_metadata == other.test_metadata + + +@dataclass +class ParsedSchemaTestNode(SchemaTestMixin, ParsedNode, HasTestMetadata): + pass + @dataclass class IntermediateSnapshotNode(ParsedNode): @@ -306,7 +425,14 @@ def patch(self, patch: ParsedMacroPatch): self.arguments = patch.arguments if flags.STRICT_MODE: assert isinstance(self, JsonSchemaMixin) - self.to_dict(validate=True) + self.to_dict(validate=True, omit_none=False) + + def same_contents(self, other: Optional['ParsedMacro']) -> bool: + if other is None: + return False + # the only thing that makes one macro different from another with the + # same name/package is its content + return self.macro_sql == other.macro_sql @dataclass @@ -318,6 +444,13 @@ class ParsedDocumentation(UnparsedDocumentation, HasUniqueID): def search_name(self): return self.name + def same_contents(self, other: Optional['ParsedDocumentation']) -> bool: + if other is None: + return False + # the only thing that makes one doc different from another with the + # same name/package is its content + return self.block_contents == other.block_contents + def normalize_test(testdef: TestDef) -> Dict[str, Any]: if isinstance(testdef, str): @@ -403,6 +536,27 @@ class ParsedSourceDefinition( config: SourceConfig = field(default_factory=SourceConfig) patch_path: Optional[Path] = None + def same_contents(self, old: Optional['ParsedSourceDefinition']) -> bool: + # existing when it didn't before is a change! + if old is None: + return True + + # config changes are changes (because the only config is "enabled", and + # enabling a source is a change!) + # changing the database/schema/identifier is a change + # messing around with external stuff is a change (uh, right?) + # quoting changes are changes + # freshness changes are changes, I guess + # metadata/tags changes are not "changes" + # patching/description changes are not "changes" + return ( + old.config == self.config and + old.freshness == self.freshness and + old.database == self.database and + old.schema == self.schema and + old.identifier == self.identifier + ) + def get_full_source_name(self): return f'{self.source_name}_{self.name}' diff --git a/core/dbt/contracts/state.py b/core/dbt/contracts/state.py new file mode 100644 index 00000000000..94173b41e68 --- /dev/null +++ b/core/dbt/contracts/state.py @@ -0,0 +1,13 @@ +from pathlib import Path +from .graph.manifest import WritableManifest +from typing import Optional + + +class PreviousState: + def __init__(self, path: Path): + self.path: Path = path + self.manifest: Optional[WritableManifest] = None + + manifest_path = self.path / 'manifest.json' + if manifest_path.exists() and manifest_path.is_file(): + self.manifest = WritableManifest.read(str(manifest_path)) diff --git a/core/dbt/contracts/util.py b/core/dbt/contracts/util.py index 764ee1cd08e..fced32c436d 100644 --- a/core/dbt/contracts/util.py +++ b/core/dbt/contracts/util.py @@ -1,10 +1,14 @@ import dataclasses -from typing import List +from typing import List, Tuple from dbt.clients.system import write_json, read_json from dbt.exceptions import RuntimeException +MacroKey = Tuple[str, str] +SourceKey = Tuple[str, str] + + def list_str() -> List[str]: """Mypy gets upset about stuff like: diff --git a/core/dbt/graph/selector.py b/core/dbt/graph/selector.py index 539340dd873..4cb09ce9b42 100644 --- a/core/dbt/graph/selector.py +++ b/core/dbt/graph/selector.py @@ -1,5 +1,5 @@ -from typing import Set, List, Union +from typing import Set, List, Union, Optional from .graph import Graph, UniqueId from .queue import GraphQueue @@ -16,6 +16,7 @@ from dbt.contracts.graph.compiled import NonSourceNode, CompileResultNode from dbt.contracts.graph.manifest import Manifest from dbt.contracts.graph.parsed import ParsedSourceDefinition +from dbt.contracts.state import PreviousState def get_package_names(nodes): @@ -37,9 +38,10 @@ def __init__( self, graph: Graph, manifest: Manifest, + previous_state: Optional[PreviousState] = None, ): + super().__init__(manifest, previous_state) self.full_graph = graph - self.manifest = manifest # build a subgraph containing only non-empty, enabled nodes and enabled # sources. @@ -195,11 +197,13 @@ def __init__( self, graph: Graph, manifest: Manifest, + previous_state: Optional[PreviousState], resource_types: List[NodeType], ): super().__init__( graph=graph, manifest=manifest, + previous_state=previous_state, ) self.resource_types: Set[NodeType] = set(resource_types) diff --git a/core/dbt/graph/selector_methods.py b/core/dbt/graph/selector_methods.py index afc968654a2..dbdf114a66a 100644 --- a/core/dbt/graph/selector_methods.py +++ b/core/dbt/graph/selector_methods.py @@ -1,7 +1,7 @@ import abc from itertools import chain from pathlib import Path -from typing import Set, List, Dict, Iterator, Tuple, Any, Union, Type +from typing import Set, List, Dict, Iterator, Tuple, Any, Union, Type, Optional from hologram.helpers import StrEnum @@ -12,18 +12,21 @@ CompiledSchemaTestNode, NonSourceNode, ) -from dbt.contracts.graph.manifest import Manifest +from dbt.contracts.graph.manifest import Manifest, WritableManifest from dbt.contracts.graph.parsed import ( HasTestMetadata, ParsedDataTestNode, ParsedSchemaTestNode, ParsedSourceDefinition, ) +from dbt.contracts.state import PreviousState +from dbt.logger import GLOBAL_LOGGER as logger from dbt.exceptions import ( InternalException, RuntimeException, ) from dbt.node_types import NodeType +from dbt.ui import warning_tag SELECTOR_GLOB = '*' @@ -40,6 +43,7 @@ class MethodName(StrEnum): TestName = 'test_name' TestType = 'test_type' ResourceType = 'resource_type' + State = 'state' def is_selected_node(real_node, node_selector): @@ -72,8 +76,14 @@ def is_selected_node(real_node, node_selector): class SelectorMethod(metaclass=abc.ABCMeta): - def __init__(self, manifest: Manifest, arguments: List[str]): + def __init__( + self, + manifest: Manifest, + previous_state: Optional[PreviousState], + arguments: List[str] + ): self.manifest: Manifest = manifest + self.previous_state = previous_state self.arguments: List[str] = arguments def parsed_nodes( @@ -329,6 +339,85 @@ def search( yield node +class StateSelectorMethod(SelectorMethod): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.macros_were_modified = None + + def _macros_modified(self): + # we checked in the caller! + if self.previous_state is None or self.previous_state.manifest is None: + raise InternalException( + f'No deferred manifest in _macros_modified' + ) + old_macros = self.previous_state.manifest.macros + new_macros = self.manifest.macros + # macros were added/removed + if old_macros.keys() != new_macros.keys(): + return True + + return any( + old_macros[uid].macro_sql != new_macros[uid].macro_sql + for uid in new_macros + ) + + def check_modified( + self, + old: Optional[SelectorTarget], + new: SelectorTarget, + ) -> bool: + # check if there are any changes in macros, if so, log a warning the + # first time + if self.macros_were_modified is None: + self.macros_were_modified = self._macros_modified() + if self.macros_were_modified: + logger.warning(warning_tag( + 'During a state comparison, dbt detected a change in ' + 'macros. This will not be marked as a modification.' + )) + + return not new.same_contents(old) + + def check_new( + self, + old: Optional[SelectorTarget], + new: SelectorTarget, + ) -> bool: + return old is None + + def search( + self, included_nodes: Set[UniqueId], selector: str + ) -> Iterator[UniqueId]: + if self.previous_state is None or self.previous_state.manifest is None: + raise RuntimeException( + f'Got a state selector method, but no deferred manifest' + ) + + state_checks = { + 'modified': self.check_modified, + 'new': self.check_new, + } + if selector in state_checks: + checker = state_checks[selector] + else: + raise RuntimeException( + f'Got an invalid selector "{selector}", expected one of ' + f'"{list(state_checks)}"' + ) + + manifest: WritableManifest = self.previous_state.manifest + + for node, real_node in self.all_nodes(included_nodes): + previous_node: Optional[SelectorTarget] = None + if node in manifest.nodes: + previous_node = manifest.nodes[node] + elif node in manifest.sources: + previous_node = manifest.sources[node] + + if checker(previous_node, real_node): + yield node + + class MethodManager: SELECTOR_METHODS: Dict[MethodName, Type[SelectorMethod]] = { MethodName.FQN: QualifiedNameSelectorMethod, @@ -339,10 +428,16 @@ class MethodManager: MethodName.Config: ConfigSelectorMethod, MethodName.TestName: TestNameSelectorMethod, MethodName.TestType: TestTypeSelectorMethod, + MethodName.State: StateSelectorMethod, } - def __init__(self, manifest: Manifest): + def __init__( + self, + manifest: Manifest, + previous_state: Optional[PreviousState], + ): self.manifest = manifest + self.previous_state = previous_state def get_method( self, method: MethodName, method_arguments: List[str] @@ -354,4 +449,4 @@ def get_method( f'method name, but it is not handled' ) cls: Type[SelectorMethod] = self.SELECTOR_METHODS[method] - return cls(self.manifest, method_arguments) + return cls(self.manifest, self.previous_state, method_arguments) diff --git a/core/dbt/main.py b/core/dbt/main.py index d4344f1cbed..576c8948fca 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -453,16 +453,7 @@ def _build_run_subparser(subparsers, base_subparser): ''' ) - # for now, this is a "dbt run"-only thing - run_sub.add_argument( - '--state', - help=''' - If set, use the given directory as the source for json files to compare - with this project. - ''', - type=Path, - default=flags.ARTIFACT_STATE_PATH, - ) + # this is a "dbt run"-only thing, for now run_sub.add_optional_argument_inverse( '--defer', enable_help=''' @@ -511,35 +502,79 @@ def _build_docs_generate_subparser(subparsers, base_subparser): return generate_sub +def _add_models_argument(sub, help_override=None, **kwargs): + help_str = ''' + Specify the models to include. + ''' + if help_override is not None: + help_str = help_override + sub.add_argument( + '-m', + '--models', + dest='models', + nargs='+', + help=help_str, + **kwargs + ) + + +def _add_select_argument(sub, dest='models', help_override=None, **kwargs): + help_str = ''' + Specify the nodes to include. + ''' + if help_override is not None: + help_str = help_override + + sub.add_argument( + '-s', + '--select', + dest=dest, + nargs='+', + help=help_str, + **kwargs + ) + + +def _add_common_selector_arguments(sub): + sub.add_argument( + '--exclude', + required=False, + nargs='+', + help=''' + Specify the models to exclude. + ''', + ) + sub.add_argument( + '--selector', + dest='selector_name', + metavar='SELECTOR_NAME', + help=''' + The selector name to use, as defined in selectors.yml + ''' + ) + sub.add_argument( + '--state', + help=''' + If set, use the given directory as the source for json files to + compare with this project. + ''', + type=Path, + default=flags.ARTIFACT_STATE_PATH, + ) + + def _add_selection_arguments(*subparsers, **kwargs): models_name = kwargs.get('models_name', 'models') for sub in subparsers: - sub.add_argument( - '-{}'.format(models_name[0]), - '--{}'.format(models_name), - dest='models', - required=False, - nargs='+', - help=''' - Specify the models to include. - ''', - ) - sub.add_argument( - '--exclude', - required=False, - nargs='+', - help=''' - Specify the models to exclude. - ''', - ) - sub.add_argument( - '--selector', - dest='selector_name', - metavar='SELECTOR_NAME', - help=''' - The selector name to use, as defined in selectors.yml - ''' - ) + if models_name == 'models': + _add_models_argument(sub) + elif models_name == 'select': + # these still get stored in 'models', so they present the same + # interface to the task + _add_select_argument(sub) + else: + raise InternalException(f'Unknown models style {models_name}') + _add_common_selector_arguments(sub) def _add_table_mutability_arguments(*subparsers): @@ -752,44 +787,24 @@ def _build_list_subparser(subparsers, base_subparser): sub.add_argument('--output', choices=['json', 'name', 'path', 'selector'], default='selector') - sub.add_argument( - '-s', - '--select', - required=False, - nargs='+', - metavar='SELECTOR', - help=''' - Specify the nodes to select. - ''', - ) - sub.add_argument( - '-m', - '--models', - required=False, - nargs='+', - metavar='SELECTOR', - help=''' + + _add_models_argument( + sub, + help_override=''' Specify the models to select and set the resource-type to 'model'. Mutually exclusive with '--select' (or '-s') and '--resource-type' ''', - ) - sub.add_argument( - '--exclude', - required=False, - nargs='+', metavar='SELECTOR', - help=''' - Specify the models to exclude. - ''' + required=False ) - sub.add_argument( - '--selector', - metavar='SELECTOR_NAME', - dest='selector_name', - help=''' - The selector name to use, as defined in selectors.yml - ''' + _add_select_argument( + sub, + dest='select', + metavar='SELECTOR', + required=False, ) + _add_common_selector_arguments(sub) + return sub @@ -961,6 +976,7 @@ def parse_args(args, cls=DBTArgumentParser): _add_common_arguments(run_sub, compile_sub, generate_sub, test_sub, rpc_sub, seed_sub) # --models, --exclude + # list_sub sets up its own arguments. _add_selection_arguments(run_sub, compile_sub, generate_sub, test_sub) _add_selection_arguments(snapshot_sub, seed_sub, models_name='select') # --full-refresh diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 3397fe3024a..e54b54d643c 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -20,9 +20,10 @@ from dbt.context.context_config import ( LegacyContextConfig, ContextConfig, ContextConfigType ) -from dbt.contracts.graph.manifest import ( - Manifest, SourceFile, FilePath, FileHash +from dbt.contracts.files import ( + SourceFile, FilePath, FileHash ) +from dbt.contracts.graph.manifest import Manifest from dbt.contracts.graph.parsed import HasUniqueID from dbt.contracts.graph.unparsed import UnparsedNode from dbt.exceptions import ( @@ -76,11 +77,19 @@ def generate_unique_id(self, resource_name: str) -> str: self.project.project_name, resource_name) - def load_file(self, path: FilePath) -> SourceFile: + def load_file( + self, + path: FilePath, + *, + set_contents: bool = True, + ) -> SourceFile: file_contents = load_file_contents(path.absolute_path, strip=False) checksum = FileHash.from_contents(file_contents) source_file = SourceFile(path=path, checksum=checksum) - source_file.contents = file_contents.strip() + if set_contents: + source_file.contents = file_contents.strip() + else: + source_file.contents = '' return source_file @@ -239,6 +248,7 @@ def _create_parsetime_node( 'raw_sql': block.contents, 'unique_id': self.generate_unique_id(name), 'config': self.config_dict(config), + 'checksum': block.file.checksum.to_dict(), } dct.update(kwargs) try: diff --git a/core/dbt/parser/hooks.py b/core/dbt/parser/hooks.py index b7cb39edd9f..bdb3719e248 100644 --- a/core/dbt/parser/hooks.py +++ b/core/dbt/parser/hooks.py @@ -2,7 +2,7 @@ from typing import Iterable, Iterator, Union, List, Tuple from dbt.context.context_config import ContextConfigType -from dbt.contracts.graph.manifest import FilePath +from dbt.contracts.files import FilePath from dbt.contracts.graph.parsed import ParsedHookNode from dbt.exceptions import InternalException from dbt.node_types import NodeType, RunHookType diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index a580f6dd3d0..4e711c3e87d 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -19,8 +19,9 @@ from dbt.clients.system import make_directory from dbt.config import Project, RuntimeConfig from dbt.context.docs import generate_runtime_docs +from dbt.contracts.files import FilePath, FileHash from dbt.contracts.graph.compiled import NonSourceNode -from dbt.contracts.graph.manifest import Manifest, FilePath, FileHash, Disabled +from dbt.contracts.graph.manifest import Manifest, Disabled from dbt.contracts.graph.parsed import ( ParsedSourceDefinition, ParsedNode, ParsedMacro, ColumnInfo, ) diff --git a/core/dbt/parser/results.py b/core/dbt/parser/results.py index 6e55c601b14..33d2cdc30d1 100644 --- a/core/dbt/parser/results.py +++ b/core/dbt/parser/results.py @@ -3,9 +3,7 @@ from hologram import JsonSchemaMixin -from dbt.contracts.graph.manifest import ( - SourceFile, RemoteFile, FileHash, MacroKey, SourceKey -) +from dbt.contracts.files import RemoteFile, FileHash, SourceFile from dbt.contracts.graph.compiled import CompileResultNode from dbt.contracts.graph.parsed import ( HasUniqueID, @@ -24,7 +22,7 @@ UnpatchedSourceDefinition, ) from dbt.contracts.graph.unparsed import SourcePatch -from dbt.contracts.util import Writable, Replaceable +from dbt.contracts.util import Writable, Replaceable, MacroKey, SourceKey from dbt.exceptions import ( raise_duplicate_resource_name, raise_duplicate_patch_name, raise_duplicate_macro_patch_name, CompilationException, InternalException, diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 6c79034831a..60d9d45f93b 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -18,6 +18,7 @@ ) from dbt.context.configured import generate_schema_yml from dbt.context.target import generate_target_context +from dbt.contracts.files import FileHash from dbt.contracts.graph.manifest import SourceFile from dbt.contracts.graph.model_config import SourceConfig from dbt.contracts.graph.parsed import ( @@ -321,6 +322,7 @@ def create_test_node( 'config': self.config_dict(config), 'test_metadata': test_metadata, 'column_name': column_name, + 'checksum': FileHash.empty().to_dict(), } try: return self.parse_from_dict(dct) diff --git a/core/dbt/parser/search.py b/core/dbt/parser/search.py index db41626cc6c..0442f045cc9 100644 --- a/core/dbt/parser/search.py +++ b/core/dbt/parser/search.py @@ -7,7 +7,7 @@ from dbt.clients.jinja import extract_toplevel_blocks, BlockTag from dbt.clients.system import find_matching from dbt.config import Project -from dbt.contracts.graph.manifest import SourceFile, FilePath +from dbt.contracts.files import SourceFile, FilePath from dbt.exceptions import CompilationException, InternalException diff --git a/core/dbt/parser/seeds.py b/core/dbt/parser/seeds.py index 041922cf61f..96cd49646f8 100644 --- a/core/dbt/parser/seeds.py +++ b/core/dbt/parser/seeds.py @@ -1,5 +1,5 @@ from dbt.context.context_config import ContextConfigType -from dbt.contracts.graph.manifest import SourceFile, FilePath +from dbt.contracts.files import SourceFile, FilePath from dbt.contracts.graph.parsed import ParsedSeedNode from dbt.node_types import NodeType from dbt.parser.base import SimpleSQLParser @@ -29,4 +29,9 @@ def render_with_context( """Seeds don't need to do any rendering.""" def load_file(self, match: FilePath) -> SourceFile: - return SourceFile.seed(match) + if match.seed_too_large(): + # We don't want to calculate a hash of this file. Use the path. + return SourceFile.big_seed(match) + else: + # We want to calculate a hash, but we don't need the contents + return super().load_file(match, set_contents=False) diff --git a/core/dbt/task/compile.py b/core/dbt/task/compile.py index 13a99b4fabd..e7476a5ebbd 100644 --- a/core/dbt/task/compile.py +++ b/core/dbt/task/compile.py @@ -42,6 +42,7 @@ def get_node_selector(self) -> ResourceTypeSelector: return ResourceTypeSelector( graph=self.graph, manifest=self.manifest, + previous_state=self.previous_state, resource_types=NodeType.executable(), ) diff --git a/core/dbt/task/freshness.py b/core/dbt/task/freshness.py index bb942ffdd3c..9637f215208 100644 --- a/core/dbt/task/freshness.py +++ b/core/dbt/task/freshness.py @@ -140,6 +140,7 @@ def get_node_selector(self): return FreshnessSelector( graph=self.graph, manifest=self.manifest, + previous_state=self.previous_state, ) def get_runner_type(self): diff --git a/core/dbt/task/list.py b/core/dbt/task/list.py index d4a39acd094..229927751dd 100644 --- a/core/dbt/task/list.py +++ b/core/dbt/task/list.py @@ -165,11 +165,13 @@ def get_node_selector(self): return TestSelector( graph=self.graph, manifest=self.manifest, + previous_state=self.previous_state, ) else: return ResourceTypeSelector( graph=self.graph, manifest=self.manifest, + previous_state=self.previous_state, resource_types=self.resource_types, ) diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index 1e2fb304761..917d38dc90b 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -1,7 +1,6 @@ import functools import time -from pathlib import Path -from typing import List, Dict, Any, Iterable, Set, Tuple, Optional +from typing import List, Dict, Any, Iterable, Set, Tuple, Optional, AbstractSet from .compile import CompileRunner, CompileTask @@ -247,32 +246,6 @@ def __init__(self, args, config): super().__init__(args, config) self.ran_hooks = [] self._total_executed = 0 - self.deferred_manifest: Optional[WritableManifest] = None - - def _get_state_path(self) -> Path: - if self.args.state is not None: - return self.args.state - else: - raise RuntimeException( - 'Received a --defer argument, but no value was provided ' - 'to --state' - ) - - def _get_deferred_manifest(self) -> Optional[WritableManifest]: - if not self.args.defer: - return None - - path = self._get_state_path() - - if not path.is_absolute(): - path = Path(self.config.project_root) / path - if path.exists() and not path.is_file(): - path = path / 'manifest.json' - if not path.exists(): - raise RuntimeException( - f'Could not find --state path: "{path}"' - ) - return WritableManifest.read(str(path)) def index_offset(self, value: int) -> int: return self._total_executed + value @@ -383,9 +356,26 @@ def print_results_line(self, results, execution_time): "Finished running {stat_line}{execution}." .format(stat_line=stat_line, execution=execution)) - def defer_to_manifest(self, selected_uids): - self.deferred_manifest = self._get_deferred_manifest() - if self.deferred_manifest is None: + def _get_deferred_manifest(self) -> Optional[WritableManifest]: + if not self.args.defer: + return None + + state = self.previous_state + if state is None: + raise RuntimeException( + 'Received a --defer argument, but no value was provided ' + 'to --state' + ) + + if state.manifest is None: + raise RuntimeException( + f'Could not find manifest in --state path: "{self.args.state}"' + ) + return state.manifest + + def defer_to_manifest(self, selected_uids: AbstractSet[str]): + deferred_manifest = self._get_deferred_manifest() + if deferred_manifest is None: return if self.manifest is None: raise InternalException( @@ -393,13 +383,13 @@ def defer_to_manifest(self, selected_uids): 'manifest to defer from!' ) self.manifest.merge_from_artifact( - other=self.deferred_manifest, + other=deferred_manifest, selected=selected_uids, ) # TODO: is it wrong to write the manifest here? I think it's right... self.write_manifest() - def before_run(self, adapter, selected_uids): + def before_run(self, adapter, selected_uids: AbstractSet[str]): self.defer_to_manifest(selected_uids) with adapter.connection_named('master'): self.create_schemas(adapter, selected_uids) @@ -436,6 +426,7 @@ def get_node_selector(self) -> ResourceTypeSelector: return ResourceTypeSelector( graph=self.graph, manifest=self.manifest, + previous_state=self.previous_state, resource_types=[NodeType.Model], ) diff --git a/core/dbt/task/runnable.py b/core/dbt/task/runnable.py index 401bb5ec15a..90f1e14bd98 100644 --- a/core/dbt/task/runnable.py +++ b/core/dbt/task/runnable.py @@ -4,7 +4,7 @@ from concurrent.futures import as_completed from datetime import datetime from multiprocessing.dummy import Pool as ThreadPool -from typing import Optional, Dict, List, Set, Tuple, Iterable +from typing import Optional, Dict, List, Set, Tuple, Iterable, AbstractSet from .printer import ( print_run_result_error, @@ -32,6 +32,7 @@ from dbt.contracts.graph.manifest import Manifest from dbt.contracts.graph.parsed import ParsedSourceDefinition from dbt.contracts.results import ExecutionResult +from dbt.contracts.state import PreviousState from dbt.exceptions import ( InternalException, NotImplementedException, @@ -88,6 +89,9 @@ def __init__(self, args, config): self.node_results = [] self._skipped_children = {} self._raise_next_tick = None + self.previous_state: Optional[PreviousState] = None + if self.args.state is not None: + self.previous_state = PreviousState(self.args.state) def index_offset(self, value: int) -> int: return value @@ -356,7 +360,7 @@ def populate_adapter_cache(self, adapter): def before_hooks(self, adapter): pass - def before_run(self, adapter, selected_uids): + def before_run(self, adapter, selected_uids: AbstractSet[str]): with adapter.connection_named('master'): self.populate_adapter_cache(adapter) @@ -366,7 +370,7 @@ def after_run(self, adapter, results): def after_hooks(self, adapter, results, elapsed): pass - def execute_with_hooks(self, selected_uids): + def execute_with_hooks(self, selected_uids: AbstractSet[str]): adapter = get_adapter(self.config) try: self.before_hooks(adapter) diff --git a/core/dbt/task/seed.py b/core/dbt/task/seed.py index bcc10c20977..47a89d1a5db 100644 --- a/core/dbt/task/seed.py +++ b/core/dbt/task/seed.py @@ -48,6 +48,7 @@ def get_node_selector(self): return ResourceTypeSelector( graph=self.graph, manifest=self.manifest, + previous_state=self.previous_state, resource_types=[NodeType.Seed], ) diff --git a/core/dbt/task/snapshot.py b/core/dbt/task/snapshot.py index 5065211e531..edc5ba9b277 100644 --- a/core/dbt/task/snapshot.py +++ b/core/dbt/task/snapshot.py @@ -30,6 +30,7 @@ def get_node_selector(self): return ResourceTypeSelector( graph=self.graph, manifest=self.manifest, + previous_state=self.previous_state, resource_types=[NodeType.Snapshot], ) diff --git a/core/dbt/task/test.py b/core/dbt/task/test.py index 170250b260a..74bd101abc9 100644 --- a/core/dbt/task/test.py +++ b/core/dbt/task/test.py @@ -107,10 +107,11 @@ def after_execute(self, result): class TestSelector(ResourceTypeSelector): - def __init__(self, graph, manifest): + def __init__(self, graph, manifest, previous_state): super().__init__( graph=graph, manifest=manifest, + previous_state=previous_state, resource_types=[NodeType.Test], ) @@ -153,6 +154,7 @@ def get_node_selector(self) -> TestSelector: return TestSelector( graph=self.graph, manifest=self.manifest, + previous_state=self.previous_state, ) def get_runner_type(self): diff --git a/core/setup.py b/core/setup.py index cfc50bf6864..64c0a11a83d 100644 --- a/core/setup.py +++ b/core/setup.py @@ -64,7 +64,7 @@ def read(fname): 'json-rpc>=1.12,<2', 'werkzeug>=0.15,<0.17', 'dataclasses==0.6;python_version<"3.7"', - 'hologram==0.0.8', + 'hologram==0.0.10', 'logbook>=1.5,<1.6', 'typing-extensions>=3.7.4,<3.8', # the following are all to match snowflake-connector-python diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 82e2fc8a91d..cabfdfd10f1 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -912,6 +912,7 @@ def expected_seeded_manifest(self, model_database=None): second_model_sql_path = os.path.join(models_path, 'second_model.sql') model_schema_yml_path = os.path.join(models_path, 'schema.yml') seed_schema_yml_path = os.path.join(self.dir('seed'), 'schema.yml') + seed_path = self.dir(os.path.join('seed', 'seed.csv')) my_schema_name = self.unique_schema() @@ -1013,6 +1014,7 @@ def expected_seeded_manifest(self, model_database=None): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': ANY, + 'checksum': self._checksum_file(model_sql_path), }, 'model.test.second_model': { 'build_path': Normalized('target/compiled/test/models/second_model.sql'), @@ -1080,7 +1082,9 @@ def expected_seeded_manifest(self, model_database=None): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': ANY, + 'checksum': self._checksum_file(second_model_sql_path), }, + 'seed.test.seed': { 'build_path': None, 'compiled': True, @@ -1105,8 +1109,7 @@ def expected_seeded_manifest(self, model_database=None): 'resource_type': 'seed', 'raw_sql': '', 'package_name': 'test', - 'original_file_path': self.dir(os.path.join('seed', - 'seed.csv')), + 'original_file_path': seed_path, 'refs': [], 'sources': [], 'depends_on': {'nodes': [], 'macros': []}, @@ -1162,6 +1165,7 @@ def expected_seeded_manifest(self, model_database=None): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': '', + 'checksum': self._checksum_file(seed_path), }, 'test.test.not_null_model_id': { 'alias': 'not_null_model_id', @@ -1217,6 +1221,7 @@ def expected_seeded_manifest(self, model_database=None): 'model': "{{ ref('model') }}", }, }, + 'checksum': {'name': 'none', 'checksum': ''}, }, 'test.test.test_nothing_model_': { 'alias': 'test_nothing_model_', @@ -1271,6 +1276,7 @@ def expected_seeded_manifest(self, model_database=None): 'model': "{{ ref('model') }}", }, }, + 'checksum': {'name': 'none', 'checksum': ''}, }, 'test.test.unique_model_id': { 'alias': 'unique_model_id', @@ -1326,6 +1332,7 @@ def expected_seeded_manifest(self, model_database=None): 'model': "{{ ref('model') }}", }, }, + 'checksum': {'name': 'none', 'checksum': ''}, }, }, 'sources': {}, @@ -1368,6 +1375,10 @@ def expected_postgres_references_manifest(self, model_database=None): model_database = self.default_database my_schema_name = self.unique_schema() docs_path = self.dir('ref_models/docs.md') + ephemeral_copy_path = self.dir('ref_models/ephemeral_copy.sql') + ephemeral_summary_path = self.dir('ref_models/ephemeral_summary.sql') + view_summary_path = self.dir('ref_models/view_summary.sql') + seed_path = self.dir('seed/seed.csv') return { 'nodes': { @@ -1397,7 +1408,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'docs': {'show': True}, 'fqn': ['test', 'ephemeral_copy'], 'name': 'ephemeral_copy', - 'original_file_path': self.dir('ref_models/ephemeral_copy.sql'), + 'original_file_path': ephemeral_copy_path, 'package_name': 'test', 'patch_path': None, 'path': 'ephemeral_copy.sql', @@ -1418,6 +1429,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': ANY, + 'checksum': self._checksum_file(ephemeral_copy_path), }, 'model.test.ephemeral_summary': { 'alias': 'ephemeral_summary', @@ -1460,7 +1472,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'docs': {'show': True}, 'fqn': ['test', 'ephemeral_summary'], 'name': 'ephemeral_summary', - 'original_file_path': self.dir('ref_models/ephemeral_summary.sql'), + 'original_file_path': ephemeral_summary_path, 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'ephemeral_summary.sql', @@ -1483,6 +1495,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'extra_ctes_injected': True, 'extra_ctes': [ANY], 'injected_sql': ANY, + 'checksum': self._checksum_file(ephemeral_summary_path), }, 'model.test.view_summary': { 'alias': 'view_summary', @@ -1525,7 +1538,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'docs': {'show': True}, 'fqn': ['test', 'view_summary'], 'name': 'view_summary', - 'original_file_path': self.dir('ref_models/view_summary.sql'), + 'original_file_path': view_summary_path, 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'view_summary.sql', @@ -1547,6 +1560,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': ANY, + 'checksum': self._checksum_file(view_summary_path), }, 'seed.test.seed': { 'alias': 'seed', @@ -1608,7 +1622,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'docs': {'show': True}, 'fqn': ['test', 'seed'], 'name': 'seed', - 'original_file_path': self.dir('seed/seed.csv'), + 'original_file_path': seed_path, 'package_name': 'test', 'patch_path': self.dir('seed/schema.yml'), 'path': 'seed.csv', @@ -1626,6 +1640,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': '', + 'checksum': self._checksum_file(seed_path), }, }, 'sources': { @@ -1816,7 +1831,9 @@ def expected_bigquery_complex_manifest(self): nested_table_sql_path = self.dir('bq_models/nested_table.sql') clustered_sql_path = self.dir('bq_models/clustered.sql') multi_clustered_sql_path = self.dir('bq_models/multi_clustered.sql') + seed_path = self.dir('seed/seed.csv') my_schema_name = self.unique_schema() + return { 'nodes': { 'model.test.clustered': { @@ -1898,6 +1915,7 @@ def expected_bigquery_complex_manifest(self): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': ANY, + 'checksum': self._checksum_file(clustered_sql_path), }, 'model.test.multi_clustered': { 'alias': 'multi_clustered', @@ -1978,6 +1996,7 @@ def expected_bigquery_complex_manifest(self): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': ANY, + 'checksum': self._checksum_file(multi_clustered_sql_path), }, 'model.test.nested_view': { 'alias': 'nested_view', @@ -2059,6 +2078,7 @@ def expected_bigquery_complex_manifest(self): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': ANY, + 'checksum': self._checksum_file(nested_view_sql_path), }, 'model.test.nested_table': { 'alias': 'nested_table', @@ -2104,6 +2124,7 @@ def expected_bigquery_complex_manifest(self): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': ANY, + 'checksum': self._checksum_file(nested_table_sql_path), }, 'seed.test.seed': { 'build_path': None, @@ -2114,7 +2135,7 @@ def expected_bigquery_complex_manifest(self): 'resource_type': 'seed', 'raw_sql': '', 'package_name': 'test', - 'original_file_path': self.dir('seed/seed.csv'), + 'original_file_path': seed_path, 'refs': [], 'sources': [], 'depends_on': { @@ -2186,6 +2207,7 @@ def expected_bigquery_complex_manifest(self): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': '', + 'checksum': self._checksum_file(seed_path), }, }, 'sources': {}, @@ -2245,6 +2267,7 @@ def _absolute_path_to(self, searched_path: str, relative_path: str): def expected_redshift_incremental_view_manifest(self): model_sql_path = self.dir('rs_models/model.sql') my_schema_name = self.unique_schema() + seed_path = self.dir('seed/seed.csv') return { 'nodes': { @@ -2329,6 +2352,7 @@ def expected_redshift_incremental_view_manifest(self): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': ANY, + 'checksum': self._checksum_file(model_sql_path), }, 'seed.test.seed': { 'build_path': None, @@ -2339,7 +2363,7 @@ def expected_redshift_incremental_view_manifest(self): 'resource_type': 'seed', 'raw_sql': '', 'package_name': 'test', - 'original_file_path': self.dir('seed/seed.csv'), + 'original_file_path': seed_path, 'refs': [], 'sources': [], 'depends_on': { @@ -2411,6 +2435,7 @@ def expected_redshift_incremental_view_manifest(self): 'extra_ctes_injected': True, 'extra_ctes': [], 'injected_sql': ANY, + 'checksum': self._checksum_file(seed_path), }, }, 'sources': {}, @@ -2519,6 +2544,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, compiled_sql = '\n\nselect * from {}.{}.{}'.format( compiled_database, compiled_schema, compiled_seed ) + seed_path = self.dir('seed/seed.csv') return [ { @@ -2531,6 +2557,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'build_path': Normalized( 'target/compiled/test/models/model.sql' ), + 'checksum': self._checksum_file(model_sql_path), 'columns': { 'id': { 'description': 'The user ID number', @@ -2613,6 +2640,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'build_path': Normalized( 'target/compiled/test/models/second_model.sql' ), + 'checksum': self._checksum_file(second_model_sql_path), 'columns': { 'id': { 'description': 'The user ID number', @@ -2693,6 +2721,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'node': { 'alias': 'seed', 'build_path': None, + 'checksum': self._checksum_file(seed_path), 'columns': { 'id': { 'description': 'The user ID number', @@ -2756,7 +2785,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'injected_sql': '', 'meta': {}, 'name': 'seed', - 'original_file_path': self.dir('seed/seed.csv'), + 'original_file_path': seed_path, 'package_name': 'test', 'patch_path': seed_schema_yml_path, 'path': 'seed.csv', @@ -2782,6 +2811,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'node': { 'alias': 'not_null_model_id', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/not_null_model_id.sql'), + 'checksum': {'name': 'none', 'checksum': ''}, 'column_name': 'id', 'columns': {}, 'compiled': True, @@ -2847,6 +2877,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'node': { 'alias': 'test_nothing_model_', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/test_nothing_model_.sql'), + 'checksum': {'name': 'none', 'checksum': ''}, 'column_name': None, 'columns': {}, 'compiled': True, @@ -2911,6 +2942,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'node': { 'alias': 'unique_model_id', 'build_path': Normalized('target/compiled/test/models/schema.yml/schema_test/unique_model_id.sql'), + 'checksum': {'name': 'none', 'checksum': ''}, 'column_name': 'id', 'columns': {}, 'compiled': True, @@ -2993,6 +3025,10 @@ def expected_postgres_references_run_results(self): 'order by ct asc' ).format(self.default_database, my_schema_name) + ephemeral_summary_path = self.dir('ref_models/ephemeral_summary.sql') + view_summary_path = self.dir('ref_models/view_summary.sql') + seed_path = self.dir('seed/seed.csv') + return [ { 'error': None, @@ -3004,6 +3040,7 @@ def expected_postgres_references_run_results(self): 'build_path': Normalized( 'target/compiled/test/ref_models/ephemeral_summary.sql' ), + 'checksum': self._checksum_file(ephemeral_summary_path), 'columns': { 'first_name': { 'description': 'The first name being summarized', @@ -3052,7 +3089,7 @@ def expected_postgres_references_run_results(self): 'injected_sql': ephemeral_injected_sql, 'meta': {}, 'name': 'ephemeral_summary', - 'original_file_path': self.dir('ref_models/ephemeral_summary.sql'), + 'original_file_path': ephemeral_summary_path, 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'ephemeral_summary.sql', @@ -3086,6 +3123,7 @@ def expected_postgres_references_run_results(self): 'target/compiled/test/ref_models/view_summary.sql' ), 'alias': 'view_summary', + 'checksum': self._checksum_file(view_summary_path), 'columns': { 'first_name': { 'description': 'The first name being summarized', @@ -3133,7 +3171,7 @@ def expected_postgres_references_run_results(self): 'injected_sql': view_compiled_sql, 'meta': {}, 'name': 'view_summary', - 'original_file_path': self.dir('ref_models/view_summary.sql'), + 'original_file_path': view_summary_path, 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'view_summary.sql', @@ -3163,6 +3201,7 @@ def expected_postgres_references_run_results(self): 'node': { 'alias': 'seed', 'build_path': None, + 'checksum': self._checksum_file(seed_path), 'columns': { 'id': { 'name': 'id', @@ -3226,7 +3265,7 @@ def expected_postgres_references_run_results(self): 'injected_sql': '', 'meta': {}, 'name': 'seed', - 'original_file_path': self.dir('seed/seed.csv'), + 'original_file_path': seed_path, 'package_name': 'test', 'patch_path': self.dir('seed/schema.yml'), 'path': 'seed.csv', diff --git a/test/integration/062_defer_state_test/macros/macros.sql b/test/integration/062_defer_state_test/macros/macros.sql new file mode 100644 index 00000000000..79519c1b60b --- /dev/null +++ b/test/integration/062_defer_state_test/macros/macros.sql @@ -0,0 +1,3 @@ +{% macro my_macro() %} + {% do log('in a macro' ) %} +{% endmacro %} diff --git a/test/integration/062_defer_state_test/test_modified_state.py b/test/integration/062_defer_state_test/test_modified_state.py new file mode 100644 index 00000000000..535a9d11eec --- /dev/null +++ b/test/integration/062_defer_state_test/test_modified_state.py @@ -0,0 +1,185 @@ +from test.integration.base import DBTIntegrationTest, use_profile +import os +import random +import shutil +import string + +import pytest + +from dbt.exceptions import CompilationException + + +class TestModifiedState(DBTIntegrationTest): + @property + def schema(self): + return "modified_state_062" + + @property + def models(self): + return "models" + + @property + def project_config(self): + return { + 'config-version': 2, + 'macro-paths': ['macros'], + 'seeds': { + 'test': { + 'quote_columns': True, + } + } + } + + def _symlink_test_folders(self): + # dbt's normal symlink behavior breaks this test. Copy the files + # so we can freely modify them. + for entry in os.listdir(self.test_original_source_path): + src = os.path.join(self.test_original_source_path, entry) + tst = os.path.join(self.test_root_dir, entry) + if entry in {'models', 'data', 'macros'}: + shutil.copytree(src, tst) + elif os.path.isdir(entry) or entry.endswith('.sql'): + os.symlink(src, tst) + + def copy_state(self): + assert not os.path.exists('state') + os.makedirs('state') + shutil.copyfile('target/manifest.json', 'state/manifest.json') + + def setUp(self): + super().setUp() + self.run_dbt(['seed']) + self.run_dbt(['run']) + self.copy_state() + + @use_profile('postgres') + def test_postgres_changed_seed_contents_state(self): + results = self.run_dbt(['ls', '--resource-type', 'seed', '--select', 'state:modified', '--state', './state'], strict=False, expect_pass=False) + assert len(results) == 0 + with open('data/seed.csv') as fp: + fp.readline() + newline = fp.newlines + with open('data/seed.csv', 'a') as fp: + fp.write(f'3,carl{newline}') + + results = self.run_dbt(['ls', '--resource-type', 'seed', '--select', 'state:modified', '--state', './state']) + assert len(results) == 1 + assert results[0] == 'test.seed' + + results = self.run_dbt(['ls', '--select', 'state:modified', '--state', './state']) + assert len(results) == 1 + assert results[0] == 'test.seed' + + results = self.run_dbt(['ls', '--select', 'state:modified+', '--state', './state']) + assert len(results) == 6 + assert set(results) == {'test.seed', 'test.table_model', 'test.view_model', 'test.ephemeral_model', 'test.schema_test.not_null_view_model_id', 'test.schema_test.unique_view_model_id'} + + shutil.rmtree('./state') + self.copy_state() + + with open('data/seed.csv', 'a') as fp: + # assume each line is ~2 bytes + len(name) + target_size = 1*1024*1024 + line_size = 64 + + num_lines = target_size // line_size + + maxlines = num_lines + 4 + + for idx in range(4, maxlines): + value = ''.join(random.choices(string.ascii_letters, k=62)) + fp.write(f'{idx},{value}{newline}') + + # now if we run again, we should get a warning + results = self.run_dbt(['ls', '--resource-type', 'seed', '--select', 'state:modified', '--state', './state'], strict=False) + assert len(results) == 1 + assert results[0] == 'test.seed' + + with pytest.raises(CompilationException) as exc: + self.run_dbt(['ls', '--resource-type', 'seed', '--select', 'state:modified', '--state', './state'], strict=True) + assert '>1MB' in str(exc.value) + + shutil.rmtree('./state') + self.copy_state() + + # once it's in path mode, we don't mark it as modified if it changes + with open('data/seed.csv', 'a') as fp: + fp.write(f'{random},test{newline}') + + results = self.run_dbt(['ls', '--resource-type', 'seed', '--select', 'state:modified', '--state', './state'], strict=False, expect_pass=False) + assert len(results) == 0 + + @use_profile('postgres') + def test_postgres_changed_seed_config(self): + results = self.run_dbt(['ls', '--resource-type', 'seed', '--select', 'state:modified', '--state', './state'], strict=False, expect_pass=False) + assert len(results) == 0 + + self.use_default_project({'seeds': {'test': {'quote_columns': False}}}) + + # quoting change -> seed changed + results = self.run_dbt(['ls', '--resource-type', 'seed', '--select', 'state:modified', '--state', './state']) + assert len(results) == 1 + assert results[0] == 'test.seed' + + @use_profile('postgres') + def test_postgres_changed_model_contents(self): + results = self.run_dbt(['run', '--models', 'state:modified', '--state', './state'], strict=False) + assert len(results) == 0 + + with open('models/table_model.sql') as fp: + fp.readline() + newline = fp.newlines + + with open('models/table_model.sql', 'w') as fp: + fp.write("{{ config(materialized='table') }}") + fp.write(newline) + fp.write("select * from {{ ref('seed') }}") + fp.write(newline) + + results = self.run_dbt(['run', '--models', 'state:modified', '--state', './state']) + assert len(results) == 1 + assert results[0].node.name == 'table_model' + + @use_profile('postgres') + def test_postgres_new_macro(self): + with open('macros/macros.sql') as fp: + fp.readline() + newline = fp.newlines + + new_macro = '{% macro my_other_macro() %}{% endmacro %}' + newline + + # add a new macro to a new file + with open('macros/second_macro.sql', 'w') as fp: + fp.write(new_macro) + + results, stdout = self.run_dbt_and_capture(['run', '--models', 'state:modified', '--state', './state'], strict=False) + assert len(results) == 0 + assert 'detected a change in macros' in stdout + + os.remove('macros/second_macro.sql') + # add a new macro to the existing file + with open('macros/macros.sql', 'a') as fp: + fp.write(new_macro) + + results, stdout = self.run_dbt_and_capture(['run', '--models', 'state:modified', '--state', './state'], strict=False) + assert len(results) == 0 + assert 'detected a change in macros' in stdout + + @use_profile('postgres') + def test_postgres_changed_macro_contents(self): + with open('macros/macros.sql') as fp: + fp.readline() + newline = fp.newlines + + # modify an existing macro + with open('macros/macros.sql', 'w') as fp: + fp.write("{% macro my_macro() %}") + fp.write(newline) + fp.write(" {% do log('in a macro', info=True) %}") + fp.write(newline) + fp.write('{% endmacro %}') + fp.write(newline) + + results, stdout = self.run_dbt_and_capture(['run', '--models', 'state:modified', '--state', './state'], strict=False) + assert len(results) == 0 + assert 'detected a change in macros' in stdout diff --git a/test/integration/base.py b/test/integration/base.py index c912133cf82..d9355b50d69 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -67,6 +67,8 @@ def __init__(self): self.exclude = None self.single_threaded = False self.selector_name = None + self.state = None + self.defer = None class TestArgs: diff --git a/test/unit/test_compiler.py b/test/unit/test_compiler.py index 94c51b0f47b..f7cec475158 100644 --- a/test/unit/test_compiler.py +++ b/test/unit/test_compiler.py @@ -3,6 +3,7 @@ import dbt.flags import dbt.compilation +from dbt.contracts.files import FileHash from dbt.contracts.graph.manifest import Manifest from dbt.contracts.graph.parsed import NodeConfig, DependsOn, ParsedModelNode from dbt.contracts.graph.compiled import CompiledModelNode, InjectedCTE @@ -80,7 +81,8 @@ def test__prepend_ctes__already_has_cte(self): injected_sql='', compiled_sql=( 'with cte as (select * from something_else) ' - 'select * from __dbt__CTE__ephemeral') + 'select * from __dbt__CTE__ephemeral'), + checksum=FileHash.from_contents(''), ), 'model.root.ephemeral': CompiledModelNode( name='ephemeral', @@ -104,7 +106,8 @@ def test__prepend_ctes__already_has_cte(self): compiled_sql='select * from source_table', extra_ctes_injected=False, extra_ctes=[], - injected_sql='' + injected_sql='', + checksum=FileHash.from_contents(''), ), }, sources={}, @@ -163,7 +166,8 @@ def test__prepend_ctes__no_ctes(self): extra_ctes=[], injected_sql='', compiled_sql=('with cte as (select * from something_else) ' - 'select * from source_table') + 'select * from source_table'), + checksum=FileHash.from_contents(''), ), 'model.root.view_no_cte': CompiledModelNode( name='view_no_cte', @@ -187,7 +191,8 @@ def test__prepend_ctes__no_ctes(self): extra_ctes_injected=False, extra_ctes=[], injected_sql='', - compiled_sql=('select * from source_table') + compiled_sql=('select * from source_table'), + checksum=FileHash.from_contents(''), ), }, sources={}, @@ -254,7 +259,8 @@ def test__prepend_ctes(self): extra_ctes_injected=False, extra_ctes=[InjectedCTE(id='model.root.ephemeral', sql='select * from source_table')], injected_sql='', - compiled_sql='select * from __dbt__CTE__ephemeral' + compiled_sql='select * from __dbt__CTE__ephemeral', + checksum=FileHash.from_contents(''), ), 'model.root.ephemeral': CompiledModelNode( name='ephemeral', @@ -278,7 +284,8 @@ def test__prepend_ctes(self): extra_ctes_injected=False, extra_ctes=[], injected_sql='', - compiled_sql='select * from source_table' + compiled_sql='select * from source_table', + checksum=FileHash.from_contents(''), ), }, sources={}, @@ -328,6 +335,7 @@ def test__prepend_ctes__cte_not_compiled(self): path='ephemeral.sql', original_file_path='ephemeral.sql', raw_sql='select * from source_table', + checksum=FileHash.from_contents(''), ) compiled_ephemeral = CompiledModelNode( name='ephemeral', @@ -352,6 +360,7 @@ def test__prepend_ctes__cte_not_compiled(self): injected_sql='select * from source_table', extra_ctes_injected=True, extra_ctes=[], + checksum=FileHash.from_contents(''), ) manifest = Manifest( macros={}, @@ -378,7 +387,8 @@ def test__prepend_ctes__cte_not_compiled(self): extra_ctes_injected=False, extra_ctes=[InjectedCTE(id='model.root.ephemeral', sql='select * from source_table')], injected_sql='', - compiled_sql='select * from __dbt__CTE__ephemeral' + compiled_sql='select * from __dbt__CTE__ephemeral', + checksum=FileHash.from_contents(''), ), 'model.root.ephemeral': parsed_ephemeral, }, @@ -442,7 +452,9 @@ def test__prepend_ctes__multiple_levels(self): extra_ctes_injected=False, extra_ctes=[InjectedCTE(id='model.root.ephemeral', sql=None)], injected_sql=None, - compiled_sql='select * from __dbt__CTE__ephemeral' + compiled_sql='select * from __dbt__CTE__ephemeral', + checksum=FileHash.from_contents(''), + ), 'model.root.ephemeral': ParsedModelNode( name='ephemeral', @@ -462,6 +474,7 @@ def test__prepend_ctes__multiple_levels(self): path='ephemeral.sql', original_file_path='ephemeral.sql', raw_sql='select * from {{ref("ephemeral_level_two")}}', + checksum=FileHash.from_contents(''), ), 'model.root.ephemeral_level_two': ParsedModelNode( name='ephemeral_level_two', @@ -481,6 +494,7 @@ def test__prepend_ctes__multiple_levels(self): path='ephemeral_level_two.sql', original_file_path='ephemeral_level_two.sql', raw_sql='select * from source_table', + checksum=FileHash.from_contents(''), ), }, sources={}, diff --git a/test/unit/test_context.py b/test/unit/test_context.py index aaa83d7159a..0e545cdf3da 100644 --- a/test/unit/test_context.py +++ b/test/unit/test_context.py @@ -16,6 +16,7 @@ ) from dbt.config.project import V1VarProvider from dbt.context import base, target, configured, providers, docs, manifest, macros +from dbt.contracts.files import FileHash from dbt.node_types import NodeType import dbt.exceptions from .utils import profile_from_dict, config_from_parts_or_dicts, inject_adapter, clear_plugin @@ -53,7 +54,8 @@ def setUp(self): path='model_one.sql', raw_sql='', description='', - columns={} + columns={}, + checksum=FileHash.from_contents(''), ) self.context = mock.MagicMock() self.provider = V1VarProvider({}, {}, {}) diff --git a/test/unit/test_contracts_graph_compiled.py b/test/unit/test_contracts_graph_compiled.py index b40aa563ee7..43a6f82c455 100644 --- a/test/unit/test_contracts_graph_compiled.py +++ b/test/unit/test_contracts_graph_compiled.py @@ -1,384 +1,557 @@ import pickle +import pytest +from dbt.contracts.files import FileHash from dbt.contracts.graph.compiled import ( CompiledModelNode, InjectedCTE, CompiledSchemaTestNode ) from dbt.contracts.graph.parsed import ( - DependsOn, NodeConfig, TestConfig, TestMetadata + DependsOn, NodeConfig, TestConfig, TestMetadata, ColumnInfo ) from dbt.node_types import NodeType -from .utils import ContractTestCase +from .utils import ( + assert_from_dict, + assert_symmetric, + assert_fails_validation, +) -class TestCompiledModelNode(ContractTestCase): - ContractType = CompiledModelNode +@pytest.fixture +def basic_uncompiled_model(): + return CompiledModelNode( + package_name='test', + root_path='/root/', + path='/root/models/foo.sql', + original_file_path='models/foo.sql', + raw_sql='select * from {{ ref("other") }}', + name='foo', + resource_type=NodeType.Model, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + deferred=False, + description='', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + config=NodeConfig(), + meta={}, + compiled=False, + extra_ctes=[], + extra_ctes_injected=False, + checksum=FileHash.from_contents(''), + ) - def _minimum(self): - return { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Model), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'database': 'test_db', - 'schema': 'test_schema', - 'alias': 'bar', - 'compiled': False, - } - - def test_basic_uncompiled(self): - node_dict = { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Model), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': []}, - 'database': 'test_db', - 'deferred': False, - 'description': '', - 'schema': 'test_schema', - 'alias': 'bar', + +@pytest.fixture +def basic_compiled_model(): + return CompiledModelNode( + package_name='test', + root_path='/root/', + path='/root/models/foo.sql', + original_file_path='models/foo.sql', + raw_sql='select * from {{ ref("other") }}', + name='foo', + resource_type=NodeType.Model, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + deferred=True, + description='', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + config=NodeConfig(), + meta={}, + compiled=True, + compiled_sql='select * from whatever', + extra_ctes=[InjectedCTE('whatever', 'select * from other')], + extra_ctes_injected=True, + injected_sql='with whatever as (select * from other) select * from whatever', + checksum=FileHash.from_contents(''), + ) + + +@pytest.fixture +def minimal_uncompiled_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Model), + 'path': '/root/models/foo.sql', + 'original_file_path': 'models/foo.sql', + 'package_name': 'test', + 'raw_sql': 'select * from {{ ref("other") }}', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'database': 'test_db', + 'schema': 'test_schema', + 'alias': 'bar', + 'compiled': False, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def basic_uncompiled_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Model), + 'path': '/root/models/foo.sql', + 'original_file_path': 'models/foo.sql', + 'package_name': 'test', + 'raw_sql': 'select * from {{ ref("other") }}', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'database': 'test_db', + 'deferred': False, + 'description': '', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': [], + 'config': { + 'column_types': {}, + 'enabled': True, + 'materialized': 'view', + 'persist_docs': {}, + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, 'tags': [], - 'config': { - 'column_types': {}, - 'enabled': True, - 'materialized': 'view', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, - 'tags': [], - 'vars': {}, - }, - 'docs': {'show': True}, - 'columns': {}, - 'meta': {}, - 'compiled': False, - 'extra_ctes': [], - 'extra_ctes_injected': False, - } - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Model, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - deferred=False, - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=NodeConfig(), - meta={}, - compiled=False, - extra_ctes=[], - extra_ctes_injected=False, - ) - self.assert_symmetric(node, node_dict) - self.assertFalse(node.empty) - self.assertTrue(node.is_refable) - self.assertFalse(node.is_ephemeral) - self.assertEqual(node.local_vars(), {}) - - minimum = self._minimum() - self.assert_from_dict(node, minimum) - pickle.loads(pickle.dumps(node)) - - def test_basic_compiled(self): - node_dict = { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Model), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from {{ ref("other") }}', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': []}, - 'database': 'test_db', - 'deferred': True, - 'description': '', - 'schema': 'test_schema', - 'alias': 'bar', + 'vars': {}, + }, + 'docs': {'show': True}, + 'columns': {}, + 'meta': {}, + 'compiled': False, + 'extra_ctes': [], + 'extra_ctes_injected': False, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def basic_compiled_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Model), + 'path': '/root/models/foo.sql', + 'original_file_path': 'models/foo.sql', + 'package_name': 'test', + 'raw_sql': 'select * from {{ ref("other") }}', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'database': 'test_db', + 'deferred': True, + 'description': '', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': [], + 'config': { + 'column_types': {}, + 'enabled': True, + 'materialized': 'view', + 'persist_docs': {}, + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, 'tags': [], - 'config': { - 'column_types': {}, - 'enabled': True, - 'materialized': 'view', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, - 'tags': [], - 'vars': {}, - }, - 'docs': {'show': True}, - 'columns': {}, - 'meta': {}, - 'compiled': True, - 'compiled_sql': 'select * from whatever', - 'extra_ctes': [{'id': 'whatever', 'sql': 'select * from other'}], - 'extra_ctes_injected': True, - 'injected_sql': 'with whatever as (select * from other) select * from whatever', - } - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from {{ ref("other") }}', - name='foo', - resource_type=NodeType.Model, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - deferred=True, - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=NodeConfig(), - meta={}, - compiled=True, - compiled_sql='select * from whatever', - extra_ctes=[InjectedCTE('whatever', 'select * from other')], - extra_ctes_injected=True, - injected_sql='with whatever as (select * from other) select * from whatever', - ) - self.assert_symmetric(node, node_dict) - self.assertFalse(node.empty) - self.assertTrue(node.is_refable) - self.assertFalse(node.is_ephemeral) - self.assertEqual(node.local_vars(), {}) - - def test_invalid_extra_fields(self): - bad_extra = self._minimum() - bad_extra['notvalid'] = 'nope' - self.assert_fails_validation(bad_extra) - - def test_invalid_bad_type(self): - bad_type = self._minimum() - bad_type['resource_type'] = str(NodeType.Macro) - self.assert_fails_validation(bad_type) - - -class TestCompiledSchemaTestNode(ContractTestCase): - ContractType = CompiledSchemaTestNode - - def _minimum(self): - return { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Test), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'database': 'test_db', - 'schema': 'test_schema', - 'alias': 'bar', - 'test_metadata': { - 'name': 'foo', - 'kwargs': {}, - }, - 'compiled': False, - } - - def test_basic_uncompiled(self): - node_dict = { + 'vars': {}, + }, + 'docs': {'show': True}, + 'columns': {}, + 'meta': {}, + 'compiled': True, + 'compiled_sql': 'select * from whatever', + 'extra_ctes': [{'id': 'whatever', 'sql': 'select * from other'}], + 'extra_ctes_injected': True, + 'injected_sql': 'with whatever as (select * from other) select * from whatever', + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +def test_basic_uncompiled_model(minimal_uncompiled_dict, basic_uncompiled_dict, basic_uncompiled_model): + node_dict = basic_uncompiled_dict + node = basic_uncompiled_model + assert_symmetric(node, node_dict, CompiledModelNode) + assert node.empty is False + assert node.is_refable is True + assert node.is_ephemeral is False + assert node.local_vars() == {} + + assert_from_dict(node, minimal_uncompiled_dict, CompiledModelNode) + pickle.loads(pickle.dumps(node)) + + +def test_basic_compiled_model(basic_compiled_dict, basic_compiled_model): + node_dict = basic_compiled_dict + node = basic_compiled_model + assert_symmetric(node, node_dict, CompiledModelNode) + assert node.empty is False + assert node.is_refable is True + assert node.is_ephemeral is False + assert node.local_vars() == {} + + +def test_invalid_extra_fields_model(minimal_uncompiled_dict): + bad_extra = minimal_uncompiled_dict + bad_extra['notvalid'] = 'nope' + assert_fails_validation(bad_extra, CompiledModelNode) + + +def test_invalid_bad_type_model(minimal_uncompiled_dict): + bad_type = minimal_uncompiled_dict + bad_type['resource_type'] = str(NodeType.Macro) + assert_fails_validation(bad_type, CompiledModelNode) + + +unchanged_compiled_models = [ + lambda u: (u, u.replace(description='a description')), + lambda u: (u, u.replace(tags=['mytag'])), + lambda u: (u, u.replace(meta={'cool_key': 'cool value'})), + # alias configs are ignored, we only care about the final value + lambda u: (u, u.replace(config=u.config.replace(alias='nope'))), + lambda u: (u, u.replace(config=u.config.replace(database='nope'))), + lambda u: (u, u.replace(config=u.config.replace(schema='nope'))), + + # None -> False is a config change even though it's pretty much the same + lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': False})), u.replace(config=u.config.replace(persist_docs={'relation': False}))), + lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': False})), u.replace(config=u.config.replace(persist_docs={'columns': False}))), + # True -> True + lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}))), + lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}))), + + # only columns docs enabled, but description changed + lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}), description='a model description')), + # only relation docs eanbled, but columns changed + lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}), columns={'a': ColumnInfo(name='a', description='a column description')})) +] + + +changed_compiled_models = [ + lambda u: (u, None), + lambda u: (u, u.replace(raw_sql='select * from wherever')), + lambda u: (u, u.replace(database='other_db')), + lambda u: (u, u.replace(schema='other_schema')), + lambda u: (u, u.replace(alias='foo')), + lambda u: (u, u.replace(fqn=['test', 'models', 'subdir', 'foo'], original_file_path='models/subdir/foo.sql', path='/root/models/subdir/foo.sql')), + lambda u: (u, u.replace(config=u.config.replace(full_refresh=True))), + lambda u: (u, u.replace(config=u.config.replace(post_hook=['select 1 as id']))), + lambda u: (u, u.replace(config=u.config.replace(pre_hook=['select 1 as id']))), + lambda u: (u, u.replace(config=u.config.replace(quoting={'database': True, 'schema': False, 'identifier': False}))), + # we changed persist_docs values + lambda u: (u, u.replace(config=u.config.replace(persist_docs={'relation': True}))), + lambda u: (u, u.replace(config=u.config.replace(persist_docs={'columns': True}))), + lambda u: (u, u.replace(config=u.config.replace(persist_docs={'columns': True, 'relation': True}))), + + # None -> False is a config change even though it's pretty much the same + lambda u: (u, u.replace(config=u.config.replace(persist_docs={'relation': False}))), + lambda u: (u, u.replace(config=u.config.replace(persist_docs={'columns': False}))), + # persist docs was true for the relation and we changed the model description + lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}), description='a model description')), + # persist docs was true for columns and we changed the model description + lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}), columns={'a': ColumnInfo(name='a', description='a column description')})), +] + + +@pytest.mark.parametrize('func', unchanged_compiled_models) +def test_compare_unchanged_model(func, basic_uncompiled_model): + node, compare = func(basic_uncompiled_model) + assert node.same_contents(compare) + + +@pytest.mark.parametrize('func', changed_compiled_models) +def test_compare_changed_model(func, basic_uncompiled_model): + node, compare = func(basic_uncompiled_model) + assert not node.same_contents(compare) + + +@pytest.fixture +def minimal_schema_test_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Test), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from {{ ref("other") }}', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'database': 'test_db', + 'schema': 'test_schema', + 'alias': 'bar', + 'test_metadata': { 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Test), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': []}, - 'database': 'test_db', - 'description': '', - 'schema': 'test_schema', - 'alias': 'bar', + 'kwargs': {}, + }, + 'compiled': False, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def basic_uncompiled_schema_test_node(): + return CompiledSchemaTestNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from {{ ref("other") }}', + name='foo', + resource_type=NodeType.Test, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + deferred=False, + depends_on=DependsOn(), + description='', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + config=TestConfig(), + meta={}, + compiled=False, + extra_ctes=[], + extra_ctes_injected=False, + test_metadata=TestMetadata(namespace=None, name='foo', kwargs={}), + checksum=FileHash.from_contents(''), + ) + + +@pytest.fixture +def basic_compiled_schema_test_node(): + return CompiledSchemaTestNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from {{ ref("other") }}', + name='foo', + resource_type=NodeType.Test, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + deferred=False, + description='', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + config=TestConfig(severity='warn'), + meta={}, + compiled=True, + compiled_sql='select * from whatever', + extra_ctes=[InjectedCTE('whatever', 'select * from other')], + extra_ctes_injected=True, + injected_sql='with whatever as (select * from other) select * from whatever', + column_name='id', + test_metadata=TestMetadata(namespace=None, name='foo', kwargs={}), + checksum=FileHash.from_contents(''), + ) + + +@pytest.fixture +def basic_uncompiled_schema_test_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Test), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from {{ ref("other") }}', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'database': 'test_db', + 'description': '', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': [], + 'config': { + 'column_types': {}, + 'enabled': True, + 'materialized': 'view', + 'persist_docs': {}, + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, 'tags': [], - 'config': { - 'column_types': {}, - 'enabled': True, - 'materialized': 'view', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, - 'tags': [], - 'vars': {}, - 'severity': 'ERROR', - }, - 'deferred': False, - 'docs': {'show': True}, - 'columns': {}, - 'meta': {}, - 'compiled': False, - 'extra_ctes': [], - 'extra_ctes_injected': False, - 'test_metadata': { - 'name': 'foo', - 'kwargs': {}, - }, - } - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Test, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - deferred=False, - depends_on=DependsOn(), - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=TestConfig(), - meta={}, - compiled=False, - extra_ctes=[], - extra_ctes_injected=False, - test_metadata=TestMetadata(namespace=None, name='foo', kwargs={}), - ) - self.assert_symmetric(node, node_dict) - self.assertFalse(node.empty) - self.assertFalse(node.is_refable) - self.assertFalse(node.is_ephemeral) - self.assertEqual(node.local_vars(), {}) - - minimum = self._minimum() - self.assert_from_dict(node, minimum) - pickle.loads(pickle.dumps(node)) - - def test_basic_compiled(self): - node_dict = { + 'vars': {}, + 'severity': 'ERROR', + }, + 'deferred': False, + 'docs': {'show': True}, + 'columns': {}, + 'meta': {}, + 'compiled': False, + 'extra_ctes': [], + 'extra_ctes_injected': False, + 'test_metadata': { 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Test), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from {{ ref("other") }}', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': []}, - 'deferred': False, - 'database': 'test_db', - 'description': '', - 'schema': 'test_schema', - 'alias': 'bar', + 'kwargs': {}, + }, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def basic_compiled_schema_test_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Test), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from {{ ref("other") }}', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'deferred': False, + 'database': 'test_db', + 'description': '', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': [], + 'config': { + 'column_types': {}, + 'enabled': True, + 'materialized': 'view', + 'persist_docs': {}, + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, 'tags': [], - 'config': { - 'column_types': {}, - 'enabled': True, - 'materialized': 'view', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, - 'tags': [], - 'vars': {}, - 'severity': 'warn', - }, - - 'docs': {'show': True}, - 'columns': {}, - 'meta': {}, - 'compiled': True, - 'compiled_sql': 'select * from whatever', - 'extra_ctes': [{'id': 'whatever', 'sql': 'select * from other'}], - 'extra_ctes_injected': True, - 'injected_sql': 'with whatever as (select * from other) select * from whatever', - 'column_name': 'id', - 'test_metadata': { - 'name': 'foo', - 'kwargs': {}, - }, - } - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from {{ ref("other") }}', - name='foo', - resource_type=NodeType.Test, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - deferred=False, - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=TestConfig(severity='warn'), - meta={}, - compiled=True, - compiled_sql='select * from whatever', - extra_ctes=[InjectedCTE('whatever', 'select * from other')], - extra_ctes_injected=True, - injected_sql='with whatever as (select * from other) select * from whatever', - column_name='id', - test_metadata=TestMetadata(namespace=None, name='foo', kwargs={}), - ) - self.assert_symmetric(node, node_dict) - self.assertFalse(node.empty) - self.assertFalse(node.is_refable) - self.assertFalse(node.is_ephemeral) - self.assertEqual(node.local_vars(), {}) - - def test_invalid_extra_fields(self): - bad_extra = self._minimum() - bad_extra['extra'] = 'extra value' - self.assert_fails_validation(bad_extra) - - def test_invalid_resource_type(self): - bad_type = self._minimum() - bad_type['resource_type'] = str(NodeType.Model) - self.assert_fails_validation(bad_type) + 'vars': {}, + 'severity': 'warn', + }, + + 'docs': {'show': True}, + 'columns': {}, + 'meta': {}, + 'compiled': True, + 'compiled_sql': 'select * from whatever', + 'extra_ctes': [{'id': 'whatever', 'sql': 'select * from other'}], + 'extra_ctes_injected': True, + 'injected_sql': 'with whatever as (select * from other) select * from whatever', + 'column_name': 'id', + 'test_metadata': { + 'name': 'foo', + 'kwargs': {}, + }, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +def test_basic_uncompiled_schema_test(basic_uncompiled_schema_test_node, basic_uncompiled_schema_test_dict, minimal_schema_test_dict): + node = basic_uncompiled_schema_test_node + node_dict = basic_uncompiled_schema_test_dict + minimum = minimal_schema_test_dict + + assert_symmetric(node, node_dict, CompiledSchemaTestNode) + assert node.empty is False + assert node.is_refable is False + assert node.is_ephemeral is False + assert node.local_vars() == {} + + assert_from_dict(node, minimum, CompiledSchemaTestNode) + + +def test_basic_compiled_schema_test(basic_compiled_schema_test_node, basic_compiled_schema_test_dict): + node = basic_compiled_schema_test_node + node_dict = basic_compiled_schema_test_dict + + assert_symmetric(node, node_dict, CompiledSchemaTestNode) + assert node.empty is False + assert node.is_refable is False + assert node.is_ephemeral is False + assert node.local_vars() == {} + + +def test_invalid_extra_schema_test_fields(minimal_schema_test_dict): + bad_extra = minimal_schema_test_dict + bad_extra['extra'] = 'extra value' + assert_fails_validation(bad_extra, CompiledSchemaTestNode) + + +def test_invalid_resource_type_schema_test(minimal_schema_test_dict): + bad_type = minimal_schema_test_dict + bad_type['resource_type'] = str(NodeType.Model) + assert_fails_validation(bad_type, CompiledSchemaTestNode) + + +unchanged_schema_tests = [ + # for tests, raw_sql isn't a change (because it's always the same for a given test macro) + lambda u: u.replace(raw_sql='select * from wherever'), + lambda u: u.replace(description='a description'), + lambda u: u.replace(tags=['mytag']), + lambda u: u.replace(meta={'cool_key': 'cool value'}), + # alias configs are ignored, we only care about the final value + lambda u: u.replace(config=u.config.replace(alias='nope')), + lambda u: u.replace(config=u.config.replace(database='nope')), + lambda u: u.replace(config=u.config.replace(schema='nope')), +] + + +changed_schema_tests = [ + lambda u: None, + lambda u: u.replace(database='other_db'), + lambda u: u.replace(schema='other_schema'), + lambda u: u.replace(alias='foo'), + lambda u: u.replace(fqn=['test', 'models', 'subdir', 'foo'], original_file_path='models/subdir/foo.sql', path='/root/models/subdir/foo.sql'), + lambda u: u.replace(config=u.config.replace(full_refresh=True)), + lambda u: u.replace(config=u.config.replace(post_hook=['select 1 as id'])), + lambda u: u.replace(config=u.config.replace(pre_hook=['select 1 as id'])), + lambda u: u.replace(config=u.config.replace(severity='warn')), + lambda u: u.replace(config=u.config.replace(quoting={'database': True, 'schema': False, 'identifier': False})), + lambda u: u.replace(test_metadata=u.test_metadata.replace(namespace='something')), + lambda u: u.replace(test_metadata=u.test_metadata.replace(name='bar')), + lambda u: u.replace(test_metadata=u.test_metadata.replace(kwargs={'arg': 'value'})), +] + + +@pytest.mark.parametrize('func', unchanged_schema_tests) +def test_compare_unchanged_schema_test(func, basic_uncompiled_schema_test_node): + value = func(basic_uncompiled_schema_test_node) + assert basic_uncompiled_schema_test_node.same_contents(value) + + +@pytest.mark.parametrize('func', changed_schema_tests) +def test_compare_changed_schema_test(func, basic_uncompiled_schema_test_node): + value = func(basic_uncompiled_schema_test_node) + assert not basic_uncompiled_schema_test_node.same_contents(value) + + +def test_compare_to_compiled(basic_uncompiled_schema_test_node, basic_compiled_schema_test_node): + # if you fix the severity, they should be the "same". + uncompiled = basic_uncompiled_schema_test_node + compiled = basic_compiled_schema_test_node + assert not uncompiled.same_contents(compiled) + fixed_config = compiled.config.replace(severity=uncompiled.config.severity) + fixed_compiled = compiled.replace(config=fixed_config) + assert uncompiled.same_contents(fixed_compiled) diff --git a/test/unit/test_contracts_graph_parsed.py b/test/unit/test_contracts_graph_parsed.py index 7716512b541..5d1e023d8d9 100644 --- a/test/unit/test_contracts_graph_parsed.py +++ b/test/unit/test_contracts_graph_parsed.py @@ -1,9 +1,12 @@ import pickle +import pytest from dbt.node_types import NodeType +from dbt.contracts.files import FileHash from dbt.contracts.graph.model_config import ( All, NodeConfig, + SeedConfig, TestConfig, TimestampSnapshotConfig, CheckSnapshotConfig, @@ -21,6 +24,7 @@ IntermediateSnapshotNode, ParsedNodePatch, ParsedMacro, + ParsedSeedNode, Docs, MacroDependsOn, ParsedSourceDefinition, @@ -29,16 +33,102 @@ TestMetadata, ) from dbt.contracts.graph.unparsed import Quoting +from dbt import flags from hologram import ValidationError -from .utils import ContractTestCase - - -class TestNodeConfig(ContractTestCase): - ContractType = NodeConfig - - def test_basics(self): - cfg_dict = { +from .utils import ContractTestCase, assert_symmetric, assert_from_dict, assert_fails_validation + + +@pytest.fixture(autouse=True) +def strict_mode(): + flags.STRICT_MODE = True + yield + flags.STRICT_MODE = False + + +@pytest.fixture +def populated_node_config_object(): + result = NodeConfig( + column_types={'a': 'text'}, + materialized='table', + post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')] + ) + result._extra['extra'] = 'even more' + return result + + +@pytest.fixture +def populated_node_config_dict(): + return { + 'column_types': {'a': 'text'}, + 'enabled': True, + 'materialized': 'table', + 'persist_docs': {}, + 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], + 'pre-hook': [], + 'quoting': {}, + 'tags': [], + 'vars': {}, + 'extra': 'even more', + } + + +def test_config_populated(populated_node_config_object, populated_node_config_dict): + assert_symmetric(populated_node_config_object, populated_node_config_dict, NodeConfig) + pickle.loads(pickle.dumps(populated_node_config_object)) + + +different_node_configs = [ + lambda c: c.replace(post_hook=[]), + lambda c: c.replace(materialized='view'), + lambda c: c.replace(quoting={'database': True}), + lambda c: c.replace(extra='different extra'), + lambda c: c.replace(column_types={'a': 'varchar(256)'}), +] + + +same_node_configs = [ + lambda c: c.replace(tags=['mytag']), + lambda c: c.replace(alias='changed'), + lambda c: c.replace(schema='changed'), + lambda c: c.replace(database='changed'), +] + + +@pytest.mark.parametrize('func', different_node_configs) +def test_config_different(populated_node_config_object, func): + value = func(populated_node_config_object) + assert not populated_node_config_object.same_contents(value) + + +@pytest.mark.parametrize('func', same_node_configs) +def test_config_same(populated_node_config_object, func): + value = func(populated_node_config_object) + assert populated_node_config_object != value + assert populated_node_config_object.same_contents(value) + + +@pytest.fixture +def base_parsed_model_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Model), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from wherever', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'database': 'test_db', + 'description': '', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': [], + 'config': { 'column_types': {}, 'enabled': True, 'materialized': 'view', @@ -48,799 +138,1142 @@ def test_basics(self): 'quoting': {}, 'tags': [], 'vars': {}, - } - cfg = self.ContractType() - self.assert_symmetric(cfg, cfg_dict) - - def test_populated(self): - cfg_dict = { + }, + 'deferred': False, + 'docs': {'show': True}, + 'columns': {}, + 'meta': {}, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def basic_parsed_model_object(): + return ParsedModelNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from wherever', + name='foo', + resource_type=NodeType.Model, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + description='', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + config=NodeConfig(), + meta={}, + checksum=FileHash.from_contents(''), + ) + + +@pytest.fixture +def minimal_parsed_model_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Model), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from wherever', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'database': 'test_db', + 'schema': 'test_schema', + 'alias': 'bar', + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def complex_parsed_model_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Model), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from {{ ref("bar") }}', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': ['model.test.bar']}, + 'database': 'test_db', + 'deferred': True, + 'description': 'My parsed node', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': ['tag'], + 'meta': {}, + 'config': { 'column_types': {'a': 'text'}, 'enabled': True, - 'materialized': 'table', + 'materialized': 'ephemeral', 'persist_docs': {}, 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], 'pre-hook': [], 'quoting': {}, 'tags': [], - 'vars': {}, - 'extra': 'even more', - } - cfg = self.ContractType( - column_types={'a': 'text'}, - materialized='table', - post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')] - ) - cfg._extra['extra'] = 'even more' - - self.assert_symmetric(cfg, cfg_dict) - pickle.loads(pickle.dumps(cfg)) - - -class TestParsedModelNode(ContractTestCase): - ContractType = ParsedModelNode - - def _model_ok(self): - return { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Model), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': []}, - 'database': 'test_db', - 'description': '', - 'schema': 'test_schema', - 'alias': 'bar', - 'tags': [], - 'config': { - 'column_types': {}, - 'enabled': True, - 'materialized': 'view', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, - 'tags': [], - 'vars': {}, - }, - 'deferred': False, - 'docs': {'show': True}, - 'columns': {}, - 'meta': {}, - } - - - def test_ok(self): - node_dict = self._model_ok() - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Model, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=NodeConfig(), - meta={}, - ) - self.assert_symmetric(node, node_dict) - self.assertFalse(node.empty) - self.assertTrue(node.is_refable) - self.assertFalse(node.is_ephemeral) - self.assertEqual(node.local_vars(), {}) - - minimum = { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Model), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'database': 'test_db', - 'schema': 'test_schema', - 'alias': 'bar', - } - self.assert_from_dict(node, minimum) - pickle.loads(pickle.dumps(node)) - - def test_complex(self): - node_dict = { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Model), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from {{ ref("bar") }}', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': ['model.test.bar']}, - 'database': 'test_db', - 'deferred': True, - 'description': 'My parsed node', - 'schema': 'test_schema', - 'alias': 'bar', - 'tags': ['tag'], - 'meta': {}, - 'config': { - 'column_types': {'a': 'text'}, - 'enabled': True, - 'materialized': 'ephemeral', - 'persist_docs': {}, - 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], - 'pre-hook': [], - 'quoting': {}, + 'vars': {'foo': 100}, + }, + 'docs': {'show': True}, + 'columns': { + 'a': { + 'name': 'a', + 'description': 'a text field', + 'meta': {}, 'tags': [], - 'vars': {'foo': 100}, }, - 'docs': {'show': True}, - 'columns': { - 'a': { - 'name': 'a', - 'description': 'a text field', - 'meta': {}, - 'tags': [], - }, - }, - } - - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from {{ ref("bar") }}', - name='foo', - resource_type=NodeType.Model, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(nodes=['model.test.bar']), - deferred=True, - description='My parsed node', - database='test_db', - schema='test_schema', - alias='bar', - tags=['tag'], - meta={}, - config=NodeConfig( - column_types={'a': 'text'}, - materialized='ephemeral', - post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')], - vars={'foo': 100}, - ), - columns={'a': ColumnInfo('a', 'a text field', {})}, - ) - self.assert_symmetric(node, node_dict) - self.assertFalse(node.empty) - self.assertTrue(node.is_refable) - self.assertTrue(node.is_ephemeral) - self.assertEqual(node.local_vars(), {'foo': 100}) - - def test_invalid_bad_tags(self): - # bad top-level field - bad_tags = self._model_ok() - bad_tags['tags'] = 100 - self.assert_fails_validation(bad_tags) - - def test_invalid_bad_materialized(self): - # bad nested field - bad_materialized = self._model_ok() - bad_materialized['config']['materialized'] = None - self.assert_fails_validation(bad_materialized) - - def test_patch_ok(self): - initial = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Model, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - meta={}, - config=NodeConfig(), - ) - patch = ParsedNodePatch( - name='foo', - yaml_key='models', - package_name='test', - description='The foo model', - original_file_path='/path/to/schema.yml', - columns={'a': ColumnInfo(name='a', description='a text field', meta={})}, - docs=Docs(), - meta={}, - ) - - initial.patch(patch) - - expected_dict = { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Model), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': []}, - 'database': 'test_db', - 'deferred': False, - 'description': 'The foo model', - 'schema': 'test_schema', - 'alias': 'bar', + }, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def complex_parsed_model_object(): + return ParsedModelNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from {{ ref("bar") }}', + name='foo', + resource_type=NodeType.Model, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(nodes=['model.test.bar']), + deferred=True, + description='My parsed node', + database='test_db', + schema='test_schema', + alias='bar', + tags=['tag'], + meta={}, + config=NodeConfig( + column_types={'a': 'text'}, + materialized='ephemeral', + post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')], + vars={'foo': 100}, + ), + columns={'a': ColumnInfo('a', 'a text field', {})}, + checksum=FileHash.from_contents(''), + ) + + +def test_model_basic(basic_parsed_model_object, base_parsed_model_dict, minimal_parsed_model_dict): + node = basic_parsed_model_object + node_dict = base_parsed_model_dict + assert_symmetric(node, node_dict) + assert node.empty is False + assert node.is_refable is True + assert node.is_ephemeral is False + assert node.local_vars() == {} + + minimum = minimal_parsed_model_dict + assert_from_dict(node, minimum) + pickle.loads(pickle.dumps(node)) + + +def test_model_complex(complex_parsed_model_object, complex_parsed_model_dict): + node = complex_parsed_model_object + node_dict = complex_parsed_model_dict + assert_symmetric(node, node_dict) + assert node.empty is False + assert node.is_refable is True + assert node.is_ephemeral is True + assert node.local_vars() == {'foo': 100} + + +def test_invalid_bad_tags(base_parsed_model_dict): + # bad top-level field + bad_tags = base_parsed_model_dict + bad_tags['tags'] = 100 + assert_fails_validation(bad_tags, ParsedModelNode) + + +def test_invalid_bad_materialized(base_parsed_model_dict): + # bad nested field + bad_materialized = base_parsed_model_dict + bad_materialized['config']['materialized'] = None + assert_fails_validation(bad_materialized, ParsedModelNode) + + +unchanged_nodes = [ + lambda u: (u, u.replace(tags=['mytag'])), + lambda u: (u, u.replace(meta={'something': 1000})), + # True -> True + lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}))), + lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}))), + + # only columns docs enabled, but description changed + lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}), description='a model description')), + # only relation docs eanbled, but columns changed + lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}), columns={'a': ColumnInfo(name='a', description='a column description')})) + +] + + +changed_nodes = [ + lambda u: (u, u.replace(alias='other')), + lambda u: (u, u.replace(schema='other')), + lambda u: (u, u.replace(database='other')), + lambda u: (u, u.replace(fqn=['test', 'models', 'subdir', 'foo'], original_file_path='models/subdir/foo.sql', path='/root/models/subdir/foo.sql')), + + # None -> False is a config change even though it's pretty much the same + lambda u: (u, u.replace(config=u.config.replace(persist_docs={'relation': False}))), + lambda u: (u, u.replace(config=u.config.replace(persist_docs={'columns': False}))), + + # persist docs was true for the relation and we changed the model description + lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}), description='a model description')), + # persist docs was true for columns and we changed the model description + lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}), columns={'a': ColumnInfo(name='a', description='a column description')})), +] + + +@pytest.mark.parametrize('func', unchanged_nodes) +def test_compare_unchanged_parsed_model(func, basic_parsed_model_object): + node, compare = func(basic_parsed_model_object) + assert node.same_contents(compare) + + +@pytest.mark.parametrize('func', changed_nodes) +def test_compare_changed_model(func, basic_parsed_model_object): + node, compare = func(basic_parsed_model_object) + assert not node.same_contents(compare) + + +@pytest.fixture +def basic_parsed_seed_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Seed), + 'path': '/root/seeds/seed.csv', + 'original_file_path': 'seeds/seed.csv', + 'package_name': 'test', + 'raw_sql': '', + 'unique_id': 'seed.test.foo', + 'fqn': ['test', 'seeds', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'database': 'test_db', + 'description': '', + 'schema': 'test_schema', + 'tags': [], + 'alias': 'foo', + 'config': { + 'column_types': {}, + 'enabled': True, + 'materialized': 'seed', + 'persist_docs': {}, + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, 'tags': [], - 'meta': {}, - 'config': { - 'column_types': {}, - 'enabled': True, - 'materialized': 'view', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, - 'tags': [], - 'vars': {}, - }, - 'patch_path': '/path/to/schema.yml', - 'columns': { - 'a': { - 'name': 'a', - 'description': 'a text field', - 'meta': {}, - 'tags': [], - }, - }, - 'docs': {'show': True}, - } - - expected = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Model, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - description='The foo model', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - meta={}, - config=NodeConfig(), - patch_path='/path/to/schema.yml', - columns={'a': ColumnInfo(name='a', description='a text field', meta={})}, - docs=Docs(), - ) - self.assert_symmetric(expected, expected_dict) # sanity check - self.assertEqual(initial, expected) - self.assert_symmetric(initial, expected_dict) - - def patch_invalid(self): - initial = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Model, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=NodeConfig(), - ) - # invalid patch: description can't be None - patch = ParsedNodePatch( - name='foo', - yaml_key='models', - package_name='test', - description=None, - original_file_path='/path/to/schema.yml', - columns={}, - docs=Docs(), - ) - with self.assertRaises(ValidationError): - initial.patch(patch) - - -class TestParsedHookNode(ContractTestCase): - ContractType = ParsedHookNode - - def _hook_ok(self): - return { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Operation), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': []}, - 'database': 'test_db', - 'deferred': False, - 'description': '', - 'schema': 'test_schema', - 'alias': 'bar', + 'vars': {}, + }, + 'deferred': False, + 'docs': {'show': True}, + 'columns': {}, + 'meta': {}, + 'checksum': {'name': 'path', 'checksum': '/root/seeds/seed.csv'}, + } + + +@pytest.fixture +def basic_parsed_seed_object(): + return ParsedSeedNode( + name='foo', + root_path='/root/', + resource_type=NodeType.Seed, + path='/root/seeds/seed.csv', + original_file_path='seeds/seed.csv', + package_name='test', + raw_sql='', + unique_id='seed.test.foo', + fqn=['test', 'seeds', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + database='test_db', + description='', + schema='test_schema', + tags=[], + alias='foo', + config=SeedConfig(), + # config=SeedConfig(quote_columns=True), + deferred=False, + docs=Docs(show=True), + columns={}, + meta={}, + checksum=FileHash(name='path', checksum='/root/seeds/seed.csv'), + ) + + +@pytest.fixture +def minimal_parsed_seed_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Seed), + 'path': '/root/seeds/seed.csv', + 'original_file_path': 'seeds/seed.csv', + 'package_name': 'test', + 'raw_sql': '', + 'unique_id': 'seed.test.foo', + 'fqn': ['test', 'seeds', 'foo'], + 'database': 'test_db', + 'schema': 'test_schema', + 'alias': 'foo', + 'checksum': {'name': 'path', 'checksum': '/root/seeds/seed.csv'}, + } + + +@pytest.fixture +def complex_parsed_seed_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Seed), + 'path': '/root/seeds/seed.csv', + 'original_file_path': 'seeds/seed.csv', + 'package_name': 'test', + 'raw_sql': '', + 'unique_id': 'seed.test.foo', + 'fqn': ['test', 'seeds', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'database': 'test_db', + 'description': 'a description', + 'schema': 'test_schema', + 'tags': ['mytag'], + 'alias': 'foo', + 'config': { + 'column_types': {}, + 'enabled': True, + 'materialized': 'seed', + 'persist_docs': {'relation': True, 'columns': True}, + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, 'tags': [], - 'config': { - 'column_types': {}, - 'enabled': True, - 'materialized': 'view', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, - 'tags': [], - 'vars': {}, - }, - 'docs': {'show': True}, - 'columns': {}, - 'meta': {}, - 'index': 10, - } - - def test_ok(self): - node_dict = self._hook_ok() - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Operation, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - description='', - deferred=False, - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=NodeConfig(), - index=10, - ) - self.assert_symmetric(node, node_dict) - self.assertFalse(node.empty) - self.assertFalse(node.is_refable) - self.assertEqual(node.get_materialization(), 'view') - - node.index = None - minimum = { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Operation), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'database': 'test_db', - 'schema': 'test_schema', - 'alias': 'bar', - } - self.assert_from_dict(node, minimum) - pickle.loads(pickle.dumps(node)) - - def test_complex(self): - node_dict = { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Operation), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from {{ ref("bar") }}', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': ['model.test.bar']}, - 'deferred': False, - 'database': 'test_db', - 'description': 'My parsed node', - 'schema': 'test_schema', - 'alias': 'bar', - 'tags': ['tag'], - 'meta': {}, - 'config': { - 'column_types': {'a': 'text'}, - 'enabled': True, - 'materialized': 'table', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, - 'tags': [], - 'vars': {}, - }, - 'docs': {'show': True}, - 'columns': { - 'a': { - 'name': 'a', - 'description': 'a text field', - 'meta': {}, - 'tags': [], - }, - }, - 'index': 13, - } - - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from {{ ref("bar") }}', - name='foo', - resource_type=NodeType.Operation, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(nodes=['model.test.bar']), - description='My parsed node', - deferred=False, - database='test_db', - schema='test_schema', - alias='bar', - tags=['tag'], - meta={}, - config=NodeConfig( - column_types={'a': 'text'}, - materialized='table', - post_hook=[] - ), - columns={'a': ColumnInfo('a', 'a text field', {})}, - index=13, - ) - self.assert_symmetric(node, node_dict) - self.assertFalse(node.empty) - self.assertFalse(node.is_refable) - self.assertEqual(node.get_materialization(), 'table') - - def test_invalid_index_type(self): - # bad top-level field - bad_index = self._hook_ok() - bad_index['index'] = 'a string!?' - self.assert_fails_validation(bad_index) - - -class TestParsedSchemaTestNode(ContractTestCase): - ContractType = ParsedSchemaTestNode - - def _minimum(self): - return { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Test), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'test.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'database': 'test_db', - 'schema': 'test_schema', - 'alias': 'bar', - 'meta': {}, - 'test_metadata': { - 'name': 'foo', - 'kwargs': {}, - }, - } - - def _complex(self): - return { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Test), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from {{ ref("bar") }}', - 'unique_id': 'test.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': ['model.test.bar']}, - 'database': 'test_db', - 'deferred': False, - 'description': 'My parsed node', - 'schema': 'test_schema', - 'alias': 'bar', - 'tags': ['tag'], - 'meta': {}, - 'config': { - 'column_types': {'a': 'text'}, - 'enabled': True, - 'materialized': 'table', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, + 'vars': {}, + 'quote_columns': True, + }, + 'deferred': False, + 'docs': {'show': True}, + 'columns': {'a': {'name': 'a', 'description': 'a column description', 'meta': {}, 'tags': []}}, + 'meta': {'foo': 1000}, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def complex_parsed_seed_object(): + return ParsedSeedNode( + name='foo', + root_path='/root/', + resource_type=NodeType.Seed, + path='/root/seeds/seed.csv', + original_file_path='seeds/seed.csv', + package_name='test', + raw_sql='', + unique_id='seed.test.foo', + fqn=['test', 'seeds', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + database='test_db', + description='a description', + schema='test_schema', + tags=['mytag'], + alias='foo', + config=SeedConfig( + quote_columns=True, + persist_docs={'relation': True, 'columns': True}, + ), + deferred=False, + docs=Docs(show=True), + columns={'a': ColumnInfo(name='a', description='a column description')}, + meta={'foo': 1000}, + checksum=FileHash.from_contents(''), + ) + + +def test_seed_basic(basic_parsed_seed_dict, basic_parsed_seed_object, minimal_parsed_seed_dict): + assert_symmetric(basic_parsed_seed_object, basic_parsed_seed_dict) + assert basic_parsed_seed_object.get_materialization() == 'seed' + + assert_from_dict(basic_parsed_seed_object, minimal_parsed_seed_dict, ParsedSeedNode) + + +def test_seed_complex(complex_parsed_seed_dict, complex_parsed_seed_object): + assert_symmetric(complex_parsed_seed_object, complex_parsed_seed_dict) + assert complex_parsed_seed_object.get_materialization() == 'seed' + + +unchanged_seeds = [ + lambda u: (u, u.replace(tags=['mytag'])), + lambda u: (u, u.replace(meta={'something': 1000})), + # True -> True + lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}))), + lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}))), + + # only columns docs enabled, but description changed + lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}), description='a model description')), + # only relation docs eanbled, but columns changed + lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}), columns={'a': ColumnInfo(name='a', description='a column description')})) +] + + +changed_seeds = [ + lambda u: (u, u.replace(alias='other')), + lambda u: (u, u.replace(schema='other')), + lambda u: (u, u.replace(database='other')), + lambda u: (u, u.replace(fqn=['test', 'models', 'subdir', 'foo'], original_file_path='models/subdir/foo.sql', path='/root/models/subdir/foo.sql')), + + # None -> False is a config change even though it's pretty much the same + lambda u: (u, u.replace(config=u.config.replace(persist_docs={'relation': False}))), + lambda u: (u, u.replace(config=u.config.replace(persist_docs={'columns': False}))), + + # persist docs was true for the relation and we changed the model description + lambda u: (u.replace(config=u.config.replace(persist_docs={'relation': True})), u.replace(config=u.config.replace(persist_docs={'relation': True}), description='a model description')), + # persist docs was true for columns and we changed the model description + lambda u: (u.replace(config=u.config.replace(persist_docs={'columns': True})), u.replace(config=u.config.replace(persist_docs={'columns': True}), columns={'a': ColumnInfo(name='a', description='a column description')})), +] + + +@pytest.mark.parametrize('func', unchanged_seeds) +def test_compare_unchanged_parsed_seed(func, basic_parsed_seed_object): + node, compare = func(basic_parsed_seed_object) + assert node.same_contents(compare) + + +@pytest.mark.parametrize('func', changed_seeds) +def test_compare_changed_seed(func, basic_parsed_seed_object): + node, compare = func(basic_parsed_seed_object) + assert not node.same_contents(compare) + + + +@pytest.fixture +def basic_parsed_model_patch_dict(): + return { + 'name': 'foo', + 'description': 'The foo model', + 'original_file_path': '/path/to/schema.yml', + 'docs': {'show': True}, + 'meta': {}, + 'yaml_key': 'models', + 'package_name': 'test', + 'columns': { + 'a': { + 'name': 'a', + 'description': 'a text field', + 'meta': {}, 'tags': [], - 'vars': {}, - 'severity': 'WARN', - 'extra_key': 'extra value' - }, - 'docs': {'show': False}, - 'columns': { - 'a': { - 'name': 'a', - 'description': 'a text field', - 'meta': {}, - 'tags': [], - }, }, - 'column_name': 'id', - 'test_metadata': { - 'name': 'foo', - 'kwargs': {}, - }, - } - - def test_ok(self): - node_dict = { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Test), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'test.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': []}, - 'deferred': False, - 'database': 'test_db', - 'description': '', - 'schema': 'test_schema', - 'alias': 'bar', + }, + } + + +@pytest.fixture +def basic_parsed_model_patch_object(): + return ParsedNodePatch( + name='foo', + yaml_key='models', + package_name='test', + description='The foo model', + original_file_path='/path/to/schema.yml', + columns={'a': ColumnInfo(name='a', description='a text field', meta={})}, + docs=Docs(), + meta={}, + ) + + +@pytest.fixture +def patched_model_object(): + return ParsedModelNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from wherever', + name='foo', + resource_type=NodeType.Model, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + description='The foo model', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + meta={}, + config=NodeConfig(), + patch_path='/path/to/schema.yml', + columns={'a': ColumnInfo(name='a', description='a text field', meta={})}, + docs=Docs(), + checksum=FileHash.from_contents(''), + ) + + +def test_patch_parsed_model(basic_parsed_model_object, basic_parsed_model_patch_object, patched_model_object): + pre_patch = basic_parsed_model_object + pre_patch.patch(basic_parsed_model_patch_object) + assert patched_model_object == pre_patch + + +def test_patch_parsed_model_invalid(basic_parsed_model_object, basic_parsed_model_patch_object): + pre_patch = basic_parsed_model_object + patch = basic_parsed_model_patch_object.replace(description=None) + with pytest.raises(ValidationError): + pre_patch.patch(patch) + + +@pytest.fixture +def minimal_parsed_hook_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Operation), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from wherever', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'database': 'test_db', + 'schema': 'test_schema', + 'alias': 'bar', + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def base_parsed_hook_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Operation), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from wherever', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'database': 'test_db', + 'deferred': False, + 'description': '', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': [], + 'config': { + 'column_types': {}, + 'enabled': True, + 'materialized': 'view', + 'persist_docs': {}, + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, 'tags': [], - 'meta': {}, - 'config': { - 'column_types': {}, - 'enabled': True, - 'materialized': 'view', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, + 'vars': {}, + }, + 'docs': {'show': True}, + 'columns': {}, + 'meta': {}, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def base_parsed_hook_object(): + return ParsedHookNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from wherever', + name='foo', + resource_type=NodeType.Operation, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + description='', + deferred=False, + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + config=NodeConfig(), + index=None, + checksum=FileHash.from_contents(''), + ) + + +@pytest.fixture +def complex_parsed_hook_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Operation), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from {{ ref("bar") }}', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': ['model.test.bar']}, + 'deferred': False, + 'database': 'test_db', + 'description': 'My parsed node', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': ['tag'], + 'meta': {}, + 'config': { + 'column_types': {'a': 'text'}, + 'enabled': True, + 'materialized': 'table', + 'persist_docs': {}, + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, + 'tags': [], + 'vars': {}, + }, + 'docs': {'show': True}, + 'columns': { + 'a': { + 'name': 'a', + 'description': 'a text field', + 'meta': {}, 'tags': [], - 'vars': {}, - 'severity': 'ERROR', - }, - 'docs': {'show': True}, - 'columns': {}, - 'test_metadata': { - 'name': 'foo', - 'kwargs': {}, }, - } - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Test, - unique_id='test.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - meta={}, - config=TestConfig(), - test_metadata=TestMetadata(namespace=None, name='foo', kwargs={}), - ) - self.assert_symmetric(node, node_dict) - self.assertFalse(node.empty) - self.assertFalse(node.is_ephemeral) - self.assertFalse(node.is_refable) - self.assertEqual(node.get_materialization(), 'view') - - minimum = self._minimum() - self.assert_from_dict(node, minimum) - pickle.loads(pickle.dumps(node)) - - def test_complex(self): - node_dict = self._complex() - - cfg = TestConfig( + }, + 'index': 13, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def complex_parsed_hook_object(): + return ParsedHookNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from {{ ref("bar") }}', + name='foo', + resource_type=NodeType.Operation, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(nodes=['model.test.bar']), + description='My parsed node', + deferred=False, + database='test_db', + schema='test_schema', + alias='bar', + tags=['tag'], + meta={}, + config=NodeConfig( column_types={'a': 'text'}, materialized='table', - severity='WARN' - ) - cfg._extra.update({'extra_key': 'extra value'}) - - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from {{ ref("bar") }}', - name='foo', - resource_type=NodeType.Test, - unique_id='test.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(nodes=['model.test.bar']), - description='My parsed node', - database='test_db', - schema='test_schema', - alias='bar', - tags=['tag'], - meta={}, - config=cfg, - columns={'a': ColumnInfo('a', 'a text field',{})}, - column_name='id', - docs=Docs(show=False), - test_metadata=TestMetadata(namespace=None, name='foo', kwargs={}), - ) - self.assert_symmetric(node, node_dict) - self.assertFalse(node.empty) - - def test_invalid_column_name_type(self): - # bad top-level field - bad_column_name = self._complex() - bad_column_name['column_name'] = {} - self.assert_fails_validation(bad_column_name) - - def test_invalid_severity(self): - invalid_config_value = self._complex() - invalid_config_value['config']['severity'] = 'WERROR' - self.assert_fails_validation(invalid_config_value) - - -class TestTimestampSnapshotConfig(ContractTestCase): - ContractType = TimestampSnapshotConfig - - def _cfg_basic(self): - return { + post_hook=[] + ), + columns={'a': ColumnInfo('a', 'a text field', {})}, + index=13, + checksum=FileHash.from_contents(''), + ) + + +def test_basic_parsed_hook(minimal_parsed_hook_dict, base_parsed_hook_dict, base_parsed_hook_object): + node = base_parsed_hook_object + node_dict = base_parsed_hook_dict + minimum = minimal_parsed_hook_dict + + assert_symmetric(node, node_dict, ParsedHookNode) + assert node.empty is False + assert node.is_refable is False + assert node.get_materialization() == 'view' + assert_from_dict(node, minimum, ParsedHookNode) + pickle.loads(pickle.dumps(node)) + + +def test_complex_parsed_hook(complex_parsed_hook_dict, complex_parsed_hook_object): + node = complex_parsed_hook_object + node_dict = complex_parsed_hook_dict + assert_symmetric(node, node_dict) + assert node.empty is False + assert node.is_refable is False + assert node.get_materialization() == 'table' + + +def test_invalid_hook_index_type(base_parsed_hook_dict): + bad_index = base_parsed_hook_dict + bad_index['index'] = 'a string!?' + assert_fails_validation(bad_index, ParsedHookNode) + + +@pytest.fixture +def minimal_parsed_schema_test_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Test), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from wherever', + 'unique_id': 'test.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'database': 'test_db', + 'schema': 'test_schema', + 'alias': 'bar', + 'meta': {}, + 'test_metadata': { + 'name': 'foo', + 'kwargs': {}, + }, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def basic_parsed_schema_test_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Test), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from wherever', + 'unique_id': 'test.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'deferred': False, + 'database': 'test_db', + 'description': '', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': [], + 'meta': {}, + 'config': { 'column_types': {}, 'enabled': True, - 'materialized': 'snapshot', + 'materialized': 'view', 'persist_docs': {}, 'post-hook': [], 'pre-hook': [], 'quoting': {}, 'tags': [], 'vars': {}, - 'unique_key': 'id', - 'strategy': 'timestamp', - 'updated_at': 'last_update', - 'target_database': 'some_snapshot_db', - 'target_schema': 'some_snapshot_schema', - } - - def test_basics(self): - cfg_dict = self._cfg_basic() - cfg = self.ContractType( - strategy=SnapshotStrategy.Timestamp, - updated_at='last_update', - unique_key='id', - target_database='some_snapshot_db', - target_schema='some_snapshot_schema', - ) - self.assert_symmetric(cfg, cfg_dict) - pickle.loads(pickle.dumps(cfg)) - - def test_populated(self): - cfg_dict = { + 'severity': 'ERROR', + }, + 'docs': {'show': True}, + 'columns': {}, + 'test_metadata': { + 'name': 'foo', + 'kwargs': {}, + }, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def basic_parsed_schema_test_object(): + return ParsedSchemaTestNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from wherever', + name='foo', + resource_type=NodeType.Test, + unique_id='test.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + description='', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + meta={}, + config=TestConfig(), + test_metadata=TestMetadata(namespace=None, name='foo', kwargs={}), + checksum=FileHash.from_contents(''), + ) + + +@pytest.fixture +def complex_parsed_schema_test_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Test), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from {{ ref("bar") }}', + 'unique_id': 'test.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': ['model.test.bar']}, + 'database': 'test_db', + 'deferred': False, + 'description': 'My parsed node', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': ['tag'], + 'meta': {}, + 'config': { 'column_types': {'a': 'text'}, 'enabled': True, - 'materialized': 'snapshot', + 'materialized': 'table', 'persist_docs': {}, - 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], + 'post-hook': [], 'pre-hook': [], 'quoting': {}, 'tags': [], 'vars': {}, - 'target_database': 'some_snapshot_db', - 'target_schema': 'some_snapshot_schema', - 'unique_key': 'id', - 'extra': 'even more', - 'strategy': 'timestamp', - 'updated_at': 'last_update', - } - cfg = self.ContractType( - column_types={'a': 'text'}, - materialized='snapshot', - post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')], - strategy=SnapshotStrategy.Timestamp, - target_database='some_snapshot_db', - target_schema='some_snapshot_schema', - updated_at='last_update', - unique_key='id', - ) - cfg._extra['extra'] = 'even more' - - self.assert_symmetric(cfg, cfg_dict) - - def test_invalid_wrong_strategy(self): - bad_type = self._cfg_basic() - bad_type['strategy'] = 'check' - self.assert_fails_validation(bad_type) - - def test_invalid_missing_updated_at(self): - bad_fields = self._cfg_basic() - del bad_fields['updated_at'] - bad_fields['check_cols'] = 'all' - self.assert_fails_validation(bad_fields) - - -class TestCheckSnapshotConfig(ContractTestCase): - ContractType = CheckSnapshotConfig - - def _cfg_ok(self): - return { + 'severity': 'WARN', + 'extra_key': 'extra value' + }, + 'docs': {'show': False}, + 'columns': { + 'a': { + 'name': 'a', + 'description': 'a text field', + 'meta': {}, + 'tags': [], + }, + }, + 'column_name': 'id', + 'test_metadata': { + 'name': 'foo', + 'kwargs': {}, + }, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def complex_parsed_schema_test_object(): + cfg = TestConfig( + column_types={'a': 'text'}, + materialized='table', + severity='WARN' + ) + cfg._extra.update({'extra_key': 'extra value'}) + return ParsedSchemaTestNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from {{ ref("bar") }}', + name='foo', + resource_type=NodeType.Test, + unique_id='test.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(nodes=['model.test.bar']), + description='My parsed node', + database='test_db', + schema='test_schema', + alias='bar', + tags=['tag'], + meta={}, + config=cfg, + columns={'a': ColumnInfo('a', 'a text field',{})}, + column_name='id', + docs=Docs(show=False), + test_metadata=TestMetadata(namespace=None, name='foo', kwargs={}), + checksum=FileHash.from_contents(''), + ) + + +def test_basic_schema_test_node(minimal_parsed_schema_test_dict, basic_parsed_schema_test_dict, basic_parsed_schema_test_object): + node = basic_parsed_schema_test_object + node_dict = basic_parsed_schema_test_dict + minimum = minimal_parsed_schema_test_dict + assert_symmetric(node, node_dict, ParsedSchemaTestNode) + + assert node.empty is False + assert node.is_ephemeral is False + assert node.is_refable is False + assert node.get_materialization() == 'view' + + assert_from_dict(node, minimum, ParsedSchemaTestNode) + pickle.loads(pickle.dumps(node)) + + +def test_complex_schema_test_node(complex_parsed_schema_test_dict, complex_parsed_schema_test_object): + node = complex_parsed_schema_test_object + node_dict = complex_parsed_schema_test_dict + assert_symmetric(node, node_dict) + assert node.empty is False + + +def test_invalid_column_name_type(complex_parsed_schema_test_dict): + # bad top-level field + bad_column_name = complex_parsed_schema_test_dict + bad_column_name['column_name'] = {} + assert_fails_validation(bad_column_name, ParsedSchemaTestNode) + + +def test_invalid_severity(complex_parsed_schema_test_dict): + invalid_config_value = complex_parsed_schema_test_dict + invalid_config_value['config']['severity'] = 'WERROR' + assert_fails_validation(invalid_config_value, ParsedSchemaTestNode) + + +@pytest.fixture +def basic_timestamp_snapshot_config_dict(): + return { + 'column_types': {}, + 'enabled': True, + 'materialized': 'snapshot', + 'persist_docs': {}, + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, + 'tags': [], + 'vars': {}, + 'unique_key': 'id', + 'strategy': 'timestamp', + 'updated_at': 'last_update', + 'target_database': 'some_snapshot_db', + 'target_schema': 'some_snapshot_schema', + } + + +@pytest.fixture +def basic_timestamp_snapshot_config_object(): + return TimestampSnapshotConfig( + strategy=SnapshotStrategy.Timestamp, + updated_at='last_update', + unique_key='id', + target_database='some_snapshot_db', + target_schema='some_snapshot_schema', + ) + + +@pytest.fixture +def complex_timestamp_snapshot_config_dict(): + return { + 'column_types': {'a': 'text'}, + 'enabled': True, + 'materialized': 'snapshot', + 'persist_docs': {}, + 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], + 'pre-hook': [], + 'quoting': {}, + 'tags': [], + 'vars': {}, + 'target_database': 'some_snapshot_db', + 'target_schema': 'some_snapshot_schema', + 'unique_key': 'id', + 'extra': 'even more', + 'strategy': 'timestamp', + 'updated_at': 'last_update', + } + + +@pytest.fixture +def complex_timestamp_snapshot_config_object(): + cfg = TimestampSnapshotConfig( + column_types={'a': 'text'}, + materialized='snapshot', + post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')], + strategy=SnapshotStrategy.Timestamp, + target_database='some_snapshot_db', + target_schema='some_snapshot_schema', + updated_at='last_update', + unique_key='id', + ) + cfg._extra['extra'] = 'even more' + return cfg + + +def test_basic_timestamp_snapshot_config(basic_timestamp_snapshot_config_dict, basic_timestamp_snapshot_config_object): + cfg = basic_timestamp_snapshot_config_object + cfg_dict = basic_timestamp_snapshot_config_dict + assert_symmetric(cfg, cfg_dict) + pickle.loads(pickle.dumps(cfg)) + + +def test_complex_timestamp_snapshot_config(complex_timestamp_snapshot_config_dict, complex_timestamp_snapshot_config_object): + cfg = complex_timestamp_snapshot_config_object + cfg_dict = complex_timestamp_snapshot_config_dict + assert_symmetric(cfg, cfg_dict, TimestampSnapshotConfig) + + +def test_invalid_wrong_strategy(basic_timestamp_snapshot_config_dict): + bad_type = basic_timestamp_snapshot_config_dict + bad_type['strategy'] = 'check' + assert_fails_validation(bad_type, TimestampSnapshotConfig) + + +def test_invalid_missing_updated_at(basic_timestamp_snapshot_config_dict): + bad_fields = basic_timestamp_snapshot_config_dict + del bad_fields['updated_at'] + bad_fields['check_cols'] = 'all' + assert_fails_validation(bad_fields, TimestampSnapshotConfig) + + +@pytest.fixture +def basic_check_snapshot_config_dict(): + return { + 'column_types': {}, + 'enabled': True, + 'materialized': 'snapshot', + 'persist_docs': {}, + 'post-hook': [], + 'pre-hook': [], + 'quoting': {}, + 'tags': [], + 'vars': {}, + 'target_database': 'some_snapshot_db', + 'target_schema': 'some_snapshot_schema', + 'unique_key': 'id', + 'strategy': 'check', + 'check_cols': 'all', + } + + +@pytest.fixture +def basic_check_snapshot_config_object(): + return CheckSnapshotConfig( + strategy=SnapshotStrategy.Check, + check_cols=All.All, + unique_key='id', + target_database='some_snapshot_db', + target_schema='some_snapshot_schema', + ) + + +@pytest.fixture +def complex_set_snapshot_config_dict(): + return { + 'column_types': {'a': 'text'}, + 'enabled': True, + 'materialized': 'snapshot', + 'persist_docs': {}, + 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], + 'pre-hook': [], + 'quoting': {}, + 'tags': [], + 'vars': {}, + 'target_database': 'some_snapshot_db', + 'target_schema': 'some_snapshot_schema', + 'unique_key': 'id', + 'extra': 'even more', + 'strategy': 'check', + 'check_cols': ['a', 'b'], + } + + +@pytest.fixture +def complex_set_snapshot_config_object(): + cfg = CheckSnapshotConfig( + column_types={'a': 'text'}, + materialized='snapshot', + post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')], + strategy=SnapshotStrategy.Check, + check_cols=['a', 'b'], + target_database='some_snapshot_db', + target_schema='some_snapshot_schema', + unique_key='id', + ) + cfg._extra['extra'] = 'even more' + return cfg + + +def test_basic_snapshot_config(basic_check_snapshot_config_dict, basic_check_snapshot_config_object): + cfg_dict = basic_check_snapshot_config_dict + cfg = basic_check_snapshot_config_object + assert_symmetric(cfg, cfg_dict, CheckSnapshotConfig) + pickle.loads(pickle.dumps(cfg)) + + +def test_complex_snapshot_config(complex_set_snapshot_config_dict, complex_set_snapshot_config_object): + cfg_dict = complex_set_snapshot_config_dict + cfg = complex_set_snapshot_config_object + assert_symmetric(cfg, cfg_dict) + pickle.loads(pickle.dumps(cfg)) + + +def test_invalid_check_wrong_strategy(basic_check_snapshot_config_dict): + wrong_strategy = basic_check_snapshot_config_dict + wrong_strategy['strategy'] = 'timestamp' + assert_fails_validation(wrong_strategy, CheckSnapshotConfig) + + +def test_invalid_missing_check_cols(basic_check_snapshot_config_dict): + wrong_fields = basic_check_snapshot_config_dict + del wrong_fields['check_cols'] + assert_fails_validation(wrong_fields, CheckSnapshotConfig) + + +def test_invalid_check_value(basic_check_snapshot_config_dict): + invalid_check_type = basic_check_snapshot_config_dict + invalid_check_type['check_cols'] = 'some' + assert_fails_validation(invalid_check_type, CheckSnapshotConfig) + + +@pytest.fixture +def basic_timestamp_snapshot_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Snapshot), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from wherever', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'deferred': False, + 'database': 'test_db', + 'description': '', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': [], + 'config': { 'column_types': {}, 'enabled': True, 'materialized': 'snapshot', @@ -853,29 +1286,108 @@ def _cfg_ok(self): 'target_database': 'some_snapshot_db', 'target_schema': 'some_snapshot_schema', 'unique_key': 'id', - 'strategy': 'check', - 'check_cols': 'all', - } - - def test_basics(self): - cfg_dict = self._cfg_ok() - cfg = self.ContractType( - strategy=SnapshotStrategy.Check, - check_cols=All.All, + 'strategy': 'timestamp', + 'updated_at': 'last_update', + }, + 'docs': {'show': True}, + 'columns': {}, + 'meta': {}, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def basic_timestamp_snapshot_object(): + return ParsedSnapshotNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from wherever', + name='foo', + resource_type=NodeType.Snapshot, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + description='', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + config=TimestampSnapshotConfig( + strategy=SnapshotStrategy.Timestamp, unique_key='id', + updated_at='last_update', target_database='some_snapshot_db', target_schema='some_snapshot_schema', - ) - self.assert_symmetric(cfg, cfg_dict) - pickle.loads(pickle.dumps(cfg)) - - def test_populated(self): - cfg_dict = { - 'column_types': {'a': 'text'}, + ), + checksum=FileHash.from_contents(''), + ) + + +@pytest.fixture +def basic_intermedaite_timestamp_snapshot_object(): + cfg = EmptySnapshotConfig() + cfg._extra.update({ + 'strategy': 'timestamp', + 'unique_key': 'id', + 'updated_at': 'last_update', + 'target_database': 'some_snapshot_db', + 'target_schema': 'some_snapshot_schema', + }) + + return IntermediateSnapshotNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from wherever', + name='foo', + resource_type=NodeType.Snapshot, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + description='', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + config=cfg, + checksum=FileHash.from_contents(''), + ) + + +@pytest.fixture +def basic_check_snapshot_dict(): + return { + 'name': 'foo', + 'root_path': '/root/', + 'resource_type': str(NodeType.Snapshot), + 'path': '/root/x/path.sql', + 'original_file_path': '/root/path.sql', + 'package_name': 'test', + 'raw_sql': 'select * from wherever', + 'unique_id': 'model.test.foo', + 'fqn': ['test', 'models', 'foo'], + 'refs': [], + 'sources': [], + 'depends_on': {'macros': [], 'nodes': []}, + 'database': 'test_db', + 'deferred': False, + 'description': '', + 'schema': 'test_schema', + 'alias': 'bar', + 'tags': [], + 'config': { + 'column_types': {}, 'enabled': True, 'materialized': 'snapshot', 'persist_docs': {}, - 'post-hook': [{'sql': 'insert into blah(a, b) select "1", 1', 'transaction': True}], + 'post-hook': [], 'pre-hook': [], 'quoting': {}, 'tags': [], @@ -883,320 +1395,154 @@ def test_populated(self): 'target_database': 'some_snapshot_db', 'target_schema': 'some_snapshot_schema', 'unique_key': 'id', - 'extra': 'even more', 'strategy': 'check', - 'check_cols': ['a', 'b'], - } - cfg = self.ContractType( - column_types={'a': 'text'}, - materialized='snapshot', - post_hook=[Hook(sql='insert into blah(a, b) select "1", 1')], + 'check_cols': 'all', + }, + 'docs': {'show': True}, + 'columns': {}, + 'meta': {}, + 'checksum': {'name': 'sha256', 'checksum': 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'}, + } + + +@pytest.fixture +def basic_check_snapshot_object(): + return ParsedSnapshotNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from wherever', + name='foo', + resource_type=NodeType.Snapshot, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + description='', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + config=CheckSnapshotConfig( strategy=SnapshotStrategy.Check, - check_cols=['a', 'b'], + unique_key='id', + check_cols=All.All, target_database='some_snapshot_db', target_schema='some_snapshot_schema', - unique_key='id', - ) - cfg._extra['extra'] = 'even more' - - self.assert_symmetric(cfg, cfg_dict) - - def test_invalid_wrong_strategy(self): - wrong_strategy = self._cfg_ok() - wrong_strategy['strategy'] = 'timestamp' - self.assert_fails_validation(wrong_strategy) - - def test_invalid_missing_check_cols(self): - wrong_fields = self._cfg_ok() - del wrong_fields['check_cols'] - self.assert_fails_validation(wrong_fields) - - def test_invalid_check_value(self): - invalid_check_type = self._cfg_ok() - invalid_check_type['check_cols'] = 'some' - self.assert_fails_validation(invalid_check_type) - - -class TestParsedSnapshotNode(ContractTestCase): - ContractType = ParsedSnapshotNode - - def _ts_ok(self): - return { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Snapshot), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': []}, - 'deferred': False, - 'database': 'test_db', - 'description': '', - 'schema': 'test_schema', - 'alias': 'bar', - 'tags': [], - 'config': { - 'column_types': {}, - 'enabled': True, - 'materialized': 'snapshot', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, - 'tags': [], - 'vars': {}, - 'target_database': 'some_snapshot_db', - 'target_schema': 'some_snapshot_schema', - 'unique_key': 'id', - 'strategy': 'timestamp', - 'updated_at': 'last_update', - }, - 'docs': {'show': True}, - 'columns': {}, - 'meta': {}, - } - - def test_timestamp_ok(self): - node_dict = self._ts_ok() - - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Snapshot, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=TimestampSnapshotConfig( - strategy=SnapshotStrategy.Timestamp, - unique_key='id', - updated_at='last_update', - target_database='some_snapshot_db', - target_schema='some_snapshot_schema', - ), - ) - - cfg = EmptySnapshotConfig() - cfg._extra.update({ - 'strategy': 'timestamp', - 'unique_key': 'id', - 'updated_at': 'last_update', - 'target_database': 'some_snapshot_db', - 'target_schema': 'some_snapshot_schema', - }) - - inter = IntermediateSnapshotNode( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Snapshot, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=cfg, - ) - self.assert_symmetric(node, node_dict) - self.assert_symmetric(inter, node_dict, cls=IntermediateSnapshotNode) - self.assertEqual( - self.ContractType.from_dict(inter.to_dict()), - node - ) - self.assertTrue(node.is_refable) - self.assertFalse(node.is_ephemeral) - pickle.loads(pickle.dumps(node)) - - def test_check_ok(self): - node_dict = { - 'name': 'foo', - 'root_path': '/root/', - 'resource_type': str(NodeType.Snapshot), - 'path': '/root/x/path.sql', - 'original_file_path': '/root/path.sql', - 'package_name': 'test', - 'raw_sql': 'select * from wherever', - 'unique_id': 'model.test.foo', - 'fqn': ['test', 'models', 'foo'], - 'refs': [], - 'sources': [], - 'depends_on': {'macros': [], 'nodes': []}, - 'database': 'test_db', - 'deferred': False, - 'description': '', - 'schema': 'test_schema', - 'alias': 'bar', - 'tags': [], - 'config': { - 'column_types': {}, - 'enabled': True, - 'materialized': 'snapshot', - 'persist_docs': {}, - 'post-hook': [], - 'pre-hook': [], - 'quoting': {}, + ), + checksum=FileHash.from_contents(''), + ) + + +@pytest.fixture +def basic_intermedaite_check_snapshot_object(): + cfg = EmptySnapshotConfig() + cfg._extra.update({ + 'unique_key': 'id', + 'strategy': 'check', + 'check_cols': 'all', + 'target_database': 'some_snapshot_db', + 'target_schema': 'some_snapshot_schema', + }) + + return IntermediateSnapshotNode( + package_name='test', + root_path='/root/', + path='/root/x/path.sql', + original_file_path='/root/path.sql', + raw_sql='select * from wherever', + name='foo', + resource_type=NodeType.Snapshot, + unique_id='model.test.foo', + fqn=['test', 'models', 'foo'], + refs=[], + sources=[], + depends_on=DependsOn(), + description='', + database='test_db', + schema='test_schema', + alias='bar', + tags=[], + config=cfg, + checksum=FileHash.from_contents(''), + ) + + +def test_timestamp_snapshot_ok(basic_timestamp_snapshot_dict, basic_timestamp_snapshot_object, basic_intermedaite_timestamp_snapshot_object): + node_dict = basic_timestamp_snapshot_dict + node = basic_timestamp_snapshot_object + inter = basic_intermedaite_timestamp_snapshot_object + + assert_symmetric(node, node_dict, ParsedSnapshotNode) + assert_symmetric(inter, node_dict, IntermediateSnapshotNode) + assert ParsedSnapshotNode.from_dict(inter.to_dict()) == node + assert node.is_refable is True + assert node.is_ephemeral is False + pickle.loads(pickle.dumps(node)) + + +def test_check_snapshot_ok(basic_check_snapshot_dict, basic_check_snapshot_object, basic_intermedaite_check_snapshot_object): + node_dict = basic_check_snapshot_dict + node = basic_check_snapshot_object + inter = basic_intermedaite_check_snapshot_object + + assert_symmetric(node, node_dict, ParsedSnapshotNode) + assert_symmetric(inter, node_dict, IntermediateSnapshotNode) + assert ParsedSnapshotNode.from_dict(inter.to_dict()) == node + assert node.is_refable is True + assert node.is_ephemeral is False + pickle.loads(pickle.dumps(node)) + + +def test_invalid_snapshot_bad_resource_type(basic_timestamp_snapshot_dict): + bad_resource_type = basic_timestamp_snapshot_dict + bad_resource_type['resource_type'] = str(NodeType.Model) + assert_fails_validation(bad_resource_type, ParsedSnapshotNode) + + +def test_basic_parsed_node_patch(basic_parsed_model_patch_object, basic_parsed_model_patch_dict): + assert_symmetric(basic_parsed_model_patch_object, basic_parsed_model_patch_dict) + + +@pytest.fixture +def populated_parsed_node_patch_dict(): + return { + 'name': 'foo', + 'description': 'The foo model', + 'original_file_path': '/path/to/schema.yml', + 'columns': { + 'a': { + 'name': 'a', + 'description': 'a text field', + 'meta': {}, 'tags': [], - 'vars': {}, - 'target_database': 'some_snapshot_db', - 'target_schema': 'some_snapshot_schema', - 'unique_key': 'id', - 'strategy': 'check', - 'check_cols': 'all', - }, - 'docs': {'show': True}, - 'columns': {}, - 'meta': {}, - } - - node = self.ContractType( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Snapshot, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=CheckSnapshotConfig( - strategy=SnapshotStrategy.Check, - unique_key='id', - check_cols=All.All, - target_database='some_snapshot_db', - target_schema='some_snapshot_schema', - ), - ) - cfg = EmptySnapshotConfig() - cfg._extra.update({ - 'unique_key': 'id', - 'strategy': 'check', - 'check_cols': 'all', - 'target_database': 'some_snapshot_db', - 'target_schema': 'some_snapshot_schema', - }) - - inter = IntermediateSnapshotNode( - package_name='test', - root_path='/root/', - path='/root/x/path.sql', - original_file_path='/root/path.sql', - raw_sql='select * from wherever', - name='foo', - resource_type=NodeType.Snapshot, - unique_id='model.test.foo', - fqn=['test', 'models', 'foo'], - refs=[], - sources=[], - depends_on=DependsOn(), - description='', - database='test_db', - schema='test_schema', - alias='bar', - tags=[], - config=cfg, - ) - self.assert_symmetric(node, node_dict) - self.assert_symmetric(inter, node_dict, cls=IntermediateSnapshotNode) - self.assertEqual( - self.ContractType.from_dict(inter.to_dict()), - node - ) - self.assertTrue(node.is_refable) - self.assertFalse(node.is_ephemeral) - - def test_invalid_bad_resource_type(self): - bad_resource_type = self._ts_ok() - bad_resource_type['resource_type'] = str(NodeType.Model) - self.assert_fails_validation(bad_resource_type) - - -class TestParsedNodePatch(ContractTestCase): - ContractType = ParsedNodePatch - - def test_empty(self): - dct = { - 'name': 'foo', - 'description': 'The foo model', - 'original_file_path': '/path/to/schema.yml', - 'columns': {}, - 'docs': {'show': True}, - 'meta': {}, - 'yaml_key': 'models', - 'package_name': 'test', - } - patch = self.ContractType( - name='foo', - description='The foo model', - yaml_key='models', - package_name='test', - original_file_path='/path/to/schema.yml', - columns={}, - docs=Docs(), - meta={}, - ) - self.assert_symmetric(patch, dct) - - def test_populated(self): - dct = { - 'name': 'foo', - 'description': 'The foo model', - 'original_file_path': '/path/to/schema.yml', - 'columns': { - 'a': { - 'name': 'a', - 'description': 'a text field', - 'meta': {}, - 'tags': [], - }, }, - 'docs': {'show': False}, - 'meta': {'key': ['value']}, - 'yaml_key': 'models', - 'package_name': 'test', - } - patch = self.ContractType( - name='foo', - description='The foo model', - original_file_path='/path/to/schema.yml', - columns={'a': ColumnInfo(name='a', description='a text field', meta={})}, - meta={'key': ['value']}, - yaml_key='models', - package_name='test', - docs=Docs(show=False), - ) - self.assert_symmetric(patch, dct) - pickle.loads(pickle.dumps(patch)) + }, + 'docs': {'show': False}, + 'meta': {'key': ['value']}, + 'yaml_key': 'models', + 'package_name': 'test', + } + + +@pytest.fixture +def populated_parsed_node_patch_object(): + return ParsedNodePatch( + name='foo', + description='The foo model', + original_file_path='/path/to/schema.yml', + columns={'a': ColumnInfo(name='a', description='a text field', meta={})}, + meta={'key': ['value']}, + yaml_key='models', + package_name='test', + docs=Docs(show=False), + ) + + +def test_populated_parsed_node_patch(populated_parsed_node_patch_dict, populated_parsed_node_patch_object): + assert_symmetric(populated_parsed_node_patch_object, populated_parsed_node_patch_dict) class TestParsedMacro(ContractTestCase): diff --git a/test/unit/test_docs_blocks.py b/test/unit/test_docs_blocks.py index 8cbf720ba52..d0596dbec5d 100644 --- a/test/unit/test_docs_blocks.py +++ b/test/unit/test_docs_blocks.py @@ -1,7 +1,8 @@ import os import unittest -from dbt.contracts.graph.manifest import SourceFile, FileHash, FilePath, Manifest +from dbt.contracts.files import SourceFile, FileHash, FilePath +from dbt.contracts.graph.manifest import Manifest from dbt.contracts.graph.parsed import ParsedDocumentation from dbt.node_types import NodeType from dbt.parser import docs diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index 6c444768df2..86a2ce2baae 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -12,7 +12,8 @@ import dbt.config import dbt.utils import dbt.parser.manifest -from dbt.contracts.graph.manifest import FilePath, SourceFile, FileHash, Manifest +from dbt.contracts.files import SourceFile, FileHash, FilePath +from dbt.contracts.graph.manifest import Manifest from dbt.parser.results import ParseResult from dbt.parser.base import BaseParser from dbt.graph import NodeSelector, parse_difference diff --git a/test/unit/test_graph_selector_methods.py b/test/unit/test_graph_selector_methods.py index fd3d5c6c0c7..dcee2bd9796 100644 --- a/test/unit/test_graph_selector_methods.py +++ b/test/unit/test_graph_selector_methods.py @@ -1,7 +1,11 @@ +import copy import pytest +from unittest import mock from datetime import datetime +from pathlib import Path +from dbt.contracts.files import FileHash from dbt.contracts.graph.parsed import ( DependsOn, NodeConfig, @@ -13,8 +17,10 @@ ParsedSourceDefinition, TestConfig, TestMetadata, + ColumnInfo, ) from dbt.contracts.graph.manifest import Manifest +from dbt.contracts.state import PreviousState from dbt.node_types import NodeType from dbt.graph.selector_methods import ( MethodManager, @@ -26,7 +32,10 @@ ConfigSelectorMethod, TestNameSelectorMethod, TestTypeSelectorMethod, + StateSelectorMethod, ) +import dbt.exceptions +import dbt.contracts.graph.parsed def make_model(pkg, name, sql, refs=None, sources=None, tags=None, path=None, alias=None, config_kwargs=None, fqn_extras=None): @@ -76,10 +85,11 @@ def make_model(pkg, name, sql, refs=None, sources=None, tags=None, path=None, al sources=source_values, depends_on=DependsOn(nodes=depends_on_nodes), resource_type=NodeType.Model, + checksum=FileHash.from_contents(''), ) -def make_seed(pkg, name, path=None, loader=None, alias=None, tags=None, fqn_extras=None): +def make_seed(pkg, name, path=None, loader=None, alias=None, tags=None, fqn_extras=None, checksum=None): if alias is None: alias = name if tags is None: @@ -90,6 +100,9 @@ def make_seed(pkg, name, path=None, loader=None, alias=None, tags=None, fqn_extr if fqn_extras is None: fqn_extras = [] + if checksum is None: + checksum = FileHash.from_contents('') + fqn = [pkg] + fqn_extras + [name] return ParsedSeedNode( raw_sql='', @@ -105,6 +118,7 @@ def make_seed(pkg, name, path=None, loader=None, alias=None, tags=None, fqn_extr original_file_path=f'data/{path}', tags=tags, resource_type=NodeType.Seed, + checksum=FileHash.from_contents(''), ) @@ -222,9 +236,10 @@ def make_schema_test(pkg, test_name, test_model, test_kwargs, path=None, refs=No sources=[], depends_on=DependsOn( macros=[macro_depends], - nodes=['model.minimal.view_model'] + nodes=depends_on_nodes ), column_name=column_name, + checksum=FileHash.from_contents(''), ) @@ -272,6 +287,7 @@ def make_data_test(pkg, name, sql, refs=None, sources=None, tags=None, path=None sources=source_values, depends_on=DependsOn(nodes=depends_on_nodes), resource_type=NodeType.Test, + checksum=FileHash.from_contents(''), ) @@ -438,7 +454,7 @@ def search_manifest_using_method(manifest, method, selection): def test_select_fqn(manifest): - methods = MethodManager(manifest) + methods = MethodManager(manifest, None) method = methods.get_method('fqn', []) assert isinstance(method, QualifiedNameSelectorMethod) assert method.arguments == [] @@ -451,7 +467,7 @@ def test_select_fqn(manifest): def test_select_tag(manifest): - methods = MethodManager(manifest) + methods = MethodManager(manifest, None) method = methods.get_method('tag', []) assert isinstance(method, TagSelectorMethod) assert method.arguments == [] @@ -461,7 +477,7 @@ def test_select_tag(manifest): def test_select_source(manifest): - methods = MethodManager(manifest) + methods = MethodManager(manifest, None) method = methods.get_method('source', []) assert isinstance(method, SourceSelectorMethod) assert method.arguments == [] @@ -486,7 +502,7 @@ def test_select_source(manifest): # TODO: this requires writing out files @pytest.mark.skip('TODO: write manifest files to disk') def test_select_path(manifest): - methods = MethodManager(manifest) + methods = MethodManager(manifest, None) method = methods.get_method('path', []) assert isinstance(method, PathSelectorMethod) assert method.arguments == [] @@ -500,7 +516,7 @@ def test_select_path(manifest): def test_select_package(manifest): - methods = MethodManager(manifest) + methods = MethodManager(manifest, None) method = methods.get_method('package', []) assert isinstance(method, PackageSelectorMethod) assert method.arguments == [] @@ -512,7 +528,7 @@ def test_select_package(manifest): def test_select_config_materialized(manifest): - methods = MethodManager(manifest) + methods = MethodManager(manifest, None) method = methods.get_method('config', ['materialized']) assert isinstance(method, ConfigSelectorMethod) assert method.arguments == ['materialized'] @@ -523,7 +539,7 @@ def test_select_config_materialized(manifest): def test_select_test_name(manifest): - methods = MethodManager(manifest) + methods = MethodManager(manifest, None) method = methods.get_method('test_name', []) assert isinstance(method, TestNameSelectorMethod) assert method.arguments == [] @@ -534,10 +550,198 @@ def test_select_test_name(manifest): def test_select_test_type(manifest): - methods = MethodManager(manifest) + methods = MethodManager(manifest, None) method = methods.get_method('test_type', []) assert isinstance(method, TestTypeSelectorMethod) assert method.arguments == [] assert search_manifest_using_method(manifest, method, 'schema') == {'unique_table_model_id', 'not_null_table_model_id', 'unique_view_model_id', 'unique_ext_raw_ext_source_id'} assert search_manifest_using_method(manifest, method, 'data') == {'view_test_nothing'} + +@pytest.fixture +def previous_state(manifest): + writable = copy.deepcopy(manifest).writable_manifest() + state = PreviousState(Path('/path/does/not/exist')) + state.manifest = writable + return state + + +def add_node(manifest, node): + manifest.nodes[node.unique_id] = node + + +def change_node(manifest, node, change=None): + if change is not None: + node = change(node) + manifest.nodes[node.unique_id] = node + + +def statemethod(manifest, previous_state): + methods = MethodManager(manifest, previous_state) + method = methods.get_method('state', []) + assert isinstance(method, StateSelectorMethod) + assert method.arguments == [] + return method + + +def test_select_state_no_change(manifest, previous_state): + method = statemethod(manifest, previous_state) + assert not search_manifest_using_method(manifest, method, 'modified') + assert not search_manifest_using_method(manifest, method, 'new') + + +def test_select_state_nothing(manifest, previous_state): + previous_state.manifest = None + method = statemethod(manifest, previous_state) + with pytest.raises(dbt.exceptions.RuntimeException) as exc: + search_manifest_using_method(manifest, method, 'modified') + assert 'no deferred manifest' in str(exc.value) + + with pytest.raises(dbt.exceptions.RuntimeException) as exc: + search_manifest_using_method(manifest, method, 'new') + assert 'no deferred manifest' in str(exc.value) + + +def test_select_state_added_model(manifest, previous_state): + add_node(manifest, make_model('pkg', 'another_model', 'select 1 as id')) + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'another_model'} + assert search_manifest_using_method(manifest, method, 'new') == {'another_model'} + + +def test_select_state_changed_model_sql(manifest, previous_state, view_model): + change_node(manifest, view_model.replace(raw_sql='select 1 as id')) + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'view_model'} + assert not search_manifest_using_method(manifest, method, 'new') + + +def test_select_state_changed_model_fqn(manifest, previous_state, view_model): + change_node(manifest, view_model.replace(fqn=view_model.fqn[:-1]+['nested']+view_model.fqn[-1:])) + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'view_model'} + assert not search_manifest_using_method(manifest, method, 'new') + + +def test_select_state_added_seed(manifest, previous_state): + add_node(manifest, make_seed('pkg', 'another_seed')) + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'another_seed'} + assert search_manifest_using_method(manifest, method, 'new') == {'another_seed'} + + +def test_select_state_changed_seed_checksum_sha_to_sha(manifest, previous_state, seed): + change_node(manifest, seed.replace(checksum=FileHash.from_contents('changed'))) + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} + assert not search_manifest_using_method(manifest, method, 'new') + + +def test_select_state_changed_seed_checksum_path_to_path(manifest, previous_state, seed): + change_node(previous_state.manifest, seed.replace(checksum=FileHash(name='path', checksum=seed.original_file_path))) + change_node(manifest, seed.replace(checksum=FileHash(name='path', checksum=seed.original_file_path))) + method = statemethod(manifest, previous_state) + with mock.patch('dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch: + assert not search_manifest_using_method(manifest, method, 'modified') + warn_or_error_patch.assert_called_once() + msg = warn_or_error_patch.call_args[0][0] + assert msg.startswith('Found a seed >1MB in size') + with mock.patch('dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch: + assert not search_manifest_using_method(manifest, method, 'new') + warn_or_error_patch.assert_not_called() + + +def test_select_state_changed_seed_checksum_sha_to_path(manifest, previous_state, seed): + change_node(manifest, seed.replace(checksum=FileHash(name='path', checksum=seed.original_file_path))) + method = statemethod(manifest, previous_state) + with mock.patch('dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch: + assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} + warn_or_error_patch.assert_called_once() + msg = warn_or_error_patch.call_args[0][0] + assert msg.startswith('Found a seed >1MB in size') + with mock.patch('dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch: + assert not search_manifest_using_method(manifest, method, 'new') + warn_or_error_patch.assert_not_called() + + +def test_select_state_changed_seed_checksum_path_to_sha(manifest, previous_state, seed): + change_node(previous_state.manifest, seed.replace(checksum=FileHash(name='path', checksum=seed.original_file_path))) + method = statemethod(manifest, previous_state) + with mock.patch('dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch: + assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} + warn_or_error_patch.assert_not_called() + with mock.patch('dbt.contracts.graph.parsed.warn_or_error') as warn_or_error_patch: + assert not search_manifest_using_method(manifest, method, 'new') + warn_or_error_patch.assert_not_called() + + +def test_select_state_changed_seed_fqn(manifest, previous_state, seed): + change_node(manifest, seed.replace(fqn=seed.fqn[:-1]+['nested']+seed.fqn[-1:])) + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} + assert not search_manifest_using_method(manifest, method, 'new') + + +def test_select_state_changed_seed_relation_documented(manifest, previous_state, seed): + seed_doc_relation = seed.replace(config=seed.config.replace(persist_docs={'relation': True})) + change_node(manifest, seed_doc_relation) + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} + assert not search_manifest_using_method(manifest, method, 'new') + + +def test_select_state_changed_seed_relation_documented_nodocs(manifest, previous_state, seed): + seed_doc_relation = seed.replace(config=seed.config.replace(persist_docs={'relation': True})) + seed_doc_relation_documented = seed_doc_relation.replace(description='a description') + change_node(previous_state.manifest, seed_doc_relation) + change_node(manifest, seed_doc_relation_documented) + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} + assert not search_manifest_using_method(manifest, method, 'new') + + +def test_select_state_changed_seed_relation_documented_withdocs(manifest, previous_state, seed): + seed_doc_relation = seed.replace(config=seed.config.replace(persist_docs={'relation': True})) + seed_doc_relation_documented = seed_doc_relation.replace(description='a description') + change_node(previous_state.manifest, seed_doc_relation_documented) + change_node(manifest, seed_doc_relation) + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} + assert not search_manifest_using_method(manifest, method, 'new') + + +def test_select_state_changed_seed_columns_documented(manifest, previous_state, seed): + # changing persist_docs, even without changing the description -> changed + seed_doc_columns = seed.replace(config=seed.config.replace(persist_docs={'columns': True})) + change_node(manifest, seed_doc_columns) + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} + assert not search_manifest_using_method(manifest, method, 'new') + + +def test_select_state_changed_seed_columns_documented_nodocs(manifest, previous_state, seed): + seed_doc_columns = seed.replace(config=seed.config.replace(persist_docs={'columns': True})) + seed_doc_columns_documented_columns = seed_doc_columns.replace( + columns={'a': ColumnInfo(name='a', description='a description')}, + ) + + change_node(previous_state.manifest, seed_doc_columns) + change_node(manifest, seed_doc_columns_documented_columns) + + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} + assert not search_manifest_using_method(manifest, method, 'new') + + +def test_select_state_changed_seed_columns_documented_withdocs(manifest, previous_state, seed): + seed_doc_columns = seed.replace(config=seed.config.replace(persist_docs={'columns': True})) + seed_doc_columns_documented_columns = seed_doc_columns.replace( + columns={'a': ColumnInfo(name='a', description='a description')}, + ) + + change_node(manifest, seed_doc_columns) + change_node(previous_state.manifest, seed_doc_columns_documented_columns) + + method = statemethod(manifest, previous_state) + assert search_manifest_using_method(manifest, method, 'modified') == {'seed'} + assert not search_manifest_using_method(manifest, method, 'new') diff --git a/test/unit/test_manifest.py b/test/unit/test_manifest.py index 9acc26a9edf..9549df1f32c 100644 --- a/test/unit/test_manifest.py +++ b/test/unit/test_manifest.py @@ -10,6 +10,7 @@ import dbt.flags from dbt import tracking +from dbt.contracts.files import FileHash from dbt.contracts.graph.manifest import Manifest, ManifestMetadata from dbt.contracts.graph.parsed import ( ParsedModelNode, @@ -30,7 +31,7 @@ 'depends_on', 'database', 'schema', 'name', 'resource_type', 'package_name', 'root_path', 'path', 'original_file_path', 'raw_sql', 'description', 'columns', 'fqn', 'build_path', 'patch_path', 'docs', - 'deferred', + 'deferred', 'checksum', }) REQUIRED_COMPILED_NODE_KEYS = frozenset(REQUIRED_PARSED_NODE_KEYS | { @@ -76,7 +77,8 @@ def setUp(self): original_file_path='events.sql', root_path='', meta={}, - raw_sql='does not matter' + raw_sql='does not matter', + checksum=FileHash.empty(), ), 'model.root.events': ParsedModelNode( name='events', @@ -96,7 +98,8 @@ def setUp(self): original_file_path='events.sql', root_path='', meta={}, - raw_sql='does not matter' + raw_sql='does not matter', + checksum=FileHash.empty(), ), 'model.root.dep': ParsedModelNode( name='dep', @@ -116,7 +119,8 @@ def setUp(self): original_file_path='multi.sql', root_path='', meta={}, - raw_sql='does not matter' + raw_sql='does not matter', + checksum=FileHash.empty(), ), 'model.root.nested': ParsedModelNode( name='nested', @@ -136,7 +140,8 @@ def setUp(self): original_file_path='multi.sql', root_path='', meta={}, - raw_sql='does not matter' + raw_sql='does not matter', + checksum=FileHash.empty(), ), 'model.root.sibling': ParsedModelNode( name='sibling', @@ -156,7 +161,8 @@ def setUp(self): original_file_path='multi.sql', root_path='', meta={}, - raw_sql='does not matter' + raw_sql='does not matter', + checksum=FileHash.empty(), ), 'model.root.multi': ParsedModelNode( name='multi', @@ -176,7 +182,8 @@ def setUp(self): original_file_path='multi.sql', root_path='', meta={}, - raw_sql='does not matter' + raw_sql='does not matter', + checksum=FileHash.empty(), ), } @@ -382,6 +389,7 @@ def test_get_resource_fqns(self): original_file_path='seed.csv', root_path='', raw_sql='-- csv --', + checksum=FileHash.empty(), ) manifest = Manifest(nodes=nodes, sources=self.sources, macros={}, docs={}, generated_at=datetime.utcnow(), disabled=[], @@ -448,7 +456,8 @@ def setUp(self): compiled_sql='also does not matter', extra_ctes_injected=True, injected_sql=None, - extra_ctes=[] + extra_ctes=[], + checksum=FileHash.empty(), ), 'model.root.events': CompiledModelNode( name='events', @@ -473,7 +482,8 @@ def setUp(self): compiled_sql='also does not matter', extra_ctes_injected=True, injected_sql='and this also does not matter', - extra_ctes=[] + extra_ctes=[], + checksum=FileHash.empty(), ), 'model.root.dep': ParsedModelNode( name='dep', @@ -493,7 +503,8 @@ def setUp(self): original_file_path='multi.sql', root_path='', meta={}, - raw_sql='does not matter' + raw_sql='does not matter', + checksum=FileHash.empty(), ), 'model.root.nested': ParsedModelNode( name='nested', @@ -513,7 +524,8 @@ def setUp(self): original_file_path='multi.sql', root_path='', meta={}, - raw_sql='does not matter' + raw_sql='does not matter', + checksum=FileHash.empty(), ), 'model.root.sibling': ParsedModelNode( name='sibling', @@ -533,7 +545,8 @@ def setUp(self): original_file_path='multi.sql', root_path='', meta={}, - raw_sql='does not matter' + raw_sql='does not matter', + checksum=FileHash.empty(), ), 'model.root.multi': ParsedModelNode( name='multi', @@ -553,7 +566,8 @@ def setUp(self): original_file_path='multi.sql', root_path='', meta={}, - raw_sql='does not matter' + raw_sql='does not matter', + checksum=FileHash.empty(), ), } diff --git a/test/unit/test_parse_manifest.py b/test/unit/test_parse_manifest.py index 0e5fb6f1b5f..de258f7de70 100644 --- a/test/unit/test_parse_manifest.py +++ b/test/unit/test_parse_manifest.py @@ -3,7 +3,7 @@ from .utils import config_from_parts_or_dicts, normalize -from dbt.contracts.graph.manifest import FileHash, FilePath, SourceFile +from dbt.contracts.files import SourceFile, FileHash, FilePath from dbt.parser import ParseResult from dbt.parser.search import FileBlock from dbt.parser import manifest diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index 29dbdcbfdd3..071f4b7eb67 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -19,9 +19,8 @@ from dbt.parser.manifest import process_docs, process_sources, process_refs from dbt.node_types import NodeType -from dbt.contracts.graph.manifest import ( - Manifest, FilePath, SourceFile, FileHash -) +from dbt.contracts.files import SourceFile, FileHash, FilePath +from dbt.contracts.graph.manifest import Manifest from dbt.contracts.graph.model_config import ( NodeConfig, TestConfig, TimestampSnapshotConfig, SnapshotStrategy, ) @@ -483,6 +482,7 @@ def test_basic(self): config=NodeConfig(materialized='table'), path=normalize('nested/model_1.sql'), raw_sql=raw_sql, + checksum=block.file.checksum, ) self.assertEqual(node, expected) path = get_abs_os_path('./dbt_modules/snowplow/models/nested/model_1.sql') @@ -551,6 +551,7 @@ def test_single_block(self): ), path=normalize('nested/snap_1.sql'), raw_sql=raw_sql, + checksum=block.file.checksum, ) self.assertEqual(node, expected) path = get_abs_os_path('./dbt_modules/snowplow/snapshots/nested/snap_1.sql') @@ -603,6 +604,7 @@ def test_multi_block(self): ), path=normalize('nested/snap_1.sql'), raw_sql=raw_1, + checksum=block.file.checksum, ) expect_bar = ParsedSnapshotNode( alias='bar', @@ -625,6 +627,7 @@ def test_multi_block(self): ), path=normalize('nested/snap_1.sql'), raw_sql=raw_2, + checksum=block.file.checksum, ) self.assertEqual(nodes[0], expect_bar) self.assertEqual(nodes[1], expect_foo) @@ -736,6 +739,7 @@ def test_basic(self): tags=['data'], path=normalize('data_test/test_1.sql'), raw_sql=raw_sql, + checksum=block.file.checksum, ) self.assertEqual(node, expected) path = get_abs_os_path('./dbt_modules/snowplow/tests/test_1.sql') @@ -777,6 +781,7 @@ def test_basic(self): config=NodeConfig(), path=normalize('analysis/nested/analysis_1.sql'), raw_sql=raw_sql, + checksum=block.file.checksum, ) self.assertEqual(node, expected) path = get_abs_os_path('./dbt_modules/snowplow/analyses/nested/analysis_1.sql') diff --git a/test/unit/utils.py b/test/unit/utils.py index 4900a77b9e0..03701695e52 100644 --- a/test/unit/utils.py +++ b/test/unit/utils.py @@ -9,6 +9,7 @@ from unittest import TestCase import agate +import pytest from hologram import ValidationError @@ -155,6 +156,26 @@ def assert_fails_validation(self, dct, cls=None): cls.from_dict(dct) +def assert_to_dict(obj, dct): + assert obj.to_dict() == dct + + +def assert_from_dict(obj, dct, cls=None): + if cls is None: + cls = obj.__class__ + assert cls.from_dict(dct) == obj + + +def assert_symmetric(obj, dct, cls=None): + assert_to_dict(obj, dct) + assert_from_dict(obj, dct, cls) + + +def assert_fails_validation(dct, cls): + with pytest.raises(ValidationError): + cls.from_dict(dct) + + def generate_name_macros(package): from dbt.contracts.graph.parsed import ParsedMacro from dbt.node_types import NodeType