Skip to content

Commit

Permalink
Merge pull request #2695 from fishtown-analytics/feature/state-modifi…
Browse files Browse the repository at this point in the history
…ed-selector

Add state:modified and state:new selectors
  • Loading branch information
beckjake authored Aug 14, 2020
2 parents 1bd82d4 + c8453d8 commit d554835
Show file tree
Hide file tree
Showing 44 changed files with 3,753 additions and 1,913 deletions.
11 changes: 4 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
## dbt 0.18.0 (Release TBD)


### Features
- Add support for impersonating a service account using `impersonate_service_account` in the BigQuery profile configuration ([#2677](https://github.com/fishtown-analytics/dbt/issues/2677)) ([docs](https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile#service-account-impersonation))
- Macros in the current project can override internal dbt macros that are called through `execute_macros`. ([#2301](https://github.com/fishtown-analytics/dbt/issues/2301), [#2686](https://github.com/fishtown-analytics/dbt/pull/2686))
- Add better retry support when using the BigQuery adapter ([#2694](https://github.com/fishtown-analytics/dbt/pull/2694), follow-up to [#1963](https://github.com/fishtown-analytics/dbt/pull/1963))


### Breaking changes
- `adapter_macro` is no longer a macro, instead it is a builtin context method. Any custom macros that intercepted it by going through `context['dbt']` will need to instead access it via `context['builtins']` ([#2302](https://github.com/fishtown-analytics/dbt/issues/2302), [#2673](https://github.com/fishtown-analytics/dbt/pull/2673))
- `adapter_macro` is now deprecated. Use `adapter.dispatch` instead.

### Features
- Add better retry support when using the BigQuery adapter ([#2694](https://github.com/fishtown-analytics/dbt/pull/2694), follow-up to [#1963](https://github.com/fishtown-analytics/dbt/pull/1963))
- Added a `dispatch` method to the context adapter and deprecated `adapter_macro`. ([#2302](https://github.com/fishtown-analytics/dbt/issues/2302), [#2679](https://github.com/fishtown-analytics/dbt/pull/2679))
- The built-in schema tests now use `adapter.dispatch`, so they can be overridden for adapter plugins ([#2415](https://github.com/fishtown-analytics/dbt/issues/2415), [#2684](https://github.com/fishtown-analytics/dbt/pull/2684))
- Add support for impersonating a service account using `impersonate_service_account` in the BigQuery profile configuration ([#2677](https://github.com/fishtown-analytics/dbt/issues/2677)) ([docs](https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile#service-account-impersonation))
- Macros in the current project can override internal dbt macros that are called through `execute_macros`. ([#2301](https://github.com/fishtown-analytics/dbt/issues/2301), [#2686](https://github.com/fishtown-analytics/dbt/pull/2686))
- Add state:modified and state:new selectors ([#2641](https://github.com/fishtown-analytics/dbt/issues/2641), [#2695](https://github.com/fishtown-analytics/dbt/pull/2695))

Contributors:
- [@bbhoss](https://github.com/bbhoss) ([#2677](https://github.com/fishtown-analytics/dbt/pull/2677))
Expand Down
166 changes: 166 additions & 0 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import hashlib
import os
from dataclasses import dataclass, field
from typing import List, Optional, Union

from hologram import JsonSchemaMixin

from dbt.exceptions import InternalException

from .util import MacroKey, SourceKey


MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
MAXIMUM_SEED_SIZE_NAME = '1MB'


@dataclass
class FilePath(JsonSchemaMixin):
searched_path: str
relative_path: str
project_root: str

@property
def search_key(self) -> str:
# TODO: should this be project name + path relative to project root?
return self.absolute_path

@property
def full_path(self) -> str:
# useful for symlink preservation
return os.path.join(
self.project_root, self.searched_path, self.relative_path
)

@property
def absolute_path(self) -> str:
return os.path.abspath(self.full_path)

@property
def original_file_path(self) -> str:
# this is mostly used for reporting errors. It doesn't show the project
# name, should it?
return os.path.join(
self.searched_path, self.relative_path
)

def seed_too_large(self) -> bool:
"""Return whether the file this represents is over the seed size limit
"""
return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE


@dataclass
class FileHash(JsonSchemaMixin):
name: str # the hash type name
checksum: str # the hashlib.hash_type().hexdigest() of the file contents

@classmethod
def empty(cls):
return FileHash(name='none', checksum='')

@classmethod
def path(cls, path: str):
return FileHash(name='path', checksum=path)

def __eq__(self, other):
if not isinstance(other, FileHash):
return NotImplemented

if self.name == 'none' or self.name != other.name:
return False

return self.checksum == other.checksum

def compare(self, contents: str) -> bool:
"""Compare the file contents with the given hash"""
if self.name == 'none':
return False

return self.from_contents(contents, name=self.name) == self.checksum

@classmethod
def from_contents(cls, contents: str, name='sha256') -> 'FileHash':
"""Create a file hash from the given file contents. The hash is always
the utf-8 encoding of the contents given, because dbt only reads files
as utf-8.
"""
data = contents.encode('utf-8')
checksum = hashlib.new(name, data).hexdigest()
return cls(name=name, checksum=checksum)


@dataclass
class RemoteFile(JsonSchemaMixin):
@property
def searched_path(self) -> str:
return 'from remote system'

@property
def relative_path(self) -> str:
return 'from remote system'

@property
def absolute_path(self) -> str:
return 'from remote system'

@property
def original_file_path(self):
return 'from remote system'


@dataclass
class SourceFile(JsonSchemaMixin):
"""Define a source file in dbt"""
path: Union[FilePath, RemoteFile] # the path information
checksum: FileHash
# we don't want to serialize this
_contents: Optional[str] = None
# the unique IDs contained in this file
nodes: List[str] = field(default_factory=list)
docs: List[str] = field(default_factory=list)
macros: List[str] = field(default_factory=list)
sources: List[str] = field(default_factory=list)
# any node patches in this file. The entries are names, not unique ids!
patches: List[str] = field(default_factory=list)
# any macro patches in this file. The entries are package, name pairs.
macro_patches: List[MacroKey] = field(default_factory=list)
# any source patches in this file. The entries are package, name pairs
source_patches: List[SourceKey] = field(default_factory=list)

@property
def search_key(self) -> Optional[str]:
if isinstance(self.path, RemoteFile):
return None
if self.checksum.name == 'none':
return None
return self.path.search_key

@property
def contents(self) -> str:
if self._contents is None:
raise InternalException('SourceFile has no contents!')
return self._contents

@contents.setter
def contents(self, value):
self._contents = value

@classmethod
def empty(cls, path: FilePath) -> 'SourceFile':
self = cls(path=path, checksum=FileHash.empty())
self.contents = ''
return self

@classmethod
def big_seed(cls, path: FilePath) -> 'SourceFile':
"""Parse seeds over the size limit with just the path"""
self = cls(path=path, checksum=FileHash.path(path.absolute_path))
self.contents = ''
return self

@classmethod
def remote(cls, contents: str) -> 'SourceFile':
self = cls(path=RemoteFile(), checksum=FileHash.empty())
self.contents = contents
return self
22 changes: 22 additions & 0 deletions core/dbt/contracts/graph/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
ParsedSourceDefinition,
SeedConfig,
TestConfig,
same_seeds,
)
from dbt.node_types import NodeType
from dbt.contracts.util import Replaceable
Expand Down Expand Up @@ -94,6 +95,7 @@ class CompiledRPCNode(CompiledNode):

@dataclass
class CompiledSeedNode(CompiledNode):
# keep this in sync with ParsedSeedNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Seed]})
config: SeedConfig = field(default_factory=SeedConfig)

Expand All @@ -102,6 +104,9 @@ def empty(self):
""" Seeds are never empty"""
return False

def same_body(self, other) -> bool:
return same_seeds(self, other)


@dataclass
class CompiledSnapshotNode(CompiledNode):
Expand All @@ -116,10 +121,27 @@ class CompiledDataTestNode(CompiledNode):

@dataclass
class CompiledSchemaTestNode(CompiledNode, HasTestMetadata):
# keep this in sync with ParsedSchemaTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
config: TestConfig = field(default_factory=TestConfig)

def same_config(self, other) -> bool:
return self.config.severity == other.config.severity

def same_column_name(self, other) -> bool:
return self.column_name == other.column_name

def same_contents(self, other) -> bool:
if other is None:
return False

return (
self.same_config(other) and
self.same_fqn(other) and
True
)


CompiledTestNode = Union[CompiledDataTestNode, CompiledSchemaTestNode]

Expand Down
Loading

0 comments on commit d554835

Please sign in to comment.