Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add state:modified and state:new selectors #2695

Merged
merged 6 commits into from
Aug 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
## dbt 0.18.0 (Release TBD)


### Features
- Add support for impersonating a service account using `impersonate_service_account` in the BigQuery profile configuration ([#2677](https://github.com/fishtown-analytics/dbt/issues/2677)) ([docs](https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile#service-account-impersonation))
- Macros in the current project can override internal dbt macros that are called through `execute_macros`. ([#2301](https://github.com/fishtown-analytics/dbt/issues/2301), [#2686](https://github.com/fishtown-analytics/dbt/pull/2686))
- Add better retry support when using the BigQuery adapter ([#2694](https://github.com/fishtown-analytics/dbt/pull/2694), follow-up to [#1963](https://github.com/fishtown-analytics/dbt/pull/1963))


### Breaking changes
- `adapter_macro` is no longer a macro, instead it is a builtin context method. Any custom macros that intercepted it by going through `context['dbt']` will need to instead access it via `context['builtins']` ([#2302](https://github.com/fishtown-analytics/dbt/issues/2302), [#2673](https://github.com/fishtown-analytics/dbt/pull/2673))
- `adapter_macro` is now deprecated. Use `adapter.dispatch` instead.

### Features
- Add better retry support when using the BigQuery adapter ([#2694](https://github.com/fishtown-analytics/dbt/pull/2694), follow-up to [#1963](https://github.com/fishtown-analytics/dbt/pull/1963))
- Added a `dispatch` method to the context adapter and deprecated `adapter_macro`. ([#2302](https://github.com/fishtown-analytics/dbt/issues/2302), [#2679](https://github.com/fishtown-analytics/dbt/pull/2679))
- The built-in schema tests now use `adapter.dispatch`, so they can be overridden for adapter plugins ([#2415](https://github.com/fishtown-analytics/dbt/issues/2415), [#2684](https://github.com/fishtown-analytics/dbt/pull/2684))
- Add support for impersonating a service account using `impersonate_service_account` in the BigQuery profile configuration ([#2677](https://github.com/fishtown-analytics/dbt/issues/2677)) ([docs](https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile#service-account-impersonation))
- Macros in the current project can override internal dbt macros that are called through `execute_macros`. ([#2301](https://github.com/fishtown-analytics/dbt/issues/2301), [#2686](https://github.com/fishtown-analytics/dbt/pull/2686))
- Add state:modified and state:new selectors ([#2641](https://github.com/fishtown-analytics/dbt/issues/2641), [#2695](https://github.com/fishtown-analytics/dbt/pull/2695))

Contributors:
- [@bbhoss](https://github.com/bbhoss) ([#2677](https://github.com/fishtown-analytics/dbt/pull/2677))
Expand Down
166 changes: 166 additions & 0 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import hashlib
import os
from dataclasses import dataclass, field
from typing import List, Optional, Union

from hologram import JsonSchemaMixin

from dbt.exceptions import InternalException

from .util import MacroKey, SourceKey


MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
MAXIMUM_SEED_SIZE_NAME = '1MB'


@dataclass
class FilePath(JsonSchemaMixin):
searched_path: str
relative_path: str
project_root: str

@property
def search_key(self) -> str:
# TODO: should this be project name + path relative to project root?
return self.absolute_path

@property
def full_path(self) -> str:
# useful for symlink preservation
return os.path.join(
self.project_root, self.searched_path, self.relative_path
)

@property
def absolute_path(self) -> str:
return os.path.abspath(self.full_path)

@property
def original_file_path(self) -> str:
# this is mostly used for reporting errors. It doesn't show the project
# name, should it?
return os.path.join(
self.searched_path, self.relative_path
)

def seed_too_large(self) -> bool:
"""Return whether the file this represents is over the seed size limit
"""
return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE


@dataclass
class FileHash(JsonSchemaMixin):
name: str # the hash type name
checksum: str # the hashlib.hash_type().hexdigest() of the file contents

@classmethod
def empty(cls):
return FileHash(name='none', checksum='')

@classmethod
def path(cls, path: str):
return FileHash(name='path', checksum=path)

def __eq__(self, other):
if not isinstance(other, FileHash):
return NotImplemented

if self.name == 'none' or self.name != other.name:
return False

return self.checksum == other.checksum

def compare(self, contents: str) -> bool:
"""Compare the file contents with the given hash"""
if self.name == 'none':
return False

return self.from_contents(contents, name=self.name) == self.checksum

@classmethod
def from_contents(cls, contents: str, name='sha256') -> 'FileHash':
"""Create a file hash from the given file contents. The hash is always
the utf-8 encoding of the contents given, because dbt only reads files
as utf-8.
"""
data = contents.encode('utf-8')
checksum = hashlib.new(name, data).hexdigest()
return cls(name=name, checksum=checksum)


@dataclass
class RemoteFile(JsonSchemaMixin):
@property
def searched_path(self) -> str:
return 'from remote system'

@property
def relative_path(self) -> str:
return 'from remote system'

@property
def absolute_path(self) -> str:
return 'from remote system'

@property
def original_file_path(self):
return 'from remote system'


@dataclass
class SourceFile(JsonSchemaMixin):
"""Define a source file in dbt"""
path: Union[FilePath, RemoteFile] # the path information
checksum: FileHash
# we don't want to serialize this
_contents: Optional[str] = None
# the unique IDs contained in this file
nodes: List[str] = field(default_factory=list)
docs: List[str] = field(default_factory=list)
macros: List[str] = field(default_factory=list)
sources: List[str] = field(default_factory=list)
# any node patches in this file. The entries are names, not unique ids!
patches: List[str] = field(default_factory=list)
# any macro patches in this file. The entries are package, name pairs.
macro_patches: List[MacroKey] = field(default_factory=list)
# any source patches in this file. The entries are package, name pairs
source_patches: List[SourceKey] = field(default_factory=list)

@property
def search_key(self) -> Optional[str]:
if isinstance(self.path, RemoteFile):
return None
if self.checksum.name == 'none':
return None
return self.path.search_key

@property
def contents(self) -> str:
if self._contents is None:
raise InternalException('SourceFile has no contents!')
return self._contents

@contents.setter
def contents(self, value):
self._contents = value

@classmethod
def empty(cls, path: FilePath) -> 'SourceFile':
self = cls(path=path, checksum=FileHash.empty())
self.contents = ''
return self

@classmethod
def big_seed(cls, path: FilePath) -> 'SourceFile':
"""Parse seeds over the size limit with just the path"""
self = cls(path=path, checksum=FileHash.path(path.absolute_path))
self.contents = ''
return self

@classmethod
def remote(cls, contents: str) -> 'SourceFile':
self = cls(path=RemoteFile(), checksum=FileHash.empty())
self.contents = contents
return self
22 changes: 22 additions & 0 deletions core/dbt/contracts/graph/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
ParsedSourceDefinition,
SeedConfig,
TestConfig,
same_seeds,
)
from dbt.node_types import NodeType
from dbt.contracts.util import Replaceable
Expand Down Expand Up @@ -94,6 +95,7 @@ class CompiledRPCNode(CompiledNode):

@dataclass
class CompiledSeedNode(CompiledNode):
# keep this in sync with ParsedSeedNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Seed]})
config: SeedConfig = field(default_factory=SeedConfig)

Expand All @@ -102,6 +104,9 @@ def empty(self):
""" Seeds are never empty"""
return False

def same_body(self, other) -> bool:
return same_seeds(self, other)


@dataclass
class CompiledSnapshotNode(CompiledNode):
Expand All @@ -116,10 +121,27 @@ class CompiledDataTestNode(CompiledNode):

@dataclass
class CompiledSchemaTestNode(CompiledNode, HasTestMetadata):
# keep this in sync with ParsedSchemaTestNode!
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
config: TestConfig = field(default_factory=TestConfig)

def same_config(self, other) -> bool:
return self.config.severity == other.config.severity

def same_column_name(self, other) -> bool:
return self.column_name == other.column_name

def same_contents(self, other) -> bool:
if other is None:
return False

return (
self.same_config(other) and
self.same_fqn(other) and
True
)


CompiledTestNode = Union[CompiledDataTestNode, CompiledSchemaTestNode]

Expand Down
Loading