Skip to content

Commit

Permalink
Add state:modified and state:new selectors
Browse files Browse the repository at this point in the history
  • Loading branch information
Jacob Beck committed Aug 11, 2020
1 parent fb8065d commit 2a4fe40
Show file tree
Hide file tree
Showing 42 changed files with 3,202 additions and 1,832 deletions.
9 changes: 3 additions & 6 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
## dbt 0.18.0 (Release TBD)


### Features
- Add support for impersonating a service account using `impersonate_service_account` in the BigQuery profile configuration ([#2677](https://github.com/fishtown-analytics/dbt/issues/2677)) ([docs](https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile#service-account-impersonation))
- Macros in the current project can override internal dbt macros that are called through `execute_macros`. ([#2301](https://github.com/fishtown-analytics/dbt/issues/2301), [#2686](https://github.com/fishtown-analytics/dbt/pull/2686))


### Breaking changes
- `adapter_macro` is no longer a macro, instead it is a builtin context method. Any custom macros that intercepted it by going through `context['dbt']` will need to instead access it via `context['builtins']` ([#2302](https://github.com/fishtown-analytics/dbt/issues/2302), [#2673](https://github.com/fishtown-analytics/dbt/pull/2673))
- `adapter_macro` is now deprecated. Use `adapter.dispatch` instead.

### Features
- Added a `dispatch` method to the context adapter and deprecated `adapter_macro`. ([#2302](https://github.com/fishtown-analytics/dbt/issues/2302), [#2679](https://github.com/fishtown-analytics/dbt/pull/2679))
- The built-in schema tests now use `adapter.dispatch`, so they can be overridden for adapter plugins ([#2415](https://github.com/fishtown-analytics/dbt/issues/2415), [#2684](https://github.com/fishtown-analytics/dbt/pull/2684))
- Add support for impersonating a service account using `impersonate_service_account` in the BigQuery profile configuration ([#2677](https://github.com/fishtown-analytics/dbt/issues/2677)) ([docs](https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile#service-account-impersonation))
- Macros in the current project can override internal dbt macros that are called through `execute_macros`. ([#2301](https://github.com/fishtown-analytics/dbt/issues/2301), [#2686](https://github.com/fishtown-analytics/dbt/pull/2686))
- Add state:modified and state:new selectors ([#2641](https://github.com/fishtown-analytics/dbt/issues/2641), [#2695](https://github.com/fishtown-analytics/dbt/pull/2695))

Contributors:
- [@bbhoss](https://github.com/bbhoss) ([#2677](https://github.com/fishtown-analytics/dbt/pull/2677))
Expand Down
166 changes: 166 additions & 0 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import hashlib
import os
from dataclasses import dataclass, field
from typing import List, Optional, Union

from hologram import JsonSchemaMixin

from dbt.exceptions import InternalException

from .util import MacroKey, SourceKey


MAXIMUM_SEED_SIZE = 1 * 1024 * 1024
MAXIMUM_SEED_SIZE_NAME = '1MB'


@dataclass
class FilePath(JsonSchemaMixin):
searched_path: str
relative_path: str
project_root: str

@property
def search_key(self) -> str:
# TODO: should this be project name + path relative to project root?
return self.absolute_path

@property
def full_path(self) -> str:
# useful for symlink preservation
return os.path.join(
self.project_root, self.searched_path, self.relative_path
)

@property
def absolute_path(self) -> str:
return os.path.abspath(self.full_path)

@property
def original_file_path(self) -> str:
# this is mostly used for reporting errors. It doesn't show the project
# name, should it?
return os.path.join(
self.searched_path, self.relative_path
)

def seed_too_large(self) -> bool:
"""Return whether the file this represents is over the seed size limit
"""
return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE


@dataclass
class FileHash(JsonSchemaMixin):
name: str # the hash type name
checksum: str # the hashlib.hash_type().hexdigest() of the file contents

@classmethod
def empty(cls):
return FileHash(name='none', checksum='')

@classmethod
def path(cls, path: str):
return FileHash(name='path', checksum=path)

def __eq__(self, other):
if not isinstance(other, FileHash):
return NotImplemented

if self.name == 'none' or self.name != other.name:
return False

return self.checksum == other.checksum

def compare(self, contents: str) -> bool:
"""Compare the file contents with the given hash"""
if self.name == 'none':
return False

return self.from_contents(contents, name=self.name) == self.checksum

@classmethod
def from_contents(cls, contents: str, name='sha256') -> 'FileHash':
"""Create a file hash from the given file contents. The hash is always
the utf-8 encoding of the contents given, because dbt only reads files
as utf-8.
"""
data = contents.encode('utf-8')
checksum = hashlib.new(name, data).hexdigest()
return cls(name=name, checksum=checksum)


@dataclass
class RemoteFile(JsonSchemaMixin):
@property
def searched_path(self) -> str:
return 'from remote system'

@property
def relative_path(self) -> str:
return 'from remote system'

@property
def absolute_path(self) -> str:
return 'from remote system'

@property
def original_file_path(self):
return 'from remote system'


@dataclass
class SourceFile(JsonSchemaMixin):
"""Define a source file in dbt"""
path: Union[FilePath, RemoteFile] # the path information
checksum: FileHash
# we don't want to serialize this
_contents: Optional[str] = None
# the unique IDs contained in this file
nodes: List[str] = field(default_factory=list)
docs: List[str] = field(default_factory=list)
macros: List[str] = field(default_factory=list)
sources: List[str] = field(default_factory=list)
# any node patches in this file. The entries are names, not unique ids!
patches: List[str] = field(default_factory=list)
# any macro patches in this file. The entries are package, name pairs.
macro_patches: List[MacroKey] = field(default_factory=list)
# any source patches in this file. The entries are package, name pairs
source_patches: List[SourceKey] = field(default_factory=list)

@property
def search_key(self) -> Optional[str]:
if isinstance(self.path, RemoteFile):
return None
if self.checksum.name == 'none':
return None
return self.path.search_key

@property
def contents(self) -> str:
if self._contents is None:
raise InternalException('SourceFile has no contents!')
return self._contents

@contents.setter
def contents(self, value):
self._contents = value

@classmethod
def empty(cls, path: FilePath) -> 'SourceFile':
self = cls(path=path, checksum=FileHash.empty())
self.contents = ''
return self

@classmethod
def big_seed(cls, path: FilePath) -> 'SourceFile':
"""Parse seeds over the size limit with just the path"""
self = cls(path=path, checksum=FileHash.path(path.absolute_path))
self.contents = ''
return self

@classmethod
def remote(cls, contents: str) -> 'SourceFile':
self = cls(path=RemoteFile(), checksum=FileHash.empty())
self.contents = contents
return self
19 changes: 6 additions & 13 deletions core/dbt/contracts/graph/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
ParsedSeedNode,
ParsedSnapshotNode,
ParsedSourceDefinition,
SeedConfig,
SchemaTestMixin,
SeedMixin,
TestConfig,
)
from dbt.node_types import NodeType
Expand Down Expand Up @@ -93,14 +94,8 @@ class CompiledRPCNode(CompiledNode):


@dataclass
class CompiledSeedNode(CompiledNode):
resource_type: NodeType = field(metadata={'restrict': [NodeType.Seed]})
config: SeedConfig = field(default_factory=SeedConfig)

@property
def empty(self):
""" Seeds are never empty"""
return False
class CompiledSeedNode(SeedMixin, CompiledNode):
pass


@dataclass
Expand All @@ -115,10 +110,8 @@ class CompiledDataTestNode(CompiledNode):


@dataclass
class CompiledSchemaTestNode(CompiledNode, HasTestMetadata):
resource_type: NodeType = field(metadata={'restrict': [NodeType.Test]})
column_name: Optional[str] = None
config: TestConfig = field(default_factory=TestConfig)
class CompiledSchemaTestNode(SchemaTestMixin, CompiledNode, HasTestMetadata):
pass


CompiledTestNode = Union[CompiledDataTestNode, CompiledSchemaTestNode]
Expand Down
Loading

0 comments on commit 2a4fe40

Please sign in to comment.