Skip to content

Commit

Permalink
Move manifest nodes to dbt/artifacts (#9538)
Browse files Browse the repository at this point in the history
  • Loading branch information
gshank authored and Teresa Martyny committed Feb 22, 2024
1 parent 20f9049 commit f8798c0
Show file tree
Hide file tree
Showing 31 changed files with 736 additions and 572 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Under the Hood-20240207-122342.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Under the Hood
body: Move manifest nodes to artifacts
time: 2024-02-07T12:23:42.909049-05:00
custom:
Author: gshank
Issue: "9388"
9 changes: 9 additions & 0 deletions .github/ISSUE_TEMPLATE/implementation-ticket.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ body:
Example: Backport to 1.6.latest, 1.5.latest and 1.4.latest. Since 1.4 isn't using click, the backport may be complicated. The `backport 1.6.latest`, `backport 1.5.latest` and `backport 1.4.latest` labels have been added.
validations:
required: true
- type: textarea
attributes:
label: Are there any security concerns with these changes?
description: |
When in doubt, run it by the security team.
placeholder: |
Example: Logging sensitive data
validations:
required: true
- type: textarea
attributes:
label: Context
Expand Down
37 changes: 30 additions & 7 deletions core/dbt/artifacts/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
from dbt.artifacts.resources.base import BaseResource, GraphResource
from dbt.artifacts.resources.base import BaseResource, GraphResource, FileHash, Docs

# alias to latest resource definitions
from dbt.artifacts.resources.v1.components import (
ColumnInfo,
DependsOn,
FreshnessThreshold,
HasRelationMetadata,
NodeVersion,
Quoting,
RefArgs,
HasRelationMetadata,
ParsedResourceMandatory,
ParsedResource,
ColumnInfo,
CompiledResource,
InjectedCTE,
Contract,
DeferRelation,
FreshnessThreshold,
Quoting,
Time,
)
from dbt.artifacts.resources.v1.analysis import Analysis
from dbt.artifacts.resources.v1.hook import HookNode
from dbt.artifacts.resources.v1.model import Model, ModelConfig
from dbt.artifacts.resources.v1.sql_operation import SqlOperation
from dbt.artifacts.resources.v1.seed import Seed, SeedConfig
from dbt.artifacts.resources.v1.singular_test import SingularTest
from dbt.artifacts.resources.v1.generic_test import GenericTest, TestMetadata
from dbt.artifacts.resources.v1.snapshot import Snapshot, SnapshotConfig


from dbt.artifacts.resources.v1.documentation import Documentation
from dbt.artifacts.resources.v1.exposure import (
Exposure,
Expand All @@ -19,7 +35,6 @@
MaturityType,
)
from dbt.artifacts.resources.v1.macro import Macro, MacroDependsOn, MacroArgument
from dbt.artifacts.resources.v1.docs import Docs
from dbt.artifacts.resources.v1.group import Group
from dbt.artifacts.resources.v1.metric import (
ConstantPropertyInput,
Expand Down Expand Up @@ -59,10 +74,18 @@
SemanticModel,
SemanticModelConfig,
)

from dbt.artifacts.resources.v1.config import (
NodeAndTestConfig,
NodeConfig,
TestConfig,
Hook,
)

from dbt.artifacts.resources.v1.source_definition import (
SourceConfig,
ExternalPartition,
ExternalTable,
SourceDefinition,
ParsedSourceMandatory,
SourceConfig,
)
49 changes: 48 additions & 1 deletion core/dbt/artifacts/resources/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass
from dbt_common.dataclass_schema import dbtClassMixin
from typing import List
from typing import List, Optional
import hashlib

from dbt.artifacts.resources.types import NodeType

Expand All @@ -18,3 +19,49 @@ class BaseResource(dbtClassMixin):
@dataclass
class GraphResource(BaseResource):
fqn: List[str]


@dataclass
class FileHash(dbtClassMixin):
name: str # the hash type name
checksum: str # the hashlib.hash_type().hexdigest() of the file contents

@classmethod
def empty(cls):
return FileHash(name="none", checksum="")

@classmethod
def path(cls, path: str):
return FileHash(name="path", checksum=path)

def __eq__(self, other):
if not isinstance(other, FileHash):
return NotImplemented

if self.name == "none" or self.name != other.name:
return False

return self.checksum == other.checksum

def compare(self, contents: str) -> bool:
"""Compare the file contents with the given hash"""
if self.name == "none":
return False

return self.from_contents(contents, name=self.name) == self.checksum

@classmethod
def from_contents(cls, contents: str, name="sha256") -> "FileHash":
"""Create a file hash from the given file contents. The hash is always
the utf-8 encoding of the contents given, because dbt only reads files
as utf-8.
"""
data = contents.encode("utf-8")
checksum = hashlib.new(name, data).hexdigest()
return cls(name=name, checksum=checksum)


@dataclass
class Docs(dbtClassMixin):
show: bool = True
node_color: Optional[str] = None
5 changes: 5 additions & 0 deletions core/dbt/artifacts/resources/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ class ModelLanguage(StrEnum):
sql = "sql"


class ModelHookType(StrEnum):
PreHook = "pre-hook"
PostHook = "post-hook"


class TimePeriod(StrEnum):
minute = "minute"
hour = "hour"
Expand Down
9 changes: 9 additions & 0 deletions core/dbt/artifacts/resources/v1/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from dbt.artifacts.resources.v1.components import CompiledResource
from typing import Literal
from dataclasses import dataclass
from dbt.artifacts.resources.types import NodeType


@dataclass
class Analysis(CompiledResource):
resource_type: Literal[NodeType.Analysis]
95 changes: 90 additions & 5 deletions core/dbt/artifacts/resources/v1/components.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,29 @@
import time
from dataclasses import dataclass, field
from datetime import timedelta
from dbt.artifacts.resources.types import TimePeriod
from dbt.artifacts.resources.v1.macro import MacroDependsOn
from dbt.artifacts.resources.base import GraphResource, FileHash, Docs
from dbt.artifacts.resources.v1.config import NodeConfig
from dbt_common.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin
from dbt_common.contracts.config.properties import AdditionalPropertiesMixin
from dbt_common.contracts.constraints import ColumnLevelConstraint
from typing import Dict, List, Optional, Union, Any
from datetime import timedelta
from dbt.artifacts.resources.types import TimePeriod
from dbt_common.contracts.util import Mergeable
from dbt_common.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin
from typing import Any, Dict, List, Optional, Union


NodeVersion = Union[str, float]


@dataclass
class MacroDependsOn(dbtClassMixin):
macros: List[str] = field(default_factory=list)

# 'in' on lists is O(n) so this is O(n^2) for # of macros
def add_macro(self, value: str):
if value not in self.macros:
self.macros.append(value)


@dataclass
class DependsOn(MacroDependsOn):
nodes: List[str] = field(default_factory=list)
Expand Down Expand Up @@ -56,6 +68,21 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin):
_extra: Dict[str, Any] = field(default_factory=dict)


@dataclass
class InjectedCTE(dbtClassMixin):
"""Used in CompiledNodes as part of ephemeral model processing"""

id: str
sql: str


@dataclass
class Contract(dbtClassMixin):
enforced: bool = False
alias_types: bool = True
checksum: Optional[str] = None


@dataclass
class Quoting(dbtClassMixin, Mergeable):
database: Optional[bool] = None
Expand Down Expand Up @@ -121,3 +148,61 @@ def quoting_dict(self) -> Dict[str, bool]:
return self.quoting.to_dict(omit_none=True)
else:
return {}


@dataclass
class DeferRelation(HasRelationMetadata):
alias: str
relation_name: Optional[str]

@property
def identifier(self):
return self.alias


@dataclass
class ParsedResourceMandatory(GraphResource, HasRelationMetadata):
alias: str
checksum: FileHash
config: NodeConfig = field(default_factory=NodeConfig)

@property
def identifier(self):
return self.alias


@dataclass
class ParsedResource(ParsedResourceMandatory):
tags: List[str] = field(default_factory=list)
description: str = field(default="")
columns: Dict[str, ColumnInfo] = field(default_factory=dict)
meta: Dict[str, Any] = field(default_factory=dict)
group: Optional[str] = None
docs: Docs = field(default_factory=Docs)
patch_path: Optional[str] = None
build_path: Optional[str] = None
deferred: bool = False
unrendered_config: Dict[str, Any] = field(default_factory=dict)
created_at: float = field(default_factory=lambda: time.time())
config_call_dict: Dict[str, Any] = field(default_factory=dict)
relation_name: Optional[str] = None
raw_code: str = ""


@dataclass
class CompiledResource(ParsedResource):
"""Contains attributes necessary for SQL files and nodes with refs, sources, etc,
so all ManifestNodes except SeedNode."""

language: str = "sql"
refs: List[RefArgs] = field(default_factory=list)
sources: List[List[str]] = field(default_factory=list)
metrics: List[List[str]] = field(default_factory=list)
depends_on: DependsOn = field(default_factory=DependsOn)
compiled_path: Optional[str] = None
compiled: bool = False
compiled_code: Optional[str] = None
extra_ctes_injected: bool = False
extra_ctes: List[InjectedCTE] = field(default_factory=list)
_pre_injected_sql: Optional[str] = None
contract: Contract = field(default_factory=Contract)
Loading

0 comments on commit f8798c0

Please sign in to comment.