From 7df747ae04a8c99dbd745085f42cd4647ec71d80 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 21 Feb 2024 11:16:06 -0500 Subject: [PATCH] Move manifest nodes to dbt/artifacts (#9538) --- .../Under the Hood-20240207-122342.yaml | 6 + core/dbt/artifacts/resources/__init__.py | 37 +- core/dbt/artifacts/resources/base.py | 49 ++- core/dbt/artifacts/resources/types.py | 5 + core/dbt/artifacts/resources/v1/analysis.py | 9 + core/dbt/artifacts/resources/v1/components.py | 95 ++++- core/dbt/artifacts/resources/v1/config.py | 255 ++++++++++++++ core/dbt/artifacts/resources/v1/docs.py | 9 - .../artifacts/resources/v1/generic_test.py | 30 ++ core/dbt/artifacts/resources/v1/hook.py | 10 + core/dbt/artifacts/resources/v1/macro.py | 14 +- core/dbt/artifacts/resources/v1/model.py | 28 ++ core/dbt/artifacts/resources/v1/seed.py | 30 ++ .../artifacts/resources/v1/singular_test.py | 13 + core/dbt/artifacts/resources/v1/snapshot.py | 66 ++++ .../resources/v1/source_definition.py | 12 +- .../artifacts/resources/v1/sql_operation.py | 9 + core/dbt/contracts/files.py | 42 +-- core/dbt/contracts/graph/manifest.py | 3 +- core/dbt/contracts/graph/model_config.py | 325 +----------------- core/dbt/contracts/graph/nodes.py | 211 +++--------- core/dbt/parser/base.py | 3 +- core/dbt/parser/manifest.py | 4 +- core/dbt/parser/schemas.py | 8 + core/dbt/parser/unit_tests.py | 3 +- core/dbt/task/run.py | 2 +- tests/unit/test_contracts_graph_compiled.py | 4 +- tests/unit/test_contracts_graph_parsed.py | 8 +- tests/unit/test_graph_selector_methods.py | 6 +- tests/unit/test_parser.py | 3 +- 30 files changed, 727 insertions(+), 572 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20240207-122342.yaml create mode 100644 core/dbt/artifacts/resources/v1/analysis.py create mode 100644 core/dbt/artifacts/resources/v1/config.py delete mode 100644 core/dbt/artifacts/resources/v1/docs.py create mode 100644 core/dbt/artifacts/resources/v1/generic_test.py create mode 100644 core/dbt/artifacts/resources/v1/hook.py create mode 100644 core/dbt/artifacts/resources/v1/model.py create mode 100644 core/dbt/artifacts/resources/v1/seed.py create mode 100644 core/dbt/artifacts/resources/v1/singular_test.py create mode 100644 core/dbt/artifacts/resources/v1/snapshot.py create mode 100644 core/dbt/artifacts/resources/v1/sql_operation.py diff --git a/.changes/unreleased/Under the Hood-20240207-122342.yaml b/.changes/unreleased/Under the Hood-20240207-122342.yaml new file mode 100644 index 00000000000..f2e4a0ed3fe --- /dev/null +++ b/.changes/unreleased/Under the Hood-20240207-122342.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Move manifest nodes to artifacts +time: 2024-02-07T12:23:42.909049-05:00 +custom: + Author: gshank + Issue: "9388" diff --git a/core/dbt/artifacts/resources/__init__.py b/core/dbt/artifacts/resources/__init__.py index 7618a3bc12d..57e44e3eb5b 100644 --- a/core/dbt/artifacts/resources/__init__.py +++ b/core/dbt/artifacts/resources/__init__.py @@ -1,16 +1,32 @@ -from dbt.artifacts.resources.base import BaseResource, GraphResource +from dbt.artifacts.resources.base import BaseResource, GraphResource, FileHash, Docs # alias to latest resource definitions from dbt.artifacts.resources.v1.components import ( - ColumnInfo, DependsOn, - FreshnessThreshold, - HasRelationMetadata, NodeVersion, - Quoting, RefArgs, + HasRelationMetadata, + ParsedResourceMandatory, + ParsedResource, + ColumnInfo, + CompiledResource, + InjectedCTE, + Contract, + DeferRelation, + FreshnessThreshold, + Quoting, Time, ) +from dbt.artifacts.resources.v1.analysis import Analysis +from dbt.artifacts.resources.v1.hook import HookNode +from dbt.artifacts.resources.v1.model import Model, ModelConfig +from dbt.artifacts.resources.v1.sql_operation import SqlOperation +from dbt.artifacts.resources.v1.seed import Seed, SeedConfig +from dbt.artifacts.resources.v1.singular_test import SingularTest +from dbt.artifacts.resources.v1.generic_test import GenericTest, TestMetadata +from dbt.artifacts.resources.v1.snapshot import Snapshot, SnapshotConfig + + from dbt.artifacts.resources.v1.documentation import Documentation from dbt.artifacts.resources.v1.exposure import ( Exposure, @@ -19,7 +35,6 @@ MaturityType, ) from dbt.artifacts.resources.v1.macro import Macro, MacroDependsOn, MacroArgument -from dbt.artifacts.resources.v1.docs import Docs from dbt.artifacts.resources.v1.group import Group from dbt.artifacts.resources.v1.metric import ( ConstantPropertyInput, @@ -59,10 +74,18 @@ SemanticModel, SemanticModelConfig, ) + +from dbt.artifacts.resources.v1.config import ( + NodeAndTestConfig, + NodeConfig, + TestConfig, + Hook, +) + from dbt.artifacts.resources.v1.source_definition import ( + SourceConfig, ExternalPartition, ExternalTable, SourceDefinition, ParsedSourceMandatory, - SourceConfig, ) diff --git a/core/dbt/artifacts/resources/base.py b/core/dbt/artifacts/resources/base.py index 06fecf57b26..dd66aa97d72 100644 --- a/core/dbt/artifacts/resources/base.py +++ b/core/dbt/artifacts/resources/base.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from dbt_common.dataclass_schema import dbtClassMixin -from typing import List +from typing import List, Optional +import hashlib from dbt.artifacts.resources.types import NodeType @@ -18,3 +19,49 @@ class BaseResource(dbtClassMixin): @dataclass class GraphResource(BaseResource): fqn: List[str] + + +@dataclass +class FileHash(dbtClassMixin): + name: str # the hash type name + checksum: str # the hashlib.hash_type().hexdigest() of the file contents + + @classmethod + def empty(cls): + return FileHash(name="none", checksum="") + + @classmethod + def path(cls, path: str): + return FileHash(name="path", checksum=path) + + def __eq__(self, other): + if not isinstance(other, FileHash): + return NotImplemented + + if self.name == "none" or self.name != other.name: + return False + + return self.checksum == other.checksum + + def compare(self, contents: str) -> bool: + """Compare the file contents with the given hash""" + if self.name == "none": + return False + + return self.from_contents(contents, name=self.name) == self.checksum + + @classmethod + def from_contents(cls, contents: str, name="sha256") -> "FileHash": + """Create a file hash from the given file contents. The hash is always + the utf-8 encoding of the contents given, because dbt only reads files + as utf-8. + """ + data = contents.encode("utf-8") + checksum = hashlib.new(name, data).hexdigest() + return cls(name=name, checksum=checksum) + + +@dataclass +class Docs(dbtClassMixin): + show: bool = True + node_color: Optional[str] = None diff --git a/core/dbt/artifacts/resources/types.py b/core/dbt/artifacts/resources/types.py index af1383e834a..c0ab5341e4c 100644 --- a/core/dbt/artifacts/resources/types.py +++ b/core/dbt/artifacts/resources/types.py @@ -56,6 +56,11 @@ class ModelLanguage(StrEnum): sql = "sql" +class ModelHookType(StrEnum): + PreHook = "pre-hook" + PostHook = "post-hook" + + class TimePeriod(StrEnum): minute = "minute" hour = "hour" diff --git a/core/dbt/artifacts/resources/v1/analysis.py b/core/dbt/artifacts/resources/v1/analysis.py new file mode 100644 index 00000000000..60f90e61576 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/analysis.py @@ -0,0 +1,9 @@ +from dbt.artifacts.resources.v1.components import CompiledResource +from typing import Literal +from dataclasses import dataclass +from dbt.artifacts.resources.types import NodeType + + +@dataclass +class Analysis(CompiledResource): + resource_type: Literal[NodeType.Analysis] diff --git a/core/dbt/artifacts/resources/v1/components.py b/core/dbt/artifacts/resources/v1/components.py index 1aa29086680..6a131ef761d 100644 --- a/core/dbt/artifacts/resources/v1/components.py +++ b/core/dbt/artifacts/resources/v1/components.py @@ -1,17 +1,29 @@ +import time from dataclasses import dataclass, field -from datetime import timedelta -from dbt.artifacts.resources.types import TimePeriod -from dbt.artifacts.resources.v1.macro import MacroDependsOn +from dbt.artifacts.resources.base import GraphResource, FileHash, Docs +from dbt.artifacts.resources.v1.config import NodeConfig +from dbt_common.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin from dbt_common.contracts.config.properties import AdditionalPropertiesMixin from dbt_common.contracts.constraints import ColumnLevelConstraint +from typing import Dict, List, Optional, Union, Any +from datetime import timedelta +from dbt.artifacts.resources.types import TimePeriod from dbt_common.contracts.util import Mergeable -from dbt_common.dataclass_schema import dbtClassMixin, ExtensibleDbtClassMixin -from typing import Any, Dict, List, Optional, Union NodeVersion = Union[str, float] +@dataclass +class MacroDependsOn(dbtClassMixin): + macros: List[str] = field(default_factory=list) + + # 'in' on lists is O(n) so this is O(n^2) for # of macros + def add_macro(self, value: str): + if value not in self.macros: + self.macros.append(value) + + @dataclass class DependsOn(MacroDependsOn): nodes: List[str] = field(default_factory=list) @@ -56,6 +68,21 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin): _extra: Dict[str, Any] = field(default_factory=dict) +@dataclass +class InjectedCTE(dbtClassMixin): + """Used in CompiledNodes as part of ephemeral model processing""" + + id: str + sql: str + + +@dataclass +class Contract(dbtClassMixin): + enforced: bool = False + alias_types: bool = True + checksum: Optional[str] = None + + @dataclass class Quoting(dbtClassMixin, Mergeable): database: Optional[bool] = None @@ -121,3 +148,61 @@ def quoting_dict(self) -> Dict[str, bool]: return self.quoting.to_dict(omit_none=True) else: return {} + + +@dataclass +class DeferRelation(HasRelationMetadata): + alias: str + relation_name: Optional[str] + + @property + def identifier(self): + return self.alias + + +@dataclass +class ParsedResourceMandatory(GraphResource, HasRelationMetadata): + alias: str + checksum: FileHash + config: NodeConfig = field(default_factory=NodeConfig) + + @property + def identifier(self): + return self.alias + + +@dataclass +class ParsedResource(ParsedResourceMandatory): + tags: List[str] = field(default_factory=list) + description: str = field(default="") + columns: Dict[str, ColumnInfo] = field(default_factory=dict) + meta: Dict[str, Any] = field(default_factory=dict) + group: Optional[str] = None + docs: Docs = field(default_factory=Docs) + patch_path: Optional[str] = None + build_path: Optional[str] = None + deferred: bool = False + unrendered_config: Dict[str, Any] = field(default_factory=dict) + created_at: float = field(default_factory=lambda: time.time()) + config_call_dict: Dict[str, Any] = field(default_factory=dict) + relation_name: Optional[str] = None + raw_code: str = "" + + +@dataclass +class CompiledResource(ParsedResource): + """Contains attributes necessary for SQL files and nodes with refs, sources, etc, + so all ManifestNodes except SeedNode.""" + + language: str = "sql" + refs: List[RefArgs] = field(default_factory=list) + sources: List[List[str]] = field(default_factory=list) + metrics: List[List[str]] = field(default_factory=list) + depends_on: DependsOn = field(default_factory=DependsOn) + compiled_path: Optional[str] = None + compiled: bool = False + compiled_code: Optional[str] = None + extra_ctes_injected: bool = False + extra_ctes: List[InjectedCTE] = field(default_factory=list) + _pre_injected_sql: Optional[str] = None + contract: Contract = field(default_factory=Contract) diff --git a/core/dbt/artifacts/resources/v1/config.py b/core/dbt/artifacts/resources/v1/config.py new file mode 100644 index 00000000000..d58d2ff4c5d --- /dev/null +++ b/core/dbt/artifacts/resources/v1/config.py @@ -0,0 +1,255 @@ +from dbt_common.dataclass_schema import dbtClassMixin, ValidationError +from typing import Optional, List, Any, Dict, Union +from typing_extensions import Annotated +from dataclasses import dataclass, field +from dbt_common.contracts.config.base import ( + BaseConfig, + CompareBehavior, + MergeBehavior, +) +from dbt_common.contracts.config.metadata import Metadata, ShowBehavior +from dbt_common.contracts.config.materialization import OnConfigurationChangeOption +from dbt.artifacts.resources.base import Docs +from dbt.artifacts.resources.types import ModelHookType +from dbt.contracts.graph.utils import validate_color +from dbt import hooks +from mashumaro.jsonschema.annotations import Pattern + + +def list_str() -> List[str]: + return [] + + +class Severity(str): + pass + + +def metas(*metas: Metadata) -> Dict[str, Any]: + existing: Dict[str, Any] = {} + for m in metas: + existing = m.meta(existing) + return existing + + +@dataclass +class ContractConfig(dbtClassMixin): + enforced: bool = False + alias_types: bool = True + + +@dataclass +class Hook(dbtClassMixin): + sql: str + transaction: bool = True + index: Optional[int] = None + + +@dataclass +class NodeAndTestConfig(BaseConfig): + enabled: bool = True + # these fields are included in serialized output, but are not part of + # config comparison (they are part of database_representation) + alias: Optional[str] = field( + default=None, + metadata=CompareBehavior.Exclude.meta(), + ) + schema: Optional[str] = field( + default=None, + metadata=CompareBehavior.Exclude.meta(), + ) + database: Optional[str] = field( + default=None, + metadata=CompareBehavior.Exclude.meta(), + ) + tags: Union[List[str], str] = field( + default_factory=list_str, + metadata=metas(ShowBehavior.Hide, MergeBehavior.Append, CompareBehavior.Exclude), + ) + meta: Dict[str, Any] = field( + default_factory=dict, + metadata=MergeBehavior.Update.meta(), + ) + group: Optional[str] = field( + default=None, + metadata=CompareBehavior.Exclude.meta(), + ) + + +@dataclass +class NodeConfig(NodeAndTestConfig): + # Note: if any new fields are added with MergeBehavior, also update the + # 'mergebehavior' dictionary + materialized: str = "view" + incremental_strategy: Optional[str] = None + persist_docs: Dict[str, Any] = field(default_factory=dict) + post_hook: List[Hook] = field( + default_factory=list, + metadata={"merge": MergeBehavior.Append, "alias": "post-hook"}, + ) + pre_hook: List[Hook] = field( + default_factory=list, + metadata={"merge": MergeBehavior.Append, "alias": "pre-hook"}, + ) + quoting: Dict[str, Any] = field( + default_factory=dict, + metadata=MergeBehavior.Update.meta(), + ) + # This is actually only used by seeds. Should it be available to others? + # That would be a breaking change! + column_types: Dict[str, Any] = field( + default_factory=dict, + metadata=MergeBehavior.Update.meta(), + ) + full_refresh: Optional[bool] = None + # 'unique_key' doesn't use 'Optional' because typing.get_type_hints was + # sometimes getting the Union order wrong, causing serialization failures. + unique_key: Union[str, List[str], None] = None + on_schema_change: Optional[str] = "ignore" + on_configuration_change: OnConfigurationChangeOption = field( + default_factory=OnConfigurationChangeOption.default + ) + grants: Dict[str, Any] = field( + default_factory=dict, metadata=MergeBehavior.DictKeyAppend.meta() + ) + packages: List[str] = field( + default_factory=list, + metadata=MergeBehavior.Append.meta(), + ) + docs: Docs = field( + default_factory=Docs, + metadata=MergeBehavior.Update.meta(), + ) + contract: ContractConfig = field( + default_factory=ContractConfig, + metadata=MergeBehavior.Update.meta(), + ) + + def __post_init__(self): + # we validate that node_color has a suitable value to prevent dbt-docs from crashing + if self.docs.node_color: + node_color = self.docs.node_color + if not validate_color(node_color): + raise ValidationError( + f"Invalid color name for docs.node_color: {node_color}. " + "It is neither a valid HTML color name nor a valid HEX code." + ) + + if ( + self.contract.enforced + and self.materialized == "incremental" + and self.on_schema_change not in ("append_new_columns", "fail") + ): + raise ValidationError( + f"Invalid value for on_schema_change: {self.on_schema_change}. Models " + "materialized as incremental with contracts enabled must set " + "on_schema_change to 'append_new_columns' or 'fail'" + ) + + @classmethod + def __pre_deserialize__(cls, data): + data = super().__pre_deserialize__(data) + for key in ModelHookType: + if key in data: + data[key] = [hooks.get_hook_dict(h) for h in data[key]] + return data + + +SEVERITY_PATTERN = r"^([Ww][Aa][Rr][Nn]|[Ee][Rr][Rr][Oo][Rr])$" + + +@dataclass +class TestConfig(NodeAndTestConfig): + __test__ = False + + # this is repeated because of a different default + schema: Optional[str] = field( + default="dbt_test__audit", + metadata=CompareBehavior.Exclude.meta(), + ) + materialized: str = "test" + # Annotated is used by mashumaro for jsonschema generation + severity: Annotated[Severity, Pattern(SEVERITY_PATTERN)] = Severity("ERROR") + store_failures: Optional[bool] = None + store_failures_as: Optional[str] = None + where: Optional[str] = None + limit: Optional[int] = None + fail_calc: str = "count(*)" + warn_if: str = "!= 0" + error_if: str = "!= 0" + + def __post_init__(self): + """ + The presence of a setting for `store_failures_as` overrides any existing setting for `store_failures`, + regardless of level of granularity. If `store_failures_as` is not set, then `store_failures` takes effect. + At the time of implementation, `store_failures = True` would always create a table; the user could not + configure this. Hence, if `store_failures = True` and `store_failures_as` is not specified, then it + should be set to "table" to mimic the existing functionality. + + A side effect of this overriding functionality is that `store_failures_as="view"` at the project + level cannot be turned off at the model level without setting both `store_failures_as` and + `store_failures`. The former would cascade down and override `store_failures=False`. The proposal + is to include "ephemeral" as a value for `store_failures_as`, which effectively sets + `store_failures=False`. + + The exception handling for this is tricky. If we raise an exception here, the entire run fails at + parse time. We would rather well-formed models run successfully, leaving only exceptions to be rerun + if necessary. Hence, the exception needs to be raised in the test materialization. In order to do so, + we need to make sure that we go down the `store_failures = True` route with the invalid setting for + `store_failures_as`. This results in the `.get()` defaulted to `True` below, instead of a normal + dictionary lookup as is done in the `if` block. Refer to the test materialization for the + exception that is raise as a result of an invalid value. + + The intention of this block is to behave as if `store_failures_as` is the only setting, + but still allow for backwards compatibility for `store_failures`. + See https://github.com/dbt-labs/dbt-core/issues/6914 for more information. + """ + + # if `store_failures_as` is not set, it gets set by `store_failures` + # the settings below mimic existing behavior prior to `store_failures_as` + get_store_failures_as_map = { + True: "table", + False: "ephemeral", + None: None, + } + + # if `store_failures_as` is set, it dictates what `store_failures` gets set to + # the settings below overrides whatever `store_failures` is set to by the user + get_store_failures_map = { + "ephemeral": False, + "table": True, + "view": True, + } + + if self.store_failures_as is None: + self.store_failures_as = get_store_failures_as_map[self.store_failures] + else: + self.store_failures = get_store_failures_map.get(self.store_failures_as, True) + + @classmethod + def same_contents(cls, unrendered: Dict[str, Any], other: Dict[str, Any]) -> bool: + """This is like __eq__, except it explicitly checks certain fields.""" + modifiers = [ + "severity", + "where", + "limit", + "fail_calc", + "warn_if", + "error_if", + "store_failures", + "store_failures_as", + ] + + seen = set() + for _, target_name in cls._get_fields(): + key = target_name + seen.add(key) + if key in modifiers: + if not cls.compare_key(unrendered, other, key): + return False + return True + + @classmethod + def validate(cls, data): + super().validate(data) + if data.get("materialized") and data.get("materialized") != "test": + raise ValidationError("A test must have a materialized value of 'test'") diff --git a/core/dbt/artifacts/resources/v1/docs.py b/core/dbt/artifacts/resources/v1/docs.py deleted file mode 100644 index b016320a249..00000000000 --- a/core/dbt/artifacts/resources/v1/docs.py +++ /dev/null @@ -1,9 +0,0 @@ -from dataclasses import dataclass -from dbt_common.dataclass_schema import dbtClassMixin -from typing import Optional - - -@dataclass -class Docs(dbtClassMixin): - show: bool = True - node_color: Optional[str] = None diff --git a/core/dbt/artifacts/resources/v1/generic_test.py b/core/dbt/artifacts/resources/v1/generic_test.py new file mode 100644 index 00000000000..b24be584b3a --- /dev/null +++ b/core/dbt/artifacts/resources/v1/generic_test.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass, field +from typing import Optional, Any, Dict, Literal +from dbt_common.dataclass_schema import dbtClassMixin +from dbt.artifacts.resources.types import NodeType +from dbt.artifacts.resources.v1.config import TestConfig +from dbt.artifacts.resources.v1.components import CompiledResource + + +@dataclass +class TestMetadata(dbtClassMixin): + __test__ = False + + name: str = "test" # dummy default to allow default in GenericTestNode. Should always be set. + # kwargs are the args that are left in the test builder after + # removing configs. They are set from the test builder when + # the test node is created. + kwargs: Dict[str, Any] = field(default_factory=dict) + namespace: Optional[str] = None + + +@dataclass +class GenericTest(CompiledResource): + resource_type: Literal[NodeType.Test] + column_name: Optional[str] = None + file_key_name: Optional[str] = None + # Was not able to make mypy happy and keep the code working. We need to + # refactor the various configs. + config: TestConfig = field(default_factory=TestConfig) # type: ignore + attached_node: Optional[str] = None + test_metadata: TestMetadata = field(default_factory=TestMetadata) diff --git a/core/dbt/artifacts/resources/v1/hook.py b/core/dbt/artifacts/resources/v1/hook.py new file mode 100644 index 00000000000..dcfb4684c68 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/hook.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass +from typing import Optional, Literal +from dbt.artifacts.resources.v1.components import CompiledResource +from dbt.artifacts.resources.types import NodeType + + +@dataclass +class HookNode(CompiledResource): + resource_type: Literal[NodeType.Operation] + index: Optional[int] = None diff --git a/core/dbt/artifacts/resources/v1/macro.py b/core/dbt/artifacts/resources/v1/macro.py index f52255933b2..be02d529ee1 100644 --- a/core/dbt/artifacts/resources/v1/macro.py +++ b/core/dbt/artifacts/resources/v1/macro.py @@ -3,9 +3,9 @@ from typing import Literal, List, Dict, Optional, Any from dbt_common.dataclass_schema import dbtClassMixin -from dbt.artifacts.resources.base import BaseResource +from dbt.artifacts.resources.base import BaseResource, Docs from dbt.artifacts.resources.types import NodeType, ModelLanguage -from dbt.artifacts.resources.v1.docs import Docs +from dbt.artifacts.resources.v1.components import MacroDependsOn @dataclass @@ -15,16 +15,6 @@ class MacroArgument(dbtClassMixin): description: str = "" -@dataclass -class MacroDependsOn(dbtClassMixin): - macros: List[str] = field(default_factory=list) - - # 'in' on lists is O(n) so this is O(n^2) for # of macros - def add_macro(self, value: str): - if value not in self.macros: - self.macros.append(value) - - @dataclass class Macro(BaseResource): macro_sql: str diff --git a/core/dbt/artifacts/resources/v1/model.py b/core/dbt/artifacts/resources/v1/model.py new file mode 100644 index 00000000000..afb5edaad54 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/model.py @@ -0,0 +1,28 @@ +from dataclasses import dataclass, field +from typing import Literal, Optional, List +from datetime import datetime +from dbt_common.contracts.config.base import MergeBehavior +from dbt_common.contracts.constraints import ModelLevelConstraint +from dbt.artifacts.resources.v1.config import NodeConfig +from dbt.artifacts.resources.types import AccessType, NodeType +from dbt.artifacts.resources.v1.components import DeferRelation, NodeVersion, CompiledResource + + +@dataclass +class ModelConfig(NodeConfig): + access: AccessType = field( + default=AccessType.Protected, + metadata=MergeBehavior.Clobber.meta(), + ) + + +@dataclass +class Model(CompiledResource): + resource_type: Literal[NodeType.Model] + access: AccessType = AccessType.Protected + config: ModelConfig = field(default_factory=ModelConfig) + constraints: List[ModelLevelConstraint] = field(default_factory=list) + version: Optional[NodeVersion] = None + latest_version: Optional[NodeVersion] = None + deprecation_date: Optional[datetime] = None + defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/artifacts/resources/v1/seed.py b/core/dbt/artifacts/resources/v1/seed.py new file mode 100644 index 00000000000..47a16352cf2 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/seed.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass, field +from typing import Optional, Literal +from dbt_common.dataclass_schema import ValidationError +from dbt.artifacts.resources.types import NodeType +from dbt.artifacts.resources.v1.components import MacroDependsOn, DeferRelation, ParsedResource +from dbt.artifacts.resources.v1.config import NodeConfig + + +@dataclass +class SeedConfig(NodeConfig): + materialized: str = "seed" + delimiter: str = "," + quote_columns: Optional[bool] = None + + @classmethod + def validate(cls, data): + super().validate(data) + if data.get("materialized") and data.get("materialized") != "seed": + raise ValidationError("A seed must have a materialized value of 'seed'") + + +@dataclass +class Seed(ParsedResource): # No SQLDefaults! + resource_type: Literal[NodeType.Seed] + config: SeedConfig = field(default_factory=SeedConfig) + # seeds need the root_path because the contents are not loaded initially + # and we need the root_path to load the seed later + root_path: Optional[str] = None + depends_on: MacroDependsOn = field(default_factory=MacroDependsOn) + defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/artifacts/resources/v1/singular_test.py b/core/dbt/artifacts/resources/v1/singular_test.py new file mode 100644 index 00000000000..76b47183c51 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/singular_test.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass, field +from typing import Literal +from dbt.artifacts.resources.types import NodeType +from dbt.artifacts.resources.v1.components import CompiledResource +from dbt.artifacts.resources.v1.config import TestConfig + + +@dataclass +class SingularTest(CompiledResource): + resource_type: Literal[NodeType.Test] + # Was not able to make mypy happy and keep the code working. We need to + # refactor the various configs. + config: TestConfig = field(default_factory=TestConfig) # type: ignore diff --git a/core/dbt/artifacts/resources/v1/snapshot.py b/core/dbt/artifacts/resources/v1/snapshot.py new file mode 100644 index 00000000000..3eceb9bb1d2 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/snapshot.py @@ -0,0 +1,66 @@ +from typing import Union, List, Optional, Literal +from dataclasses import dataclass +from dbt_common.dataclass_schema import ValidationError +from dbt.artifacts.resources.types import NodeType +from dbt.artifacts.resources.v1.components import CompiledResource, DeferRelation +from dbt.artifacts.resources.v1.config import NodeConfig + + +@dataclass +class SnapshotConfig(NodeConfig): + materialized: str = "snapshot" + strategy: Optional[str] = None + unique_key: Optional[str] = None + target_schema: Optional[str] = None + target_database: Optional[str] = None + updated_at: Optional[str] = None + # Not using Optional because of serialization issues with a Union of str and List[str] + check_cols: Union[str, List[str], None] = None + + @classmethod + def validate(cls, data): + super().validate(data) + # Note: currently you can't just set these keys in schema.yml because this validation + # will fail when parsing the snapshot node. + if not data.get("strategy") or not data.get("unique_key") or not data.get("target_schema"): + raise ValidationError( + "Snapshots must be configured with a 'strategy', 'unique_key', " + "and 'target_schema'." + ) + if data.get("strategy") == "check": + if not data.get("check_cols"): + raise ValidationError( + "A snapshot configured with the check strategy must " + "specify a check_cols configuration." + ) + if isinstance(data["check_cols"], str) and data["check_cols"] != "all": + raise ValidationError( + f"Invalid value for 'check_cols': {data['check_cols']}. " + "Expected 'all' or a list of strings." + ) + elif data.get("strategy") == "timestamp": + if not data.get("updated_at"): + raise ValidationError( + "A snapshot configured with the timestamp strategy " + "must specify an updated_at configuration." + ) + if data.get("check_cols"): + raise ValidationError("A 'timestamp' snapshot should not have 'check_cols'") + # If the strategy is not 'check' or 'timestamp' it's a custom strategy, + # formerly supported with GenericSnapshotConfig + + if data.get("materialized") and data.get("materialized") != "snapshot": + raise ValidationError("A snapshot must have a materialized value of 'snapshot'") + + # Called by "calculate_node_config_dict" in ContextConfigGenerator + def finalize_and_validate(self): + data = self.to_dict(omit_none=True) + self.validate(data) + return self.from_dict(data) + + +@dataclass +class Snapshot(CompiledResource): + resource_type: Literal[NodeType.Snapshot] + config: SnapshotConfig + defer_relation: Optional[DeferRelation] = None diff --git a/core/dbt/artifacts/resources/v1/source_definition.py b/core/dbt/artifacts/resources/v1/source_definition.py index 9d3a87b0bd6..e5a9ab1d98e 100644 --- a/core/dbt/artifacts/resources/v1/source_definition.py +++ b/core/dbt/artifacts/resources/v1/source_definition.py @@ -9,13 +9,18 @@ HasRelationMetadata, Quoting, ) -from dbt_common.contracts.config.base import BaseConfig +from dbt.artifacts.resources.v1.config import BaseConfig from dbt_common.contracts.config.properties import AdditionalPropertiesAllowed from dbt_common.contracts.util import Mergeable from dbt_common.exceptions import CompilationError from typing import Any, Dict, List, Literal, Optional, Union +@dataclass +class SourceConfig(BaseConfig): + enabled: bool = True + + @dataclass class ExternalPartition(AdditionalPropertiesAllowed): name: str = "" @@ -40,11 +45,6 @@ def __bool__(self): return self.location is not None -@dataclass -class SourceConfig(BaseConfig): - enabled: bool = True - - @dataclass class ParsedSourceMandatory(GraphResource, HasRelationMetadata): source_name: str diff --git a/core/dbt/artifacts/resources/v1/sql_operation.py b/core/dbt/artifacts/resources/v1/sql_operation.py new file mode 100644 index 00000000000..fd8e79b21a1 --- /dev/null +++ b/core/dbt/artifacts/resources/v1/sql_operation.py @@ -0,0 +1,9 @@ +from dataclasses import dataclass +from typing import Literal +from dbt.artifacts.resources.types import NodeType +from dbt.artifacts.resources.v1.components import CompiledResource + + +@dataclass +class SqlOperation(CompiledResource): + resource_type: Literal[NodeType.SqlOperation] diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index fe5f91d265e..714782161cc 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -1,4 +1,3 @@ -import hashlib import os from dataclasses import dataclass, field @@ -7,6 +6,7 @@ from dbt.constants import MAXIMUM_SEED_SIZE from dbt_common.dataclass_schema import dbtClassMixin, StrEnum +from dbt.artifacts.resources.base import FileHash from .util import SourceKey @@ -70,46 +70,6 @@ def seed_too_large(self) -> bool: return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE -@dataclass -class FileHash(dbtClassMixin): - name: str # the hash type name - checksum: str # the hashlib.hash_type().hexdigest() of the file contents - - @classmethod - def empty(cls): - return FileHash(name="none", checksum="") - - @classmethod - def path(cls, path: str): - return FileHash(name="path", checksum=path) - - def __eq__(self, other): - if not isinstance(other, FileHash): - return NotImplemented - - if self.name == "none" or self.name != other.name: - return False - - return self.checksum == other.checksum - - def compare(self, contents: str) -> bool: - """Compare the file contents with the given hash""" - if self.name == "none": - return False - - return self.from_contents(contents, name=self.name) == self.checksum - - @classmethod - def from_contents(cls, contents: str, name="sha256") -> "FileHash": - """Create a file hash from the given file contents. The hash is always - the utf-8 encoding of the contents given, because dbt only reads files - as utf-8. - """ - data = contents.encode("utf-8") - checksum = hashlib.new(name, data).hexdigest() - return cls(name=name, checksum=checksum) - - @dataclass class RemoteFile(dbtClassMixin): def __init__(self, language) -> None: diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 4e9f9411f23..bab2f7d9336 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -34,7 +34,6 @@ ManifestNode, Metric, ModelNode, - DeferRelation, ResultNode, SavedQuery, SemanticModel, @@ -46,7 +45,7 @@ from dbt.contracts.graph.unparsed import SourcePatch, UnparsedVersion # to preserve import paths -from dbt.artifacts.resources import NodeVersion +from dbt.artifacts.resources import NodeVersion, DeferRelation from dbt.artifacts.schemas.manifest import WritableManifest, ManifestMetadata, UniqueID from dbt.contracts.files import ( SourceFile, diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index 35c8bbca9ce..12753794859 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -1,27 +1,22 @@ from dataclasses import field, dataclass from typing import Any, List, Optional, Dict, Union, Type -from typing_extensions import Annotated from dbt.artifacts.resources import ( ExposureConfig, MetricConfig, SavedQueryConfig, SemanticModelConfig, + NodeConfig, + SeedConfig, + TestConfig, + SnapshotConfig, SourceConfig, + ModelConfig, ) from dbt_common.contracts.config.base import BaseConfig, MergeBehavior, CompareBehavior -from dbt_common.contracts.config.materialization import OnConfigurationChangeOption from dbt_common.contracts.config.metadata import Metadata, ShowBehavior -from dbt_common.dataclass_schema import ( - dbtClassMixin, - ValidationError, -) -from dbt.contracts.graph.unparsed import Docs -from dbt.contracts.graph.utils import validate_color from dbt.contracts.util import list_str -from dbt import hooks -from dbt.node_types import NodeType, AccessType -from mashumaro.jsonschema.annotations import Pattern +from dbt.node_types import NodeType def metas(*metas: Metadata) -> Dict[str, Any]: @@ -38,323 +33,17 @@ def insensitive_patterns(*patterns: str): return "^({})$".format("|".join(lowercased)) -class Severity(str): - pass - - -@dataclass -class ContractConfig(dbtClassMixin): - enforced: bool = False - alias_types: bool = True - - -@dataclass -class Hook(dbtClassMixin): - sql: str - transaction: bool = True - index: Optional[int] = None - - -@dataclass -class NodeAndTestConfig(BaseConfig): - enabled: bool = True - # these fields are included in serialized output, but are not part of - # config comparison (they are part of database_representation) - alias: Optional[str] = field( - default=None, - metadata=CompareBehavior.Exclude.meta(), - ) - schema: Optional[str] = field( - default=None, - metadata=CompareBehavior.Exclude.meta(), - ) - database: Optional[str] = field( - default=None, - metadata=CompareBehavior.Exclude.meta(), - ) - tags: Union[List[str], str] = field( - default_factory=list_str, - metadata=metas(ShowBehavior.Hide, MergeBehavior.Append, CompareBehavior.Exclude), - ) - meta: Dict[str, Any] = field( - default_factory=dict, - metadata=MergeBehavior.Update.meta(), - ) - group: Optional[str] = field( - default=None, - metadata=CompareBehavior.Exclude.meta(), - ) - - -@dataclass -class NodeConfig(NodeAndTestConfig): - # Note: if any new fields are added with MergeBehavior, also update the - # 'mergebehavior' dictionary - materialized: str = "view" - incremental_strategy: Optional[str] = None - persist_docs: Dict[str, Any] = field(default_factory=dict) - post_hook: List[Hook] = field( - default_factory=list, - metadata={"merge": MergeBehavior.Append, "alias": "post-hook"}, - ) - pre_hook: List[Hook] = field( - default_factory=list, - metadata={"merge": MergeBehavior.Append, "alias": "pre-hook"}, - ) - quoting: Dict[str, Any] = field( - default_factory=dict, - metadata=MergeBehavior.Update.meta(), - ) - # This is actually only used by seeds. Should it be available to others? - # That would be a breaking change! - column_types: Dict[str, Any] = field( - default_factory=dict, - metadata=MergeBehavior.Update.meta(), - ) - full_refresh: Optional[bool] = None - # 'unique_key' doesn't use 'Optional' because typing.get_type_hints was - # sometimes getting the Union order wrong, causing serialization failures. - unique_key: Union[str, List[str], None] = None - on_schema_change: Optional[str] = "ignore" - on_configuration_change: OnConfigurationChangeOption = field( - default_factory=OnConfigurationChangeOption.default - ) - grants: Dict[str, Any] = field( - default_factory=dict, metadata=MergeBehavior.DictKeyAppend.meta() - ) - packages: List[str] = field( - default_factory=list, - metadata=MergeBehavior.Append.meta(), - ) - docs: Docs = field( - default_factory=Docs, - metadata=MergeBehavior.Update.meta(), - ) - contract: ContractConfig = field( - default_factory=ContractConfig, - metadata=MergeBehavior.Update.meta(), - ) - - def __post_init__(self): - # we validate that node_color has a suitable value to prevent dbt-docs from crashing - if self.docs.node_color: - node_color = self.docs.node_color - if not validate_color(node_color): - raise ValidationError( - f"Invalid color name for docs.node_color: {node_color}. " - "It is neither a valid HTML color name nor a valid HEX code." - ) - - if ( - self.contract.enforced - and self.materialized == "incremental" - and self.on_schema_change not in ("append_new_columns", "fail") - ): - raise ValidationError( - f"Invalid value for on_schema_change: {self.on_schema_change}. Models " - "materialized as incremental with contracts enabled must set " - "on_schema_change to 'append_new_columns' or 'fail'" - ) - - @classmethod - def __pre_deserialize__(cls, data): - data = super().__pre_deserialize__(data) - for key in hooks.ModelHookType: - if key in data: - data[key] = [hooks.get_hook_dict(h) for h in data[key]] - return data - - # this is still used by jsonschema validation - @classmethod - def field_mapping(cls): - return {"post_hook": "post-hook", "pre_hook": "pre-hook"} - - -@dataclass -class ModelConfig(NodeConfig): - access: AccessType = field( - default=AccessType.Protected, - metadata=MergeBehavior.Update.meta(), - ) - - @dataclass class UnitTestNodeConfig(NodeConfig): expected_rows: List[Dict[str, Any]] = field(default_factory=list) -@dataclass -class SeedConfig(NodeConfig): - materialized: str = "seed" - delimiter: str = "," - quote_columns: Optional[bool] = None - - @classmethod - def validate(cls, data): - super().validate(data) - if data.get("materialized") and data.get("materialized") != "seed": - raise ValidationError("A seed must have a materialized value of 'seed'") - - -SEVERITY_PATTERN = r"^([Ww][Aa][Rr][Nn]|[Ee][Rr][Rr][Oo][Rr])$" - - -@dataclass -class TestConfig(NodeAndTestConfig): - __test__ = False - - # this is repeated because of a different default - schema: Optional[str] = field( - default="dbt_test__audit", - metadata=CompareBehavior.Exclude.meta(), - ) - materialized: str = "test" - # Annotated is used by mashumaro for jsonschema generation - severity: Annotated[Severity, Pattern(SEVERITY_PATTERN)] = Severity("ERROR") - store_failures: Optional[bool] = None - store_failures_as: Optional[str] = None - where: Optional[str] = None - limit: Optional[int] = None - fail_calc: str = "count(*)" - warn_if: str = "!= 0" - error_if: str = "!= 0" - - def __post_init__(self): - """ - The presence of a setting for `store_failures_as` overrides any existing setting for `store_failures`, - regardless of level of granularity. If `store_failures_as` is not set, then `store_failures` takes effect. - At the time of implementation, `store_failures = True` would always create a table; the user could not - configure this. Hence, if `store_failures = True` and `store_failures_as` is not specified, then it - should be set to "table" to mimic the existing functionality. - - A side effect of this overriding functionality is that `store_failures_as="view"` at the project - level cannot be turned off at the model level without setting both `store_failures_as` and - `store_failures`. The former would cascade down and override `store_failures=False`. The proposal - is to include "ephemeral" as a value for `store_failures_as`, which effectively sets - `store_failures=False`. - - The exception handling for this is tricky. If we raise an exception here, the entire run fails at - parse time. We would rather well-formed models run successfully, leaving only exceptions to be rerun - if necessary. Hence, the exception needs to be raised in the test materialization. In order to do so, - we need to make sure that we go down the `store_failures = True` route with the invalid setting for - `store_failures_as`. This results in the `.get()` defaulted to `True` below, instead of a normal - dictionary lookup as is done in the `if` block. Refer to the test materialization for the - exception that is raise as a result of an invalid value. - - The intention of this block is to behave as if `store_failures_as` is the only setting, - but still allow for backwards compatibility for `store_failures`. - See https://github.com/dbt-labs/dbt-core/issues/6914 for more information. - """ - - # if `store_failures_as` is not set, it gets set by `store_failures` - # the settings below mimic existing behavior prior to `store_failures_as` - get_store_failures_as_map = { - True: "table", - False: "ephemeral", - None: None, - } - - # if `store_failures_as` is set, it dictates what `store_failures` gets set to - # the settings below overrides whatever `store_failures` is set to by the user - get_store_failures_map = { - "ephemeral": False, - "table": True, - "view": True, - } - - if self.store_failures_as is None: - self.store_failures_as = get_store_failures_as_map[self.store_failures] - else: - self.store_failures = get_store_failures_map.get(self.store_failures_as, True) - - @classmethod - def same_contents(cls, unrendered: Dict[str, Any], other: Dict[str, Any]) -> bool: - """This is like __eq__, except it explicitly checks certain fields.""" - modifiers = [ - "severity", - "where", - "limit", - "fail_calc", - "warn_if", - "error_if", - "store_failures", - "store_failures_as", - ] - - seen = set() - for _, target_name in cls._get_fields(): - key = target_name - seen.add(key) - if key in modifiers: - if not cls.compare_key(unrendered, other, key): - return False - return True - - @classmethod - def validate(cls, data): - super().validate(data) - if data.get("materialized") and data.get("materialized") != "test": - raise ValidationError("A test must have a materialized value of 'test'") - - @dataclass class EmptySnapshotConfig(NodeConfig): materialized: str = "snapshot" unique_key: Optional[str] = None # override NodeConfig unique_key definition -@dataclass -class SnapshotConfig(EmptySnapshotConfig): - strategy: Optional[str] = None - unique_key: Optional[str] = None - target_schema: Optional[str] = None - target_database: Optional[str] = None - updated_at: Optional[str] = None - # Not using Optional because of serialization issues with a Union of str and List[str] - check_cols: Union[str, List[str], None] = None - - @classmethod - def validate(cls, data): - super().validate(data) - # Note: currently you can't just set these keys in schema.yml because this validation - # will fail when parsing the snapshot node. - if not data.get("strategy") or not data.get("unique_key") or not data.get("target_schema"): - raise ValidationError( - "Snapshots must be configured with a 'strategy', 'unique_key', " - "and 'target_schema'." - ) - if data.get("strategy") == "check": - if not data.get("check_cols"): - raise ValidationError( - "A snapshot configured with the check strategy must " - "specify a check_cols configuration." - ) - if isinstance(data["check_cols"], str) and data["check_cols"] != "all": - raise ValidationError( - f"Invalid value for 'check_cols': {data['check_cols']}. " - "Expected 'all' or a list of strings." - ) - elif data.get("strategy") == "timestamp": - if not data.get("updated_at"): - raise ValidationError( - "A snapshot configured with the timestamp strategy " - "must specify an updated_at configuration." - ) - if data.get("check_cols"): - raise ValidationError("A 'timestamp' snapshot should not have 'check_cols'") - # If the strategy is not 'check' or 'timestamp' it's a custom strategy, - # formerly supported with GenericSnapshotConfig - - if data.get("materialized") and data.get("materialized") != "snapshot": - raise ValidationError("A snapshot must have a materialized value of 'snapshot'") - - # Called by "calculate_node_config_dict" in ContextConfigGenerator - def finalize_and_validate(self): - data = self.to_dict(omit_none=True) - self.validate(data) - return self.from_dict(data) - - @dataclass class UnitTestConfig(BaseConfig): tags: Union[str, List[str]] = field( @@ -375,7 +64,7 @@ class UnitTestConfig(BaseConfig): NodeType.Source: SourceConfig, NodeType.Seed: SeedConfig, NodeType.Test: TestConfig, - NodeType.Model: NodeConfig, + NodeType.Model: ModelConfig, NodeType.Snapshot: SnapshotConfig, NodeType.Unit: UnitTestConfig, } diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index e5df173db2f..146f54f1a91 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -19,14 +19,9 @@ ) from dbt import deprecations -from dbt_common.contracts.constraints import ( - ConstraintType, - ModelLevelConstraint, -) -from dbt_common.dataclass_schema import dbtClassMixin +from dbt_common.contracts.constraints import ConstraintType from dbt_common.clients.system import write_file -from dbt.contracts.files import FileHash from dbt.contracts.graph.unparsed import ( HasYamlMetadata, TestDef, @@ -38,6 +33,11 @@ UnitTestOutputFixture, UnitTestNodeVersions, ) +from dbt.contracts.graph.model_config import ( + UnitTestNodeConfig, + UnitTestConfig, + EmptySnapshotConfig, +) from dbt.contracts.graph.node_args import ModelNodeArgs from dbt_common.events.functions import warn_or_error from dbt.exceptions import ParsingError, ContractBreakingChangeError, ValidationError @@ -57,25 +57,12 @@ VERSIONED_NODE_TYPES, ) -from .model_config import ( - NodeConfig, - ModelConfig, - SeedConfig, - TestConfig, - EmptySnapshotConfig, - SnapshotConfig, - UnitTestConfig, - UnitTestNodeConfig, -) from dbt.artifacts.resources import ( BaseResource, - ColumnInfo as ColumnInfoResource, DependsOn, Docs, Exposure as ExposureResource, - HasRelationMetadata as HasRelationMetadataResource, - MacroDependsOn, MacroArgument, Documentation as DocumentationResource, Macro as MacroResource, @@ -83,10 +70,26 @@ NodeVersion, Group as GroupResource, GraphResource, - Quoting as QuotingResource, - RefArgs as RefArgsResource, SavedQuery as SavedQueryResource, SemanticModel as SemanticModelResource, + ParsedResourceMandatory, + ParsedResource, + CompiledResource, + HasRelationMetadata as HasRelationMetadataResource, + FileHash, + NodeConfig, + ColumnInfo, + InjectedCTE, + Analysis as AnalysisResource, + HookNode as HookNodeResource, + Model as ModelResource, + ModelConfig, + SqlOperation as SqlOperationResource, + Seed as SeedResource, + SingularTest as SingularTestResource, + GenericTest as GenericTestResource, + Snapshot as SnapshotResource, + Quoting as QuotingResource, SourceDefinition as SourceDefinitionResource, ) @@ -180,35 +183,30 @@ def same_fqn(self, other) -> bool: @dataclass -class Contract(dbtClassMixin): - enforced: bool = False - alias_types: bool = True - checksum: Optional[str] = None - - -@dataclass -class DeferRelation(HasRelationMetadataResource): - alias: str - relation_name: Optional[str] +class HasRelationMetadata(HasRelationMetadataResource): + @classmethod + def __pre_deserialize__(cls, data): + data = super().__pre_deserialize__(data) + if "database" not in data: + data["database"] = None + return data @property - def identifier(self): - return self.alias + def quoting_dict(self) -> Dict[str, bool]: + if hasattr(self, "quoting"): + return self.quoting.to_dict(omit_none=True) + else: + return {} @dataclass -class ParsedNodeMandatory(GraphNode, HasRelationMetadataResource): - alias: str - checksum: FileHash - config: NodeConfig = field(default_factory=NodeConfig) - - @property - def identifier(self): - return self.alias +class ParsedNodeMandatory(ParsedResourceMandatory, GraphNode, HasRelationMetadata): + pass # This needs to be in all ManifestNodes and also in SourceDefinition, -# because of "source freshness" +# because of "source freshness". Should not be in artifacts, because we +# don't write out _event_status. @dataclass class NodeInfoMixin: _event_status: Dict[str, Any] = field(default_factory=dict) @@ -244,22 +242,7 @@ def clear_event_status(self): @dataclass -class ParsedNode(NodeInfoMixin, ParsedNodeMandatory, SerializableType): - tags: List[str] = field(default_factory=list) - description: str = field(default="") - columns: Dict[str, ColumnInfoResource] = field(default_factory=dict) - meta: Dict[str, Any] = field(default_factory=dict) - group: Optional[str] = None - docs: Docs = field(default_factory=Docs) - patch_path: Optional[str] = None - build_path: Optional[str] = None - deferred: bool = False - unrendered_config: Dict[str, Any] = field(default_factory=dict) - created_at: float = field(default_factory=lambda: time.time()) - config_call_dict: Dict[str, Any] = field(default_factory=dict) - relation_name: Optional[str] = None - raw_code: str = "" - +class ParsedNode(ParsedResource, NodeInfoMixin, ParsedNodeMandatory, SerializableType): def get_target_write_path(self, target_path: str, subdirectory: str): # This is called for both the "compiled" subdirectory of "target" and the "run" subdirectory if os.path.basename(self.path) == os.path.basename(self.original_file_path): @@ -299,8 +282,6 @@ def _deserialize(cls, dct: Dict[str, int]): return AnalysisNode.from_dict(dct) elif resource_type == "seed": return SeedNode.from_dict(dct) - elif resource_type == "rpc": - return RPCNode.from_dict(dct) elif resource_type == "sql": return SqlNode.from_dict(dct) elif resource_type == "test": @@ -398,31 +379,10 @@ def is_external_node(self): @dataclass -class InjectedCTE(dbtClassMixin): - """Used in CompiledNodes as part of ephemeral model processing""" - - id: str - sql: str - - -@dataclass -class CompiledNode(ParsedNode): +class CompiledNode(CompiledResource, ParsedNode): """Contains attributes necessary for SQL files and nodes with refs, sources, etc, so all ManifestNodes except SeedNode.""" - language: str = "sql" - refs: List[RefArgsResource] = field(default_factory=list) - sources: List[List[str]] = field(default_factory=list) - metrics: List[List[str]] = field(default_factory=list) - depends_on: DependsOn = field(default_factory=DependsOn) - compiled_path: Optional[str] = None - compiled: bool = False - compiled_code: Optional[str] = None - extra_ctes_injected: bool = False - extra_ctes: List[InjectedCTE] = field(default_factory=list) - _pre_injected_sql: Optional[str] = None - contract: Contract = field(default_factory=Contract) - @property def empty(self): return not self.raw_code.strip() @@ -469,27 +429,17 @@ def depends_on_macros(self): @dataclass -class AnalysisNode(CompiledNode): - resource_type: Literal[NodeType.Analysis] +class AnalysisNode(AnalysisResource, CompiledNode): + pass @dataclass -class HookNode(CompiledNode): - resource_type: Literal[NodeType.Operation] - index: Optional[int] = None +class HookNode(HookNodeResource, CompiledNode): + pass @dataclass -class ModelNode(CompiledNode): - resource_type: Literal[NodeType.Model] - access: AccessType = AccessType.Protected - config: ModelConfig = field(default_factory=ModelConfig) - constraints: List[ModelLevelConstraint] = field(default_factory=list) - version: Optional[NodeVersion] = None - latest_version: Optional[NodeVersion] = None - deprecation_date: Optional[datetime] = None - defer_relation: Optional[DeferRelation] = None - +class ModelNode(ModelResource, CompiledNode): @classmethod def from_args(cls, args: ModelNodeArgs) -> "ModelNode": unique_id = args.unique_id @@ -766,15 +716,9 @@ def same_contract(self, old, adapter_type=None) -> bool: return False -# TODO: rm? -@dataclass -class RPCNode(CompiledNode): - resource_type: Literal[NodeType.RPCCall] - - @dataclass -class SqlNode(CompiledNode): - resource_type: Literal[NodeType.SqlOperation] +class SqlNode(SqlOperationResource, CompiledNode): + pass # ==================================== @@ -783,15 +727,7 @@ class SqlNode(CompiledNode): @dataclass -class SeedNode(ParsedNode): # No SQLDefaults! - resource_type: Literal[NodeType.Seed] - config: SeedConfig = field(default_factory=SeedConfig) - # seeds need the root_path because the contents are not loaded initially - # and we need the root_path to load the seed later - root_path: Optional[str] = None - depends_on: MacroDependsOn = field(default_factory=MacroDependsOn) - defer_relation: Optional[DeferRelation] = None - +class SeedNode(SeedResource, ParsedNode): # No SQLDefaults! def same_seeds(self, other: "SeedNode") -> bool: # for seeds, we check the hashes. If the hashes are different types, # no match. If the hashes are both the same 'path', log a warning and @@ -909,12 +845,7 @@ def is_relational(self): @dataclass -class SingularTestNode(TestShouldStoreFailures, CompiledNode): - resource_type: Literal[NodeType.Test] - # Was not able to make mypy happy and keep the code working. We need to - # refactor the various configs. - config: TestConfig = field(default_factory=TestConfig) # type: ignore - +class SingularTestNode(SingularTestResource, TestShouldStoreFailures, CompiledNode): @property def test_node_type(self): return "singular" @@ -926,34 +857,7 @@ def test_node_type(self): @dataclass -class TestMetadata(dbtClassMixin): - __test__ = False - - name: str - # kwargs are the args that are left in the test builder after - # removing configs. They are set from the test builder when - # the test node is created. - kwargs: Dict[str, Any] = field(default_factory=dict) - namespace: Optional[str] = None - - -# This has to be separated out because it has no default and so -# has to be included as a superclass, not an attribute -@dataclass -class HasTestMetadata(dbtClassMixin): - test_metadata: TestMetadata - - -@dataclass -class GenericTestNode(TestShouldStoreFailures, CompiledNode, HasTestMetadata): - resource_type: Literal[NodeType.Test] - column_name: Optional[str] = None - file_key_name: Optional[str] = None - # Was not able to make mypy happy and keep the code working. We need to - # refactor the various configs. - config: TestConfig = field(default_factory=TestConfig) # type: ignore - attached_node: Optional[str] = None - +class GenericTestNode(GenericTestResource, TestShouldStoreFailures, CompiledNode): def same_contents(self, other, adapter_type: Optional[str]) -> bool: if other is None: return False @@ -1072,10 +976,8 @@ class IntermediateSnapshotNode(CompiledNode): @dataclass -class SnapshotNode(CompiledNode): - resource_type: Literal[NodeType.Snapshot] - config: SnapshotConfig - defer_relation: Optional[DeferRelation] = None +class SnapshotNode(SnapshotResource, CompiledNode): + pass # ==================================== @@ -1220,7 +1122,7 @@ class SourceDefinition( NodeInfoMixin, GraphNode, SourceDefinitionResource, - HasRelationMetadataResource, + HasRelationMetadata, ): @classmethod def resource_class(cls) -> Type[SourceDefinitionResource]: @@ -1618,7 +1520,7 @@ class ParsedPatch(HasYamlMetadata): # may be empty. @dataclass class ParsedNodePatch(ParsedPatch): - columns: Dict[str, ColumnInfoResource] + columns: Dict[str, ColumnInfo] access: Optional[str] version: Optional[NodeVersion] latest_version: Optional[NodeVersion] @@ -1643,7 +1545,6 @@ class ParsedMacroPatch(ParsedPatch): SingularTestNode, HookNode, ModelNode, - RPCNode, SqlNode, GenericTestNode, SnapshotNode, diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 8dbfc4cb3a4..61e34237e5c 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -12,11 +12,12 @@ generate_generate_name_macro_context, ) from dbt.adapters.factory import get_adapter # noqa: F401 +from dbt.artifacts.resources import Contract from dbt.clients.jinja import get_rendered from dbt.config import Project, RuntimeConfig from dbt.context.context_config import ContextConfig from dbt.contracts.graph.manifest import Manifest -from dbt.contracts.graph.nodes import Contract, BaseNode, ManifestNode +from dbt.contracts.graph.nodes import BaseNode, ManifestNode from dbt.contracts.graph.unparsed import Docs, UnparsedNode from dbt.exceptions import ( DbtInternalError, diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 66cccfbcac0..eb6ff5b5702 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -79,7 +79,7 @@ from dbt.context.macro_resolver import MacroResolver, TestMacroNamespace from dbt.context.configured import generate_macro_context from dbt.context.providers import ParseProvider, generate_runtime_macro_context -from dbt.contracts.files import FileHash, ParseFileType, SchemaSourceFile +from dbt.contracts.files import ParseFileType, SchemaSourceFile from dbt.parser.read_files import ( ReadFilesFromFileSystem, load_source_file, @@ -107,7 +107,7 @@ ResultNode, ModelNode, ) -from dbt.artifacts.resources import NodeRelation, NodeVersion +from dbt.artifacts.resources import NodeRelation, NodeVersion, FileHash from dbt.artifacts.schemas.base import Writable from dbt.exceptions import ( TargetNotFoundError, diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 9c67cfff665..2a4896bd2cf 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -562,6 +562,14 @@ def validate_and_rename(data): validate_and_rename(column) def patch_node_config(self, node, patch): + if "access" in patch.config: + if AccessType.is_valid(patch.config["access"]): + patch.config["access"] = AccessType(patch.config["access"]) + else: + raise InvalidAccessTypeError( + unique_id=node.unique_id, + field_value=patch.config["access"], + ) # Get the ContextConfig that's used in calculating the config # This must match the model resource_type that's being patched config = ContextConfig( diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index bb98fdb6878..918c4c85c78 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -14,7 +14,8 @@ from dbt.context.providers import generate_parse_exposure, get_rendered from dbt.contracts.files import FileHash, SchemaSourceFile from dbt.contracts.graph.manifest import Manifest -from dbt.contracts.graph.model_config import UnitTestNodeConfig, ModelConfig +from dbt.contracts.graph.model_config import UnitTestNodeConfig +from dbt.artifacts.resources import ModelConfig from dbt.contracts.graph.nodes import ( ModelNode, UnitTestNode, diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index aa8407694c1..83163c4a74e 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -17,10 +17,10 @@ from dbt.adapters.base import BaseRelation from dbt.clients.jinja import MacroGenerator from dbt.context.providers import generate_runtime_model_context -from dbt.contracts.graph.model_config import Hook from dbt.contracts.graph.nodes import HookNode, ResultNode from dbt.artifacts.schemas.results import NodeStatus, RunStatus, RunningStatus, BaseResult from dbt.artifacts.schemas.run import RunResult +from dbt.artifacts.resources import Hook from dbt.exceptions import ( CompilationError, DbtInternalError, diff --git a/tests/unit/test_contracts_graph_compiled.py b/tests/unit/test_contracts_graph_compiled.py index 5343cdd5246..8c454d6a68a 100644 --- a/tests/unit/test_contracts_graph_compiled.py +++ b/tests/unit/test_contracts_graph_compiled.py @@ -11,10 +11,8 @@ InjectedCTE, ModelNode, ModelConfig, - TestConfig, - TestMetadata, - Contract, ) +from dbt.artifacts.resources import Contract, TestConfig, TestMetadata from dbt.node_types import NodeType from .utils import ( diff --git a/tests/unit/test_contracts_graph_parsed.py b/tests/unit/test_contracts_graph_parsed.py index 7fb2b4afa99..0e9a1c523f7 100644 --- a/tests/unit/test_contracts_graph_parsed.py +++ b/tests/unit/test_contracts_graph_parsed.py @@ -19,20 +19,22 @@ Owner, Quoting, RefArgs, + MacroDependsOn, + TestMetadata, SourceConfig, Time, + Hook, ) from dbt.artifacts.resources.types import TimePeriod from dbt.node_types import NodeType, AccessType from dbt.contracts.files import FileHash from dbt.contracts.graph.model_config import ( - ModelConfig, NodeConfig, SeedConfig, TestConfig, SnapshotConfig, EmptySnapshotConfig, - Hook, + ModelConfig, ) from dbt.contracts.graph.nodes import ( ModelNode, @@ -45,11 +47,9 @@ Metric, SeedNode, Docs, - MacroDependsOn, SourceDefinition, Documentation, HookNode, - TestMetadata, SemanticModel, ) from dbt.artifacts.resources import SourceDefinition as SourceDefinitionResource diff --git a/tests/unit/test_graph_selector_methods.py b/tests/unit/test_graph_selector_methods.py index 0fd9c96fbc9..af1dc6fde3e 100644 --- a/tests/unit/test_graph_selector_methods.py +++ b/tests/unit/test_graph_selector_methods.py @@ -8,7 +8,6 @@ from dbt.contracts.files import FileHash from dbt.contracts.graph.nodes import ( DependsOn, - MacroDependsOn, NodeConfig, Macro, ModelNode, @@ -21,8 +20,6 @@ SingularTestNode, GenericTestNode, SourceDefinition, - TestConfig, - TestMetadata, AccessType, UnitTestDefinition, ) @@ -35,6 +32,9 @@ NodeRelation, Owner, QueryParams, + MacroDependsOn, + TestConfig, + TestMetadata, ) from dbt.contracts.graph.unparsed import ( UnitTestInputFixture, diff --git a/tests/unit/test_parser.py b/tests/unit/test_parser.py index c949756eba8..89b2ca27de0 100644 --- a/tests/unit/test_parser.py +++ b/tests/unit/test_parser.py @@ -12,7 +12,8 @@ from dbt.context.context_config import ContextConfig from dbt.contracts.files import SourceFile, FileHash, FilePath, SchemaSourceFile from dbt.contracts.graph.manifest import Manifest -from dbt.contracts.graph.model_config import NodeConfig, TestConfig, SnapshotConfig, ModelConfig +from dbt.contracts.graph.model_config import NodeConfig, TestConfig, SnapshotConfig +from dbt.artifacts.resources import ModelConfig from dbt.contracts.graph.nodes import ( ModelNode, Macro,