Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow configuration of snapshot column names #10608

Merged
merged 30 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
39253aa
Add various _column_name configs to SnapshotConfig
gshank Aug 26, 2024
a1b4e77
Use dbt-adapters branch
gshank Aug 26, 2024
408cb0b
Update list test
gshank Aug 27, 2024
049ea88
Update expected_manifest and snapshot_config in artifacts tests
gshank Aug 27, 2024
ac3a347
Changie
gshank Aug 27, 2024
868366f
Add seed_cn.sql
gshank Aug 29, 2024
935cab7
Add snapshot_table_column_names property
gshank Aug 29, 2024
ac63301
Actually add test file
gshank Aug 29, 2024
5346199
Use dbt-postgres snapshot_column_names branch
gshank Sep 3, 2024
8158ad1
Merge branch 'main' into snapshot_column_names
gshank Sep 3, 2024
71c7c4c
changie
gshank Sep 3, 2024
41a013d
Formatting
gshank Sep 3, 2024
6e9a9c2
Use dbt-common object_mergebehavior
gshank Sep 12, 2024
102f067
Update columns names object
gshank Sep 12, 2024
8d24642
Merge branch 'main' into snapshot_column_names
gshank Sep 12, 2024
c6b3e41
Update test case
gshank Sep 13, 2024
e5d9838
Update to use merge_config_dicts from dbt-common
gshank Sep 13, 2024
e1c4a49
Fix unit test
gshank Sep 13, 2024
ed3486b
Fix artifacts test
gshank Sep 13, 2024
befaea0
Merge branch 'main' into snapshot_column_names
gshank Sep 13, 2024
220107b
Update schemas
gshank Sep 16, 2024
3c03f29
Merge branch 'main' into snapshot_column_names
gshank Sep 16, 2024
c608621
Test for setting column names in dbt_project
gshank Sep 16, 2024
090208a
Add test for missing column names
gshank Sep 16, 2024
52c8ace
Merge branch 'main' into snapshot_column_names
gshank Sep 18, 2024
ed6390d
Remove duplicate changelog
gshank Sep 18, 2024
12c96bc
Use dbt-common main
gshank Sep 18, 2024
3406e60
Merge branch 'main' into snapshot_column_names
gshank Sep 20, 2024
733f208
Remove branches from dev-requirements.txt, bump versions of dbt-adapt…
gshank Sep 20, 2024
c71f9d5
Tweak test to work on Windows
gshank Sep 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20240903-154133.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Allow configuring snapshot column names
gshank marked this conversation as resolved.
Show resolved Hide resolved
time: 2024-09-03T15:41:33.167097-04:00
custom:
Author: gshank
Issue: "10185"
24 changes: 22 additions & 2 deletions core/dbt/artifacts/resources/v1/snapshot.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Dict, List, Literal, Optional, Union

from dbt.artifacts.resources.types import NodeType
from dbt.artifacts.resources.v1.components import CompiledResource, DeferRelation
from dbt.artifacts.resources.v1.config import NodeConfig
from dbt_common.dataclass_schema import ValidationError
from dbt_common.dataclass_schema import ValidationError, dbtClassMixin


@dataclass
class SnapshotMetaColumnNames(dbtClassMixin):
dbt_valid_to: Optional[str] = None
dbt_valid_from: Optional[str] = None
dbt_scd_id: Optional[str] = None
dbt_updated_at: Optional[str] = None


@dataclass
Expand All @@ -17,6 +25,18 @@ class SnapshotConfig(NodeConfig):
updated_at: Optional[str] = None
# Not using Optional because of serialization issues with a Union of str and List[str]
check_cols: Union[str, List[str], None] = None
snapshot_meta_column_names: SnapshotMetaColumnNames = field(
default_factory=SnapshotMetaColumnNames
)

@property
def snapshot_table_column_names(self):
return {
"dbt_valid_from": self.snapshot_meta_column_names.dbt_valid_from or "dbt_valid_from",
"dbt_valid_to": self.snapshot_meta_column_names.dbt_valid_to or "dbt_valid_to",
"dbt_scd_id": self.snapshot_meta_column_names.dbt_scd_id or "dbt_scd_id",
"dbt_updated_at": self.snapshot_meta_column_names.dbt_updated_at or "dbt_updated_at",
}

def final_validate(self):
if not self.strategy or not self.unique_key:
Expand Down
52 changes: 2 additions & 50 deletions core/dbt/context/context_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dbt.contracts.graph.model_config import get_config_for
from dbt.node_types import NodeType
from dbt.utils import fqn_search
from dbt_common.contracts.config.base import BaseConfig, _listify
from dbt_common.contracts.config.base import BaseConfig, merge_config_dicts
from dbt_common.exceptions import DbtInternalError


Expand Down Expand Up @@ -293,55 +293,7 @@ def __init__(

def add_config_call(self, opts: Dict[str, Any]) -> None:
dct = self._config_call_dict
self._add_config_call(dct, opts)

@classmethod
def _add_config_call(cls, config_call_dict, opts: Dict[str, Any]) -> None:
# config_call_dict is already encountered configs, opts is new
# This mirrors code in _merge_field_value in model_config.py which is similar but
# operates on config objects.
for k, v in opts.items():
# MergeBehavior for post-hook and pre-hook is to collect all
# values, instead of overwriting
if k in BaseConfig.mergebehavior["append"]:
if not isinstance(v, list):
v = [v]
if k in config_call_dict: # should always be a list here
config_call_dict[k].extend(v)
else:
config_call_dict[k] = v

elif k in BaseConfig.mergebehavior["update"]:
if not isinstance(v, dict):
raise DbtInternalError(f"expected dict, got {v}")
if k in config_call_dict and isinstance(config_call_dict[k], dict):
config_call_dict[k].update(v)
else:
config_call_dict[k] = v
elif k in BaseConfig.mergebehavior["dict_key_append"]:
if not isinstance(v, dict):
raise DbtInternalError(f"expected dict, got {v}")
if k in config_call_dict: # should always be a dict
for key, value in v.items():
extend = False
# This might start with a +, to indicate we should extend the list
# instead of just clobbering it
if key.startswith("+"):
extend = True
if key in config_call_dict[k] and extend:
# extend the list
config_call_dict[k][key].extend(_listify(value))
else:
# clobber the list
config_call_dict[k][key] = _listify(value)
else:
# This is always a dictionary
config_call_dict[k] = v
# listify everything
for key, value in config_call_dict[k].items():
config_call_dict[k][key] = _listify(value)
else:
config_call_dict[k] = v
merge_config_dicts(dct, opts)

def build_config_dict(
self,
Expand Down
3 changes: 2 additions & 1 deletion core/dbt/parser/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from dbt.node_types import ModelLanguage, NodeType
from dbt.parser.base import SimpleSQLParser
from dbt.parser.search import FileBlock
from dbt_common.contracts.config.base import merge_config_dicts
from dbt_common.dataclass_schema import ValidationError
from dbt_common.exceptions.macros import UndefinedMacroError
from dbt_extractor import ExtractionError, py_extract_from_source # type: ignore
Expand Down Expand Up @@ -467,7 +468,7 @@ def _get_config_call_dict(static_parser_result: Dict[str, Any]) -> Dict[str, Any
config_call_dict: Dict[str, Any] = {}

for c in static_parser_result["configs"]:
ContextConfig._add_config_call(config_call_dict, {c[0]: c[1]})
merge_config_dicts(config_call_dict, {c[0]: c[1]})

return config_call_dict

Expand Down
6 changes: 3 additions & 3 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
git+https://github.com/dbt-labs/dbt-adapters.git@main
git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter
git+https://github.com/dbt-labs/dbt-adapters.git@snapshot_column_names
git+https://github.com/dbt-labs/dbt-adapters.git@snapshot_column_names#subdirectory=dbt-tests-adapter
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know it's obvious, but I'm just leaving a reminder that these changes will need to be left out of the merge.

git+https://github.com/dbt-labs/dbt-common.git@main
git+https://github.com/dbt-labs/dbt-postgres.git@main
git+https://github.com/dbt-labs/dbt-postgres.git@snapshot_column_names
# black must match what's in .pre-commit-config.yaml to be sure local env matches CI
black==24.3.0
bumpversion
Expand Down
102 changes: 102 additions & 0 deletions schemas/dbt/manifest/v12.json
Original file line number Diff line number Diff line change
Expand Up @@ -6610,6 +6610,57 @@
}
],
"default": null
},
"snapshot_meta_column_names": {
"type": "object",
"title": "SnapshotMetaColumnNames",
"properties": {
"dbt_valid_to": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
},
"dbt_valid_from": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
},
"dbt_scd_id": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
},
"dbt_updated_at": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
}
},
"additionalProperties": false
}
},
"additionalProperties": true
Expand Down Expand Up @@ -16336,6 +16387,57 @@
}
],
"default": null
},
"snapshot_meta_column_names": {
"type": "object",
"title": "SnapshotMetaColumnNames",
"properties": {
"dbt_valid_to": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
},
"dbt_valid_from": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
},
"dbt_scd_id": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
},
"dbt_updated_at": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null
}
},
"additionalProperties": false
}
},
"additionalProperties": true
Expand Down
6 changes: 6 additions & 0 deletions tests/functional/artifacts/expected_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ def get_rendered_snapshot_config(**updates):
"post-hook": [],
"column_types": {},
"quoting": {},
"snapshot_meta_column_names": {
"dbt_valid_to": None,
"dbt_valid_from": None,
"dbt_updated_at": None,
"dbt_scd_id": None,
},
"tags": [],
"persist_docs": {},
"full_refresh": None,
Expand Down
6 changes: 6 additions & 0 deletions tests/functional/list/test_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ def expect_snapshot_output(self, happy_path_project): # noqa: F811
"persist_docs": {},
"target_database": happy_path_project.database,
"target_schema": happy_path_project.test_schema,
"snapshot_meta_column_names": {
"dbt_scd_id": None,
"dbt_updated_at": None,
"dbt_valid_from": None,
"dbt_valid_to": None,
},
"unique_key": "id",
"strategy": "timestamp",
"updated_at": "updated_at",
Expand Down
82 changes: 82 additions & 0 deletions tests/functional/snapshots/data/seed_cn.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
create table {database}.{schema}.seed (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),
updated_at TIMESTAMP WITHOUT TIME ZONE
);

create table {database}.{schema}.snapshot_expected (
id INTEGER,
first_name VARCHAR(50),
last_name VARCHAR(50),
email VARCHAR(50),
gender VARCHAR(50),
ip_address VARCHAR(20),

-- snapshotting fields
updated_at TIMESTAMP WITHOUT TIME ZONE,
test_valid_from TIMESTAMP WITHOUT TIME ZONE,
test_valid_to TIMESTAMP WITHOUT TIME ZONE,
test_scd_id TEXT,
test_updated_at TIMESTAMP WITHOUT TIME ZONE
);


-- seed inserts
-- use the same email for two users to verify that duplicated check_cols values
-- are handled appropriately
insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values
(1, 'Judith', 'Kennedy', '(not provided)', 'Female', '54.60.24.128', '2015-12-24 12:19:28'),
(2, 'Arthur', 'Kelly', '(not provided)', 'Male', '62.56.24.215', '2015-10-28 16:22:15'),
(3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'),
(4, 'Ralph', 'Turner', 'rturner3@hp.com', 'Male', '157.83.76.114', '2016-08-08 00:06:51'),
(5, 'Laura', 'Gonzales', 'lgonzales4@howstuffworks.com', 'Female', '30.54.105.168', '2016-09-01 08:25:38'),
(6, 'Katherine', 'Lopez', 'klopez5@yahoo.co.jp', 'Female', '169.138.46.89', '2016-08-30 18:52:11'),
(7, 'Jeremy', 'Hamilton', 'jhamilton6@mozilla.org', 'Male', '231.189.13.133', '2016-07-17 02:09:46'),
(8, 'Heather', 'Rose', 'hrose7@goodreads.com', 'Female', '87.165.201.65', '2015-12-29 22:03:56'),
(9, 'Gregory', 'Kelly', 'gkelly8@trellian.com', 'Male', '154.209.99.7', '2016-03-24 21:18:16'),
(10, 'Rachel', 'Lopez', 'rlopez9@themeforest.net', 'Female', '237.165.82.71', '2016-08-20 15:44:49'),
(11, 'Donna', 'Welch', 'dwelcha@shutterfly.com', 'Female', '103.33.110.138', '2016-02-27 01:41:48'),
(12, 'Russell', 'Lawrence', 'rlawrenceb@qq.com', 'Male', '189.115.73.4', '2016-06-11 03:07:09'),
(13, 'Michelle', 'Montgomery', 'mmontgomeryc@scientificamerican.com', 'Female', '243.220.95.82', '2016-06-18 16:27:19'),
(14, 'Walter', 'Castillo', 'wcastillod@pagesperso-orange.fr', 'Male', '71.159.238.196', '2016-10-06 01:55:44'),
(15, 'Robin', 'Mills', 'rmillse@vkontakte.ru', 'Female', '172.190.5.50', '2016-10-31 11:41:21'),
(16, 'Raymond', 'Holmes', 'rholmesf@usgs.gov', 'Male', '148.153.166.95', '2016-10-03 08:16:38'),
(17, 'Gary', 'Bishop', 'gbishopg@plala.or.jp', 'Male', '161.108.182.13', '2016-08-29 19:35:20'),
(18, 'Anna', 'Riley', 'arileyh@nasa.gov', 'Female', '253.31.108.22', '2015-12-11 04:34:27'),
(19, 'Sarah', 'Knight', 'sknighti@foxnews.com', 'Female', '222.220.3.177', '2016-09-26 00:49:06'),
(20, 'Phyllis', 'Fox', null, 'Female', '163.191.232.95', '2016-08-21 10:35:19');


-- populate snapshot table
insert into {database}.{schema}.snapshot_expected (
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
test_valid_from,
test_valid_to,
test_updated_at,
test_scd_id
)

select
id,
first_name,
last_name,
email,
gender,
ip_address,
updated_at,
-- fields added by snapshotting
updated_at as test_valid_from,
null::timestamp as test_valid_to,
updated_at as test_updated_at,
md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id
from {database}.{schema}.seed;
Loading
Loading