From 2c71baa01ffafaf843e6036f893439410c679281 Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Wed, 30 Mar 2022 13:59:38 +0200 Subject: [PATCH 1/7] Update gitignore for latest version of dbt --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index dad33a4..bd45744 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ target/ dbt_modules/ +dbt_packages/ logs/ From 966aee13c996d98a8e0ebeb366b10d5d9ce2407d Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Wed, 30 Mar 2022 14:00:05 +0200 Subject: [PATCH 2/7] Add macros to retrieve info from graph --- macros/generate_model_yaml.sql | 5 +++-- macros/helpers/helpers.sql | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 macros/helpers/helpers.sql diff --git a/macros/generate_model_yaml.sql b/macros/generate_model_yaml.sql index 81809f8..12d55ed 100644 --- a/macros/generate_model_yaml.sql +++ b/macros/generate_model_yaml.sql @@ -1,6 +1,7 @@ -{% macro generate_model_yaml(model_name) %} +{% macro generate_model_yaml(model_name, upstream_descriptions=False) %} {% set model_yaml=[] %} +{% set column_desc_dict = codegen.build_dict_column_descriptions(model_name) if upstream_descriptions else {} %} {% do model_yaml.append('version: 2') %} {% do model_yaml.append('') %} @@ -14,7 +15,7 @@ {% for column in columns %} {% do model_yaml.append(' - name: ' ~ column.name | lower ) %} - {% do model_yaml.append(' description: ""') %} + {% do model_yaml.append(' description: "' ~ column_desc_dict.get(column.name | lower,'') ~ '"') %} {% do model_yaml.append('') %} {% endfor %} diff --git a/macros/helpers/helpers.sql b/macros/helpers/helpers.sql new file mode 100644 index 0000000..bfdb1ba --- /dev/null +++ b/macros/helpers/helpers.sql @@ -0,0 +1,29 @@ +{# retrieve directly upstream models from a given model #} +{% macro get_model_dependencies(model_name) %} + {% for node in graph.nodes.values() | selectattr('name', "equalto", model_name) %} + {{ return(node.depends_on.nodes) }} + {% endfor %} +{% endmacro %} + + +{# add to an input dictionnary entries containing all the column descriptions of a given model #} +{% macro add_model_column_descriptions_to_dict(model_name,dict_with_descriptions={}) %} + {% for node in graph.nodes.values() | selectattr('name', "equalto", model_name) %} + {% for col_name, col_values in node.columns.items() %} + {% do dict_with_descriptions.update( {col_name: col_values.description} ) %} + {% endfor %} + {% endfor %} + {{ return(dict_with_descriptions) }} +{% endmacro %} + +{# build a global dictionnary looping through all the direct parents models #} +{# if the same column name exists with different descriptions it is overwritten at each loop #} +{% macro build_dict_column_descriptions(model_name) %} + {% if execute %} + {% set glob_dict = {} %} + {% for full_model in codegen.get_model_dependencies(model_name) %} + {% do codegen.add_model_column_descriptions_to_dict(full_model.split('.')[-1],glob_dict) %} + {% endfor %} + {{ return(glob_dict) }} + {% endif %} +{% endmacro %} \ No newline at end of file From 7a19e872857f90b605891c6cc72e99298dda14db Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Wed, 30 Mar 2022 14:00:47 +0200 Subject: [PATCH 3/7] Update README with new parameter for generate_model_yaml --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d53f75d..0c96c22 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,7 @@ schema.yml file. ### Arguments: * `model_name` (required): The model you wish to generate YAML for. +* `upstream_descriptions` (optional, default=False): Whether you want to include descriptions for identical column names from upstream models. ### Usage: 1. Create a model. From 3ab1082ed8c156c388b9b9d83cf97364e6c942af Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Wed, 30 Mar 2022 14:01:12 +0200 Subject: [PATCH 4/7] Add integration test for the new feature --- integration_tests/models/child_model.sql | 3 +++ integration_tests/models/model_data_a.sql | 3 +++ integration_tests/models/schema.yml | 7 ++++++ ...erate_model_yaml_upstream_descriptions.sql | 22 +++++++++++++++++++ 4 files changed, 35 insertions(+) create mode 100644 integration_tests/models/child_model.sql create mode 100644 integration_tests/models/model_data_a.sql create mode 100644 integration_tests/models/schema.yml create mode 100644 integration_tests/tests/test_generate_model_yaml_upstream_descriptions.sql diff --git a/integration_tests/models/child_model.sql b/integration_tests/models/child_model.sql new file mode 100644 index 0000000..2492ce0 --- /dev/null +++ b/integration_tests/models/child_model.sql @@ -0,0 +1,3 @@ +select + * +from {{ ref('model_data_a') }} diff --git a/integration_tests/models/model_data_a.sql b/integration_tests/models/model_data_a.sql new file mode 100644 index 0000000..87b1545 --- /dev/null +++ b/integration_tests/models/model_data_a.sql @@ -0,0 +1,3 @@ +select + * +from {{ ref('data__a_relation') }} diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml new file mode 100644 index 0000000..aa3f980 --- /dev/null +++ b/integration_tests/models/schema.yml @@ -0,0 +1,7 @@ +version: 2 + +models: + - name: model_data_a + columns: + - name: col_a + description: description column a \ No newline at end of file diff --git a/integration_tests/tests/test_generate_model_yaml_upstream_descriptions.sql b/integration_tests/tests/test_generate_model_yaml_upstream_descriptions.sql new file mode 100644 index 0000000..3bc9804 --- /dev/null +++ b/integration_tests/tests/test_generate_model_yaml_upstream_descriptions.sql @@ -0,0 +1,22 @@ +{% set actual_model_yaml = codegen.generate_model_yaml( + model_name='child_model', + upstream_descriptions=True + ) +%} + +{% set expected_model_yaml %} +version: 2 + +models: + - name: child_model + description: "" + columns: + - name: col_a + description: "description column a" + + - name: col_b + description: "" + +{% endset %} + +{{ assert_equal (actual_model_yaml | trim, expected_model_yaml | trim) }} From 06995589fcd4f66def1250ec420b503c33bb280c Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Fri, 13 May 2022 16:18:16 +0200 Subject: [PATCH 5/7] Update CHANGELOG with upstream_descriptions flag --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 53f6c97..7abd809 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +# Unreleased +## New features +- Add support for importing descriptions from columns with the same names in upstream models. It is available by setting the parameter `upstream_descriptions` to `True` in `generate_model_yaml` ([#61](https://github.com/dbt-labs/dbt-codegen/pull/61)) + # dbt-codegen v0.6.0 This release creates breaking changes to the `generate_source.sql` macro. From e6a14aa5c45ef430868949f758ffd0ffcb115ead Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Fri, 13 May 2022 16:18:27 +0200 Subject: [PATCH 6/7] Fix typos --- macros/helpers/helpers.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/macros/helpers/helpers.sql b/macros/helpers/helpers.sql index bfdb1ba..262c7d9 100644 --- a/macros/helpers/helpers.sql +++ b/macros/helpers/helpers.sql @@ -1,4 +1,4 @@ -{# retrieve directly upstream models from a given model #} +{# retrieve models directly upstream from a given model #} {% macro get_model_dependencies(model_name) %} {% for node in graph.nodes.values() | selectattr('name', "equalto", model_name) %} {{ return(node.depends_on.nodes) }} @@ -6,7 +6,7 @@ {% endmacro %} -{# add to an input dictionnary entries containing all the column descriptions of a given model #} +{# add to an input dictionary entries containing all the column descriptions of a given model #} {% macro add_model_column_descriptions_to_dict(model_name,dict_with_descriptions={}) %} {% for node in graph.nodes.values() | selectattr('name', "equalto", model_name) %} {% for col_name, col_values in node.columns.items() %} @@ -16,7 +16,7 @@ {{ return(dict_with_descriptions) }} {% endmacro %} -{# build a global dictionnary looping through all the direct parents models #} +{# build a global dictionary looping through all the direct parents models #} {# if the same column name exists with different descriptions it is overwritten at each loop #} {% macro build_dict_column_descriptions(model_name) %} {% if execute %} From 9c279f3709a0aaa33ff323fb2d96061b5768f5e1 Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Mon, 16 May 2022 18:54:52 +0200 Subject: [PATCH 7/7] Update CI steps to run the models before testing --- .circleci/config.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index f4034f6..8b89a03 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -44,6 +44,7 @@ jobs: dbt --warn-error deps --target postgres dbt --warn-error run-operation create_source_table --target postgres dbt --warn-error seed --target postgres --full-refresh + dbt --warn-error run --target postgres dbt --warn-error test --target postgres - run: @@ -55,6 +56,7 @@ jobs: dbt --warn-error deps --target redshift dbt --warn-error run-operation create_source_table --target redshift dbt --warn-error seed --target redshift --full-refresh + dbt --warn-error run --target redshift dbt --warn-error test --target redshift - run: @@ -66,6 +68,7 @@ jobs: dbt --warn-error deps --target snowflake dbt --warn-error run-operation create_source_table --target snowflake dbt --warn-error seed --target snowflake --full-refresh + dbt --warn-error run --target snowflake dbt --warn-error test --target snowflake - run: @@ -80,6 +83,7 @@ jobs: dbt --warn-error deps --target bigquery dbt --warn-error run-operation create_source_table --target bigquery dbt --warn-error seed --target bigquery --full-refresh + dbt --warn-error run --target bigquery dbt --warn-error test --target bigquery