dbt-labs · joellabes · Aug 26, 2022 · Aug 8, 2022 · Aug 8, 2022 · Aug 8, 2022
diff --git a/README.md b/README.md
@@ -92,6 +92,8 @@ models:
 
 ```
 
+This test supports the `group_by_columns` parameter; see [Grouping in tests](#grouping-in-tests) for details.
+
 #### fewer_rows_than ([source](macros/generic_tests/fewer_rows_than.sql))
 Asserts that the respective model has fewer rows than the model being compared.
 
@@ -106,6 +108,8 @@ models:
           compare_model: ref('other_table_name')
 ```
 
+This test supports the `group_by_columns` parameter; see [Grouping in tests](#grouping-in-tests) for details.
+
 #### equality ([source](macros/generic_tests/equality.sql))
 Asserts the equality of two relations. Optionally specify a subset of columns to compare.
 
@@ -191,6 +195,7 @@ models:
           field: created_at
           interval: 1
 ```
+This test supports the `group_by_columns` parameter; see [Grouping in tests](#grouping-in-tests) for details.
 
 #### at_least_one ([source](macros/generic_tests/at_least_one.sql))
 Asserts that a column has at least one value.
@@ -207,6 +212,8 @@ models:
           - dbt_utils.at_least_one
 ```
 
+This test supports the `group_by_columns` parameter; see [Grouping in tests](#grouping-in-tests) for details.
+
 #### not_constant ([source](macros/generic_tests/not_constant.sql))
 Asserts that a column does not have the same value in all rows.
 
@@ -222,6 +229,8 @@ models:
           - dbt_utils.not_constant
 ```
 
+This test supports the `group_by_columns` parameter; see [Grouping in tests](#grouping-in-tests) for details.
+
 #### cardinality_equality ([source](macros/generic_tests/cardinality_equality.sql))
 Asserts that values in a given column have exactly the same cardinality as values from a different column in a different model.
 
@@ -291,6 +300,8 @@ models:
               at_least: 0.95
 ```
 
+This test supports the `group_by_columns` parameter; see [Grouping in tests](#grouping-in-tests) for details.
+
 #### not_accepted_values ([source](macros/generic_tests/not_accepted_values.sql))
 Asserts that there are no rows that match the given values.
 
@@ -470,6 +481,8 @@ seeds:
 * `interval` (default=1): The gap between two sequential values
 * `datepart` (default=None): Used when the gaps are a unit of time. If omitted, the test will check for a numeric gap.
 
+This test supports the `group_by_columns` parameter; see [Grouping in tests](#grouping-in-tests) for details.
+
 #### unique_combination_of_columns ([source](macros/generic_tests/unique_combination_of_columns.sql))
 Asserts that the combination of columns is unique. For example, the
 combination of month and product is unique, however neither column is unique
@@ -546,6 +559,34 @@ models:
 
 ----
 
+#### Grouping in tests
+
+Certain tests support the optional `group_by_columns` argument to provide more granularity in performing tests. This can be useful when:
+
+- Some data checks can only be expressed within a group (e.g. ID values should be unique within a group but can be repeated between groups)
+- Some data checks are more precise when done by group (e.g. not only should table rowcounts be equal but the counts within each group should be equal)
+
+This feature is currently available for the following tests:
+
+- equal_rowcount()
+- fewer_rows_than()
+- recency()
+- at_least_one()
+- not_constant()
+- sequential_values()
+- non_null_proportion()
+
+To use this feature, the names of grouping variables can be passed as a list. For example, to test for at least one valid value by group, the `group_by_columns` argument could be used as follows:
+
+```
+  - name: data_test_at_least_one
+    columns:
+      - name: field
+        tests:
+          - dbt_utils.at_least_one:
+              group_by_columns: ['group_var']
+```
+
 ## Macros
 
 ### Introspective macros

diff --git a/macros/generic_tests/equal_rowcount.sql b/macros/generic_tests/equal_rowcount.sql
@@ -13,21 +13,26 @@
 {% endif %}
 
 {% if group_by_columns|length() > 0 %}
-  {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %}
+  {% set select_gb_cols = group_by_columns|join(', ') + ', ' %}
   {% set join_gb_cols %}
     {% for c in group_by_columns %}
       and a.{{c}} = b.{{c}}
     {% endfor %}
   {% endset %}
+  {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}
 {% endif %}
-{% set group_by_columns = ['id'] + group_by_columns %}
+
+{#-- We must add a fake join key in case additional grouping variables are not provided --#}
+{#-- Redshift does not allow for dynamically created join conditions (e.g. full join on 1 = 1 --#}
+{#-- The same logic is used in fewer_rows_than. In case of changes, maintain consistent logic --#}
+{% set group_by_columns = ['id_dbtutils_test_equal_rowcount'] + group_by_columns %}
 {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}
 
 with a as (
 
     select 
       {{select_gb_cols}}
-      1 as id, 
+      1 as id_dbtutils_test_equal_rowcount,
       count(*) as count_a 
     from {{ model }}
     {{groupby_gb_cols}}
@@ -38,7 +43,7 @@ b as (
 
     select 
       {{select_gb_cols}}
-      1 as id, 
+      1 as id_dbtutils_test_equal_rowcount,
       count(*) as count_b 
     from {{ compare_model }}
     {{groupby_gb_cols}}
@@ -60,7 +65,7 @@ final as (
     from a
     full join b
     on
-    a.id = b.id 
+    a.id_dbtutils_test_equal_rowcount = b.id_dbtutils_test_equal_rowcount
     {{join_gb_cols}}
 
 

diff --git a/macros/generic_tests/fewer_rows_than.sql b/macros/generic_tests/fewer_rows_than.sql
@@ -13,15 +13,21 @@
       and a.{{c}} = b.{{c}}
     {% endfor %}
   {% endset %}
+  {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}
 {% endif %}
-{% set group_by_columns = ['id'] + group_by_columns %}
+
+{#-- We must add a fake join key in case additional grouping variables are not provided --#}
+{#-- Redshift does not allow for dynamically created join conditions (e.g. full join on 1 = 1 --#}
+{#-- The same logic is used in fewer_rows_than. In case of changes, maintain consistent logic --#}
+{% set group_by_columns = ['id_dbtutils_test_fewer_rows_than'] + group_by_columns %}
 {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %}
 
+
 with a as (
 
     select 
       {{select_gb_cols}}
-      1 as id,
+      1 as id_dbtutils_test_fewer_rows_than,
       count(*) as count_our_model 
     from {{ model }}
     {{ groupby_gb_cols }}
@@ -31,7 +37,7 @@ b as (
 
     select 
       {{select_gb_cols}}
-      1 as id,
+      1 as id_dbtutils_test_fewer_rows_than,
       count(*) as count_comparison_model 
     from {{ compare_model }}
     {{ groupby_gb_cols }}
@@ -49,8 +55,8 @@ counts as (
         count_our_model,
         count_comparison_model
     from a
-    full join b on
-    a.id = b.id 
+    full join b on 
+    a.id_dbtutils_test_fewer_rows_than = b.id_dbtutils_test_fewer_rows_than
     {{ join_gb_cols }}
 
 ),

diff --git a/macros/generic_tests/sequential_values.sql b/macros/generic_tests/sequential_values.sql
@@ -9,7 +9,7 @@
 {% set previous_column_name = "previous_" ~ dbt_utils.slugify(column_name) %}
 
 {% if group_by_columns|length() > 0 %}
-  {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %}
+  {% set select_gb_cols = group_by_columns|join(',') + ', ' %}
   {% set partition_gb_cols = 'partition by ' + group_by_columns|join(',') %}
 {% endif %}