diff --git a/CHANGELOG.md b/CHANGELOG.md index 46e03eb2c..9bc34c805 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * Add new schema test, `sequential_values` ([#318](https://github.com/fishtown-analytics/dbt-utils/pull/318), inspired by [@hundredwatt](https://github.com/hundredwatt)) * Support `quarter` in the `postgres__last_day` macro ([#333](https://github.com/fishtown-analytics/dbt-utils/pull/333/files), [@seunghanhong](https://github.com/seunghanhong)) * Add new argument, `unit`, to `haversine_distance` [#340](https://github.com/fishtown-analytics/dbt-utils/pull/340) [@bastienboutonnet](https://github.com/bastienboutonnet) +* Add new schema test, `fewer_rows_than` (code originally in [#221](https://github.com/fishtown-analytics/dbt-utils/pull/230/) from [@dmarts](https://github.com/dmarts), merged via [#343]) ## Fixes diff --git a/README.md b/README.md index 744e96b67..1ac2b7724 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,20 @@ models: ``` +#### fewer_rows_than ([source](macros/schema_tests/fewer_rows_than.sql)) +This schema test asserts that this model has fewer rows than the referenced model. + +Usage: +```yaml +version: 2 + +models: + - name: model_name + tests: + - dbt_utils.fewer_rows_than: + compare_model: ref('other_table_name') +``` + #### equality ([source](macros/schema_tests/equality.sql)) This schema test asserts the equality of two relations. Optionally specify a subset of columns to compare. diff --git a/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_1.csv b/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_1.csv new file mode 100644 index 000000000..ba0e5ec60 --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_1.csv @@ -0,0 +1,4 @@ +field +1 +2 +3 diff --git a/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_2.csv b/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_2.csv new file mode 100644 index 000000000..eb0f035ae --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_2.csv @@ -0,0 +1,5 @@ +field +1 +2 +3 +4 diff --git a/integration_tests/models/schema_tests/test_fewer_rows_than.sql b/integration_tests/models/schema_tests/test_fewer_rows_than.sql new file mode 100644 index 000000000..c2ad4cbda --- /dev/null +++ b/integration_tests/models/schema_tests/test_fewer_rows_than.sql @@ -0,0 +1,9 @@ +with data as ( + + select * from {{ ref('data_test_fewer_rows_than_table_1') }} + +) + +select + field +from data \ No newline at end of file diff --git a/macros/schema_tests/fewer_rows_than.sql b/macros/schema_tests/fewer_rows_than.sql new file mode 100644 index 000000000..328c0ea0a --- /dev/null +++ b/macros/schema_tests/fewer_rows_than.sql @@ -0,0 +1,43 @@ +{% macro test_fewer_rows_than(model) %} + {{ return(adapter.dispatch('test_fewer_rows_than', packages = dbt_utils._get_utils_namespaces())(model, combination_of_columns, quote_columns, where)) }} +{% endmacro %} + +{% macro default__test_fewer_rows_than(model) %} + +{% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %} + +with a as ( + + select count(*) as count_ourmodel from {{ model }} + +), +b as ( + + select count(*) as count_comparisonmodel from {{ compare_model }} + +), +counts as ( + + select + (select count_ourmodel from a) as count_model_with_fewer_rows, + (select count_comparisonmodel from b) as count_model_with_more_rows + +), +final as ( + + select + case + -- fail the test if we have more rows than the reference model and return the row count delta + when count_model_with_fewer_rows > count_model_with_more_rows then (count_model_with_fewer_rows - count_model_with_more_rows) + -- fail the test if they are the same number + when count_model = count_comparison then 1 + -- pass the test if the delta is positive (i.e. return the number 0) + else 0 + end as row_count_delta + from counts + +) + +select row_count_delta from final + +{% endmacro %}