From a45a1d012b40f678a659d417eed50f76fcd4021d Mon Sep 17 00:00:00 2001 From: William Ridgeway <10358980+wrridgeway@users.noreply.github.com> Date: Tue, 21 May 2024 17:09:07 -0500 Subject: [PATCH] Add `expression_is_false` dbt test macro (#464) * Add new dbt test * Use new test for nonlivable chars test * Update dbt generic test readme --- .../schema/default.vw_pin_condo_char.yml | 6 ++--- dbt/tests/generic/README.md | 18 +++++++++++-- .../generic/test_expression_is_false.sql | 27 +++++++++++++++++++ 3 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 dbt/tests/generic/test_expression_is_false.sql diff --git a/dbt/models/default/schema/default.vw_pin_condo_char.yml b/dbt/models/default/schema/default.vw_pin_condo_char.yml index 4a455db34..36be5e2be 100644 --- a/dbt/models/default/schema/default.vw_pin_condo_char.yml +++ b/dbt/models/default/schema/default.vw_pin_condo_char.yml @@ -98,11 +98,11 @@ models: - not_null: name: default_vw_pin_condo_char_bldg_is_mixed_use_not_null column_name: bldg_is_mixed_use - - expression_is_true: + - expression_is_false: name: default_vw_pin_condo_char_nonlivable_no_chars expression: | - (NOT is_common_area AND NOT is_parking_space) - OR (char_bedrooms IS NULL AND char_full_baths IS NULL) + (is_common_area OR is_parking_space) + AND (char_bedrooms IS NOT NULL OR char_full_baths IS NOT NULL) config: error_if: ">48080" # as of 2024-05-21 # TODO: Non-liveable unit heuristics diff --git a/dbt/tests/generic/README.md b/dbt/tests/generic/README.md index c3bf4bf15..6a7f8dfb2 100644 --- a/dbt/tests/generic/README.md +++ b/dbt/tests/generic/README.md @@ -12,6 +12,7 @@ to define our test suite. - [`test_column_length`](#test_column_length) - [`test_columns_match`](#test_columns_match) - [`test_count_is_consistent`](#test_count_is_consistent) +- [`test_expression_is_false`](#test_expression_is_false) - [`test_expression_is_true`](#test_expression_is_true) - [`test_is_null`](#test_is_null) - [`test_no_extra_whitespace`](#test_no_extra_whitespace) @@ -110,6 +111,19 @@ the grouping column and a column called `count` with the count of rows for that * `group_column` (required string): The column to use for grouping. +### `test_expression_is_false` + +Asserts that a valid SQL expression is false for all rows. In other words, filters for +rows where a given `expression` is true. Often useful for idiosyncratic comparisons +across columns that are not easily generalized into generic tests. + +**Parameters**: + +* `expression` (required string): A valid SQL expression to apply to the column or table. +* `additional_select_columns` (optional list of strings): Additional columns to select for + failure output. The column the test is defined on will always be selected regardless + of this value. + ### `test_expression_is_true` Asserts that a valid SQL expression is true for all rows. In other words, filters for @@ -221,7 +235,7 @@ using `parid` and `taxyr`; as a result, it filters out mixed-use parcels as well `taxyr`. * `alias` (optional string): The name of the column to use for output. Necessary because aggregation functions as represented by `agg_func` require aliases in SQL. Defaults to - `_`. + `_`. ### `test_row_count` @@ -275,7 +289,7 @@ the test. columns specified in the `group_by` parameter. * `alias` (optional string): The name of the column to use for output. Necessary because aggregation functions as represented by `agg_func` require aliases in SQL. Defaults to - `_`. + `_`. ### `test_sequential_values` diff --git a/dbt/tests/generic/test_expression_is_false.sql b/dbt/tests/generic/test_expression_is_false.sql new file mode 100644 index 000000000..858d0c65d --- /dev/null +++ b/dbt/tests/generic/test_expression_is_false.sql @@ -0,0 +1,27 @@ +-- Filter for rows where a given `expression` is true. +-- +-- Adapted from our own `expression_is_true`. Supports an optional +-- `additional_select_columns` option representing an array of columns to select +-- for failing rows in addition to `column_name`. If no +-- `additional_select_columns` array is provided, defaults to selecting the +-- column represented by `column_name`; if `column_name` is also missing, falls +-- back to selecting 1 for failing rows. +{% test expression_is_false( + model, column_name, expression, additional_select_columns=[] +) %} + {%- set select_columns_csv = format_additional_select_columns( + additional_select_columns + ) -%} + {%- if column_name -%} + {%- set columns_csv = column_name -%} + {%- if select_columns_csv -%} + {%- set columns_csv = columns_csv ~ ", " ~ select_columns_csv -%} + {%- endif -%} + {%- elif select_columns_csv -%} {%- set columns_csv = select_columns_csv -%} + {%- else -%} {%- set columns_csv = "1 AS fail" -%} + {%- endif -%} + + select {{ columns_csv }} + from {{ model }} + where ({{ column_name }} {{ expression }}) +{% endtest %}