From b3d2e12582ae33f0587ea7b63e4cc51b8b45f7d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Guiselin?= <9251353+Jrmyy@users.noreply.github.com> Date: Mon, 20 May 2024 14:23:07 +0200 Subject: [PATCH] fix: test_not_null_proportion support on athena (#19) --- .../generic_tests/not_null_proportion.sql | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 macros/dbt_utils/generic_tests/not_null_proportion.sql diff --git a/macros/dbt_utils/generic_tests/not_null_proportion.sql b/macros/dbt_utils/generic_tests/not_null_proportion.sql new file mode 100644 index 0000000..3f9f459 --- /dev/null +++ b/macros/dbt_utils/generic_tests/not_null_proportion.sql @@ -0,0 +1,30 @@ +{% macro athena__test_not_null_proportion(model, group_by_columns) %} + +{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %} +{% set at_least = kwargs.get('at_least', kwargs.get('arg')) %} +{% set at_most = kwargs.get('at_most', kwargs.get('arg', 1)) %} + +{% if group_by_columns|length() > 0 %} + {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %} + {% set groupby_gb_cols = 'group by ' + group_by_columns|join(',') %} +{% endif %} + +with validation as ( + select + {{select_gb_cols}} + sum(case when {{ column_name }} is null then 0 else 1 end) / cast(count(*) as double) as not_null_proportion + from {{ model }} + {{groupby_gb_cols}} +), +validation_errors as ( + select + {{select_gb_cols}} + not_null_proportion + from validation + where not_null_proportion < {{ at_least }} or not_null_proportion > {{ at_most }} +) +select + * +from validation_errors + +{% endmacro %}