Skip to content

Commit

Permalink
Merge pull request #11 from avohq/integration-tests-snowflake
Browse files Browse the repository at this point in the history
Integration tests snowflake
  • Loading branch information
bjornj12 authored Nov 29, 2021
2 parents 67ab356 + 6613d49 commit fa04d7c
Show file tree
Hide file tree
Showing 10 changed files with 187,708 additions and 187,506 deletions.
30 changes: 29 additions & 1 deletion .github/workflows/integration_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,32 @@ jobs:
run: |
dbt seed --target bigquery
dbt run --selector avo_audit_integration_tests --target bigquery
dbt test --selector avo_audit_integration_tests --target bigquery
dbt test --selector avo_audit_integration_tests --target bigquery
# This workflow contains a single job called "build"
integration_snowflake:
name: integration-snowflake
runs-on: ubuntu-latest
container: python:3.8-buster
env:
SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }}
SNOWFLAKE_DATABASE: ${{ secrets.SNOWFLAKE_DATABASE }}
SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }}
SNOWFLAKE_SCHEMA: ${{ secrets.SNOWFLAKE_SCHEMA }}
steps:
- uses: actions/checkout@v2
- name: Install dependencies
run: |
pip install dbt==${DBT_VERSION}
dbt deps
dbt --version
- name: Test database connection
run: |
dbt debug --target snowflake
- name: Run tests
run: |
dbt seed --target snowflake
dbt run --selector avo_audit_integration_tests --target snowflake
dbt test --selector avo_audit_integration_tests --target snowflake
187,446 changes: 93,723 additions & 93,723 deletions data/avo_audit_experiment_data.csv

Large diffs are not rendered by default.

187,446 changes: 93,723 additions & 93,723 deletions integration_tests/data/avo_audit_test_data.csv

Large diffs are not rendered by default.

13 changes: 11 additions & 2 deletions integration_tests/profiles.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
integration_tests:
target: bigquery
target: snowflake
outputs:
bigquery:
type: bigquery
Expand All @@ -20,4 +20,13 @@ integration_tests:
token_uri: "{{ env_var('BIGQUERY_TOKEN_URI') }}"
auth_provider_x509_cert_url: "{{ env_var('BIGQUERY_AUTH_PROVIDER_X509_CERT_URL') }}"
client_x509_cert_url: "{{ env_var('BIGQUERY_CLIENT_X509_CERT_URL') }}"

snowflake:
type: "snowflake"
account: "{{ env_var('SNOWFLAKE_ACCOUNT') }}"
user: "{{ env_var('SNOWFLAKE_USER') }}"
password: "{{ env_var('SNOWFLAKE_PASSWORD') }}"
role: "{{ env_var('SNOWFLAKE_ROLE') }}"
database: "{{ env_var('SNOWFLAKE_DATABASE')}}"
warehouse: "{{ env_var('SNOWFLAKE_WAREHOUSE') }}"
schema: "{{ env_var('SNOWFLAKE_SCHEMA') }}"
threads: 1
101 changes: 46 additions & 55 deletions macros/audit_event_volume.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,48 +13,48 @@
{%- set total_days = days_back + days_lag -%}
{% set threshold = 2.5 %}

with union_query as (
-- Big Union query that runs ties queries together for each day in the time period selected
-- And unions the days together

{% for i in range(total_days) %}

{%- set d = dbt_date.n_days_ago(i, end_date) -%}

with all_events_query as (
with generate_dates as (
{{ avo_audit.generate_dates_table(end_date, total_days) }}
),
all_events_query as (
-- Find all event/source combos for the date range given to ensure there will be no nulls for each day.

select {{event_name_column}}, {{event_source_column}}
from {{ volume_relation }}
where DATE({{event_date_column}}) >= {{ d }}
where DATE({{event_date_column}}) >= {{ dbt_date.n_days_ago(total_days, end_date) }}
and DATE({{event_date_column}}) <= {{end_date}}

), total_events_query as (
-- Count all events for each source on each day.

select count({{event_source_column}}) as total_source_events
from {{ volume_relation }}
where DATE({{event_date_column}}) = {{ d }}

), events_dates_combo as (
),
events_dates_combo as (
-- create a event/source/day combo for all all event/source from all_events_query
-- So all combos exist on each day in the following queries.

select
all_events.{{event_name_column}} as event_name,
all_events.{{event_source_column}} as source,
day
from all_events_query all_events,
UNNEST(GENERATE_DATE_ARRAY(
{{dbt_date.n_days_ago(total_days, end_date)}},
{{end_date}},
INTERVAL 1 day)) as day
group by
e.{{event_name_column}} as event_name,
e.{{event_source_column}} as source,
g.day as day
from all_events_query as e cross join generate_dates as g
group by
event_name,
source,
day

), all_event_dates as (
)
, union_query as (
-- Big Union query that runs ties queries together for each day in the time period selected
-- And unions the days together

{% for i in range(total_days) %}

{%- set d = dbt_date.n_days_ago(i, end_date) -%}

with total_events_query_{{i}} as (
-- Count all events for each source on each day.

select count({{event_source_column}}) as total_source_events
from {{ volume_relation }}
where DATE({{event_date_column}}) = {{ d }}

), all_event_dates_{{i}} as (
-- Makes event_name/source/day combo for all event_name/source combos in 'all_events_query'
-- Counts the number of event_name in the combo
-- adds total source events for that day with as 'total_source_events'
Expand All @@ -69,15 +69,15 @@ with union_query as (
combo.day as day,
count(volume.{{event_name_column}}) as event_count,
(
select total_source_events from total_events_query
select total_source_events from total_events_query_{{i}}
) as event_source_count
from events_dates_combo combo
left join {{volume_relation}} volume
on volume.{{event_name_column}} = combo.event_name
and volume.{{event_source_column}} = combo.source
and DATE(volume.{{event_date_column}}) = combo.day
where combo.day = {{d}}
and (select total_source_events from total_events_query) > 0
and (select total_source_events from total_events_query_{{i}}) > 0
group by
combo.day,
combo.event_name,
Expand All @@ -93,7 +93,7 @@ with union_query as (
event_count,
event_source_count,
ABS(event_count / event_source_count) * 100 as percentage
from all_event_dates
from all_event_dates_{{i}}
group by
event_name,
source,
Expand All @@ -107,22 +107,22 @@ with union_query as (
union all (
{% endif %}
{% endfor %}

),

avarage as (
daily_percentage as (
select event_name, source, day, percentage
from union_query
GROUP BY event_name, source, day, percentage
), avarage as (
-- Get the Avarage and standard deviation of percentages over the time period for all event_name source combinations.
-- This is to be able to check each percentage whether its out of its normal bounds.

select
event_name,
source,
AVG(p.percentage) as avg_percentage,
STDDEV(p.percentage) as std_percentage,
from
(
select event_name, source, day, percentage from union_query GROUP BY event_name, source, day, percentage
) as p
AVG(percentage) as avg_percentage,
STDDEV(percentage) as std_percentage
from daily_percentage
group by
event_name,
source
Expand Down Expand Up @@ -168,26 +168,17 @@ avarage as (
source,
MAX(avg_percentage) as avg_percentage,
MAX(std_percentage) as std_percentage,
ARRAY_AGG(day order by day ASC) as days,
ARRAY_AGG(event_count order by day ASC) as event_counts,
ARRAY_AGG(total_source_count order by day ASC) as total_events_on_source,
ARRAY_AGG(percentage order by day ASC) as percentages,
ARRAY_AGG(signal order by day ASC) as signals,
{{ avo_audit.array_agg_ordered('day', 'day', 'asc', 'days') }},
{{ avo_audit.array_agg_ordered('event_count', 'day', 'asc', 'event_counts') }},
{{ avo_audit.array_agg_ordered('total_source_count', 'day', 'asc', 'total_events_on_source') }},
{{ avo_audit.array_agg_ordered('percentage', 'day', 'asc', 'percentages') }},
{{ avo_audit.array_agg_ordered('signal', 'day', 'asc', 'signals') }}
from calculate_signal
group by
event_name,
source
)


-- Only return event_name/source combinations that have spiked or dropped for at least 1 day.
-- Disregard all combinations that were iniside the norm for the whole time period to reduce noise.
select
*
from aggregate_by_day_asc
where
(select signal FROM UNNEST(signals) AS signal where signal = 1 GROUP BY signal) = 1
OR (select signal from UNNEST(signals) AS signal where signal = -1 GROUP BY signal) = -1
{{avo_audit.find_signals('aggregate_by_day_asc')}}


{% endmacro %}
23 changes: 23 additions & 0 deletions macros/cross_db_utils/array_agg_ordered.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{% macro array_agg_ordered(column_name, sort_column, sort, new_name) %}
{{ return(adapter.dispatch('array_agg_ordered', 'avo_audit')(column_name, sort_column, sort, new_name)) }}
{% endmacro %}


{% macro default__array_agg_ordered(column_name, sort_column, sort, new_name) %}

ARRAY_AGG({{column_name}} order by {{sort_column}} {{sort}}) as {{new_name}}

{% endmacro %}


{% macro bigquery__array_agg_ordered(column_name, sort_column, sort, new_name) %}

ARRAY_AGG({{column_name}} order by {{sort_column}} {{sort}}) as {{new_name}}

{% endmacro %}

{% macro snowflake__array_agg_ordered(column_name, sort_column, sort, new_name) %}

ARRAY_AGG({{column_name}}) within group (order by {{sort_column}} {{sort}}) as {{new_name}}

{% endmacro %}
37 changes: 37 additions & 0 deletions macros/cross_db_utils/find_signals.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{% macro find_signals(relation) %}
{{ return(adapter.dispatch('find_signals', 'avo_audit')(relation)) }}
{% endmacro %}


{% macro default__find_signals(relation) %}

select
*
from {{relation}}
where
(select signal FROM UNNEST(signals) AS signal where signal = 1 GROUP BY signal) = 1
OR (select signal from UNNEST(signals) AS signal where signal = -1 GROUP BY signal) = -1

{% endmacro %}


{% macro bigquery__find_signals(relation) %}

select
*
from {{relation}}
where
(select signal FROM UNNEST(signals) AS signal where signal = 1 GROUP BY signal) = 1
OR (select signal from UNNEST(signals) AS signal where signal = -1 GROUP BY signal) = -1

{% endmacro %}

{% macro snowflake__find_signals(relation) %}
select
*,
X.value::INTEGER as signal
from {{relation}}, LATERAL FLATTEN({{relation}}.signals) X
where signal = 1 OR signal = -1


{% endmacro %}
34 changes: 34 additions & 0 deletions macros/cross_db_utils/generate_dates_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{% macro generate_dates_table(end_date, total_days) %}
{{ return(adapter.dispatch('generate_dates_table', 'avo_audit')(end_date, total_days)) }}
{% endmacro %}


{% macro default__generate_dates_table(end_date, total_days) %}

select day from UNNEST(GENERATE_DATE_ARRAY(
{{dbt_date.n_days_ago(total_days, end_date)}},
{{end_date}},
INTERVAL 1 day)) as day

{% endmacro %}


{% macro bigquery__generate_dates_table(end_date, total_days) %}

select day from UNNEST(GENERATE_DATE_ARRAY(
{{dbt_date.n_days_ago(total_days, end_date)}},
{{end_date}},
INTERVAL 1 day)) as day

{% endmacro %}

{% macro snowflake__generate_dates_table(end_date, total_days) %}
select
dateadd(
day,
'-' || row_number() over (order by null),
dateadd(day, '+1', {{end_date}})
) as day
from table (generator(rowcount => {{total_days}}))

{% endmacro %}
8 changes: 6 additions & 2 deletions models/example/avo_audit_volume_issues.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@


{{ config(materialized='table', sort='timestamp', dist='event') }}
{%- set endDate = dbt_date.n_days_ago(1) -%}
{% set end_date_script = modules.datetime.date(2021, 11, 03) %}
{% set today = modules.datetime.date.today() %}
{% set delta = today - end_date_script %}

{% set end_date = dbt_date.n_days_ago(delta.days) %}
{%- set days_back = 10 -%}
{%- set days_lag = 5 -%}
{%- set event_name_column = 'event' -%}
Expand All @@ -10,5 +14,5 @@


{{
audit_event_volume(ref('avo_audit_experiment_data'), endDate, days_back, days_lag, event_name_column, event_date_column, event_source_column)
audit_event_volume(ref('avo_audit_experiment_data'), end_date, days_back, days_lag, event_name_column, event_date_column, event_source_column)
}}
Loading

0 comments on commit fa04d7c

Please sign in to comment.