diff --git a/.buildkite/scripts/run_models.sh b/.buildkite/scripts/run_models.sh index 5192f94..e90265c 100644 --- a/.buildkite/scripts/run_models.sh +++ b/.buildkite/scripts/run_models.sh @@ -18,8 +18,10 @@ cd integration_tests dbt deps dbt seed --target "$db" --full-refresh dbt run --target "$db" --full-refresh +dbt run --target "$db" dbt test --target "$db" dbt run --vars '{zendesk__unstructured_enabled: true, using_schedules: false, using_domain_names: false, using_user_tags: false, using_ticket_form_history: false, using_organization_tags: false}' --target "$db" --full-refresh +dbt run --vars '{zendesk__unstructured_enabled: true, using_schedules: false, using_domain_names: false, using_user_tags: false, using_ticket_form_history: false, using_organization_tags: false}' --target "$db" dbt test --target "$db" # dbt run-operation fivetran_utils.drop_schemas_automation --target "$db" \ No newline at end of file diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 66c3d09..8cd5429 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -7,6 +7,8 @@ profile: 'integration_tests' vars: zendesk_schema: zendesk_integration_tests_50 + using_schedule_histories: true + using_schedules: true zendesk_source: zendesk_organization_identifier: "organization_data" zendesk_schedule_identifier: "schedule_data" @@ -45,8 +47,6 @@ models: seeds: +quote_columns: "{{ true if target.type == 'redshift' else false }}" zendesk_integration_tests: - +column_types: - _fivetran_synced: timestamp +column_types: _fivetran_synced: timestamp group_data: diff --git a/macros/clean_schedule.sql b/macros/clean_schedule.sql index 3445f72..f382a04 100644 --- a/macros/clean_schedule.sql +++ b/macros/clean_schedule.sql @@ -3,5 +3,5 @@ {%- endmacro %} {% macro default__clean_schedule(column_name) -%} - replace(replace(replace(replace({{ column_name }}, '{', ''), '}', ''), '"', ''), ' ', '') + replace(replace(replace(replace(cast({{ column_name }} as {{ dbt.type_string() }}), '{', ''), '}', ''), '"', ''), ' ', '') {%- endmacro %} \ No newline at end of file diff --git a/macros/regex_extract.sql b/macros/regex_extract.sql index bb7f83a..e367f72 100644 --- a/macros/regex_extract.sql +++ b/macros/regex_extract.sql @@ -1,42 +1,45 @@ -{% macro regex_extract(string, regex) -%} +{% macro regex_extract(string, day) -%} -{{ adapter.dispatch('regex_extract', 'zendesk') (string, regex) }} +{{ adapter.dispatch('regex_extract', 'zendesk') (string, day) }} {%- endmacro %} -{% macro default__regex_extract(string, regex) %} - +{% macro default__regex_extract(string, day) %} + {% set regex = "'.*?" ~ day ~ ".*?({.*?})'" %} regexp_extract({{ string }}, {{ regex }} ) {%- endmacro %} -{% macro bigquery__regex_extract(string, regex) %} - +{% macro bigquery__regex_extract(string, day) %} + {% set regex = "'.*?" ~ day ~ ".*?({.*?})'" %} regexp_extract({{ string }}, {{ regex }} ) {%- endmacro %} -{% macro snowflake__regex_extract(string, regex) %} +{% macro snowflake__regex_extract(string, day) %} + {% set regex = "'.*?" ~ day ~ ".*?({.*?})'" %} REGEXP_SUBSTR({{ string }}, {{ regex }}, 1, 1, 'e', 1 ) {%- endmacro %} -{% macro postgres__regex_extract(string, regex) %} +{% macro postgres__regex_extract(string, day) %} + {% set regex = "'.*?" ~ day ~ ".*?({.*?})'" %} (regexp_matches({{ string }}, {{ regex }}))[1] {%- endmacro %} -{% macro redshift__regex_extract(string, regex) %} +{% macro redshift__regex_extract(string, day) %} + + {% set regex = '"' ~ day ~ '"' ~ ':\\\{([^\\\}]*)\\\}' -%} - {% set reformatted_regex = regex | replace(".*?", ".*") | replace("{", "\\\{") | replace("}", "\\\}") -%} - REGEXP_SUBSTR({{ string }}, {{ reformatted_regex }}, 1, 1, 'e') + '{' || REGEXP_SUBSTR({{ string }}, '{{ regex }}', 1, 1, 'e') || '}' {%- endmacro %} -{% macro spark__regex_extract(string, regex) %} - {% set reformatted_regex = regex | replace("{", "\\\{") | replace("}", "\\\}") -%} - regexp_extract({{ string }}, {{ reformatted_regex }}, 1) +{% macro spark__regex_extract(string, day) %} + {% set regex = "'.*?" ~ day ~ ".*?({.*?})'" | replace("{", "\\\{") | replace("}", "\\\}") %} + regexp_extract({{ string }}, {{ regex }}, 1) {%- endmacro %} \ No newline at end of file diff --git a/models/history/int_zendesk__schedule_history.sql b/models/history/int_zendesk__schedule_history.sql index 66e7a56..cf7a720 100644 --- a/models/history/int_zendesk__schedule_history.sql +++ b/models/history/int_zendesk__schedule_history.sql @@ -9,15 +9,16 @@ with audit_logs as ( where lower(change_description) like '%workweek changed from%' ), audit_logs_enhanced as ( - select + select schedule_id, row_number() over (partition by schedule_id order by created_at) as schedule_id_index, created_at, - replace(replace(replace(replace(change_description, + -- Clean up the change_description, sometimes has random html stuff in it + replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(change_description, 'workweek changed from', ''), '"', '"'), 'amp;', ''), - '=>', ':') + '=>', ':'), ':mon:', '"mon":'), ':tue:', '"tue":'), ':wed:', '"wed":'), ':thu:', '"thu":'), ':fri:', '"fri":'), ':sat:', '"sat":'), ':sun:', '"sun":') as change_description_cleaned from audit_logs @@ -101,47 +102,74 @@ with audit_logs as ( schedule_change, '{{ day }}' as day_of_week, cast('{{ day_number }}' as {{ dbt.type_int() }}) as day_of_week_number, - {{ zendesk.regex_extract('schedule_change', "'.*?" ~ day ~ ".*?({.*?})'") }} as day_of_week_schedule - from consolidate_actual_changes + {{ zendesk.regex_extract('schedule_change', day) }} as day_of_week_schedule + from consolidate_same_day_changes + {% if not loop.last %}union all{% endif %} {% endfor %} +{% if target.type == 'redshift' %} +-- using PartiQL syntax to work with redshift's SUPER types, which requires an extra CTE +), redshift_parse_schedule as ( + -- Redshift requires another CTE for unnesting + select + schedule_id, + valid_from, + valid_until, + schedule_change, + day_of_week, + day_of_week_number, + day_of_week_schedule, + json_parse('[' || replace(replace(day_of_week_schedule, ', ', ','), ',', '},{') || ']') as json_schedule + + from split_days + where day_of_week_schedule != '{}' + +), unnested_schedules as ( + select + schedule_id, + valid_from, + valid_until, + schedule_change, + day_of_week, + day_of_week_number, + -- go back to strings + cast(day_of_week_schedule as {{ dbt.type_string() }}) as day_of_week_schedule, + {{ clean_schedule('JSON_SERIALIZE(unnested_schedule)') }} as cleaned_unnested_schedule + + from redshift_parse_schedule as schedules, schedules.json_schedule as unnested_schedule + +{% else %} ), unnested_schedules as ( select split_days.*, -{%- if target.type == 'bigquery' %} + {%- if target.type == 'bigquery' %} {{ clean_schedule('unnested_schedule') }} as cleaned_unnested_schedule from split_days cross join unnest(json_extract_array('[' || replace(day_of_week_schedule, ',', '},{') || ']', '$')) as unnested_schedule -{%- elif target.type == 'snowflake' %} + {%- elif target.type == 'snowflake' %} unnested_schedule.key || ':' || unnested_schedule.value as cleaned_unnested_schedule from split_days cross join lateral flatten(input => parse_json(replace(replace(day_of_week_schedule, '\}\}', '\}'), '\{\{', '\{'))) as unnested_schedule -{%- elif target.type == 'postgres' %} + {%- elif target.type == 'postgres' %} {{ clean_schedule('unnested_schedule::text') }} as cleaned_unnested_schedule from split_days cross join lateral jsonb_array_elements(('[' || replace(day_of_week_schedule, ',', '},{') || ']')::jsonb) as unnested_schedule -{%- elif target.type in ('databricks', 'spark') %} + {%- elif target.type in ('databricks', 'spark') %} {{ clean_schedule('unnested_schedule') }} as cleaned_unnested_schedule from split_days lateral view explode(from_json(concat('[', replace(day_of_week_schedule, ',', '},{'), ']'), 'array')) as unnested_schedule -{%- elif target.type == 'redshift' %} - {# json_parse('[' || replace(replace(day_of_week_schedule, '\}\}', '\}'), '\{\{', '\{') || ']') as json_schedule - from split_days #} - {# cross join lateral json_parse(replace(replace(day_of_week_schedule, '\}\}', '\}'), '\{\{', '\{')) as element #} - + {% else %} cast(null as {{ dbt.type_string() }}) as cleaned_unnested_schedule from split_days + {%- endif %} -{% else %} - cast(null as {{ dbt.type_string() }}) as cleaned_unnested_schedule - from split_days -{%- endif %} +{% endif %} ), split_times as ( select