fivetran · fivetran-jamie · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024
diff --git a/.buildkite/scripts/run_models.sh b/.buildkite/scripts/run_models.sh
@@ -22,4 +22,4 @@ dbt test --target "$db"
 dbt run --vars '{zendesk__unstructured_enabled: true, using_schedules: false, using_domain_names: false, using_user_tags: false, using_ticket_form_history: false, using_organization_tags: false}' --target "$db" --full-refresh
 dbt test --target "$db"
 
-dbt run-operation fivetran_utils.drop_schemas_automation --target "$db"
+# dbt run-operation fivetran_utils.drop_schemas_automation --target "$db"
diff --git a/dbt_project.yml b/dbt_project.yml
@@ -50,6 +50,7 @@ vars:
     user: "{{ ref('stg_zendesk__user') }}"
     daylight_time: "{{ ref('stg_zendesk__daylight_time') }}"
     time_zone: "{{ ref('stg_zendesk__time_zone') }}"
+    audit_log: "{{ ref('stg_zendesk__audit_log) }}"
     using_schedules: true
     using_domain_names: true
     using_user_tags: true

diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
@@ -25,6 +25,7 @@ vars:
     zendesk_organization_tag_identifier:       "organization_tag_data"
     zendesk_user_identifier:                   "user_data"
     zendesk_user_tag_identifier:               "user_tag_data"
+    zendesk_audit_log_identifier:              "audit_log_data"
 
   ## Uncomment for docs generation
   # zendesk__unstructured_enabled: True

diff --git a/integration_tests/seeds/audit_log_data.csv b/integration_tests/seeds/audit_log_data.csv
@@ -0,0 +1,6 @@
+id,_fivetran_synced,action,actor_id,change_description,created_at,source_id,source_label,source_type
+579796,2024-05-28 21:53:06.793000,update,37253,"Workweek changed from {:sun=&amp;gt;{""01:45""=&amp;gt;""02:45""}, :mon=&amp;gt;{""09:00""=&amp;gt;""20:00""}, :tue=&amp;gt;{""09:00""=&amp;gt;""20:00""}, :wed=&amp;gt;{""08:00""=&amp;gt;""20:00""}, :thu=&amp;gt;{""08:00""=&amp;gt;""20:00""}, :fri=&amp;gt;{""08:00""=&amp;gt;""20:00""}} to {:sun=&amp;gt;{""03:00""=&amp;gt;""04:00""}, :mon=&amp;gt;{""08:00""=&amp;gt;""20:00""}, :tue=&amp;gt;{""08:00""=&amp;gt;""20:00""}, :wed=&amp;gt;{""07:15""=&amp;gt;""20:00""}, :thu=&amp;gt;{""07:15""=&amp;gt;""20:00""}, :fri=&amp;gt;{""07:15""=&amp;gt;""20:00""}}",2024-05-28 21:51:37.000000,18542,Workweek: Central US Schedule,zendesk/business_hours/workweek
+2679952,2024-05-28 16:18:58.471000,update,37253,"Workweek changed from {:thu=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :fri=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :mon=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :tue=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :wed=&amp;gt;{""09:00""=&amp;gt;""17:00""}} to {:mon=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :tue=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :wed=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :thu=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :fri=&amp;gt;{""09:00""=&amp;gt;""17:00""}}",2024-05-21 11:20:29.000000,267996,Workweek: New schedule here,zendesk/business_hours/workweek
+293556,2024-05-28 16:18:58.471000,update,37253,"Workweek changed from {} to {:mon=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :tue=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :wed=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :thu=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :fri=&amp;gt;{""09:00""=&amp;gt;""17:00""}}",2024-05-21 11:20:28.000000,267996,Workweek: New schedule here,zendesk/business_hours/workweek
+4441364,2024-05-28 16:18:58.471000,update,37253,"Workweek changed from {:wed=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :thu=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :mon=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :tue=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :fri=&amp;gt;{""09:00""=&amp;gt;""17:00""}} to {:mon=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :tue=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :wed=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :thu=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :fri=&amp;gt;{""09:00""=&amp;gt;""17:00""}}",2024-05-21 11:20:10.000000,267996,Workweek: New schedule 2,zendesk/business_hours/workweek
+70900,2024-05-28 16:18:58.471000,update,37253,"Workweek changed from {} to {:mon=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :tue=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :wed=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :thu=&amp;gt;{""09:00""=&amp;gt;""17:00""}, :fri=&amp;gt;{""09:00""=&amp;gt;""17:00""}}",2024-05-21 11:20:09.000000,267996,Workweek: New schedule 2,zendesk/business_hours/workweek
diff --git a/macros/json_parse_nonscalar.sql b/macros/json_parse_nonscalar.sql
@@ -0,0 +1,41 @@
+{% macro json_parse_nonscalar(string, string_path) -%}
+
+{{ adapter.dispatch('json_parse_nonscalar', 'zendesk') (string, string_path) }}
+
+{%- endmacro %}
+
+{% macro default__json_parse_nonscalar(string, string_path) %}
+
+  json_extract_path_text({{string}}, {%- for s in string_path -%}'{{ s }}'{%- if not loop.last -%},{%- endif -%}{%- endfor -%} )
+
+{% endmacro %}
+
+{% macro redshift__json_parse_nonscalar(string, string_path) %}
+
+  json_extract_path_text({{string}}, {%- for s in string_path -%}'{{ s }}'{%- if not loop.last -%},{%- endif -%}{%- endfor -%} )
+
+{% endmacro %}
+
+{% macro bigquery__json_parse_nonscalar(string, string_path) %}
+
+  json_extract({{string}}, '$.{%- for s in string_path -%}{{ s }}{%- if not loop.last -%}.{%- endif -%}{%- endfor -%} ')
+
+{% endmacro %}
+
+{% macro postgres__json_parse_nonscalar(string, string_path) %}
+
+  {{string}}::json #>> '{ {%- for s in string_path -%}{{ s }}{%- if not loop.last -%},{%- endif -%}{%- endfor -%} }'
+
+{% endmacro %}
+
+{% macro snowflake__json_parse_nonscalar(string, string_path) %}
+
+  parse_json( {{string}} ) {%- for s in string_path -%}{% if s is number %}[{{ s }}]{% else %}['{{ s }}']{% endif %}{%- endfor -%}
+
+{% endmacro %}
+
+{% macro spark__json_parse_nonscalar(string, string_path) %}
+
+  {{string}} : {%- for s in string_path -%}{% if s is number %}[{{ s }}]{% else %}['{{ s }}']{% endif %}{%- endfor -%}
+
+{% endmacro %}
diff --git a/macros/regex_extract.sql b/macros/regex_extract.sql
@@ -0,0 +1,41 @@
+{% macro regex_extract(string, start_or_end) -%}
+
+{{ adapter.dispatch('regex_extract', 'zendesk') (string, start_or_end) }}
+
+{%- endmacro %}
+
+{% macro default__regex_extract(string, start_or_end) %}
+
+REGEXP_EXTRACT({{ string }}, {%- if start_or_end == 'start' %} r'{"([^"]+)"' {% else %} r'":"([^"]+)"}' {% endif -%} )
+
+{% endmacro %}
+
+{% macro bigquery__regex_extract(string, start_or_end) %}
+
+REGEXP_EXTRACT({{ string }}, {%- if start_or_end == 'start' %} r'{"([^"]+)"' {% else %} r'":"([^"]+)"}' {% endif -%} )
+
+{% endmacro %}
+
+{% macro snowflake__regex_extract(string, start_or_end) %}
+
+REGEXP_SUBSTR({{ string }}, {%- if start_or_end == 'start' %} '"([^"]+)"' {% else %} '":"([^"]+)"' {% endif -%}, 1, 1, 'e', 1 )
+
+{% endmacro %}
+
+{% macro postgres__regex_extract(string, start_or_end) %}
+
+(regexp_matches({{ string }}, {%- if start_or_end == 'start' %} '"([^"]+)":' {% else %} '": "([^"]+)' {% endif -%} ))[1]
+
+{% endmacro %}
+
+{% macro redshift__regex_extract(string, start_or_end) %}
+
+REGEXP_SUBSTR({{ string }}, {%- if start_or_end == 'start' %} '"([^"]+)"' {% else %} '":"([^"]+)"' {% endif -%}, 1, 1, 'e')
+
+{% endmacro %}
+
+{% macro spark__regex_extract(string, start_or_end) %}
+
+regexp_extract({{ string }}, {%- if start_or_end == 'start' %} '"([^"]+)":' {% else %} '":"([^"]+)"' {% endif -%}, 1)
+
+{% endmacro %}
diff --git a/models/intermediate/int_zendesk__schedule_history.sql b/models/intermediate/int_zendesk__schedule_history.sql
@@ -0,0 +1,137 @@
+with reformat as (
+
+    select 
+        _fivetran_synced,
+        created_at,
+        source_id as schedule_id,
+        change_description as original,
+        replace(replace(replace(replace(replace(replace(replace(replace(lower(change_description), '=&amp;gt;', ': '), 
+            ':mon', '"mon"'), ':tue', '"tue"'), ':wed', '"wed"'), ':thu', '"thu"'), ':fri', '"fri"'), ':sat', '"sat"'), ':sun', '"sun"')  as change_description
+
+    from {{ ref('stg_zendesk__audit_log') }}
+    where lower(change_description) like '%workweek%'
+    order by created_at desc
+),
+
+jsonify as (
+
+    select 
+        _fivetran_synced,
+        created_at,
+        schedule_id,
+        original,
+        {{ dbt.split_part('change_description', "'workweek changed from '", 2) }} as change_description
+    from reformat
+),
+
+split_up as (
+
+    select
+        _fivetran_synced,
+        created_at,
+        schedule_id,
+        original,
+        {{ dbt.split_part('change_description', "' to '", 1) }} as from_schedule,
+        {{ dbt.split_part('change_description', "' to '", 2) }} as to_schedule
+    from jsonify
+),
+
+split_days as (
+
+    select 
+        _fivetran_synced,
+        created_at,
+        schedule_id,
+        original
+        {%- for day in ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] -%}
+            , {{ regex_extract(json_parse_nonscalar('from_schedule', [day]), 'start') }} as from_{{ day }}_start
+            , {{ regex_extract(json_parse_nonscalar('from_schedule', [day]), 'end') }} as from_{{ day }}_end
+            , {{ regex_extract(json_parse_nonscalar('to_schedule', [day]), 'start') }} as to_{{ day }}_start
+            , {{ regex_extract(json_parse_nonscalar('to_schedule', [day]), 'end') }} as to_{{ day }}_end
+        {% endfor %}
+
+    from split_up
+),
+
+verticalize as (
+
+    {%- for day in ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] -%}
+    select 
+        _fivetran_synced,
+        lag(created_at) over (partition by schedule_id order by created_at) as created_at,
+        schedule_id,
+        original,
+        '{{ day }}' as dow,
+        from_{{ day }}_start as start_time,
+        from_{{ day }}_end as end_time
+
+    from split_days
+    where from_{{ day }}_start is not null and from_{{ day }}_end is not null
+
+    union distinct
+
+    select 
+        _fivetran_synced,
+        created_at,
+        schedule_id,
+        original,
+        '{{ day }}' as dow,
+        to_{{ day }}_start as start_time,
+        to_{{ day }}_end as end_time
+
+    from split_days
+    where to_{{ day }}_start is not null and to_{{ day }}_end is not null
+
+    {% if not loop.last %}union distinct{% endif %}
+
+    {% endfor %}
+),
+
+split_times as (
+
+    select 
+        schedule_id,
+        cast(nullif({{ dbt.split_part('start_time', "':'", 1) }}, ' ') as {{ dbt.type_int() }}) as start_time_hh, 
+        cast(nullif({{ dbt.split_part('start_time', "':'", 2) }}, ' ') as {{ dbt.type_int() }}) as start_time_mm, 
+        cast(nullif({{ dbt.split_part('end_time', "':'", 1) }}, ' ') as {{ dbt.type_int() }}) as end_time_hh, 
+        cast(nullif({{ dbt.split_part('end_time', "':'", 2) }}, ' ') as {{ dbt.type_int() }}) as end_time_mm, 
+        start_time,
+        end_time,
+        dow,
+        _fivetran_synced,
+        created_at as valid_from,
+        coalesce(lead(created_at) over (partition by schedule_id, dow order by created_at), {{ dbt.current_timestamp_backcompat() }}) as valid_to
+
+    from verticalize
+),
+
+final as (
+
+    select
+        schedule_id,
+        start_time_hh * 60 + start_time_mm + 24 * 60 * case 
+            when dow = 'mon' then 1 
+            when dow = 'tue' then 2
+            when dow = 'wed' then 3
+            when dow = 'thu' then 4
+            when dow = 'fri' then 5
+            when dow = 'sat' then 6
+        else 0 end as start_time,
+        end_time_hh * 60 + end_time_mm + 24 * 60 * case 
+            when dow = 'mon' then 1 
+            when dow = 'tue' then 2
+            when dow = 'wed' then 3
+            when dow = 'thu' then 4
+            when dow = 'fri' then 5
+            when dow = 'sat' then 6
+        else 0 end as end_time,
+        coalesce(valid_from, '1970-01-01') as valid_from,
+        valid_to,
+        _fivetran_synced,
+        dow
+
+    from split_times
+)
+
+select * 
+from final
diff --git a/models/intermediate/int_zendesk__schedule_spine.sql b/models/intermediate/int_zendesk__schedule_spine.sql
@@ -15,6 +15,7 @@ with timezone as (
     select *
     from {{ var('daylight_time') }}
 
+-- TODO: BRING SCHEDULE HISTORY INTO THIS MODEL LIKELY
 ), schedule as (
 
     select *

diff --git a/models/intermediate/int_zendesk__timezones_w_dt.sql b/models/intermediate/int_zendesk__timezones_w_dt.sql
@@ -0,0 +1,94 @@
+{{ config(enabled=var('using_schedules', True)) }}
+
+/*
+    The purpose of this model is to create a spine of appropriate timezone offsets to use for schedules, as offsets may change due to Daylight Savings.
+    End result will include `valid_from` and `valid_until` columns which we will use downstream to determine which schedule-offset to associate with each ticket (ie standard time vs daylight time)
+*/
+
+with timezone as (
+
+    select *
+    from {{ var('time_zone') }}
+
+), daylight_time as (
+
+    select *
+    from {{ var('daylight_time') }}
+
+), timezone_with_dt as (
+
+    select 
+        timezone.*,
+        daylight_time.daylight_start_utc,
+        daylight_time.daylight_end_utc,
+        daylight_time.daylight_offset_minutes
+
+    from timezone 
+    left join daylight_time 
+        on timezone.time_zone = daylight_time.time_zone
+
+), order_timezone_dt as (
+
+    select 
+        *,
+        -- will be null for timezones without any daylight savings records (and the first entry)
+        -- we will coalesce the first entry date with .... the X years ago
+        lag(daylight_end_utc, 1) over (partition by time_zone order by daylight_end_utc asc) as last_daylight_end_utc,
+        -- will be null for timezones without any daylight savings records (and the last entry)
+        -- we will coalesce the last entry date with the current date 
+        lead(daylight_start_utc, 1) over (partition by time_zone order by daylight_start_utc asc) as next_daylight_start_utc
+
+    from timezone_with_dt
+
+), split_timezones as (
+
+    -- standard (includes timezones without DT)
+    -- starts: when the last Daylight Savings ended
+    -- ends: when the next Daylight Savings starts
+    select 
+        time_zone,
+        standard_offset_minutes as offset_minutes,
+
+        -- last_daylight_end_utc is null for the first record of the time_zone's daylight time, or if the TZ doesn't use DT
+        coalesce(last_daylight_end_utc, cast('1970-01-01' as date)) as valid_from,
+
+        -- daylight_start_utc is null for timezones that don't use DT
+        coalesce(daylight_start_utc, cast( {{ dbt.dateadd('year', 1, dbt.current_timestamp_backcompat()) }} as date)) as valid_until
+
+    from order_timezone_dt
+
+    union all 
+
+    -- DT (excludes timezones without it)
+    -- starts: when this Daylight Savings started
+    -- ends: when this Daylight Savings ends
+    select 
+        time_zone,
+        -- Pacific Time is -8h during standard time and -7h during DT
+        standard_offset_minutes + daylight_offset_minutes as offset_minutes,
+        daylight_start_utc as valid_from,
+        daylight_end_utc as valid_until
+
+    from order_timezone_dt
+    where daylight_offset_minutes is not null
+
+    union all
+
+    select
+        time_zone,
+        standard_offset_minutes as offset_minutes,
+
+        -- Get the latest daylight_end_utc time and set that as the valid_from
+        max(daylight_end_utc) as valid_from,
+
+        -- If the latest_daylight_end_time_utc is less than todays timestamp, that means DST has ended. Therefore, we will make the valid_until in the future.
+        cast( {{ dbt.dateadd('year', 1, dbt.current_timestamp_backcompat()) }} as date) as valid_until
+
+    from order_timezone_dt
+    group by 1, 2
+    -- We only want to apply this logic to time_zone's that had daylight saving time and it ended at a point. For example, Hong Kong ended DST in 1979.
+    having cast(max(daylight_end_utc) as date) < cast({{ dbt.current_timestamp_backcompat() }} as date)
+)
+
+select *
+from split_timezones 
diff --git a/packages.yml b/packages.yml
@@ -1,5 +1,8 @@
 packages:
-  - package: fivetran/zendesk_source
-    version: [">=0.12.0", "<0.13.0"]
+  # - package: fivetran/zendesk_source
+  #   version: [">=0.12.0", "<0.13.0"]
+  - git: https://github.com/fivetran/dbt_zendesk_source.git
+    revision: explore/audit-log-spike
+    warn-unpinned: false
   - package: calogica/dbt_date
     version: [">=0.9.0", "<1.0.0"]