Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

spike for adding schedule history using audit_log #157

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/scripts/run_models.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ dbt test --target "$db"
dbt run --vars '{zendesk__unstructured_enabled: true, using_schedules: false, using_domain_names: false, using_user_tags: false, using_ticket_form_history: false, using_organization_tags: false}' --target "$db" --full-refresh
dbt test --target "$db"

dbt run-operation fivetran_utils.drop_schemas_automation --target "$db"
# dbt run-operation fivetran_utils.drop_schemas_automation --target "$db"
1 change: 1 addition & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ vars:
user: "{{ ref('stg_zendesk__user') }}"
daylight_time: "{{ ref('stg_zendesk__daylight_time') }}"
time_zone: "{{ ref('stg_zendesk__time_zone') }}"
audit_log: "{{ ref('stg_zendesk__audit_log) }}"
using_schedules: true
using_domain_names: true
using_user_tags: true
Expand Down
1 change: 1 addition & 0 deletions integration_tests/dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ vars:
zendesk_organization_tag_identifier: "organization_tag_data"
zendesk_user_identifier: "user_data"
zendesk_user_tag_identifier: "user_tag_data"
zendesk_audit_log_identifier: "audit_log_data"

## Uncomment for docs generation
# zendesk__unstructured_enabled: True
Expand Down
6 changes: 6 additions & 0 deletions integration_tests/seeds/audit_log_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
id,_fivetran_synced,action,actor_id,change_description,created_at,source_id,source_label,source_type
579796,2024-05-28 21:53:06.793000,update,37253,"Workweek changed from {:sun=>{""01:45""=>""02:45""}, :mon=>{""09:00""=>""20:00""}, :tue=>{""09:00""=>""20:00""}, :wed=>{""08:00""=>""20:00""}, :thu=>{""08:00""=>""20:00""}, :fri=>{""08:00""=>""20:00""}} to {:sun=>{""03:00""=>""04:00""}, :mon=>{""08:00""=>""20:00""}, :tue=>{""08:00""=>""20:00""}, :wed=>{""07:15""=>""20:00""}, :thu=>{""07:15""=>""20:00""}, :fri=>{""07:15""=>""20:00""}}",2024-05-28 21:51:37.000000,18542,Workweek: Central US Schedule,zendesk/business_hours/workweek
2679952,2024-05-28 16:18:58.471000,update,37253,"Workweek changed from {:thu=>{""09:00""=>""17:00""}, :fri=>{""09:00""=>""17:00""}, :mon=>{""09:00""=>""17:00""}, :tue=>{""09:00""=>""17:00""}, :wed=>{""09:00""=>""17:00""}} to {:mon=>{""09:00""=>""17:00""}, :tue=>{""09:00""=>""17:00""}, :wed=>{""09:00""=>""17:00""}, :thu=>{""09:00""=>""17:00""}, :fri=>{""09:00""=>""17:00""}}",2024-05-21 11:20:29.000000,267996,Workweek: New schedule here,zendesk/business_hours/workweek
293556,2024-05-28 16:18:58.471000,update,37253,"Workweek changed from {} to {:mon=>{""09:00""=>""17:00""}, :tue=>{""09:00""=>""17:00""}, :wed=>{""09:00""=>""17:00""}, :thu=>{""09:00""=>""17:00""}, :fri=>{""09:00""=>""17:00""}}",2024-05-21 11:20:28.000000,267996,Workweek: New schedule here,zendesk/business_hours/workweek
4441364,2024-05-28 16:18:58.471000,update,37253,"Workweek changed from {:wed=>{""09:00""=>""17:00""}, :thu=>{""09:00""=>""17:00""}, :mon=>{""09:00""=>""17:00""}, :tue=>{""09:00""=>""17:00""}, :fri=>{""09:00""=>""17:00""}} to {:mon=>{""09:00""=>""17:00""}, :tue=>{""09:00""=>""17:00""}, :wed=>{""09:00""=>""17:00""}, :thu=>{""09:00""=>""17:00""}, :fri=>{""09:00""=>""17:00""}}",2024-05-21 11:20:10.000000,267996,Workweek: New schedule 2,zendesk/business_hours/workweek
70900,2024-05-28 16:18:58.471000,update,37253,"Workweek changed from {} to {:mon=>{""09:00""=>""17:00""}, :tue=>{""09:00""=>""17:00""}, :wed=>{""09:00""=>""17:00""}, :thu=>{""09:00""=>""17:00""}, :fri=>{""09:00""=>""17:00""}}",2024-05-21 11:20:09.000000,267996,Workweek: New schedule 2,zendesk/business_hours/workweek
41 changes: 41 additions & 0 deletions macros/json_parse_nonscalar.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{% macro json_parse_nonscalar(string, string_path) -%}

{{ adapter.dispatch('json_parse_nonscalar', 'zendesk') (string, string_path) }}

{%- endmacro %}

{% macro default__json_parse_nonscalar(string, string_path) %}

json_extract_path_text({{string}}, {%- for s in string_path -%}'{{ s }}'{%- if not loop.last -%},{%- endif -%}{%- endfor -%} )

{% endmacro %}

{% macro redshift__json_parse_nonscalar(string, string_path) %}

json_extract_path_text({{string}}, {%- for s in string_path -%}'{{ s }}'{%- if not loop.last -%},{%- endif -%}{%- endfor -%} )

{% endmacro %}

{% macro bigquery__json_parse_nonscalar(string, string_path) %}

json_extract({{string}}, '$.{%- for s in string_path -%}{{ s }}{%- if not loop.last -%}.{%- endif -%}{%- endfor -%} ')

{% endmacro %}

{% macro postgres__json_parse_nonscalar(string, string_path) %}

{{string}}::json #>> '{ {%- for s in string_path -%}{{ s }}{%- if not loop.last -%},{%- endif -%}{%- endfor -%} }'

{% endmacro %}

{% macro snowflake__json_parse_nonscalar(string, string_path) %}

parse_json( {{string}} ) {%- for s in string_path -%}{% if s is number %}[{{ s }}]{% else %}['{{ s }}']{% endif %}{%- endfor -%}

{% endmacro %}

{% macro spark__json_parse_nonscalar(string, string_path) %}

{{string}} : {%- for s in string_path -%}{% if s is number %}[{{ s }}]{% else %}['{{ s }}']{% endif %}{%- endfor -%}

{% endmacro %}
41 changes: 41 additions & 0 deletions macros/regex_extract.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{% macro regex_extract(string, start_or_end) -%}

{{ adapter.dispatch('regex_extract', 'zendesk') (string, start_or_end) }}

{%- endmacro %}

{% macro default__regex_extract(string, start_or_end) %}

REGEXP_EXTRACT({{ string }}, {%- if start_or_end == 'start' %} r'{"([^"]+)"' {% else %} r'":"([^"]+)"}' {% endif -%} )

{% endmacro %}

{% macro bigquery__regex_extract(string, start_or_end) %}

REGEXP_EXTRACT({{ string }}, {%- if start_or_end == 'start' %} r'{"([^"]+)"' {% else %} r'":"([^"]+)"}' {% endif -%} )

{% endmacro %}

{% macro snowflake__regex_extract(string, start_or_end) %}

REGEXP_SUBSTR({{ string }}, {%- if start_or_end == 'start' %} '"([^"]+)"' {% else %} '":"([^"]+)"' {% endif -%}, 1, 1, 'e', 1 )

{% endmacro %}

{% macro postgres__regex_extract(string, start_or_end) %}

(regexp_matches({{ string }}, {%- if start_or_end == 'start' %} '"([^"]+)":' {% else %} '": "([^"]+)' {% endif -%} ))[1]

{% endmacro %}

{% macro redshift__regex_extract(string, start_or_end) %}

REGEXP_SUBSTR({{ string }}, {%- if start_or_end == 'start' %} '"([^"]+)"' {% else %} '":"([^"]+)"' {% endif -%}, 1, 1, 'e')

{% endmacro %}

{% macro spark__regex_extract(string, start_or_end) %}

regexp_extract({{ string }}, {%- if start_or_end == 'start' %} '"([^"]+)":' {% else %} '":"([^"]+)"' {% endif -%}, 1)

{% endmacro %}
137 changes: 137 additions & 0 deletions models/intermediate/int_zendesk__schedule_history.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
with reformat as (

select
_fivetran_synced,
created_at,
source_id as schedule_id,
change_description as original,
replace(replace(replace(replace(replace(replace(replace(replace(lower(change_description), '=>', ': '),
':mon', '"mon"'), ':tue', '"tue"'), ':wed', '"wed"'), ':thu', '"thu"'), ':fri', '"fri"'), ':sat', '"sat"'), ':sun', '"sun"') as change_description

from {{ ref('stg_zendesk__audit_log') }}
where lower(change_description) like '%workweek%'
order by created_at desc
),

jsonify as (

select
_fivetran_synced,
created_at,
schedule_id,
original,
{{ dbt.split_part('change_description', "'workweek changed from '", 2) }} as change_description
from reformat
),

split_up as (

select
_fivetran_synced,
created_at,
schedule_id,
original,
{{ dbt.split_part('change_description', "' to '", 1) }} as from_schedule,
{{ dbt.split_part('change_description', "' to '", 2) }} as to_schedule
from jsonify
),

split_days as (

select
_fivetran_synced,
created_at,
schedule_id,
original
{%- for day in ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] -%}
, {{ regex_extract(json_parse_nonscalar('from_schedule', [day]), 'start') }} as from_{{ day }}_start
, {{ regex_extract(json_parse_nonscalar('from_schedule', [day]), 'end') }} as from_{{ day }}_end
, {{ regex_extract(json_parse_nonscalar('to_schedule', [day]), 'start') }} as to_{{ day }}_start
, {{ regex_extract(json_parse_nonscalar('to_schedule', [day]), 'end') }} as to_{{ day }}_end
{% endfor %}

from split_up
),

verticalize as (

{%- for day in ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] -%}
select
_fivetran_synced,
lag(created_at) over (partition by schedule_id order by created_at) as created_at,
schedule_id,
original,
'{{ day }}' as dow,
from_{{ day }}_start as start_time,
from_{{ day }}_end as end_time

from split_days
where from_{{ day }}_start is not null and from_{{ day }}_end is not null

union distinct

select
_fivetran_synced,
created_at,
schedule_id,
original,
'{{ day }}' as dow,
to_{{ day }}_start as start_time,
to_{{ day }}_end as end_time

from split_days
where to_{{ day }}_start is not null and to_{{ day }}_end is not null

{% if not loop.last %}union distinct{% endif %}

{% endfor %}
),

split_times as (

select
schedule_id,
cast(nullif({{ dbt.split_part('start_time', "':'", 1) }}, ' ') as {{ dbt.type_int() }}) as start_time_hh,
cast(nullif({{ dbt.split_part('start_time', "':'", 2) }}, ' ') as {{ dbt.type_int() }}) as start_time_mm,
cast(nullif({{ dbt.split_part('end_time', "':'", 1) }}, ' ') as {{ dbt.type_int() }}) as end_time_hh,
cast(nullif({{ dbt.split_part('end_time', "':'", 2) }}, ' ') as {{ dbt.type_int() }}) as end_time_mm,
start_time,
end_time,
dow,
_fivetran_synced,
created_at as valid_from,
coalesce(lead(created_at) over (partition by schedule_id, dow order by created_at), {{ dbt.current_timestamp_backcompat() }}) as valid_to

from verticalize
),

final as (

select
schedule_id,
start_time_hh * 60 + start_time_mm + 24 * 60 * case
when dow = 'mon' then 1
when dow = 'tue' then 2
when dow = 'wed' then 3
when dow = 'thu' then 4
when dow = 'fri' then 5
when dow = 'sat' then 6
else 0 end as start_time,
end_time_hh * 60 + end_time_mm + 24 * 60 * case
when dow = 'mon' then 1
when dow = 'tue' then 2
when dow = 'wed' then 3
when dow = 'thu' then 4
when dow = 'fri' then 5
when dow = 'sat' then 6
else 0 end as end_time,
coalesce(valid_from, '1970-01-01') as valid_from,
valid_to,
_fivetran_synced,
dow

from split_times
)

select *
from final
1 change: 1 addition & 0 deletions models/intermediate/int_zendesk__schedule_spine.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ with timezone as (
select *
from {{ var('daylight_time') }}

-- TODO: BRING SCHEDULE HISTORY INTO THIS MODEL LIKELY
), schedule as (

select *
Expand Down
94 changes: 94 additions & 0 deletions models/intermediate/int_zendesk__timezones_w_dt.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{{ config(enabled=var('using_schedules', True)) }}

/*
The purpose of this model is to create a spine of appropriate timezone offsets to use for schedules, as offsets may change due to Daylight Savings.
End result will include `valid_from` and `valid_until` columns which we will use downstream to determine which schedule-offset to associate with each ticket (ie standard time vs daylight time)
*/

with timezone as (

select *
from {{ var('time_zone') }}

), daylight_time as (

select *
from {{ var('daylight_time') }}

), timezone_with_dt as (

select
timezone.*,
daylight_time.daylight_start_utc,
daylight_time.daylight_end_utc,
daylight_time.daylight_offset_minutes

from timezone
left join daylight_time
on timezone.time_zone = daylight_time.time_zone

), order_timezone_dt as (

select
*,
-- will be null for timezones without any daylight savings records (and the first entry)
-- we will coalesce the first entry date with .... the X years ago
lag(daylight_end_utc, 1) over (partition by time_zone order by daylight_end_utc asc) as last_daylight_end_utc,
-- will be null for timezones without any daylight savings records (and the last entry)
-- we will coalesce the last entry date with the current date
lead(daylight_start_utc, 1) over (partition by time_zone order by daylight_start_utc asc) as next_daylight_start_utc

from timezone_with_dt

), split_timezones as (

-- standard (includes timezones without DT)
-- starts: when the last Daylight Savings ended
-- ends: when the next Daylight Savings starts
select
time_zone,
standard_offset_minutes as offset_minutes,

-- last_daylight_end_utc is null for the first record of the time_zone's daylight time, or if the TZ doesn't use DT
coalesce(last_daylight_end_utc, cast('1970-01-01' as date)) as valid_from,

-- daylight_start_utc is null for timezones that don't use DT
coalesce(daylight_start_utc, cast( {{ dbt.dateadd('year', 1, dbt.current_timestamp_backcompat()) }} as date)) as valid_until

from order_timezone_dt

union all

-- DT (excludes timezones without it)
-- starts: when this Daylight Savings started
-- ends: when this Daylight Savings ends
select
time_zone,
-- Pacific Time is -8h during standard time and -7h during DT
standard_offset_minutes + daylight_offset_minutes as offset_minutes,
daylight_start_utc as valid_from,
daylight_end_utc as valid_until

from order_timezone_dt
where daylight_offset_minutes is not null

union all

select
time_zone,
standard_offset_minutes as offset_minutes,

-- Get the latest daylight_end_utc time and set that as the valid_from
max(daylight_end_utc) as valid_from,

-- If the latest_daylight_end_time_utc is less than todays timestamp, that means DST has ended. Therefore, we will make the valid_until in the future.
cast( {{ dbt.dateadd('year', 1, dbt.current_timestamp_backcompat()) }} as date) as valid_until

from order_timezone_dt
group by 1, 2
-- We only want to apply this logic to time_zone's that had daylight saving time and it ended at a point. For example, Hong Kong ended DST in 1979.
having cast(max(daylight_end_utc) as date) < cast({{ dbt.current_timestamp_backcompat() }} as date)
)

select *
from split_timezones
7 changes: 5 additions & 2 deletions packages.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
packages:
- package: fivetran/zendesk_source
version: [">=0.12.0", "<0.13.0"]
# - package: fivetran/zendesk_source
# version: [">=0.12.0", "<0.13.0"]
- git: https://github.com/fivetran/dbt_zendesk_source.git
revision: explore/audit-log-spike
warn-unpinned: false
- package: calogica/dbt_date
version: [">=0.9.0", "<1.0.0"]