Skip to content

Commit

Permalink
dbt: add initial timeseries metric (#1872)
Browse files Browse the repository at this point in the history
* dbt: add initial timeseries metric

* fix: linting errors

* fix: nested cte

* fix: event type case
  • Loading branch information
ccerv1 authored Jul 29, 2024
1 parent 9f9049f commit 4e99a2e
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{{
config(
materialized='table'
)
}}

with metrics as (
select * from {{ ref('int_timeseries_code_metrics__developers') }}
),

project_metadata as (
select
project_id,
project_source,
project_namespace,
project_name,
display_name
from {{ ref('int_projects') }}
)

select
project_metadata.project_id,
project_metadata.project_source,
project_metadata.project_namespace,
project_metadata.project_name,
project_metadata.display_name,
metrics.event_source,
metrics.bucket_day,
metrics.metric,
metrics.amount
from metrics
left join project_metadata
on metrics.project_id = project_metadata.project_id
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
{% set fulltime_dev_days = 10 %}

with commits as (
select
from_artifact_id as developer_id,
project_id,
event_source,
DATE(bucket_day) as bucket_day,
CAST(SUM(amount) > 0 as int64) as commit_count
from {{ ref('int_events_daily_to_project') }}
where event_type = 'COMMIT_CODE'
group by
from_artifact_id,
project_id,
event_source,
bucket_day
),

project_start_dates as (
select
project_id,
event_source,
MIN(bucket_day) as first_commit_date
from commits
group by
project_id,
event_source
),

calendar as (
select
project_id,
event_source,
DATE_ADD(first_commit_date, interval day_offset day) as bucket_day
from
project_start_dates,
UNNEST(
GENERATE_ARRAY(
0,
DATE_DIFF(
(select MAX(bucket_day) as last_commit_date from commits),
first_commit_date, day
)
)
) as day_offset
),

devs as (
select distinct developer_id
from commits
),

developer_project_dates as (
select
devs.developer_id,
calendar.project_id,
calendar.bucket_day,
calendar.event_source
from calendar
cross join devs
),

filled_data as (
select
dpd.bucket_day,
dpd.developer_id,
dpd.project_id,
dpd.event_source,
COALESCE(c.commit_count, 0) as commit_count
from developer_project_dates as dpd
left join commits as c
on
dpd.bucket_day = c.bucket_day
and dpd.developer_id = c.developer_id
and dpd.project_id = c.project_id
and dpd.event_source = c.event_source
),

rolling_commit_days as (
select
bucket_day,
developer_id,
project_id,
event_source,
SUM(commit_count) over (
partition by developer_id, project_id, event_source
order by bucket_day
rows between 29 preceding and current row
) as num_commit_days
from filled_data
),

ft_devs as (
select
project_id,
event_source,
bucket_day,
'fulltime_developers' as metric,
COUNT(distinct developer_id) as amount
from rolling_commit_days
where num_commit_days >= {{ fulltime_dev_days }}
group by
project_id,
event_source,
bucket_day
),

pt_devs as (
select
project_id,
event_source,
bucket_day,
'parttime_developers' as metric,
COUNT(distinct developer_id) as amount
from rolling_commit_days
where num_commit_days >= 1 and num_commit_days < {{ fulltime_dev_days }}
group by
project_id,
event_source,
bucket_day
),

active_devs as (
select
project_id,
event_source,
bucket_day,
'active_developers' as metric,
COUNT(distinct developer_id) as amount
from rolling_commit_days
where num_commit_days >= 1
group by
project_id,
event_source,
bucket_day
)

select * from ft_devs
union all
select * from pt_devs
union all
select * from active_devs

0 comments on commit 4e99a2e

Please sign in to comment.