From 268435efa8e6c8f88cfd3154bc42359a87b23c46 Mon Sep 17 00:00:00 2001 From: Alex Co Date: Thu, 26 Jan 2023 11:50:23 +0800 Subject: [PATCH 1/3] Support repository name in the tranformed metrics Fix #41 Signed-off-by: Alex Co --- models/github.yml | 16 +++++-- models/github__daily_metrics.sql | 63 +++++++++++++++++++++------- models/github__monthly_metrics.sql | 3 +- models/github__quarterly_metrics.sql | 3 +- models/github__weekly_metrics.sql | 5 ++- 5 files changed, 68 insertions(+), 22 deletions(-) diff --git a/models/github.yml b/models/github.yml index 7e485fd..bdd8dfc 100644 --- a/models/github.yml +++ b/models/github.yml @@ -184,7 +184,6 @@ models: - name: day description: The reporting day tests: - - unique - not_null - name: number_issues_opened description: The total number of issues created during this time period @@ -207,6 +206,9 @@ models: - name: number_prs_closed_without_merge description: The total number of pull requests closed without a merge during this time period + - name: repository + description: The name of the repository + - name: sum_days_pr_open description: The total number of days a pull request opened during this time period was open @@ -221,7 +223,6 @@ models: - name: week description: The reporting week tests: - - unique - not_null - name: number_issues_opened description: The total number of issues created during this time period @@ -249,6 +250,9 @@ models: - name: longest_days_pr_open description: The longest number of days a pull request opened during this time period was open + + - name: repository + description: The name of the repository - name: github__monthly_metrics description: > @@ -257,7 +261,6 @@ models: - name: month description: The reporting month tests: - - unique - not_null - name: number_issues_opened description: The total number of issues created during this time period @@ -285,6 +288,9 @@ models: - name: longest_days_pr_open description: The longest number of days a pull request opened during this time period was open + + - name: repository + description: The name of the repository - name: github__quarterly_metrics description: > @@ -293,7 +299,6 @@ models: - name: quarter description: The reporting quarter tests: - - unique - not_null - name: number_issues_opened description: The total number of issues created during this time period @@ -321,4 +326,7 @@ models: - name: longest_days_pr_open description: The longest number of days a pull request opened during this time period was open + + - name: repository + description: The name of the repository \ No newline at end of file diff --git a/models/github__daily_metrics.sql b/models/github__daily_metrics.sql index 22327a2..dd49dd9 100644 --- a/models/github__daily_metrics.sql +++ b/models/github__daily_metrics.sql @@ -13,18 +13,24 @@ issues_opened_per_day as ( {{ dbt.date_trunc('day', 'created_at') }} as day, count(*) as number_issues_opened, sum(days_issue_open) as sum_days_issue_open, - max(days_issue_open) as longest_days_issue_open + max(days_issue_open) as longest_days_issue_open, + repository as repository from github_issues - group by 1 + group by + 1, + repository ), issues_closed_per_day as ( select {{ dbt.date_trunc('day', 'closed_at') }} as day, - count(*) as number_issues_closed + count(*) as number_issues_closed, + repository as repository from github_issues where closed_at is not null - group by 1 + group by + 1, + repository ), prs_opened_per_day as ( @@ -32,28 +38,37 @@ prs_opened_per_day as ( {{ dbt.date_trunc('day', 'created_at') }} as day, count(*) as number_prs_opened, sum(days_issue_open) as sum_days_pr_open, - max(days_issue_open) as longest_days_pr_open + max(days_issue_open) as longest_days_pr_open, + repository as repository from pull_requests - group by 1 + group by + 1, + repository ), prs_merged_per_day as ( select {{ dbt.date_trunc('day', 'merged_at') }} as day, - count(*) as number_prs_merged + count(*) as number_prs_merged, + repository as repository from pull_requests where merged_at is not null - group by 1 + group by + 1, + repository ), prs_closed_without_merge_per_day as ( select {{ dbt.date_trunc('day', 'closed_at') }} as day, - count(*) as number_prs_closed_without_merge + count(*) as number_prs_closed_without_merge, + repository as repository from pull_requests where closed_at is not null and merged_at is null - group by 1 + group by + 1, + repository ), issues_per_day as ( @@ -61,12 +76,18 @@ issues_per_day as ( coalesce(issues_opened_per_day.day, issues_closed_per_day.day ) as day, + coalesce(issues_opened_per_day.repository, + issues_closed_per_day.repository + ) as repository, number_issues_opened, number_issues_closed, sum_days_issue_open, longest_days_issue_open from issues_opened_per_day - full outer join issues_closed_per_day on issues_opened_per_day.day = issues_closed_per_day.day + full outer join issues_closed_per_day + on + issues_opened_per_day.day = issues_closed_per_day.day + and issues_opened_per_day.repository = issues_closed_per_day.repository ), prs_per_day as ( @@ -75,18 +96,29 @@ prs_per_day as ( prs_merged_per_day.day, prs_closed_without_merge_per_day.day ) as day, + coalesce(prs_opened_per_day.repository, + prs_merged_per_day.repository, + prs_closed_without_merge_per_day.repository + ) as repository, number_prs_opened, number_prs_merged, number_prs_closed_without_merge, sum_days_pr_open, longest_days_pr_open from prs_opened_per_day - full outer join prs_merged_per_day on prs_opened_per_day.day = prs_merged_per_day.day - full outer join prs_closed_without_merge_per_day on coalesce(prs_opened_per_day.day, prs_merged_per_day.day) = prs_closed_without_merge_per_day.day + full outer join prs_merged_per_day + on + prs_opened_per_day.day = prs_merged_per_day.day + AND prs_opened_per_day.repository = prs_merged_per_day.repository + full outer join prs_closed_without_merge_per_day + on + coalesce(prs_opened_per_day.day, prs_merged_per_day.day) = prs_closed_without_merge_per_day.day + AND coalesce(prs_opened_per_day.repository, prs_merged_per_day.repository) = prs_closed_without_merge_per_day.repository ) select coalesce(issues_per_day.day, prs_per_day.day) as day, + coalesce(issues_per_day.repository, prs_per_day.repository) as repository, coalesce(number_issues_opened, 0) as number_issues_opened, coalesce(number_issues_closed, 0) as number_issues_closed, sum_days_issue_open, @@ -97,5 +129,8 @@ select sum_days_pr_open, longest_days_pr_open from issues_per_day -full outer join prs_per_day on issues_per_day.day = prs_per_day.day +full outer join prs_per_day +on + issues_per_day.day = prs_per_day.day + AND issues_per_day.repository = prs_per_day.repository order by day desc diff --git a/models/github__monthly_metrics.sql b/models/github__monthly_metrics.sql index b26b927..c23c019 100644 --- a/models/github__monthly_metrics.sql +++ b/models/github__monthly_metrics.sql @@ -5,6 +5,7 @@ with daily_metrics as ( select {{ dbt.date_trunc('month', 'day') }} as month, + repository as repository, sum(number_issues_opened) as number_issues_opened, sum(number_issues_closed) as number_issues_closed, sum(sum_days_issue_open) / sum(number_issues_opened) as avg_days_issue_open, @@ -15,5 +16,5 @@ select sum(sum_days_pr_open) / sum(number_prs_opened) as avg_days_pr_open, max(longest_days_pr_open) as longest_days_pr_open from daily_metrics -group by 1 +group by 1, repository order by 1 desc \ No newline at end of file diff --git a/models/github__quarterly_metrics.sql b/models/github__quarterly_metrics.sql index ccb8733..983fd26 100644 --- a/models/github__quarterly_metrics.sql +++ b/models/github__quarterly_metrics.sql @@ -5,6 +5,7 @@ with daily_metrics as ( select {{ dbt.date_trunc('quarter', 'day') }} as quarter, + repository as repository, sum(number_issues_opened) as number_issues_opened, sum(number_issues_closed) as number_issues_closed, sum(sum_days_issue_open) / sum(number_issues_opened) as avg_days_issue_open, @@ -16,5 +17,5 @@ select max(longest_days_pr_open) as longest_days_pr_open from daily_metrics -group by 1 +group by 1, repository order by 1 desc \ No newline at end of file diff --git a/models/github__weekly_metrics.sql b/models/github__weekly_metrics.sql index 0d46a13..40f1b30 100644 --- a/models/github__weekly_metrics.sql +++ b/models/github__weekly_metrics.sql @@ -4,7 +4,8 @@ with daily_metrics as ( ) select - {{ dbt.date_trunc('week', 'day') }} as week, + {{ dbt.date_trunc('week', 'day') }} as week, + repository as repository, sum(number_issues_opened) as number_issues_opened, sum(number_issues_closed) as number_issues_closed, sum(sum_days_issue_open) / sum(number_issues_opened) as avg_days_issue_open, @@ -15,5 +16,5 @@ select sum(sum_days_pr_open) / sum(number_prs_opened) as avg_days_pr_open, max(longest_days_pr_open) as longest_days_pr_open from daily_metrics -group by 1 +group by 1, repository order by 1 desc \ No newline at end of file From e3b3c74f17c658cbe2eae63292bf68bd877292e3 Mon Sep 17 00:00:00 2001 From: fivetran-catfritz <111930712+fivetran-catfritz@users.noreply.github.com> Date: Tue, 31 Jan 2023 15:37:54 -0600 Subject: [PATCH 2/3] customer/add-repo-cols --- models/github.yml | 29 +++++++++++++- models/github__daily_metrics.sql | 59 +++++++++++++--------------- models/github__monthly_metrics.sql | 2 +- models/github__quarterly_metrics.sql | 2 +- models/github__weekly_metrics.sql | 2 +- 5 files changed, 58 insertions(+), 36 deletions(-) diff --git a/models/github.yml b/models/github.yml index bdd8dfc..f817c8a 100644 --- a/models/github.yml +++ b/models/github.yml @@ -180,6 +180,11 @@ models: - name: github__daily_metrics description: > Summary numbers for issues and pull requests by day + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - repository + - day columns: - name: day description: The reporting day @@ -208,6 +213,8 @@ models: - name: repository description: The name of the repository + tests: + - not_null - name: sum_days_pr_open description: The total number of days a pull request opened during this time period was open @@ -219,6 +226,11 @@ models: - name: github__weekly_metrics description: > Summary numbers for issues and pull requests by week + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - repository + - week columns: - name: week description: The reporting week @@ -253,10 +265,17 @@ models: - name: repository description: The name of the repository + tests: + - not_null - name: github__monthly_metrics description: > Summary numbers for issues and pull requests by month + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - repository + - month columns: - name: month description: The reporting month @@ -291,10 +310,17 @@ models: - name: repository description: The name of the repository + tests: + - not_null - name: github__quarterly_metrics description: > Summary numbers for issues and pull requests by quarter + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - repository + - quarter columns: - name: quarter description: The reporting quarter @@ -329,4 +355,5 @@ models: - name: repository description: The name of the repository - \ No newline at end of file + tests: + - not_null diff --git a/models/github__daily_metrics.sql b/models/github__daily_metrics.sql index dd49dd9..5aa05d6 100644 --- a/models/github__daily_metrics.sql +++ b/models/github__daily_metrics.sql @@ -9,66 +9,61 @@ pull_requests as ( ), issues_opened_per_day as ( - select - {{ dbt.date_trunc('day', 'created_at') }} as day, + select + {{ dbt.date_trunc('day', 'created_at') }} as day, + repository as repository, count(*) as number_issues_opened, sum(days_issue_open) as sum_days_issue_open, - max(days_issue_open) as longest_days_issue_open, - repository as repository + max(days_issue_open) as longest_days_issue_open from github_issues group by - 1, - repository + 1,2 ), issues_closed_per_day as ( - select - {{ dbt.date_trunc('day', 'closed_at') }} as day, - count(*) as number_issues_closed, - repository as repository + select + {{ dbt.date_trunc('day', 'closed_at') }} as day, + repository as repository, + count(*) as number_issues_closed from github_issues where closed_at is not null group by - 1, - repository + 1,2 ), prs_opened_per_day as ( - select - {{ dbt.date_trunc('day', 'created_at') }} as day, + select + {{ dbt.date_trunc('day', 'created_at') }} as day, + repository as repository, count(*) as number_prs_opened, sum(days_issue_open) as sum_days_pr_open, - max(days_issue_open) as longest_days_pr_open, - repository as repository + max(days_issue_open) as longest_days_pr_open from pull_requests group by - 1, - repository + 1,2 ), prs_merged_per_day as ( - select + select {{ dbt.date_trunc('day', 'merged_at') }} as day, - count(*) as number_prs_merged, - repository as repository + repository as repository, + count(*) as number_prs_merged from pull_requests where merged_at is not null group by - 1, - repository + 1,2 ), prs_closed_without_merge_per_day as ( - select - {{ dbt.date_trunc('day', 'closed_at') }} as day, - count(*) as number_prs_closed_without_merge, - repository as repository + select + {{ dbt.date_trunc('day', 'closed_at') }} as day, + repository as repository, + count(*) as number_prs_closed_without_merge from pull_requests where closed_at is not null and merged_at is null group by - 1, - repository + 1,2 ), issues_per_day as ( @@ -109,11 +104,11 @@ prs_per_day as ( full outer join prs_merged_per_day on prs_opened_per_day.day = prs_merged_per_day.day - AND prs_opened_per_day.repository = prs_merged_per_day.repository + and prs_opened_per_day.repository = prs_merged_per_day.repository full outer join prs_closed_without_merge_per_day on coalesce(prs_opened_per_day.day, prs_merged_per_day.day) = prs_closed_without_merge_per_day.day - AND coalesce(prs_opened_per_day.repository, prs_merged_per_day.repository) = prs_closed_without_merge_per_day.repository + and coalesce(prs_opened_per_day.repository, prs_merged_per_day.repository) = prs_closed_without_merge_per_day.repository ) select @@ -132,5 +127,5 @@ from issues_per_day full outer join prs_per_day on issues_per_day.day = prs_per_day.day - AND issues_per_day.repository = prs_per_day.repository + and issues_per_day.repository = prs_per_day.repository order by day desc diff --git a/models/github__monthly_metrics.sql b/models/github__monthly_metrics.sql index c23c019..018c1ee 100644 --- a/models/github__monthly_metrics.sql +++ b/models/github__monthly_metrics.sql @@ -16,5 +16,5 @@ select sum(sum_days_pr_open) / sum(number_prs_opened) as avg_days_pr_open, max(longest_days_pr_open) as longest_days_pr_open from daily_metrics -group by 1, repository +group by 1,2 order by 1 desc \ No newline at end of file diff --git a/models/github__quarterly_metrics.sql b/models/github__quarterly_metrics.sql index 983fd26..db4762f 100644 --- a/models/github__quarterly_metrics.sql +++ b/models/github__quarterly_metrics.sql @@ -17,5 +17,5 @@ select max(longest_days_pr_open) as longest_days_pr_open from daily_metrics -group by 1, repository +group by 1,2 order by 1 desc \ No newline at end of file diff --git a/models/github__weekly_metrics.sql b/models/github__weekly_metrics.sql index 40f1b30..081e1b4 100644 --- a/models/github__weekly_metrics.sql +++ b/models/github__weekly_metrics.sql @@ -16,5 +16,5 @@ select sum(sum_days_pr_open) / sum(number_prs_opened) as avg_days_pr_open, max(longest_days_pr_open) as longest_days_pr_open from daily_metrics -group by 1, repository +group by 1,2 order by 1 desc \ No newline at end of file From e774c602c143b1f56d479e93a30f2bc64b46c3a5 Mon Sep 17 00:00:00 2001 From: fivetran-catfritz <111930712+fivetran-catfritz@users.noreply.github.com> Date: Tue, 31 Jan 2023 17:26:49 -0600 Subject: [PATCH 3/3] update readme and changelog --- CHANGELOG.md | 9 +++++++++ README.md | 8 ++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e16c4b4..3bca650 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +# dbt_github v0.7.0 +## 🚨 Breaking Change 🚨 +- Updated the following models to aggregate at the repository grain in addition to their time period grain. ([#42](https://github.com/fivetran/dbt_github/pull/42), [#43](https://github.com/fivetran/dbt_github/pull/43)) + - `github__daily_metrics` + - `github__weekly_metrics` + - `github__monthly_metrics` + - `github__quarterly_metrics` +## Contributors 📝 +- @onimsha ([#42](https://github.com/fivetran/dbt_github/pull/42)) # dbt_github v0.6.0 [PR #35](https://github.com/fivetran/dbt_github/pull/35) includes the following breaking changes: ## 🚨 Breaking Changes 🚨: diff --git a/README.md b/README.md index fc068ef..b750eb5 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,10 @@ The following table provides a detailed list of all models materialized within t | -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | | [github__issues](https://fivetran.github.io/dbt_github/#!/model/model.github.github__issues) | Each record represents a GitHub issue, enriched with data about its assignees, milestones, and time comparisons. | | [github__pull_requests](https://fivetran.github.io/dbt_github/#!/model/model.github.github__pull_requests) | Each record represents a GitHub pull request, enriched with data about its repository, reviewers, and durations between review requests, merges and reviews. | -| [github__daily_metrics](https://fivetran.github.io/dbt_github/#!/model/model.github.github__daily_metrics) | Each record represents a single day, enriched with metrics about PRs and issues that were created and closed during that period. | -| [github__weekly_metrics](https://fivetran.github.io/dbt_github/#!/model/model.github.github__weekly_metrics) | Each record represents a single week, enriched with metrics about PRs and issues that were created and closed during that period. | -| [github__monthly_metrics](https://fivetran.github.io/dbt_github/#!/model/model.github.github__monthly_metrics) | Each record represents a single month, enriched with metrics about PRs and issues that were created and closed during that period. | -| [github__quarterly_metrics](https://fivetran.github.io/dbt_github/#!/model/model.github.github__quarterly_metrics) | Each record represents a single quarter, enriched with metrics about PRs and issues that were created and closed during that period. | +| [github__daily_metrics](https://fivetran.github.io/dbt_github/#!/model/model.github.github__daily_metrics) | Each record represents a single day and repository, enriched with metrics about PRs and issues that were created and closed during that period. | +| [github__weekly_metrics](https://fivetran.github.io/dbt_github/#!/model/model.github.github__weekly_metrics) | Each record represents a single week and repository, enriched with metrics about PRs and issues that were created and closed during that period. | +| [github__monthly_metrics](https://fivetran.github.io/dbt_github/#!/model/model.github.github__monthly_metrics) | Each record represents a single month and repository, enriched with metrics about PRs and issues that were created and closed during that period. | +| [github__quarterly_metrics](https://fivetran.github.io/dbt_github/#!/model/model.github.github__quarterly_metrics) | Each record represents a single quarter and repository, enriched with metrics about PRs and issues that were created and closed during that period. | # 🎯 How do I use the dbt package?