From dd4b6e4cf3ac48d56b81c91d2811b584dca8430c Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Fri, 22 Nov 2024 19:59:42 +0000 Subject: [PATCH 1/2] Split reporting.res_report_summary input CTEs out into dedicated tables --- dbt/models/reporting/docs.md | 55 ++++++-- .../reporting.res_report_summary.sql | 119 ++---------------- ...porting.res_report_summary_sales_input.sql | 39 ++++++ ...orting.res_report_summary_values_input.sql | 106 ++++++++++++++++ dbt/models/reporting/schema.yml | 12 ++ 5 files changed, 210 insertions(+), 121 deletions(-) create mode 100644 dbt/models/reporting/reporting.res_report_summary_sales_input.sql create mode 100644 dbt/models/reporting/reporting.res_report_summary_values_input.sql diff --git a/dbt/models/reporting/docs.md b/dbt/models/reporting/docs.md index a3a80111f..5ef4e1ecd 100644 --- a/dbt/models/reporting/docs.md +++ b/dbt/models/reporting/docs.md @@ -32,14 +32,54 @@ Feeds public reporting assets. # res_report_summary {% docs table_res_report_summary %} -Materialized version of `reporting.vw_res_report_summary`. +Aggregates statistics on characteristics, classes, AVs, and sales +by assessment stage, property groups, year, and various geographies. +Feeds public reporting assets. -Materialized to speed up queries for Tableau. +Materialized once per day to speed up queries for Tableau. + +## Nuance + +- Model and assessment values are gathered independently and + aggregated via a union rather than a join, so it's important to keep in mind + that years for model and assessment stages do NOT need to match, i.e. we can + have 2023 model values in the table before there are any 2023 assessment + values to report on. Sales are added via a lagged join, so `sale_year` should + always be `year - 1`. It is also worth nothing that "model year" is + incremented by 1 solely for the sake of reporting in this table, meaning that + models with a `meta_year` value of 2022 in `model.assessment_pin` will + populate the table with a value of 2023 for `year`. **Primary Key**: `year`, `geography_type`, `geography_id`, `assessment_stage`, `property_group` {% enddocs %} +# res_report_summary_sales_input + +{% docs table_res_report_summary_sales_input %} +Input table for `reporting.res_report_summary` that produces the raw +sales data that `res_report_summary` aggregates. + +We split these input data out into a separate table to speed up query time for +`res_report_summary`, since otherwise it needs to rerun the query logic +for every possible geography and reporting group combination. + +**Primary Key**: `pin`, `doc_no` +{% enddocs %} + +# res_report_summary_values_input + +{% docs table_res_report_summary_values_input %} +Input table for `reporting.res_report_summary` that produces the raw +characteristic and value data that `res_report_summary` aggregates. + +We split these input data out into a separate table to speed up query time for +`res_report_summary`, since otherwise it needs to rerun the query logic +for every possible geography and reporting group combination. + +**Primary Key**: `pin`, `year` +{% enddocs %} + # vw_assessment_roll {% docs view_vw_assessment_roll %} @@ -120,17 +160,6 @@ The assessment stages are: **Primary Key**: `year`, `pin`, `stage_name` {% enddocs %} -# vw_res_report_summary - -{% docs view_vw_res_report_summary %} -Aggregates statistics on characteristics, classes, AVs, and sales -by assessment stage, property groups, year, and various geographies. -Feeds public reporting assets. - -**Primary Key**: `year`, `geography_type`, `geography_id`, `assessment_stage`, -`property_group` -{% enddocs %} - # vw_top_5 {% docs view_vw_top_5 %} diff --git a/dbt/models/reporting/reporting.res_report_summary.sql b/dbt/models/reporting/reporting.res_report_summary.sql index 717f61428..b48ec94a0 100644 --- a/dbt/models/reporting/reporting.res_report_summary.sql +++ b/dbt/models/reporting/reporting.res_report_summary.sql @@ -27,112 +27,15 @@ Intended to be materialized daily through a GitHub action. ) }} --- AVs and model values -WITH all_fmvs AS ( - SELECT - ap.meta_pin AS pin, - ap.year, - 'model' AS assessment_stage, - ap.pred_pin_final_fmv_round AS total - FROM {{ source('model', 'assessment_pin') }} AS ap - INNER JOIN {{ ref('model.final_model') }} AS fm - ON ap.run_id = fm.run_id - AND ap.year = fm.year - AND ( - -- If reassessment year, use different models for different towns - ( - CONTAINS(fm.township_code_coverage, ap.township_code) - AND ap.meta_triad_code = fm.triad_code - ) - -- Otherwise, just use whichever model is "final" - OR (ap.meta_triad_code != fm.triad_code AND fm.is_final) - ) - - UNION ALL - - SELECT - pin, - year, - stage_name AS assessment_stage, - tot * 10 AS total - FROM {{ ref('reporting.vw_pin_value_long') }} - WHERE year >= '2021' -), - --- Combined SF/MF and condo characteristics -chars AS ( - SELECT - parid AS pin, - taxyr AS year, - MIN(yrblt) AS yrblt, - SUM(sfla) AS total_bldg_sf - FROM {{ source('iasworld', 'dweldat') }} - WHERE cur = 'Y' - AND deactivat IS NULL - GROUP BY parid, taxyr - UNION ALL - SELECT - pin, - year, - char_yrblt AS yrblt, - char_building_sf AS total_bldg_sf - FROM {{ ref('default.vw_pin_condo_char') }} - WHERE NOT is_parking_space - AND NOT is_common_area +-- Assign input tables to CTEs for ease of reference in macros +WITH all_values AS ( + SELECT * FROM {{ ref('reporting.res_report_summary_values_input') }} ), --- Join land, chars, and reporting groups to values -all_values AS ( - SELECT - fmvs.pin, - vptc.property_group, - vptc.class, - vptc.triad_name AS triad, - vptc.township_code, - CONCAT(vptc.township_code, vptc.nbhd) AS townnbhd, - fmvs.year, - fmvs.assessment_stage, - fmvs.total, - chars.yrblt, - chars.total_bldg_sf, - vpl.sf AS total_land_sf - FROM all_fmvs AS fmvs - LEFT JOIN {{ ref('reporting.vw_pin_township_class') }} AS vptc - ON fmvs.pin = vptc.pin - AND fmvs.year = vptc.year - INNER JOIN chars - ON fmvs.pin = chars.pin - AND fmvs.year = chars.year - LEFT JOIN {{ ref('default.vw_pin_land') }} AS vpl - ON fmvs.pin = vpl.pin - AND fmvs.year = vpl.year - WHERE vptc.property_group IS NOT NULL - AND vptc.triad_name IS NOT NULL -), - --- Sales, filtered to exclude outliers and mutlisales -sales AS ( - SELECT - vwps.sale_price, - vwps.year AS sale_year, - tc.property_group, - tc.township_code, - vwps.nbhd AS townnbhd - FROM {{ ref('default.vw_pin_sale') }} AS vwps - LEFT JOIN {{ ref('reporting.vw_pin_township_class') }} AS tc - ON vwps.pin = tc.pin - AND vwps.year = tc.year - WHERE NOT vwps.is_multisale - AND NOT vwps.sale_filter_is_outlier - AND NOT vwps.sale_filter_deed_type - AND NOT vwps.sale_filter_less_than_10k - AND NOT vwps.sale_filter_same_sale_within_365 - AND tc.property_group IS NOT NULL - AND tc.triad_name IS NOT NULL +all_sales AS ( + SELECT * FROM {{ ref('reporting.res_report_summary_sales_input') }} ), ---- AGGREGATE --- - -- Aggregate and stack stats on AV and characteristics for each reporting group aggregated_values AS ( -- By township, assessment_stage, and property group @@ -157,25 +60,25 @@ aggregated_values AS ( ), -- Aggregate and stack stats on sales for each reporting group -all_sales AS ( +aggregated_sales AS ( -- By township and property group {{ res_report_summarize_sales( - from = 'sales', geo_type = 'Town', prop_group = True + from = 'all_sales', geo_type = 'Town', prop_group = True ) }} UNION ALL -- By township {{ res_report_summarize_sales( - from = 'sales', geo_type = 'Town', prop_group = False + from = 'all_sales', geo_type = 'Town', prop_group = False ) }} UNION ALL -- By neighborhood and property group {{ res_report_summarize_sales( - from = 'sales', geo_type = 'TownNBHD', prop_group = True + from = 'all_sales', geo_type = 'TownNBHD', prop_group = True ) }} UNION ALL -- By neighborhood {{ res_report_summarize_sales( - from = 'sales', geo_type = 'TownNBHD', prop_group = False + from = 'all_sales', geo_type = 'TownNBHD', prop_group = False ) }} ) @@ -189,7 +92,7 @@ SELECT FROM aggregated_values AS av -- Join sales so that values for a given year can be compared -- to a complete set of sales from the previous year -LEFT JOIN all_sales AS asl +LEFT JOIN aggregated_sales AS asl ON av.geography_id = asl.geography_id AND CAST(av.year AS INT) = CAST(asl.sale_year AS INT) + 1 AND av.property_group = asl.property_group diff --git a/dbt/models/reporting/reporting.res_report_summary_sales_input.sql b/dbt/models/reporting/reporting.res_report_summary_sales_input.sql new file mode 100644 index 000000000..98058cd1b --- /dev/null +++ b/dbt/models/reporting/reporting.res_report_summary_sales_input.sql @@ -0,0 +1,39 @@ +/* +Input table for `reporting.res_report_summary` that produces the raw sales data +that `res_report_summary` aggregates. We split these input data out into a +separate table to speed up query time for `res_report_summary`, since otherwise +it needs to rerun the query logic below for every possible geography and +reporting group combination. + +See `reporting.res_report_summary` for a full description of these data. + +Intended to be materialized daily through a GitHub action. +*/ + +{{ + config( + materialized='table', + partitioned_by=['sale_year'] + ) +}} + +-- Sales, filtered to exclude outliers and mutlisales +SELECT + vwps.pin, + vwps.doc_no, + vwps.sale_price, + tc.property_group, + tc.township_code, + vwps.nbhd AS townnbhd, + vwps.year AS sale_year +FROM {{ ref('default.vw_pin_sale') }} AS vwps +LEFT JOIN {{ ref('reporting.vw_pin_township_class') }} AS tc + ON vwps.pin = tc.pin + AND vwps.year = tc.year +WHERE NOT vwps.is_multisale + AND NOT vwps.sale_filter_is_outlier + AND NOT vwps.sale_filter_deed_type + AND NOT vwps.sale_filter_less_than_10k + AND NOT vwps.sale_filter_same_sale_within_365 + AND tc.property_group IS NOT NULL + AND tc.triad_name IS NOT NULL diff --git a/dbt/models/reporting/reporting.res_report_summary_values_input.sql b/dbt/models/reporting/reporting.res_report_summary_values_input.sql new file mode 100644 index 000000000..c893cc6ff --- /dev/null +++ b/dbt/models/reporting/reporting.res_report_summary_values_input.sql @@ -0,0 +1,106 @@ +/* +Input table for `reporting.res_report_summary` that produces the raw +characteristic and value data that `res_report_summary` aggregates. We split +these input data out into a separate table to speed up query time for +`res_report_summary`, since otherwise it needs to rerun the query logic below +for every possible geography and reporting group combination. + +This table takes model and assessment values from two separate tables and +stacks them. Model and assessment values are gathered independently and +aggregated via a UNION rather than a JOIN, so it's important to keep in mind +that years for model and assessment stages do NOT need to match, i.e. we can +have 2023 model values in the table before there are any 2023 assessment values +to report on. + +See `reporting.res_report_summary` for a full description of these data. + +Intended to be materialized daily through a GitHub action. +*/ + +{{ + config( + materialized='table', + partitioned_by=['year'] + ) +}} + +-- AVs and model values +WITH all_fmvs AS ( + SELECT + ap.meta_pin AS pin, + ap.year, + 'model' AS assessment_stage, + ap.pred_pin_final_fmv_round AS total + FROM {{ source('model', 'assessment_pin') }} AS ap + INNER JOIN {{ ref('model.final_model') }} AS fm + ON ap.run_id = fm.run_id + AND ap.year = fm.year + AND ( + -- If reassessment year, use different models for different towns + ( + CONTAINS(fm.township_code_coverage, ap.township_code) + AND ap.meta_triad_code = fm.triad_code + ) + -- Otherwise, just use whichever model is "final" + OR (ap.meta_triad_code != fm.triad_code AND fm.is_final) + ) + + UNION ALL + + SELECT + pin, + year, + stage_name AS assessment_stage, + tot * 10 AS total + FROM {{ ref('reporting.vw_pin_value_long') }} + WHERE year >= '2021' +), + +-- Combined SF/MF and condo characteristics +chars AS ( + SELECT + parid AS pin, + taxyr AS year, + MIN(yrblt) AS yrblt, + SUM(sfla) AS total_bldg_sf + FROM {{ source('iasworld', 'dweldat') }} + WHERE cur = 'Y' + AND deactivat IS NULL + GROUP BY parid, taxyr + UNION ALL + SELECT + pin, + year, + char_yrblt AS yrblt, + char_building_sf AS total_bldg_sf + FROM {{ ref('default.vw_pin_condo_char') }} + WHERE NOT is_parking_space + AND NOT is_common_area +) + +-- Join land, chars, and reporting groups to values +SELECT + fmvs.pin, + vptc.property_group, + vptc.class, + vptc.triad_name AS triad, + vptc.township_code, + CONCAT(vptc.township_code, vptc.nbhd) AS townnbhd, + fmvs.assessment_stage, + fmvs.total, + chars.yrblt, + chars.total_bldg_sf, + vpl.sf AS total_land_sf, + fmvs.year +FROM all_fmvs AS fmvs +LEFT JOIN {{ ref('reporting.vw_pin_township_class') }} AS vptc + ON fmvs.pin = vptc.pin + AND fmvs.year = vptc.year +INNER JOIN chars + ON fmvs.pin = chars.pin + AND fmvs.year = chars.year +LEFT JOIN {{ ref('default.vw_pin_land') }} AS vpl + ON fmvs.pin = vpl.pin + AND fmvs.year = vpl.year +WHERE vptc.property_group IS NOT NULL + AND vptc.triad_name IS NOT NULL diff --git a/dbt/models/reporting/schema.yml b/dbt/models/reporting/schema.yml index f7a666bf0..42f50270e 100644 --- a/dbt/models/reporting/schema.yml +++ b/dbt/models/reporting/schema.yml @@ -123,6 +123,18 @@ models: - assessment_stage - year + - name: reporting.res_report_summary_sales_input + description: '{{ doc("table_res_report_summary_sales_input") }}' + config: + tags: + - daily + + - name: reporting.res_report_summary_values_input + description: '{{ doc("table_res_report_summary_values_input") }}' + config: + tags: + - daily + - name: reporting.vw_assessment_roll description: '{{ doc("view_vw_assessment_roll") }}' data_tests: From 5d940eee5d62460d411cf8c8af457bb5dad5611c Mon Sep 17 00:00:00 2001 From: Jean Cochrane Date: Mon, 25 Nov 2024 15:27:35 +0000 Subject: [PATCH 2/2] Fix small typo in reporting/docs.md --- dbt/models/reporting/docs.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/reporting/docs.md b/dbt/models/reporting/docs.md index 5ef4e1ecd..7862e9891 100644 --- a/dbt/models/reporting/docs.md +++ b/dbt/models/reporting/docs.md @@ -60,7 +60,7 @@ Materialized once per day to speed up queries for Tableau. Input table for `reporting.res_report_summary` that produces the raw sales data that `res_report_summary` aggregates. -We split these input data out into a separate table to speed up query time for +We split these input data out into a separate table to reduce resource use in `res_report_summary`, since otherwise it needs to rerun the query logic for every possible geography and reporting group combination. @@ -73,7 +73,7 @@ for every possible geography and reporting group combination. Input table for `reporting.res_report_summary` that produces the raw characteristic and value data that `res_report_summary` aggregates. -We split these input data out into a separate table to speed up query time for +We split these input data out into a separate table to reduce resource use in `res_report_summary`, since otherwise it needs to rerun the query logic for every possible geography and reporting group combination.