Skip to content

Commit

Permalink
Fix reporting.res_report_summary builds by turning input CTEs to ta…
Browse files Browse the repository at this point in the history
…bles (#654)

* Split reporting.res_report_summary input CTEs out into dedicated tables

* Fix small typo in reporting/docs.md
  • Loading branch information
jeancochrane authored Nov 26, 2024
1 parent 8c6633d commit a06fd39
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 121 deletions.
55 changes: 42 additions & 13 deletions dbt/models/reporting/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,54 @@ Feeds public reporting assets.
# res_report_summary

{% docs table_res_report_summary %}
Materialized version of `reporting.vw_res_report_summary`.
Aggregates statistics on characteristics, classes, AVs, and sales
by assessment stage, property groups, year, and various geographies.
Feeds public reporting assets.

Materialized to speed up queries for Tableau.
Materialized once per day to speed up queries for Tableau.

## Nuance

- Model and assessment values are gathered independently and
aggregated via a union rather than a join, so it's important to keep in mind
that years for model and assessment stages do NOT need to match, i.e. we can
have 2023 model values in the table before there are any 2023 assessment
values to report on. Sales are added via a lagged join, so `sale_year` should
always be `year - 1`. It is also worth nothing that "model year" is
incremented by 1 solely for the sake of reporting in this table, meaning that
models with a `meta_year` value of 2022 in `model.assessment_pin` will
populate the table with a value of 2023 for `year`.

**Primary Key**: `year`, `geography_type`, `geography_id`, `assessment_stage`,
`property_group`
{% enddocs %}

# res_report_summary_sales_input

{% docs table_res_report_summary_sales_input %}
Input table for `reporting.res_report_summary` that produces the raw
sales data that `res_report_summary` aggregates.

We split these input data out into a separate table to reduce resource use in
`res_report_summary`, since otherwise it needs to rerun the query logic
for every possible geography and reporting group combination.

**Primary Key**: `pin`, `doc_no`
{% enddocs %}

# res_report_summary_values_input

{% docs table_res_report_summary_values_input %}
Input table for `reporting.res_report_summary` that produces the raw
characteristic and value data that `res_report_summary` aggregates.

We split these input data out into a separate table to reduce resource use in
`res_report_summary`, since otherwise it needs to rerun the query logic
for every possible geography and reporting group combination.

**Primary Key**: `pin`, `year`
{% enddocs %}

# vw_assessment_roll

{% docs view_vw_assessment_roll %}
Expand Down Expand Up @@ -120,17 +160,6 @@ The assessment stages are:
**Primary Key**: `year`, `pin`, `stage_name`
{% enddocs %}

# vw_res_report_summary

{% docs view_vw_res_report_summary %}
Aggregates statistics on characteristics, classes, AVs, and sales
by assessment stage, property groups, year, and various geographies.
Feeds public reporting assets.

**Primary Key**: `year`, `geography_type`, `geography_id`, `assessment_stage`,
`property_group`
{% enddocs %}

# vw_top_5

{% docs view_vw_top_5 %}
Expand Down
119 changes: 11 additions & 108 deletions dbt/models/reporting/reporting.res_report_summary.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,112 +27,15 @@ Intended to be materialized daily through a GitHub action.
)
}}

-- AVs and model values
WITH all_fmvs AS (
SELECT
ap.meta_pin AS pin,
ap.year,
'model' AS assessment_stage,
ap.pred_pin_final_fmv_round AS total
FROM {{ source('model', 'assessment_pin') }} AS ap
INNER JOIN {{ ref('model.final_model') }} AS fm
ON ap.run_id = fm.run_id
AND ap.year = fm.year
AND (
-- If reassessment year, use different models for different towns
(
CONTAINS(fm.township_code_coverage, ap.township_code)
AND ap.meta_triad_code = fm.triad_code
)
-- Otherwise, just use whichever model is "final"
OR (ap.meta_triad_code != fm.triad_code AND fm.is_final)
)

UNION ALL

SELECT
pin,
year,
stage_name AS assessment_stage,
tot * 10 AS total
FROM {{ ref('reporting.vw_pin_value_long') }}
WHERE year >= '2021'
),

-- Combined SF/MF and condo characteristics
chars AS (
SELECT
parid AS pin,
taxyr AS year,
MIN(yrblt) AS yrblt,
SUM(sfla) AS total_bldg_sf
FROM {{ source('iasworld', 'dweldat') }}
WHERE cur = 'Y'
AND deactivat IS NULL
GROUP BY parid, taxyr
UNION ALL
SELECT
pin,
year,
char_yrblt AS yrblt,
char_building_sf AS total_bldg_sf
FROM {{ ref('default.vw_pin_condo_char') }}
WHERE NOT is_parking_space
AND NOT is_common_area
-- Assign input tables to CTEs for ease of reference in macros
WITH all_values AS (
SELECT * FROM {{ ref('reporting.res_report_summary_values_input') }}
),

-- Join land, chars, and reporting groups to values
all_values AS (
SELECT
fmvs.pin,
vptc.property_group,
vptc.class,
vptc.triad_name AS triad,
vptc.township_code,
CONCAT(vptc.township_code, vptc.nbhd) AS townnbhd,
fmvs.year,
fmvs.assessment_stage,
fmvs.total,
chars.yrblt,
chars.total_bldg_sf,
vpl.sf AS total_land_sf
FROM all_fmvs AS fmvs
LEFT JOIN {{ ref('reporting.vw_pin_township_class') }} AS vptc
ON fmvs.pin = vptc.pin
AND fmvs.year = vptc.year
INNER JOIN chars
ON fmvs.pin = chars.pin
AND fmvs.year = chars.year
LEFT JOIN {{ ref('default.vw_pin_land') }} AS vpl
ON fmvs.pin = vpl.pin
AND fmvs.year = vpl.year
WHERE vptc.property_group IS NOT NULL
AND vptc.triad_name IS NOT NULL
),

-- Sales, filtered to exclude outliers and mutlisales
sales AS (
SELECT
vwps.sale_price,
vwps.year AS sale_year,
tc.property_group,
tc.township_code,
vwps.nbhd AS townnbhd
FROM {{ ref('default.vw_pin_sale') }} AS vwps
LEFT JOIN {{ ref('reporting.vw_pin_township_class') }} AS tc
ON vwps.pin = tc.pin
AND vwps.year = tc.year
WHERE NOT vwps.is_multisale
AND NOT vwps.sale_filter_is_outlier
AND NOT vwps.sale_filter_deed_type
AND NOT vwps.sale_filter_less_than_10k
AND NOT vwps.sale_filter_same_sale_within_365
AND tc.property_group IS NOT NULL
AND tc.triad_name IS NOT NULL
all_sales AS (
SELECT * FROM {{ ref('reporting.res_report_summary_sales_input') }}
),

--- AGGREGATE ---

-- Aggregate and stack stats on AV and characteristics for each reporting group
aggregated_values AS (
-- By township, assessment_stage, and property group
Expand All @@ -157,25 +60,25 @@ aggregated_values AS (
),

-- Aggregate and stack stats on sales for each reporting group
all_sales AS (
aggregated_sales AS (
-- By township and property group
{{ res_report_summarize_sales(
from = 'sales', geo_type = 'Town', prop_group = True
from = 'all_sales', geo_type = 'Town', prop_group = True
) }}
UNION ALL
-- By township
{{ res_report_summarize_sales(
from = 'sales', geo_type = 'Town', prop_group = False
from = 'all_sales', geo_type = 'Town', prop_group = False
) }}
UNION ALL
-- By neighborhood and property group
{{ res_report_summarize_sales(
from = 'sales', geo_type = 'TownNBHD', prop_group = True
from = 'all_sales', geo_type = 'TownNBHD', prop_group = True
) }}
UNION ALL
-- By neighborhood
{{ res_report_summarize_sales(
from = 'sales', geo_type = 'TownNBHD', prop_group = False
from = 'all_sales', geo_type = 'TownNBHD', prop_group = False
) }}
)

Expand All @@ -189,7 +92,7 @@ SELECT
FROM aggregated_values AS av
-- Join sales so that values for a given year can be compared
-- to a complete set of sales from the previous year
LEFT JOIN all_sales AS asl
LEFT JOIN aggregated_sales AS asl
ON av.geography_id = asl.geography_id
AND CAST(av.year AS INT) = CAST(asl.sale_year AS INT) + 1
AND av.property_group = asl.property_group
39 changes: 39 additions & 0 deletions dbt/models/reporting/reporting.res_report_summary_sales_input.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
Input table for `reporting.res_report_summary` that produces the raw sales data
that `res_report_summary` aggregates. We split these input data out into a
separate table to speed up query time for `res_report_summary`, since otherwise
it needs to rerun the query logic below for every possible geography and
reporting group combination.
See `reporting.res_report_summary` for a full description of these data.
Intended to be materialized daily through a GitHub action.
*/

{{
config(
materialized='table',
partitioned_by=['sale_year']
)
}}

-- Sales, filtered to exclude outliers and mutlisales
SELECT
vwps.pin,
vwps.doc_no,
vwps.sale_price,
tc.property_group,
tc.township_code,
vwps.nbhd AS townnbhd,
vwps.year AS sale_year
FROM {{ ref('default.vw_pin_sale') }} AS vwps
LEFT JOIN {{ ref('reporting.vw_pin_township_class') }} AS tc
ON vwps.pin = tc.pin
AND vwps.year = tc.year
WHERE NOT vwps.is_multisale
AND NOT vwps.sale_filter_is_outlier
AND NOT vwps.sale_filter_deed_type
AND NOT vwps.sale_filter_less_than_10k
AND NOT vwps.sale_filter_same_sale_within_365
AND tc.property_group IS NOT NULL
AND tc.triad_name IS NOT NULL
106 changes: 106 additions & 0 deletions dbt/models/reporting/reporting.res_report_summary_values_input.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
Input table for `reporting.res_report_summary` that produces the raw
characteristic and value data that `res_report_summary` aggregates. We split
these input data out into a separate table to speed up query time for
`res_report_summary`, since otherwise it needs to rerun the query logic below
for every possible geography and reporting group combination.
This table takes model and assessment values from two separate tables and
stacks them. Model and assessment values are gathered independently and
aggregated via a UNION rather than a JOIN, so it's important to keep in mind
that years for model and assessment stages do NOT need to match, i.e. we can
have 2023 model values in the table before there are any 2023 assessment values
to report on.
See `reporting.res_report_summary` for a full description of these data.
Intended to be materialized daily through a GitHub action.
*/

{{
config(
materialized='table',
partitioned_by=['year']
)
}}

-- AVs and model values
WITH all_fmvs AS (
SELECT
ap.meta_pin AS pin,
ap.year,
'model' AS assessment_stage,
ap.pred_pin_final_fmv_round AS total
FROM {{ source('model', 'assessment_pin') }} AS ap
INNER JOIN {{ ref('model.final_model') }} AS fm
ON ap.run_id = fm.run_id
AND ap.year = fm.year
AND (
-- If reassessment year, use different models for different towns
(
CONTAINS(fm.township_code_coverage, ap.township_code)
AND ap.meta_triad_code = fm.triad_code
)
-- Otherwise, just use whichever model is "final"
OR (ap.meta_triad_code != fm.triad_code AND fm.is_final)
)

UNION ALL

SELECT
pin,
year,
stage_name AS assessment_stage,
tot * 10 AS total
FROM {{ ref('reporting.vw_pin_value_long') }}
WHERE year >= '2021'
),

-- Combined SF/MF and condo characteristics
chars AS (
SELECT
parid AS pin,
taxyr AS year,
MIN(yrblt) AS yrblt,
SUM(sfla) AS total_bldg_sf
FROM {{ source('iasworld', 'dweldat') }}
WHERE cur = 'Y'
AND deactivat IS NULL
GROUP BY parid, taxyr
UNION ALL
SELECT
pin,
year,
char_yrblt AS yrblt,
char_building_sf AS total_bldg_sf
FROM {{ ref('default.vw_pin_condo_char') }}
WHERE NOT is_parking_space
AND NOT is_common_area
)

-- Join land, chars, and reporting groups to values
SELECT
fmvs.pin,
vptc.property_group,
vptc.class,
vptc.triad_name AS triad,
vptc.township_code,
CONCAT(vptc.township_code, vptc.nbhd) AS townnbhd,
fmvs.assessment_stage,
fmvs.total,
chars.yrblt,
chars.total_bldg_sf,
vpl.sf AS total_land_sf,
fmvs.year
FROM all_fmvs AS fmvs
LEFT JOIN {{ ref('reporting.vw_pin_township_class') }} AS vptc
ON fmvs.pin = vptc.pin
AND fmvs.year = vptc.year
INNER JOIN chars
ON fmvs.pin = chars.pin
AND fmvs.year = chars.year
LEFT JOIN {{ ref('default.vw_pin_land') }} AS vpl
ON fmvs.pin = vpl.pin
AND fmvs.year = vpl.year
WHERE vptc.property_group IS NOT NULL
AND vptc.triad_name IS NOT NULL
12 changes: 12 additions & 0 deletions dbt/models/reporting/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,18 @@ models:
- assessment_stage
- year

- name: reporting.res_report_summary_sales_input
description: '{{ doc("table_res_report_summary_sales_input") }}'
config:
tags:
- daily

- name: reporting.res_report_summary_values_input
description: '{{ doc("table_res_report_summary_values_input") }}'
config:
tags:
- daily

- name: reporting.vw_assessment_roll
description: '{{ doc("view_vw_assessment_roll") }}'
data_tests:
Expand Down

0 comments on commit a06fd39

Please sign in to comment.