Skip to content

Commit

Permalink
Try another strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
wagnerlmichael committed Oct 1, 2024
1 parent 2225441 commit 77683f7
Showing 1 changed file with 100 additions and 113 deletions.
213 changes: 100 additions & 113 deletions dbt/models/default/default.vw_pin_sale.sql
Original file line number Diff line number Diff line change
Expand Up @@ -84,38 +84,20 @@ unique_sales AS (
sales.instrtyp NOT IN ('03', '04', '06')
ORDER BY sales.price DESC, sales.salekey ASC
) AS max_price,
-- We remove the letter 'D' that trails some document numbers in
-- iasworld.sales since it prevents us from joining to mydec sales.
-- This creates one instance where we have duplicate document
-- numbers, so we sort by sale date (specifically to avoid conflicts
-- with detecting the earliest duplicate sale when there are
-- multiple within one document number, within a year) within the
-- new document number to identify and remove the sale causing the
-- duplicate document number.
ROW_NUMBER() OVER (
PARTITION BY
NULLIF(REPLACE(sales.instruno, 'D', ''), ''),
sales.instrtyp NOT IN ('03', '04', '06'),
sales.price > 10000
ORDER BY sales.saledt ASC, sales.salekey ASC
) AS bad_doc_no,
-- Some pins sell for the exact same price a few months after
-- they're sold (we need to make sure to only include deed types we
-- want). These sales are unnecessary for modeling and may be
-- duplicates. We need to order by salekey as well in case of any
-- ties within price, date, and pin.
LAG(DATE_PARSE(SUBSTR(sales.saledt, 1, 10), '%Y-%m-%d')) OVER (
PARTITION BY
sales.parid,
sales.price,
sales.instrtyp NOT IN ('03', '04', '06')
ORDER BY sales.saledt ASC, sales.salekey ASC
) AS same_price_earlier_date,
-- Historically, this view filtered out sales less than $10k and
-- as well as quit claims, executor deeds, beneficial interests,
-- and NULL deed types. Now we create "legacy" filter columns so
-- that this filtering can be reproduced while still allowing all sales
-- into the view.
sales.price <= 10000 AS sale_filter_less_than_10k,
COALESCE(
sales.instrtyp IN ('03', '04', '06') OR sales.instrtyp IS NULL,
Expand Down Expand Up @@ -143,80 +125,91 @@ unique_sales AS (
),

mydec_sales AS (
SELECT * FROM (
SELECT
REPLACE(document_number, 'D', '') AS doc_no,
REPLACE(line_1_primary_pin, '-', '') AS pin,
DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date,
SUBSTR(line_4_instrument_date, 1, 4) AS year,
line_5_instrument_type AS mydec_deed_type,
NULLIF(TRIM(seller_name), '') AS seller_name,
NULLIF(TRIM(buyer_name), '') AS buyer_name,
CAST(line_11_full_consideration AS BIGINT) AS sale_price,
line_2_total_parcels AS num_parcels_sale,
COALESCE(line_2_total_parcels > 1, FALSE) AS is_multisale,
COALESCE(line_7_property_advertised = 1, FALSE)
AS mydec_property_advertised,
COALESCE(line_10a = 1, FALSE)
AS mydec_is_installment_contract_fulfilled,
COALESCE(line_10b = 1, FALSE)
AS mydec_is_sale_between_related_individuals_or_corporate_affiliates,
COALESCE(line_10c = 1, FALSE)
AS mydec_is_transfer_of_less_than_100_percent_interest,
COALESCE(line_10d = 1, FALSE)
AS mydec_is_court_ordered_sale,
COALESCE(line_10e = 1, FALSE)
AS mydec_is_sale_in_lieu_of_foreclosure,
COALESCE(line_10f = 1, FALSE)
AS mydec_is_condemnation,
COALESCE(line_10g = 1, FALSE)
AS mydec_is_short_sale,
COALESCE(line_10h = 1, FALSE)
AS mydec_is_bank_reo_real_estate_owned,
COALESCE(line_10i = 1, FALSE)
AS mydec_is_auction_sale,
COALESCE(line_10j = 1, FALSE)
AS mydec_is_seller_buyer_a_relocation_company,
COALESCE(line_10k = 1, FALSE)
AS mydec_is_seller_buyer_a_financial_institution_or_government_agency,
COALESCE(line_10l = 1, FALSE)
AS mydec_is_buyer_a_real_estate_investment_trust,
COALESCE(line_10m = 1, FALSE)
AS mydec_is_buyer_a_pension_fund,
COALESCE(line_10n = 1, FALSE)
AS mydec_is_buyer_an_adjacent_property_owner,
COALESCE(line_10o = 1, FALSE)
AS mydec_is_buyer_exercising_an_option_to_purchase,
COALESCE(line_10p = 1, FALSE)
AS mydec_is_simultaneous_trade_of_property,
COALESCE(line_10q = 1, FALSE)
AS mydec_is_sale_leaseback,
COALESCE(line_10s = 1, FALSE)
AS mydec_is_homestead_exemption,
line_10s_generalalternative
AS mydec_homestead_exemption_general_alternative,
line_10s_senior_citizens
AS mydec_homestead_exemption_senior_citizens,
line_10s_senior_citizens_assessment_freeze
AS mydec_homestead_exemption_senior_citizens_assessment_freeze,
(
COALESCE(line_10b, 0) + COALESCE(line_10c, 0)
+ COALESCE(line_10d, 0) + COALESCE(line_10e, 0)
+ COALESCE(line_10f, 0) + COALESCE(line_10g, 0)
+ COALESCE(line_10h, 0) + COALESCE(line_10i, 0)
+ COALESCE(line_10k, 0)
) > 0 AS sale_filter_ptax_flag,
COUNT() OVER (
PARTITION BY line_1_primary_pin, line_4_instrument_date
) AS num_single_day_sales
FROM {{ source('sale', 'mydec') }}
WHERE line_2_total_parcels = 1
SELECT *,
COALESCE(
DATE_DIFF(
'day',
LAG(sale_date) OVER (
PARTITION BY pin
ORDER BY sale_date ASC
),
sale_date
) <= 365,
FALSE
) AS sale_filter_same_sale_within_365
FROM (
SELECT * FROM (
SELECT
REPLACE(document_number, 'D', '') AS doc_no,
REPLACE(line_1_primary_pin, '-', '') AS pin,
DATE_PARSE(line_4_instrument_date, '%Y-%m-%d') AS sale_date,
SUBSTR(line_4_instrument_date, 1, 4) AS year,
line_5_instrument_type AS mydec_deed_type,
NULLIF(TRIM(seller_name), '') AS seller_name,
NULLIF(TRIM(buyer_name), '') AS buyer_name,
CAST(line_11_full_consideration AS BIGINT) AS sale_price,
line_2_total_parcels AS num_parcels_sale,
COALESCE(line_2_total_parcels > 1, FALSE) AS is_multisale,
COALESCE(line_7_property_advertised = 1, FALSE)
AS mydec_property_advertised,
COALESCE(line_10a = 1, FALSE)
AS mydec_is_installment_contract_fulfilled,
COALESCE(line_10b = 1, FALSE)
AS mydec_is_sale_between_related_individuals_or_corporate_affiliates,
COALESCE(line_10c = 1, FALSE)
AS mydec_is_transfer_of_less_than_100_percent_interest,
COALESCE(line_10d = 1, FALSE)
AS mydec_is_court_ordered_sale,
COALESCE(line_10e = 1, FALSE)
AS mydec_is_sale_in_lieu_of_foreclosure,
COALESCE(line_10f = 1, FALSE)
AS mydec_is_condemnation,
COALESCE(line_10g = 1, FALSE)
AS mydec_is_short_sale,
COALESCE(line_10h = 1, FALSE)
AS mydec_is_bank_reo_real_estate_owned,
COALESCE(line_10i = 1, FALSE)
AS mydec_is_auction_sale,
COALESCE(line_10j = 1, FALSE)
AS mydec_is_seller_buyer_a_relocation_company,
COALESCE(line_10k = 1, FALSE)
AS mydec_is_seller_buyer_a_financial_institution_or_government_agency,
COALESCE(line_10l = 1, FALSE)
AS mydec_is_buyer_a_real_estate_investment_trust,
COALESCE(line_10m = 1, FALSE)
AS mydec_is_buyer_a_pension_fund,
COALESCE(line_10n = 1, FALSE)
AS mydec_is_buyer_an_adjacent_property_owner,
COALESCE(line_10o = 1, FALSE)
AS mydec_is_buyer_exercising_an_option_to_purchase,
COALESCE(line_10p = 1, FALSE)
AS mydec_is_simultaneous_trade_of_property,
COALESCE(line_10q = 1, FALSE)
AS mydec_is_sale_leaseback,
COALESCE(line_10s = 1, FALSE)
AS mydec_is_homestead_exemption,
line_10s_generalalternative
AS mydec_homestead_exemption_general_alternative,
line_10s_senior_citizens
AS mydec_homestead_exemption_senior_citizens,
line_10s_senior_citizens_assessment_freeze
AS mydec_homestead_exemption_senior_citizens_assessment_freeze,
(
COALESCE(line_10b, 0) + COALESCE(line_10c, 0)
+ COALESCE(line_10d, 0) + COALESCE(line_10e, 0)
+ COALESCE(line_10f, 0) + COALESCE(line_10g, 0)
+ COALESCE(line_10h, 0) + COALESCE(line_10i, 0)
+ COALESCE(line_10k, 0)
) > 0 AS sale_filter_ptax_flag,
COUNT() OVER (
PARTITION BY line_1_primary_pin, line_4_instrument_date
) AS num_single_day_sales
FROM {{ source('sale', 'mydec') }}
WHERE line_2_total_parcels = 1
)
WHERE num_single_day_sales = 1
OR (YEAR(sale_date) > 2020)
)
/* Some sales in mydec have multiple rows for one pin on a given sale date.
Sometimes they have different dates than iasworld prior to 2021 and when
joined back onto unique_sales will create duplicates by pin/sale date. */
WHERE num_single_day_sales = 1
OR (YEAR(sale_date) > 2020)
),

max_version_flag AS (
Expand Down Expand Up @@ -311,10 +304,21 @@ cte_sales AS (
md_sales.mydec_homestead_exemption_general_alternative,
md_sales.mydec_homestead_exemption_senior_citizens,
md_sales.mydec_homestead_exemption_senior_citizens_assessment_freeze,
-- Include sale_filter_same_sale_within_365 from unique_sales
uq_sales.sale_filter_same_sale_within_365,
uq_sales.sale_filter_less_than_10k,
uq_sales.sale_filter_deed_type
-- Include sale_filter_same_sale_within_365 from both sources
COALESCE(uq_sales.sale_filter_same_sale_within_365, md_sales.sale_filter_same_sale_within_365, FALSE) AS sale_filter_same_sale_within_365,
-- Include sale_filter_less_than_10k and sale_filter_deed_type
-- Use appropriate values based on source
CASE
WHEN uq_sales.doc_no IS NOT NULL THEN uq_sales.sale_filter_less_than_10k
ELSE (md_sales.sale_price <= 10000)
END AS sale_filter_less_than_10k,
CASE
WHEN uq_sales.doc_no IS NOT NULL THEN uq_sales.sale_filter_deed_type
ELSE (
md_sales.mydec_deed_type IN ('03', '04', '06')
OR md_sales.mydec_deed_type IS NULL
)
END AS sale_filter_deed_type
FROM unique_sales AS uq_sales
FULL OUTER JOIN mydec_sales AS md_sales
ON uq_sales.doc_no = md_sales.doc_no
Expand All @@ -325,24 +329,7 @@ cte_sales AS (

combined_sales AS (
SELECT
cte_s.*,
-- Remove recalculation of sale_filter_same_sale_within_365
-- Keep the filters calculated in unique_sales
-- Also ensure that sale_filter_same_sale_within_365 is not NULL
COALESCE(cte_s.sale_filter_same_sale_within_365, FALSE)
AS sale_filter_same_sale_within_365,
-- Use the filters from unique_sales or calculate for MyDec sales
CASE
WHEN cte_s.source = 'iasworld' THEN cte_s.sale_filter_less_than_10k
ELSE (cte_s.sale_price_coalesced <= 10000)
END AS sale_filter_less_than_10k,
CASE
WHEN cte_s.source = 'iasworld' THEN cte_s.sale_filter_deed_type
ELSE (
cte_s.deed_type_coalesced IN ('03', '04', '06')
OR cte_s.deed_type_coalesced IS NULL
)
END AS sale_filter_deed_type
cte_s.*
FROM cte_sales AS cte_s
)

Expand Down

0 comments on commit 77683f7

Please sign in to comment.