Skip to content

Commit

Permalink
Add MIT IPEA project dataset (#307)
Browse files Browse the repository at this point in the history
* innitial commit

* Updated to include centriod column

* Performance improvement and minimum columns

Improved speed by using centriod join (as suggested by Rodrigo)
Drops un-needed columns. Only kept minimum required. Appears to be duplicates within the GPS data set.

* Add geometry column

Required downstream

* Replace geometry column with centroid

Large perf improvement

* Replace explicit trable ref with ref

* Clean up comments

* Fix over column

---------

Co-authored-by: Ashley Vicary <ashley.vicary@outlook.com>
Co-authored-by: Rodrigo Cunha <66736583+eng-rodrigocunha@users.noreply.github.com>
  • Loading branch information
3 people authored May 29, 2024
1 parent 88ebceb commit 130a62b
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 0 deletions.
3 changes: 3 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -274,3 +274,6 @@ models:
staging:
+materialized: view
+schema: controle_financeiro_staging
mit_ipea_project:
+materialized: view
+schema: mit_ipea_project
80 changes: 80 additions & 0 deletions models/mit_ipea_project/h3_gps.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
-- Purpose: Create a table that assigns each GPS ping to a H3 tile

{{
config(
materialized='incremental',
incremental_strategy='insert_overwrite',
partition_by={
"field": "as_at",
"data_type": "date",
"granularity": "day"
}
)
}}

WITH
-- Union DISTINCT, both BRT and SPPO gps tables
-- The tables do contain some duplicates
brt_sppo_gps AS (

SELECT
modo, timestamp_gps, data, hora, id_veiculo, servico, latitude, longitude
FROM `rj-smtr.br_rj_riodejaneiro_veiculos.gps_sppo`
WHERE data = "{{ var('run_date') }}"

UNION DISTINCT

SELECT
modo, timestamp_gps, data, hora, id_veiculo, servico, latitude, longitude
FROM `rj-smtr.br_rj_riodejaneiro_veiculos.gps_brt`
WHERE data = "{{ var('run_date') }}"
),

-- Rename columns into english, join on H3 table via a circle fully encapsulating each tile
-- Each row is now a set of possible H3 tiles for each observation
gps AS (
SELECT
modo AS mode,
timestamp_gps,
data AS as_at,
hora AS time,
id_veiculo AS vehicle_id,
servico AS service,
latitude,
longitude,
ST_GEOGPOINT(longitude, latitude) AS geography,
tile_id
FROM brt_sppo_gps
LEFT JOIN {{ ref("vw_h3") }} AS h3
ON ST_DWITHIN(ST_GEOGPOINT(longitude, latitude), h3.centroid, 560)

),

h3_gps AS (
SELECT
*,
CASE
WHEN LAG(tile_id) OVER (PARTITION BY vehicle_id ORDER BY timestamp_gps) = tile_id THEN 0
ELSE 1
END AS tile_entry -- 1 when the vehicle first enters the tile. If 0, the vehicle is already in the tile.
FROM gps
LEFT JOIN {{ ref("vw_h3") }} AS h3
USING (tile_id)

WHERE ST_INTERSECTS(gps.geography, h3.geometry) IS TRUE

)
SELECT
as_at,
time AS tile_entry_time,
LEAD(time) OVER (PARTITION BY vehicle_id ORDER BY as_at, time) AS tile_exit_time,
mode,
service,
vehicle_id,
longitude,
latitude,
tile_id,
centroid

FROM h3_gps
WHERE tile_entry = 1
7 changes: 7 additions & 0 deletions models/mit_ipea_project/vw_h3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- H3 Resolution 8 view, convert geometry column to GEOGRAPHY type
SELECT tile_id,
resolution,
parent_id,
ST_GEOGFROMTEXT(geometry) AS geometry, -- Convert string in GEOGRAPHY value
ST_CENTROID(ST_GEOGFROMTEXT(geometry)) AS centroid
FROM `rj-smtr.br_rj_riodejaneiro_geo.h3_res8`
29 changes: 29 additions & 0 deletions models/mit_ipea_project/vw_vehicle_details.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
SELECT
data AS as_at,
CASE
WHEN data = MAX(data) OVER (PARTITION BY id_veiculo) THEN 'TRUE'
ELSE 'FALSE'
END AS latest_capture,
modo AS mode,
id_veiculo AS vehicle_id,
ano_fabricacao AS manufacture_year,
carroceria AS body_model,
data_ultima_vistoria AS last_inspection_date,
id_carroceria AS body_id,
id_chassi AS chassis_id,
id_fabricante_chassi AS chassis_manufacturer_id,
id_interno_carroceria AS internal_bodywork_id,
id_planta AS plant_id,
indicador_ar_condicionado AS aircon_indicator,
indicador_elevador AS elevator_indicator,
indicador_usb AS usb_indicator,
indicador_wifi AS wifi_indicator,
nome_chassi AS chassis_name,
permissao AS permit_number,
placa AS plate_id,
quantidade_lotacao_pe AS passengers_standing,
quantidade_lotacao_sentado AS passengers_sitting,
tipo_combustivel AS fuel_type,
tipo_veiculo AS vehicle_type,
status AS status
FROM rj-smtr.veiculo.sppo_licenciamento

0 comments on commit 130a62b

Please sign in to comment.