Skip to content

Commit

Permalink
Cria modelo br_rj_riodejaneiro_gtfs (#129)
Browse files Browse the repository at this point in the history
* criado o modelo para o gtfs

* update profile.yml

* criei arquivos sql para as tabelas do gtfs

* atualizei nomes dos aqruivos sql

* [WIP] definião de vars/ source

* adicionei dados ao schema

* parametrização do smodelo sql

* inseri descrições das tabelas no shema.yml

* modificado data_versao_gtfs nos modelos por data

* atualização de modelos

* atualização de query

* ajuste nos modelos shapes, shepes_geom e ordem_servico

* ajustei tipagem de dados de algumas tabelas

* wip

* atualização dos modelos shapes_geom e shapes

* alteração de modelos

* removi casts desnecessários

* ajustei as primary keys

* ajustes

* ajuste na sintaze sql dos modelos

* ajuste de formatação

* correções de revisão

* update gitignore

* update review

* Update dbt_project.yml

* revisão dos modelos do gtfs

* alteração da descrição

* revert .gitignore

* reverte run.py

* Atualiza .gitignore

* Update run.py

* reverte profiles

* atualiza .gitignore

* reverte profiles.yml

* atualiza schema

* alteração nas descrições

* alterei database

* aprimora schema

* Atualiza shapes_geom

* atualiza utils

* reverte sources

* adiciona versao_modelo

* Update sources.yml

* adicionei descrição da coluna versao_modelo

---------

Co-authored-by: eng-rodrigocunha <engtransportes.rodrigocunha@gmail.com>
Co-authored-by: Rodrigo Cunha <66736583+eng-rodrigocunha@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 23, 2023
1 parent 0352880 commit 8b2aa31
Show file tree
Hide file tree
Showing 19 changed files with 812 additions and 4 deletions.
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ dbt-env/

# CONFIG
.DS_Store
.vscode/
*.vscode/
*.idea/
*__pycache__/

# DEV
scripts/
dev/
*dev/
7 changes: 7 additions & 0 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ vars:
stops_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.stops"
trips_staging: "rj-smtr-staging.br_rj_riodejaneiro_sigmob_staging.trips"

## GTFS
data_versao_gtfs: "YYYY-MM-DD"

### Subsídio SPPO (Ônibus) ###
buffer: 500 # distância em metros para buffer
perc_conformidade_distancia_min: 0
Expand Down Expand Up @@ -138,6 +141,10 @@ models:
projeto_subsidio_sppo:
+materialized: view
+schema: projeto_subsidio_sppo
br_rj_riodejaneiro_gtfs:
+materialized: incremental
+incremental_strategy: insert_overwrite
+schema: br_rj_riodejaneiro_gtfs
br_rj_riodejaneiro_sigmob:
+materialized: view
+schema: br_rj_riodejaneiro_sigmob
Expand Down
9 changes: 8 additions & 1 deletion dev/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,21 @@ def run_dbt_model(
upstream: bool = None,
downstream: bool = None,
exclude: str = None,
flags: str = "-x --profiles-dir ./dev",
flags: str = None,
_vars: Union[dict, List[Dict]] = None,
):
"""
Run a DBT model.
"""
run_command = "dbt run"

common_flags = "-x --profiles-dir ./dev"

if not flags:
flags = common_flags
else:
flags = common_flags + " " + flags

if not model:
model = f"{dataset_id}"
if table_id:
Expand Down
18 changes: 18 additions & 0 deletions models/br_rj_riodejaneiro_gtfs/agency_gtfs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{{ config(
partition_by = { 'field' :'data_versao',
'data_type' :'date',
'granularity': 'day' },
unique_key = ['agency_id', 'data_versao'],
alias = 'agency',
) }}


SELECT SAFE_CAST(data_versao AS DATE) data_versao,
SAFE_CAST(agency_id AS STRING) agency_id,
SAFE_CAST(JSON_VALUE(content, '$.agency_name') AS STRING) agency_name,
SAFE_CAST(JSON_VALUE(content, '$.agency_url') AS STRING) agency_url,
SAFE_CAST(JSON_VALUE(content, '$.agency_timezone') AS STRING) agency_timezone,
SAFE_CAST(JSON_VALUE(content, '$.agency_lang') AS STRING) agency_lang,
'{{ var("version") }}' as versao_modelo
FROM {{ source('br_rj_riodejaneiro_gtfs_staging', 'agency') }}
WHERE data_versao = '{{ var("data_versao_gtfs") }}'
19 changes: 19 additions & 0 deletions models/br_rj_riodejaneiro_gtfs/calendar_dates_gtfs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{{ config(
partition_by = { 'field' :'data_versao',
'data_type' :'date',
'granularity': 'day' },
unique_key = ['service_id', 'date', 'data_versao'],
alias = 'calendar_dates'
) }}


SELECT SAFE_CAST(data_versao AS DATE) data_versao,
SAFE_CAST(service_id AS STRING) service_id,
PARSE_DATE('%Y%m%d', SAFE_CAST(date AS STRING)) date,
SAFE_CAST(JSON_VALUE(content, '$.exception_type') AS STRING) exception_type,
'{{ var("version") }}' as versao_modelo
FROM {{ source(
'br_rj_riodejaneiro_gtfs_staging',
'calendar_dates'
) }}
WHERE data_versao = '{{ var("data_versao_gtfs") }}'
26 changes: 26 additions & 0 deletions models/br_rj_riodejaneiro_gtfs/calendar_gtfs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{{config(
partition_by = { 'field' :'data_versao',
'data_type' :'date',
'granularity': 'day' },
unique_key = ['service_id', 'data_versao'],
alias = 'calendar'
)}}


SELECT SAFE_CAST(data_versao AS DATE) data_versao,
SAFE_CAST(service_id AS STRING) service_id,
SAFE_CAST(JSON_VALUE(content, '$.monday') AS STRING) monday,
SAFE_CAST(JSON_VALUE(content, '$.tuesday') AS STRING) tuesday,
SAFE_CAST(JSON_VALUE(content, '$.wednesday') AS STRING) wednesday,
SAFE_CAST(JSON_VALUE(content, '$.thursday') AS STRING) thursday,
SAFE_CAST(JSON_VALUE(content, '$.friday') AS STRING) friday,
SAFE_CAST(JSON_VALUE(content, '$.saturday') AS STRING) saturday,
SAFE_CAST(JSON_VALUE(content, '$.sunday') AS STRING) sunday,
PARSE_DATE('%Y%m%d', SAFE_CAST(JSON_VALUE(content, '$.start_date') AS STRING)) start_date,
PARSE_DATE('%Y%m%d', SAFE_CAST(JSON_VALUE(content, '$.end_date') AS STRING)) end_date,
'{{ var("version") }}' as versao_modelo
FROM {{ source(
'br_rj_riodejaneiro_gtfs_staging',
'calendar'
) }}
WHERE data_versao = '{{ var("data_versao_gtfs") }}'
23 changes: 23 additions & 0 deletions models/br_rj_riodejaneiro_gtfs/fare_attributes_gtfs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{{config(
partition_by = { 'field' :'data_versao',
'data_type' :'date',
'granularity': 'day' },
unique_key = ['fare_id', 'data_versao'],
alias = 'fare_attributes'
)}}


SELECT SAFE_CAST(data_versao AS DATE) data_versao,
SAFE_CAST(fare_id AS STRING) fare_id,
SAFE_CAST(JSON_VALUE(content, '$.price') AS FLOAT64) price,
SAFE_CAST(JSON_VALUE(content, '$.currency_type') AS STRING) currency_type,
SAFE_CAST(JSON_VALUE(content, '$.payment_method') AS STRING) payment_method,
SAFE_CAST(JSON_VALUE(content, '$.transfers') AS STRING) transfers,
SAFE_CAST(JSON_VALUE(content, '$.agency_id') AS STRING) agency_id,
SAFE_CAST(JSON_VALUE(content, '$.transfer_duration') AS INT64) transfer_duration,
'{{ var("version") }}' as versao_modelo
FROM {{source(
'br_rj_riodejaneiro_gtfs_staging',
'fare_attributes'
)}}
WHERE data_versao = '{{ var("data_versao_gtfs") }}'
22 changes: 22 additions & 0 deletions models/br_rj_riodejaneiro_gtfs/fare_rules_gtfs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{{config(
partition_by = { 'field' :'data_versao',
'data_type' :'date',
'granularity': 'day' },
unique_key = ['fare_id', 'data_versao'],
alias = 'fare_rules'
)}}


SELECT
SAFE_CAST(data_versao AS DATE) data_versao,
SAFE_CAST(JSON_VALUE(content, '$.fare_id') AS STRING) fare_id,
SAFE_CAST(JSON_VALUE(content, '$.route_id') AS STRING) route_id,
SAFE_CAST(JSON_VALUE(content, '$.origin_id') AS STRING) origin_id,
SAFE_CAST(JSON_VALUE(content, '$.destination_id') AS STRING) destination_id,
SAFE_CAST(JSON_VALUE(content, '$.contains_id') AS STRING) contains_id,
'{{ var("version") }}' as versao_modelo
FROM {{ source(
'br_rj_riodejaneiro_gtfs_staging',
'fare_rules'
) }}
WHERE data_versao = '{{ var("data_versao_gtfs") }}'
25 changes: 25 additions & 0 deletions models/br_rj_riodejaneiro_gtfs/feed_info_gtfs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{{config(
partition_by = { 'field' :'data_versao',
'data_type' :'date',
'granularity': 'day' },
unique_key = ['feed_publisher_name', 'data_versao'],
alias = 'feed_info'
)}}


SELECT SAFE_CAST(data_versao AS DATE) data_versao,
SAFE_CAST(feed_publisher_name AS STRING) feed_publisher_name,
SAFE_CAST(JSON_VALUE(content, '$.feed_publisher_url') AS STRING) feed_publisher_url,
SAFE_CAST(JSON_VALUE(content, '$.feed_lang') AS STRING) feed_lang,
SAFE_CAST(JSON_VALUE(content, '$.default_lang') AS STRING) default_lang,
PARSE_DATE('%Y%m%d', SAFE_CAST(JSON_VALUE(content, '$.feed_start_date') AS STRING)) feed_start_date,
PARSE_DATE('%Y%m%d', SAFE_CAST(JSON_VALUE(content, '$.feed_end_date') AS STRING)) feed_end_date,
SAFE_CAST(JSON_VALUE(content, '$.feed_version') AS STRING) feed_version,
SAFE_CAST(JSON_VALUE(content, '$.feed_contact_email') AS STRING) feed_contact_email,
SAFE_CAST(JSON_VALUE(content, '$.feed_contact_url') AS STRING) feed_contact_url,
'{{ var("version") }}' as versao_modelo
FROM {{ source(
'br_rj_riodejaneiro_gtfs_staging',
'feed_info'
) }}
WHERE data_versao = '{{ var("data_versao_gtfs") }}'
18 changes: 18 additions & 0 deletions models/br_rj_riodejaneiro_gtfs/frequencies_gtfs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{{config(
partition_by = { 'field' :'data_versao',
'data_type' :'date',
'granularity': 'day' },
unique_key = ['trip_id', 'start_time', 'data_versao'],
alias = 'frequencies'
)}}


SELECT SAFE_CAST(data_versao AS DATE) data_versao,
SAFE_CAST(trip_id AS STRING) trip_id,
SAFE_CAST(start_time AS STRING) start_time,
SAFE_CAST(JSON_VALUE(content, '$.end_time') AS STRING) end_time,
SAFE_CAST(JSON_VALUE(content, '$.headway_secs') AS INT64) headway_secs,
SAFE_CAST(JSON_VALUE(content, '$.exact_times') AS STRING) exact_times,
'{{ var("version") }}' as versao_modelo
FROM {{source('br_rj_riodejaneiro_gtfs_staging', 'frequencies')}}
WHERE data_versao = '{{ var("data_versao_gtfs") }}'
74 changes: 74 additions & 0 deletions models/br_rj_riodejaneiro_gtfs/ordem_servico_gtfs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{{ config(
partition_by = { 'field' :'data_versao',
'data_type' :'date',
'granularity': 'day' },
unique_key = ['servico', 'data_versao'],
alias = 'ordem_servico'
) }}

WITH ordem_servico AS (
SELECT SAFE_CAST(data_versao AS DATE) data_versao,
timestamp_captura,
servico,
SAFE_CAST(JSON_VALUE(content, '$.vista') AS STRING) vista,
SAFE_CAST(JSON_VALUE(content, '$.consorcio') AS STRING) consorcio,
SAFE_CAST(JSON_VALUE(content, '$.horario_inicio') AS STRING) horario_inicio,
SAFE_CAST(JSON_VALUE(content, '$.horario_fim') AS STRING) horario_fim,
SAFE_CAST(JSON_VALUE(content, '$.extensao_ida') AS FLOAT64) extensao_ida,
SAFE_CAST(JSON_VALUE(content, '$.extensao_volta') AS FLOAT64) extensao_volta,
SAFE_CAST(JSON_VALUE(content, '$.partidas_ida_du') AS INT64) partidas_ida_du,
SAFE_CAST(JSON_VALUE(content, '$.partidas_volta_du') AS INT64) partidas_volta_du,
SAFE_CAST(JSON_VALUE(content, '$.viagens_du') AS FLOAT64) viagens_du,
SAFE_CAST(JSON_VALUE(content, '$.km_dia_util') AS FLOAT64) km_du,
SAFE_CAST(JSON_VALUE(content, '$.partidas_ida_pf') AS INT64) partidas_ida_pf,
SAFE_CAST(JSON_VALUE(content, '$.partidas_volta_pf') AS INT64) partidas_volta_pf,
SAFE_CAST(JSON_VALUE(content, '$.viagens_pf') AS FLOAT64) viagens_pf,
SAFE_CAST(JSON_VALUE(content, '$.km_pf') AS FLOAT64) km_pf,
NULL partidas_ida_sabado,
NULL partidas_volta_sabado,
SAFE_CAST(NULL AS FLOAT64) viagens_sabado,
SAFE_CAST(JSON_VALUE(content, '$.km_sabado') AS FLOAT64) km_sabado,
NULL partidas_ida_domingo,
NULL partidas_volta_domingo,
SAFE_CAST(NULL AS FLOAT64) viagens_domingo,
SAFE_CAST(JSON_VALUE(content, '$.km_domingo') AS FLOAT64) km_domingo,
'{{ var("version") }}' as versao_modelo
FROM {{ source(
'br_rj_riodejaneiro_gtfs_staging',
'ordem_servico'
) }}
WHERE data_versao = '{{ var("data_versao_gtfs") }}')
SELECT *
FROM ordem_servico UNPIVOT (
(
partidas_ida,
partidas_volta,
viagens_planejadas,
distancia_total_planejada
) FOR tipo_dia IN (
(
partidas_ida_du,
partidas_volta_du,
viagens_du,
km_du
) AS 'Dia Útil',
(
partidas_ida_pf,
partidas_volta_pf,
viagens_pf,
km_pf
) AS 'Ponto Facultativo',
(
partidas_ida_sabado,
partidas_volta_sabado,
viagens_sabado,
km_sabado
) AS 'Sabado',
(
partidas_ida_domingo,
partidas_volta_domingo,
viagens_domingo,
km_domingo
) AS 'Domingo'
)
)
29 changes: 29 additions & 0 deletions models/br_rj_riodejaneiro_gtfs/routes_gtfs.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{{config(
partition_by = { 'field' :'data_versao',
'data_type' :'date',
'granularity': 'day' },
unique_key = ['route_id', 'data_versao'],
alias = 'routes'
)}}


SELECT SAFE_CAST(data_versao AS DATE) data_versao,
SAFE_CAST(route_id AS STRING) route_id,
SAFE_CAST(JSON_VALUE(content, '$.agency_id') AS STRING) agency_id,
SAFE_CAST(JSON_VALUE(content, '$.route_short_name') AS STRING) route_short_name,
SAFE_CAST(JSON_VALUE(content, '$.route_long_name') AS STRING) route_long_name,
SAFE_CAST(JSON_VALUE(content, '$.route_desc') AS STRING) route_desc,
SAFE_CAST(JSON_VALUE(content, '$.route_type') AS STRING) route_type,
SAFE_CAST(JSON_VALUE(content, '$.route_url') AS STRING) route_url,
SAFE_CAST(JSON_VALUE(content, '$.route_color') AS STRING) route_color,
SAFE_CAST(JSON_VALUE(content, '$.route_text_color') AS STRING) route_text_color,
SAFE_CAST(JSON_VALUE(content, '$.route_sort_order') AS INT64) route_sort_order,
SAFE_CAST(JSON_VALUE(content, '$.continuous_pickup') AS STRING) continuous_pickup,
SAFE_CAST(JSON_VALUE(content, '$.continuous_drop_off') AS STRING) continuous_drop_off,
SAFE_CAST(JSON_VALUE(content, '$.network_id') AS STRING) network_id,
'{{ var("version") }}' as versao_modelo
FROM {{ source(
'br_rj_riodejaneiro_gtfs_staging',
'routes'
) }}
WHERE data_versao = '{{ var("data_versao_gtfs") }}'
Loading

0 comments on commit 8b2aa31

Please sign in to comment.