Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Trino #376

Merged
merged 2 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@ jobs:
- store_artifacts:
path: ./integration_tests/logs

integration-trino:
docker:
- image: cimg/python:3.9.9
steps:
- checkout
- run:
name: "Run Tests - Trino"
command: ./run_test.sh trino
- store_artifacts:
path: ./integration_tests/logs

workflows:
version: 2
test-all:
Expand All @@ -104,3 +115,7 @@ workflows:
context: profile-databricks
requires:
- integration-postgres
- integration-trino:
context: profile-trino
requires:
- integration-postgres
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ If applicable, add screenshots or log output to help explain your problem.
- [ ] redshift
- [ ] bigquery
- [ ] snowflake
- [ ] trino/starburst
- [ ] other (specify: ____________)


Expand Down
1 change: 1 addition & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ Screenshot of passing integration tests locally
- [ ] Snowflake
- [ ] Databricks
- [ ] DuckDB
- [ ] Trino/Starburst
- [ ] I have updated the README.md (if applicable)
- [ ] I have added tests & descriptions to my models (and macros if applicable)
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Currently, the following adapters are supported:
- Redshift
- Snowflake
- DuckDB
- Trino (tested with Iceberg connector)

## Using This Package

Expand Down
4 changes: 2 additions & 2 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ vars:
other_prefixes: ['rpt_']

# -- Performance variables --
chained_views_threshold: 5
chained_views_threshold: "{{ 5 if target.type != 'trino' else 4 }}"

# -- Execution variables --
insert_batch_size: "{{ 500 if target.type == 'bigquery' else 10000 }}"
max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else -1 }}"
max_depth_dag: "{{ 9 if target.type in ['bigquery', 'spark', 'databricks'] else 4 if target.type == 'trino' else -1 }}"
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Currently, the following adapters are supported:
- Redshift
- Snowflake
- DuckDB
- Trino (tested with Iceberg connector)

## Using This Package

Expand Down
15 changes: 14 additions & 1 deletion integration_tests/ci/sample.profiles.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,17 @@ integration_tests:

duckdb:
type: duckdb
path: ./duck.db
path: ./duck.db

trino:
type: trino
host: "{{ env_var('TRINO_TEST_HOST') }}"
port: "{{ env_var('TRINO_TEST_PORT') | as_number }}"
method: ldap
user: "{{ env_var('TRINO_TEST_USER') }}"
password: "{{ env_var('TRINO_TEST_PASS') }}"
catalog: "{{ env_var('TRINO_TEST_CATALOG_NAME') }}"
schema: dbt_project_evaluator_integration_tests_trino
threads: 5
session_properties:
query_max_stage_count: 200
8 changes: 4 additions & 4 deletions integration_tests/seeds/docs/docs_seeds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ seeds:
- name: test_fct_documentation_coverage
config:
column_types:
staging_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
intermediate_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
marts_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
other_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb'] else 'decimal(10,2)' }}"
staging_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
intermediate_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
marts_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
other_documentation_coverage_pct: "{{ 'float' if target.type not in ['spark','databricks','duckdb','trino'] else 'decimal(10,2)' }}"
tags:
- docs
tests:
Expand Down
10 changes: 5 additions & 5 deletions integration_tests/seeds/tests/tests_seeds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ seeds:
- name: test_fct_test_coverage
config:
column_types:
test_coverage_pct: float
staging_test_coverage_pct: float
intermediate_test_coverage_pct: float
marts_test_coverage_pct: float
other_test_coverage_pct: float
test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
staging_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
intermediate_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
marts_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
other_test_coverage_pct: "{{ 'float' if target.type != 'trino' else 'double' }}"
tests:
- dbt_utils.equality:
name: equality_fct_test_coverage
Expand Down
13 changes: 10 additions & 3 deletions macros/recursive_dag.sql
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ all_relationships (
on all_relationships.child_id = direct_relationships.direct_parent_id

{% if var('max_depth_dag') | int > 0 %}
{% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold')%}
{% if var('max_depth_dag') | int < 2 or var('max_depth_dag') | int < var('chained_views_threshold') | int %}
{% do exceptions.raise_compiler_error(
'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.'
) %}
Expand All @@ -138,7 +138,7 @@ all_relationships (

-- as of Feb 2022 BigQuery doesn't support with recursive in the same way as other DWs
{% set max_depth = var('max_depth_dag') | int %}
{% if max_depth < 2 or max_depth < var('chained_views_threshold') %}
{% if max_depth < 2 or max_depth < var('chained_views_threshold') | int %}
{% do exceptions.raise_compiler_error(
'Variable max_depth_dag must be at least 2 and must be greater or equal to than chained_views_threshold.'
) %}
Expand Down Expand Up @@ -260,4 +260,11 @@ with direct_relationships as (
{% macro spark__recursive_dag() %}
-- as of June 2022 databricks SQL doesn't support "with recursive" in the same way as other DWs
{{ return(bigquery__recursive_dag()) }}
{% endmacro %}
{% endmacro %}


{% macro trino__recursive_dag() %}
{#-- Although Trino supports a recursive WITH-queries,
-- it is less performant than creating CTEs with loops and unioning them --#}
{{ return(bigquery__recursive_dag()) }}
{% endmacro %}
2 changes: 1 addition & 1 deletion models/marts/dag/fct_duplicate_sources.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ source_duplicates as (
{{ dbt.listagg(
measure = 'resource_name',
delimiter_text = "', '",
order_by_clause = 'order by resource_name' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause = 'order by resource_name' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as source_names
from sources
group by source_db_location
Expand Down
2 changes: 1 addition & 1 deletion models/marts/dag/fct_model_fanout.sql
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ model_fanout_agg as (
{{ dbt.listagg(
measure = 'child',
delimiter_text = "', '",
order_by_clause = 'order by child' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause = 'order by child' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as leaf_children
from model_fanout
group by 1, 2
Expand Down
2 changes: 1 addition & 1 deletion models/marts/dag/fct_multiple_sources_joined.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ multiple_sources_joined as (
{{ dbt.listagg(
measure='parent',
delimiter_text="', '",
order_by_clause='order by parent' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause='order by parent' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as source_parents
from direct_source_relationships
group by 1
Expand Down
2 changes: 1 addition & 1 deletion models/marts/dag/fct_source_fanout.sql
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ source_fanout as (
{{ dbt.listagg(
measure='child',
delimiter_text="', '",
order_by_clause='order by child' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause='order by child' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as model_children
from direct_source_relationships
group by 1
Expand Down
4 changes: 2 additions & 2 deletions models/marts/documentation/fct_documentation_coverage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ conversion as (

final as (
select
current_timestamp as measured_at,
{{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
count(*) as total_models,
sum(is_described_model) as documented_models,
round(sum(is_described_model) * 100.0 / count(*), 2) as documentation_coverage_pct,
round(sum(is_described_model) * 100.00 / count(*), 2) as documentation_coverage_pct,
{% for model_type in var('model_types') %}
round(
{{ dbt_utils.safe_divide(
Expand Down
2 changes: 1 addition & 1 deletion models/marts/structure/fct_model_naming_conventions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ appropriate_prefixes as (
{{ dbt.listagg(
measure='prefix_value',
delimiter_text="', '",
order_by_clause='order by prefix_value' if target.type in ['snowflake','redshift','duckdb'])
order_by_clause='order by prefix_value' if target.type in ['snowflake','redshift','duckdb','trino'])
}} as appropriate_prefixes
from naming_convention_prefixes
group by model_type
Expand Down
4 changes: 2 additions & 2 deletions models/marts/tests/fct_test_coverage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ conversion as (

final as (
select
current_timestamp as measured_at,
{{ 'current_timestamp' if target.type != 'trino' else 'current_timestamp(6)' }} as measured_at,
count(*) as total_models,
sum(number_of_tests_on_model) as total_tests,
sum(is_tested_model) as tested_models,
Expand All @@ -32,7 +32,7 @@ final as (
) }}
, 2) as {{ model_type }}_test_coverage_pct,
{% endfor %}
round(sum(number_of_tests_on_model) * 1.0 / count(*), 4) as test_to_model_ratio
round(sum(number_of_tests_on_model) * 1.0000 / count(*), 4) as test_to_model_ratio

from test_counts
left join conversion
Expand Down
8 changes: 4 additions & 4 deletions seeds/seeds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ seeds:

config:
column_types:
fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake'] else 'string' }}"
fct_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
column_name: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
id_to_exclude: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"
comment: "{{ 'varchar' if target.type in ['redshift', 'postgres', 'snowflake', 'trino'] else 'string' }}"

columns:
- name: fct_name
Expand Down