Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎉 New Destination: ClickHouse #7620

Merged
merged 20 commits into from
Dec 13, 2021
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"destinationDefinitionId": "ce0d828e-1dc4-496c-b122-2da42e637e48",
"name": "Clickhouse",
"dockerRepository": "airbyte/destination-clickhouse",
"dockerImageTag": "0.1.0",
"documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse"
}
4 changes: 4 additions & 0 deletions airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ public static Database createOracleDatabase(final String username, final String
return createDatabase(username, password, jdbcConnectionString, "oracle.jdbc.OracleDriver", SQLDialect.DEFAULT);
}

public static Database createClickhouseDatabase(final String username, final String password, final String jdbcConnectionString) {
return createDatabase(username, password, jdbcConnectionString, "ru.yandex.clickhouse.ClickHouseDriver", SQLDialect.DEFAULT);
}

public static Database createMariaDbDatabase(final String username, final String password, final String jdbcConnectionString) {
return createDatabase(username, password, jdbcConnectionString, "org.mariadb.jdbc.Driver", SQLDialect.MARIADB);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM fishtownanalytics/dbt:0.21.0
COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte

# Install SSH Tunneling dependencies
RUN apt-get update && apt-get install -y jq sshpass
WORKDIR /airbyte
COPY entrypoint.sh .
COPY build/sshtunneling.sh .

WORKDIR /airbyte/normalization_code
COPY normalization ./normalization
COPY setup.py .
COPY dbt-project-template/ ./dbt-template/

# Install python dependencies
WORKDIR /airbyte/base_python_structs
RUN pip install .

WORKDIR /airbyte/normalization_code
RUN pip install .

WORKDIR /airbyte/normalization_code/dbt-template/
#RUN pip install dbt-clickhouse
# dbt-clickhouse adapter has some bugs, use our own just for now
# https://github.com/silentsokolov/dbt-clickhouse/issues/20
RUN pip install git+https://github.com/burmecia/dbt-clickhouse.git
# Download external dbt dependencies
RUN dbt deps

WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.name=airbyte/normalization-clickhouse
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# This file is necessary to install dbt-utils with dbt deps
# the content will be overwritten by the transform function

# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'airbyte_utils'
version: '1.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project. Profiles contain
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
profile: 'normalize'

# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that source models can be found
# in the "models/" directory. You probably won't need to change these!
source-paths: ["models"]
docs-paths: ["docs"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
data-paths: ["data"]
macro-paths: ["macros"]

target-path: "../build" # directory which will store compiled SQL files
log-path: "../logs" # directory which will store DBT logs
modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies

clean-targets: # directories to be removed by `dbt clean`
- "build"
- "dbt_modules"

quoting:
database: true
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
# all schemas should be unquoted
schema: false
identifier: true

# You can define configurations for models in the `source-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!
models:
airbyte_utils:
+materialized: table
generated:
airbyte_ctes:
+tags: airbyte_internal_cte
+materialized: ephemeral
airbyte_incremental:
+tags: incremental_tables
+materialized: incremental
# schema change test isn't supported in ClickHouse yet
+on_schema_change: "ignore"
airbyte_tables:
+tags: normalized_tables
+materialized: table
airbyte_views:
+tags: airbyte_internal_views
+materialized: view

dispatch:
- macro_namespace: dbt_utils
search_order: ['airbyte_utils', 'dbt_utils']
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# add dependencies. these will get pulled during the `dbt deps` process.

packages:
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
revision: 0.7.3
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
VARCHAR(max)
{%- endmacro -%}

{% macro clickhouse__type_json() %}
String
{% endmacro %}


{# string ------------------------------------------------- #}

Expand All @@ -47,6 +51,10 @@
VARCHAR(max)
{%- endmacro -%}

{%- macro clickhouse__type_string() -%}
String
{%- endmacro -%}


{# float ------------------------------------------------- #}
{% macro mysql__type_float() %}
Expand All @@ -57,6 +65,10 @@
float
{% endmacro %}

{% macro clickhouse__type_float() %}
Float64
{% endmacro %}


{# int ------------------------------------------------- #}
{% macro default__type_int() %}
Expand All @@ -67,6 +79,11 @@
int
{% endmacro %}

{% macro clickhouse__type_int() %}
INT
{% endmacro %}


{# bigint ------------------------------------------------- #}
{% macro mysql__type_bigint() %}
signed
Expand All @@ -76,12 +93,20 @@
numeric
{% endmacro %}

{% macro clickhouse__type_bigint() %}
BIGINT
{% endmacro %}


{# numeric ------------------------------------------------- --#}
{% macro mysql__type_numeric() %}
float
{% endmacro %}

{% macro clickhouse__type_numeric() %}
Float64
{% endmacro %}


{# timestamp ------------------------------------------------- --#}
{% macro mysql__type_timestamp() %}
Expand All @@ -94,6 +119,10 @@
datetime
{%- endmacro -%}

{% macro clickhouse__type_timestamp() %}
DateTime64
{% endmacro %}


{# timestamp with time zone ------------------------------------------------- #}

Expand Down Expand Up @@ -124,6 +153,10 @@
datetime
{%- endmacro -%}

{% macro clickhouse__type_timestamp_with_timezone() %}
DateTime64
{% endmacro %}


{# date ------------------------------------------------- #}

Expand All @@ -142,3 +175,7 @@
{%- macro sqlserver__type_date() -%}
date
{%- endmacro -%}

{% macro clickhouse__type_date() %}
Date
{% endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- Redshift: json_extract_path_text('json_string', 'path_elem' [,'path_elem'[, ...] ] [, null_if_invalid ] ) -> https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html
- Postgres: json_extract_path_text(<from_json>, 'path' [, 'path' [, ...}}) -> https://www.postgresql.org/docs/12/functions-json.html
- MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html
- ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/
#}

{# format_json_path -------------------------------------------------- #}
Expand Down Expand Up @@ -66,6 +67,14 @@
{{ "'$.\"" ~ str_list|join(".") ~ "\"'" }}
{%- endmacro %}

{% macro clickhouse__format_json_path(json_path_list) -%}
{%- set str_list = [] -%}
{%- for json_path in json_path_list -%}
{%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%}
{%- endfor -%}
{{ "'" ~ str_list|join("','") ~ "'" }}
{%- endmacro %}

{# json_extract ------------------------------------------------- #}

{% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -124,6 +133,14 @@
json_query({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}

{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
{%- if from_table|string() == '' %}
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
{% else %}
JSONExtractRaw({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }})
{% endif -%}
{%- endmacro %}

{# json_extract_scalar ------------------------------------------------- #}

{% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -162,6 +179,10 @@
json_value({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}

{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}

{# json_extract_array ------------------------------------------------- #}

{% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -199,3 +220,7 @@
{% macro sqlserver__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
json_query({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}

{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
JSONExtractArrayRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@
{% macro oracle__quote(column_name) -%}
{{ '\"' ~ column_name ~ '\"'}}
{%- endmacro %}

{% macro clickhouse__quote(column_name) -%}
{{ '\"' ~ column_name ~ '\"'}}
{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,15 @@
{%- endfor %}
)
{%- endmacro %}

{% macro clickhouse__surrogate_key(field_list) -%}
assumeNotNull(hex(MD5(
{%- for field in field_list %}
{% if not loop.last %}
toString({{ field }}) || '~' ||
{% else %}
toString({{ field }})
{% endif %}
{%- endfor %}
)))
{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,10 @@ services:
context: .
labels:
io.airbyte.git-revision: ${GIT_REVISION}
normalization-clickhouse:
image: airbyte/normalization-clickhouse:${VERSION}
build:
dockerfile: clickhouse.Dockerfile
context: .
labels:
io.airbyte.git-revision: ${GIT_REVISION}
Loading