Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IGNORE] Branch for integration-tests PR 7620 #7786

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
59be9e6
add ClickHouse destination
burmecia Nov 4, 2021
410e9c9
update docs
burmecia Nov 4, 2021
ff94b62
format code
burmecia Nov 4, 2021
aff0c61
code improvement as per code review
burmecia Nov 6, 2021
27fddcf
add ssh tunneling and ssl/tls support and code enhancement
burmecia Nov 7, 2021
b1eaa31
merge from master
burmecia Nov 9, 2021
09f3fa6
merge from master
burmecia Nov 9, 2021
d954ca8
Merge branch 'master' into new-dest-clickhouse
burmecia Nov 9, 2021
f05de4e
Merge remote-tracking branch 'pr-source/new-dest-clickhouse' into ots…
alexandertsukanov Nov 9, 2021
6b76313
Merge branch 'master' into new-dest-clickhouse
burmecia Nov 10, 2021
4fc08be
disable testCustomDbtTransformationsFailure test
burmecia Nov 10, 2021
b7d12cc
fix string format bug
burmecia Nov 11, 2021
856233b
Merge branch 'master' into new-dest-clickhouse
burmecia Nov 11, 2021
282febe
fix reserved keywords bug and disable dbt
burmecia Nov 13, 2021
33cedf3
Merge remote-tracking branch 'pr-source/new-dest-clickhouse' into ots…
alexandertsukanov Nov 15, 2021
43e39f6
disable dbt in expect result
burmecia Nov 17, 2021
0acef86
Merge remote-tracking branch 'pr-source/new-dest-clickhouse' into ots…
alexandertsukanov Nov 17, 2021
d85eb1f
add type hints
burmecia Nov 25, 2021
2aadea4
merge from master
burmecia Nov 26, 2021
9589702
Merge branch 'master' into new-dest-clickhouse
alexandertsukanov Nov 29, 2021
d4a6b60
Merge remote-tracking branch 'pr-source/new-dest-clickhouse' into ots…
alexandertsukanov Nov 29, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"destinationDefinitionId": "ce0d828e-1dc4-496c-b122-2da42e637e48",
"name": "Clickhouse",
"dockerRepository": "airbyte/destination-clickhouse",
"dockerImageTag": "0.1.0",
"documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse"
}
4 changes: 4 additions & 0 deletions airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ public static Database createOracleDatabase(final String username, final String
return createDatabase(username, password, jdbcConnectionString, "oracle.jdbc.OracleDriver", SQLDialect.DEFAULT);
}

public static Database createClickhouseDatabase(final String username, final String password, final String jdbcConnectionString) {
return createDatabase(username, password, jdbcConnectionString, "ru.yandex.clickhouse.ClickHouseDriver", SQLDialect.DEFAULT);
}

public static Database createMariaDbDatabase(final String username, final String password, final String jdbcConnectionString) {
return createDatabase(username, password, jdbcConnectionString, "org.mariadb.jdbc.Driver", SQLDialect.MARIADB);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM fishtownanalytics/dbt:0.21.0
COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte

# Install SSH Tunneling dependencies
RUN apt-get update && apt-get install -y jq sshpass
WORKDIR /airbyte
COPY entrypoint.sh .
COPY build/sshtunneling.sh .

WORKDIR /airbyte/normalization_code
COPY normalization ./normalization
COPY setup.py .
COPY dbt-project-template/ ./dbt-template/

# Install python dependencies
WORKDIR /airbyte/base_python_structs
RUN pip install .

WORKDIR /airbyte/normalization_code
RUN pip install .

WORKDIR /airbyte/normalization_code/dbt-template/
#RUN pip install dbt-clickhouse
# dbt-clickhouse adapter has some bugs, use our own just for now
# https://github.com/silentsokolov/dbt-clickhouse/issues/20
RUN pip install git+https://github.com/burmecia/dbt-clickhouse.git
# Download external dbt dependencies
RUN dbt deps

WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.name=airbyte/normalization-clickhouse
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# This file is necessary to install dbt-utils with dbt deps
# the content will be overwritten by the transform function

# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'airbyte_utils'
version: '1.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project. Profiles contain
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
profile: 'normalize'

# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that source models can be found
# in the "models/" directory. You probably won't need to change these!
source-paths: ["models"]
docs-paths: ["docs"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
data-paths: ["data"]
macro-paths: ["macros"]

target-path: "../build" # directory which will store compiled SQL files
log-path: "../logs" # directory which will store DBT logs
modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies

clean-targets: # directories to be removed by `dbt clean`
- "build"
- "dbt_modules"

quoting:
database: true
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
# all schemas should be unquoted
schema: false
identifier: true

# You can define configurations for models in the `source-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!
models:
airbyte_utils:
+materialized: table
generated:
airbyte_ctes:
+tags: airbyte_internal_cte
+materialized: ephemeral
airbyte_incremental:
+tags: incremental_tables
+materialized: incremental
# schema change test isn't supported in ClickHouse yet
+on_schema_change: "ignore"
airbyte_tables:
+tags: normalized_tables
+materialized: table
airbyte_views:
+tags: airbyte_internal_views
+materialized: view

dispatch:
- macro_namespace: dbt_utils
search_order: ['airbyte_utils', 'dbt_utils']
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# add dependencies. these will get pulled during the `dbt deps` process.

packages:
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
revision: 0.7.3
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
VARCHAR(max)
{%- endmacro -%}

{% macro clickhouse__type_json() %}
String
{% endmacro %}


{# string ------------------------------------------------- #}

Expand All @@ -47,6 +51,10 @@
VARCHAR(max)
{%- endmacro -%}

{%- macro clickhouse__type_string() -%}
String
{%- endmacro -%}


{# float ------------------------------------------------- #}
{% macro mysql__type_float() %}
Expand All @@ -57,6 +65,10 @@
float
{% endmacro %}

{% macro clickhouse__type_float() %}
Float64
{% endmacro %}


{# int ------------------------------------------------- #}
{% macro default__type_int() %}
Expand All @@ -67,6 +79,11 @@
int
{% endmacro %}

{% macro clickhouse__type_int() %}
INT
{% endmacro %}


{# bigint ------------------------------------------------- #}
{% macro mysql__type_bigint() %}
signed
Expand All @@ -76,12 +93,20 @@
numeric
{% endmacro %}

{% macro clickhouse__type_bigint() %}
BIGINT
{% endmacro %}


{# numeric ------------------------------------------------- --#}
{% macro mysql__type_numeric() %}
float
{% endmacro %}

{% macro clickhouse__type_numeric() %}
Float64
{% endmacro %}


{# timestamp ------------------------------------------------- --#}
{% macro mysql__type_timestamp() %}
Expand All @@ -94,6 +119,10 @@
datetime
{%- endmacro -%}

{% macro clickhouse__type_timestamp() %}
DateTime64
{% endmacro %}


{# timestamp with time zone ------------------------------------------------- #}

Expand Down Expand Up @@ -124,6 +153,10 @@
datetime
{%- endmacro -%}

{% macro clickhouse__type_timestamp_with_timezone() %}
DateTime64
{% endmacro %}


{# date ------------------------------------------------- #}

Expand All @@ -142,3 +175,7 @@
{%- macro sqlserver__type_date() -%}
date
{%- endmacro -%}

{% macro clickhouse__type_date() %}
Date
{% endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- Redshift: json_extract_path_text('json_string', 'path_elem' [,'path_elem'[, ...] ] [, null_if_invalid ] ) -> https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html
- Postgres: json_extract_path_text(<from_json>, 'path' [, 'path' [, ...}}) -> https://www.postgresql.org/docs/12/functions-json.html
- MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html
- ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/
#}

{# format_json_path -------------------------------------------------- #}
Expand Down Expand Up @@ -66,6 +67,14 @@
{{ "'$.\"" ~ str_list|join(".") ~ "\"'" }}
{%- endmacro %}

{% macro clickhouse__format_json_path(json_path_list) -%}
{%- set str_list = [] -%}
{%- for json_path in json_path_list -%}
{%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%}
{%- endfor -%}
{{ "'" ~ str_list|join("','") ~ "'" }}
{%- endmacro %}

{# json_extract ------------------------------------------------- #}

{% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -124,6 +133,14 @@
json_query({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}

{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
{%- if from_table|string() == '' %}
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
{% else %}
JSONExtractRaw({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }})
{% endif -%}
{%- endmacro %}

{# json_extract_scalar ------------------------------------------------- #}

{% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -162,6 +179,10 @@
json_value({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}

{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}

{# json_extract_array ------------------------------------------------- #}

{% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -199,3 +220,7 @@
{% macro sqlserver__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
json_query({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}

{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
JSONExtractArrayRaw({{ json_column }}, {{ format_json_path(json_path_list) }})
{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@
{% macro oracle__quote(column_name) -%}
{{ '\"' ~ column_name ~ '\"'}}
{%- endmacro %}

{% macro clickhouse__quote(column_name) -%}
{{ '\"' ~ column_name ~ '\"'}}
{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,15 @@
{%- endfor %}
)
{%- endmacro %}

{% macro clickhouse__surrogate_key(field_list) -%}
assumeNotNull(hex(MD5(
{%- for field in field_list %}
{% if not loop.last %}
toString({{ field }}) || '~' ||
{% else %}
toString({{ field }})
{% endif %}
{%- endfor %}
)))
{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,10 @@ services:
context: .
labels:
io.airbyte.git-revision: ${GIT_REVISION}
normalization-clickhouse:
image: airbyte/normalization-clickhouse:${VERSION}
build:
dockerfile: clickhouse.Dockerfile
context: .
labels:
io.airbyte.git-revision: ${GIT_REVISION}
Loading