Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎉 New Destination: ClickHouse #7620

Merged
merged 20 commits into from
Dec 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"destinationDefinitionId": "ce0d828e-1dc4-496c-b122-2da42e637e48",
"name": "Clickhouse",
"dockerRepository": "airbyte/destination-clickhouse",
"dockerImageTag": "0.1.0",
"documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse"
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@
dockerImageTag: 0.2.0
documentationUrl: https://docs.airbyte.io/integrations/destinations/keen
icon: chargify.svg
- name: Clickhouse
destinationDefinitionId: ce0d828e-1dc4-496c-b122-2da42e637e48
dockerRepository: airbyte/destination-clickhouse
dockerImageTag: 0.1.0
documentationUrl: https://docs.airbyte.io/integrations/destinations/clickhouse
- name: DynamoDB
destinationDefinitionId: 8ccd8909-4e99-4141-b48d-4984b70b2d89
dockerRepository: airbyte/destination-dynamodb
Expand Down
159 changes: 159 additions & 0 deletions airbyte-config/init/src/main/resources/seed/destination_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,165 @@
supported_destination_sync_modes:
- "overwrite"
- "append"
- dockerImage: "airbyte/destination-clickhouse:0.1.0"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/clickhouse"
connectionSpecification:
$schema: "http://json-schema.org/draft-07/schema#"
title: "ClickHouse Destination Spec"
type: "object"
required:
- "host"
- "port"
- "database"
- "username"
additionalProperties: true
properties:
host:
title: "Host"
description: "Hostname of the database."
type: "string"
order: 0
port:
title: "Port"
description: "JDBC port (not the native port) of the database."
type: "integer"
minimum: 0
maximum: 65536
default: 8123
examples:
- "8123"
order: 1
database:
title: "DB Name"
description: "Name of the database."
type: "string"
order: 2
username:
title: "User"
description: "Username to use to access the database."
type: "string"
order: 3
password:
title: "Password"
description: "Password associated with the username."
type: "string"
airbyte_secret: true
order: 4
ssl:
title: "SSL Connection"
description: "Encrypt data using SSL."
type: "boolean"
default: false
order: 5
tunnel_method:
type: "object"
title: "SSH Tunnel Method"
description: "Whether to initiate an SSH tunnel before connecting to the\
\ database, and if so, which kind of authentication to use."
oneOf:
- title: "No Tunnel"
required:
- "tunnel_method"
properties:
tunnel_method:
description: "No ssh tunnel needed to connect to database"
type: "string"
const: "NO_TUNNEL"
order: 0
- title: "SSH Key Authentication"
required:
- "tunnel_method"
- "tunnel_host"
- "tunnel_port"
- "tunnel_user"
- "ssh_key"
properties:
tunnel_method:
description: "Connect through a jump server tunnel host using username\
\ and ssh key"
type: "string"
const: "SSH_KEY_AUTH"
order: 0
tunnel_host:
title: "SSH Tunnel Jump Server Host"
description: "Hostname of the jump server host that allows inbound\
\ ssh tunnel."
type: "string"
order: 1
tunnel_port:
title: "SSH Connection Port"
description: "Port on the proxy/jump server that accepts inbound ssh\
\ connections."
type: "integer"
minimum: 0
maximum: 65536
default: 22
examples:
- "22"
order: 2
tunnel_user:
title: "SSH Login Username"
description: "OS-level username for logging into the jump server host."
type: "string"
order: 3
ssh_key:
title: "SSH Private Key"
description: "OS-level user account ssh key credentials in RSA PEM\
\ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )"
type: "string"
airbyte_secret: true
multiline: true
order: 4
- title: "Password Authentication"
required:
- "tunnel_method"
- "tunnel_host"
- "tunnel_port"
- "tunnel_user"
- "tunnel_user_password"
properties:
tunnel_method:
description: "Connect through a jump server tunnel host using username\
\ and password authentication"
type: "string"
const: "SSH_PASSWORD_AUTH"
order: 0
tunnel_host:
title: "SSH Tunnel Jump Server Host"
description: "Hostname of the jump server host that allows inbound\
\ ssh tunnel."
type: "string"
order: 1
tunnel_port:
title: "SSH Connection Port"
description: "Port on the proxy/jump server that accepts inbound ssh\
\ connections."
type: "integer"
minimum: 0
maximum: 65536
default: 22
examples:
- "22"
order: 2
tunnel_user:
title: "SSH Login Username"
description: "OS-level username for logging into the jump server host"
type: "string"
order: 3
tunnel_user_password:
title: "Password"
description: "OS-level password for logging into the jump server host"
type: "string"
airbyte_secret: true
order: 4
supportsIncremental: true
supportsNormalization: true
supportsDBT: false
supported_destination_sync_modes:
- "overwrite"
- "append"
- "append_dedup"
- dockerImage: "airbyte/destination-dynamodb:0.1.0"
spec:
documentationUrl: "https://docs.airbyte.io/integrations/destinations/dynamodb"
Expand Down
4 changes: 4 additions & 0 deletions airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ public static Database createOracleDatabase(final String username, final String
return createDatabase(username, password, jdbcConnectionString, "oracle.jdbc.OracleDriver", SQLDialect.DEFAULT);
}

public static Database createClickhouseDatabase(final String username, final String password, final String jdbcConnectionString) {
return createDatabase(username, password, jdbcConnectionString, "ru.yandex.clickhouse.ClickHouseDriver", SQLDialect.DEFAULT);
}

public static Database createMariaDbDatabase(final String username, final String password, final String jdbcConnectionString) {
return createDatabase(username, password, jdbcConnectionString, "org.mariadb.jdbc.Driver", SQLDialect.MARIADB);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM fishtownanalytics/dbt:0.21.0
COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte

# Install SSH Tunneling dependencies
RUN apt-get update && apt-get install -y jq sshpass
WORKDIR /airbyte
COPY entrypoint.sh .
COPY build/sshtunneling.sh .

WORKDIR /airbyte/normalization_code
COPY normalization ./normalization
COPY setup.py .
COPY dbt-project-template/ ./dbt-template/

# Install python dependencies
WORKDIR /airbyte/base_python_structs
RUN pip install .

WORKDIR /airbyte/normalization_code
RUN pip install .

WORKDIR /airbyte/normalization_code/dbt-template/
#RUN pip install dbt-clickhouse
# dbt-clickhouse adapter has some bugs, use our own just for now
# https://github.com/silentsokolov/dbt-clickhouse/issues/20
RUN pip install git+https://github.com/burmecia/dbt-clickhouse.git
# Download external dbt dependencies
RUN dbt deps

WORKDIR /airbyte
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
ENTRYPOINT ["/airbyte/entrypoint.sh"]

LABEL io.airbyte.name=airbyte/normalization-clickhouse
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# This file is necessary to install dbt-utils with dbt deps
# the content will be overwritten by the transform function

# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization's
# name or the intended use of these models
name: 'airbyte_utils'
version: '1.0'
config-version: 2

# This setting configures which "profile" dbt uses for this project. Profiles contain
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
profile: 'normalize'

# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that source models can be found
# in the "models/" directory. You probably won't need to change these!
source-paths: ["models"]
docs-paths: ["docs"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
data-paths: ["data"]
macro-paths: ["macros"]

target-path: "../build" # directory which will store compiled SQL files
log-path: "../logs" # directory which will store DBT logs
modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies

clean-targets: # directories to be removed by `dbt clean`
- "build"
- "dbt_modules"

quoting:
database: true
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
# all schemas should be unquoted
schema: false
identifier: true

# You can define configurations for models in the `source-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!
models:
airbyte_utils:
+materialized: table
generated:
airbyte_ctes:
+tags: airbyte_internal_cte
+materialized: ephemeral
airbyte_incremental:
+tags: incremental_tables
+materialized: incremental
# schema change test isn't supported in ClickHouse yet
+on_schema_change: "ignore"
airbyte_tables:
+tags: normalized_tables
+materialized: table
airbyte_views:
+tags: airbyte_internal_views
+materialized: view

dispatch:
- macro_namespace: dbt_utils
search_order: ['airbyte_utils', 'dbt_utils']
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# add dependencies. these will get pulled during the `dbt deps` process.

packages:
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
revision: 0.7.3
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
VARCHAR(max)
{%- endmacro -%}

{% macro clickhouse__type_json() %}
String
{% endmacro %}


{# string ------------------------------------------------- #}

Expand All @@ -47,6 +51,10 @@
VARCHAR(max)
{%- endmacro -%}

{%- macro clickhouse__type_string() -%}
String
{%- endmacro -%}


{# float ------------------------------------------------- #}
{% macro mysql__type_float() %}
Expand All @@ -57,6 +65,10 @@
float
{% endmacro %}

{% macro clickhouse__type_float() %}
Float64
{% endmacro %}


{# int ------------------------------------------------- #}
{% macro default__type_int() %}
Expand All @@ -67,6 +79,11 @@
int
{% endmacro %}

{% macro clickhouse__type_int() %}
INT
{% endmacro %}


{# bigint ------------------------------------------------- #}
{% macro mysql__type_bigint() %}
signed
Expand All @@ -76,12 +93,20 @@
numeric
{% endmacro %}

{% macro clickhouse__type_bigint() %}
BIGINT
{% endmacro %}


{# numeric ------------------------------------------------- --#}
{% macro mysql__type_numeric() %}
float
{% endmacro %}

{% macro clickhouse__type_numeric() %}
Float64
{% endmacro %}


{# timestamp ------------------------------------------------- --#}
{% macro mysql__type_timestamp() %}
Expand All @@ -94,6 +119,10 @@
datetime
{%- endmacro -%}

{% macro clickhouse__type_timestamp() %}
DateTime64
{% endmacro %}


{# timestamp with time zone ------------------------------------------------- #}

Expand Down Expand Up @@ -124,6 +153,10 @@
datetime
{%- endmacro -%}

{% macro clickhouse__type_timestamp_with_timezone() %}
DateTime64
{% endmacro %}


{# date ------------------------------------------------- #}

Expand All @@ -142,3 +175,7 @@
{%- macro sqlserver__type_date() -%}
date
{%- endmacro -%}

{% macro clickhouse__type_date() %}
Date
{% endmacro %}
Loading