From 59be9e60d23a47496d5fad8c8e60626a9c2e0cff Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Thu, 4 Nov 2021 15:42:49 +1100 Subject: [PATCH 01/12] add ClickHouse destination --- .../ce0d828e-1dc4-496c-b122-2da42e637e48.json | 7 + .../base-normalization/clickhouse.Dockerfile | 34 +++ .../dbt_project.yml | 64 +++++ .../packages.yml | 5 + .../macros/cross_db_utils/datatypes.sql | 37 +++ .../macros/cross_db_utils/json_operations.sql | 25 ++ .../macros/cross_db_utils/quote.sql | 4 + .../macros/cross_db_utils/surrogate_key.sql | 12 + .../integration_tests/dbt_integration_test.py | 81 +++++- .../dedup_cdc_excluded_ab1.sql | 21 ++ .../dedup_cdc_excluded_ab2.sql | 29 ++ .../dedup_exchange_rate_ab1.sql | 24 ++ .../dedup_exchange_rate_ab2.sql | 32 +++ .../test_normalization/exchange_rate_ab1.sql | 24 ++ .../test_normalization/exchange_rate_ab2.sql | 32 +++ .../test_normalization/exchange_rate_ab3.sql | 40 +++ .../test_normalization/pos_dedup_cdcx_ab1.sql | 21 ++ .../test_normalization/pos_dedup_cdcx_ab2.sql | 31 +++ .../renamed_dedup_cdc_excluded_ab1.sql | 17 ++ .../renamed_dedup_cdc_excluded_ab2.sql | 19 ++ .../dedup_cdc_excluded_scd.sql | 99 +++++++ .../dedup_exchange_rate_scd.sql | 105 ++++++++ .../renamed_dedup_cdc_excluded_scd.sql | 85 ++++++ .../test_normalization/dedup_cdc_excluded.sql | 33 +++ .../dedup_exchange_rate.sql | 36 +++ .../test_normalization/exchange_rate.sql | 34 +++ .../renamed_dedup_cdc_excluded.sql | 29 ++ .../test_normalization/pos_dedup_cdcx_scd.sql | 102 +++++++ .../test_normalization/pos_dedup_cdcx.sql | 31 +++ .../dedup_cdc_excluded_ab3.sql | 31 +++ .../dedup_exchange_rate_ab3.sql | 40 +++ .../test_normalization/pos_dedup_cdcx_ab3.sql | 33 +++ .../renamed_dedup_cdc_excluded_ab3.sql | 19 ++ .../dedup_cdc_excluded_ab1.sql | 20 ++ .../dedup_cdc_excluded_ab2.sql | 20 ++ .../dedup_exchange_rate_ab1.sql | 23 ++ .../dedup_exchange_rate_ab2.sql | 23 ++ .../test_normalization/exchange_rate_ab1.sql | 23 ++ .../test_normalization/exchange_rate_ab2.sql | 23 ++ .../test_normalization/exchange_rate_ab3.sql | 23 ++ .../test_normalization/pos_dedup_cdcx_ab1.sql | 20 ++ .../test_normalization/pos_dedup_cdcx_ab2.sql | 20 ++ .../renamed_dedup_cdc_excluded_ab1.sql | 16 ++ .../renamed_dedup_cdc_excluded_ab2.sql | 16 ++ .../dedup_cdc_excluded_scd.sql | 109 ++++++++ .../dedup_exchange_rate_scd.sql | 119 +++++++++ .../renamed_dedup_cdc_excluded_scd.sql | 101 +++++++ .../test_normalization/dedup_cdc_excluded.sql | 23 ++ .../dedup_exchange_rate.sql | 26 ++ .../test_normalization/exchange_rate.sql | 24 ++ .../renamed_dedup_cdc_excluded.sql | 19 ++ .../test_normalization/pos_dedup_cdcx_scd.sql | 111 ++++++++ .../test_normalization/pos_dedup_cdcx.sql | 23 ++ .../dedup_cdc_excluded_ab3.sql | 20 ++ .../dedup_exchange_rate_ab3.sql | 23 ++ .../test_normalization/pos_dedup_cdcx_ab3.sql | 20 ++ .../renamed_dedup_cdc_excluded_ab3.sql | 16 ++ .../models/generated/sources.yml | 13 + .../dedup_cdc_excluded_ab1.sql | 21 ++ .../dedup_cdc_excluded_ab2.sql | 29 ++ .../dedup_exchange_rate_ab1.sql | 24 ++ .../dedup_exchange_rate_ab2.sql | 32 +++ .../test_normalization/exchange_rate_ab1.sql | 24 ++ .../test_normalization/exchange_rate_ab2.sql | 32 +++ .../test_normalization/exchange_rate_ab3.sql | 40 +++ .../test_normalization/pos_dedup_cdcx_ab1.sql | 21 ++ .../test_normalization/pos_dedup_cdcx_ab2.sql | 31 +++ .../renamed_dedup_cdc_excluded_ab1.sql | 17 ++ .../renamed_dedup_cdc_excluded_ab2.sql | 19 ++ .../dedup_cdc_excluded_scd.sql | 5 + .../dedup_exchange_rate_scd.sql | 5 + .../renamed_dedup_cdc_excluded_scd.sql | 5 + .../test_normalization/dedup_cdc_excluded.sql | 5 + .../dedup_exchange_rate.sql | 5 + .../test_normalization/exchange_rate.sql | 5 + .../renamed_dedup_cdc_excluded.sql | 5 + .../test_normalization/pos_dedup_cdcx_scd.sql | 102 +++++++ .../test_normalization/pos_dedup_cdcx.sql | 31 +++ .../dedup_cdc_excluded_ab3.sql | 31 +++ .../dedup_exchange_rate_ab3.sql | 40 +++ .../test_normalization/pos_dedup_cdcx_ab3.sql | 33 +++ .../renamed_dedup_cdc_excluded_ab3.sql | 19 ++ .../data_input/replace_identifiers.json | 3 +- .../integration_tests/test_normalization.py | 7 +- .../normalization/destination_type.py | 1 + .../destination_name_transformer.py | 6 + .../transform_catalog/reserved_keywords.py | 4 + .../transform_catalog/stream_processor.py | 126 ++++++--- .../transform_config/transform.py | 16 ++ ...ons_catalog_expected_clickhouse_names.json | 32 +++ ...ted_catalog_expected_clickhouse_names.json | 252 ++++++++++++++++++ ...ons_catalog_expected_clickhouse_names.json | 52 ++++ .../destination-clickhouse/.dockerignore | 3 + .../destination-clickhouse/Dockerfile | 11 + .../destination-clickhouse/README.md | 68 +++++ .../destination-clickhouse/build.gradle | 31 +++ .../clickhouse/ClickhouseDestination.java | 82 ++++++ .../ClickhouseSQLNameTransformer.java | 16 ++ .../clickhouse/ClickhouseSqlOperations.java | 97 +++++++ .../src/main/resources/spec.json | 51 ++++ .../ClickhouseDestinationAcceptanceTest.java | 210 +++++++++++++++ .../clickhouse/ClickhouseDestinationTest.java | 136 ++++++++++ .../DefaultNormalizationRunner.java | 3 +- .../NormalizationRunnerFactory.java | 1 + docs/integrations/destinations/clickhouse.md | 83 ++++++ 105 files changed, 3939 insertions(+), 43 deletions(-) create mode 100644 airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ce0d828e-1dc4-496c-b122-2da42e637e48.json create mode 100644 airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile create mode 100755 airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml create mode 100755 airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/pos_dedup_cdcx.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/exchange_rate.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql create mode 100644 airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json create mode 100644 airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json create mode 100644 airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json create mode 100644 airbyte-integrations/connectors/destination-clickhouse/.dockerignore create mode 100644 airbyte-integrations/connectors/destination-clickhouse/Dockerfile create mode 100644 airbyte-integrations/connectors/destination-clickhouse/README.md create mode 100644 airbyte-integrations/connectors/destination-clickhouse/build.gradle create mode 100644 airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java create mode 100644 airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java create mode 100644 airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java create mode 100644 airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json create mode 100644 airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java create mode 100644 docs/integrations/destinations/clickhouse.md diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ce0d828e-1dc4-496c-b122-2da42e637e48.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ce0d828e-1dc4-496c-b122-2da42e637e48.json new file mode 100644 index 000000000000..88e582d9c388 --- /dev/null +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/ce0d828e-1dc4-496c-b122-2da42e637e48.json @@ -0,0 +1,7 @@ +{ + "destinationDefinitionId": "ce0d828e-1dc4-496c-b122-2da42e637e48", + "name": "Clickhouse", + "dockerRepository": "airbyte/destination-clickhouse", + "dockerImageTag": "0.1.0", + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse" +} diff --git a/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile b/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile new file mode 100644 index 000000000000..ba365769c958 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile @@ -0,0 +1,34 @@ +FROM fishtownanalytics/dbt:0.21.0 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install SSH Tunneling dependencies +RUN apt-get update && apt-get install -y jq sshpass +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . + +WORKDIR /airbyte/normalization_code/dbt-template/ +#RUN pip install dbt-clickhouse +# dbt-clickhouse adapter has some bugs, use our own just for now +# https://github.com/silentsokolov/dbt-clickhouse/issues/20 +RUN pip install git+https://github.com/burmecia/dbt-clickhouse.git +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.name=airbyte/normalization-clickhouse diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml new file mode 100755 index 000000000000..37eca7fcd73f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml @@ -0,0 +1,64 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'airbyte_utils' +version: '1.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: 'normalize' + +# These configurations specify where dbt should look for different types of files. +# The `source-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +source-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +data-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +modules-path: "/tmp/dbt_modules" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true +# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) +# all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `source-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + # schema change test isn't supported in ClickHouse yet + +on_schema_change: "ignore" + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ['airbyte_utils', 'dbt_utils'] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml new file mode 100755 index 000000000000..8c9004051053 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml @@ -0,0 +1,5 @@ +# add dependencies. these will get pulled during the `dbt deps` process. + +packages: + - git: "https://github.com/fishtown-analytics/dbt-utils.git" + revision: 0.7.3 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql index 28b734ee6fe2..080aea5eb104 100644 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql @@ -32,6 +32,10 @@ VARCHAR(max) {%- endmacro -%} +{% macro clickhouse__type_json() %} + String +{% endmacro %} + {# string ------------------------------------------------- #} @@ -47,6 +51,10 @@ VARCHAR(max) {%- endmacro -%} +{%- macro clickhouse__type_string() -%} + String +{%- endmacro -%} + {# float ------------------------------------------------- #} {% macro mysql__type_float() %} @@ -57,6 +65,10 @@ float {% endmacro %} +{% macro clickhouse__type_float() %} + Float64 +{% endmacro %} + {# int ------------------------------------------------- #} {% macro default__type_int() %} @@ -67,6 +79,11 @@ int {% endmacro %} +{% macro clickhouse__type_int() %} + INT +{% endmacro %} + + {# bigint ------------------------------------------------- #} {% macro mysql__type_bigint() %} signed @@ -76,12 +93,20 @@ numeric {% endmacro %} +{% macro clickhouse__type_bigint() %} + BIGINT +{% endmacro %} + {# numeric ------------------------------------------------- --#} {% macro mysql__type_numeric() %} float {% endmacro %} +{% macro clickhouse__type_numeric() %} + Float64 +{% endmacro %} + {# timestamp ------------------------------------------------- --#} {% macro mysql__type_timestamp() %} @@ -94,6 +119,10 @@ datetime {%- endmacro -%} +{% macro clickhouse__type_timestamp() %} + DateTime64 +{% endmacro %} + {# timestamp with time zone ------------------------------------------------- #} @@ -124,6 +153,10 @@ datetime {%- endmacro -%} +{% macro clickhouse__type_timestamp_with_timezone() %} + DateTime64 +{% endmacro %} + {# date ------------------------------------------------- #} @@ -142,3 +175,7 @@ {%- macro sqlserver__type_date() -%} date {%- endmacro -%} + +{% macro clickhouse__type_date() %} + Date +{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql index e0d27c4ae3b3..619eaf46cbfd 100644 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql @@ -5,6 +5,7 @@ - Redshift: json_extract_path_text('json_string', 'path_elem' [,'path_elem'[, ...] ] [, null_if_invalid ] ) -> https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html - Postgres: json_extract_path_text(, 'path' [, 'path' [, ...}}) -> https://www.postgresql.org/docs/12/functions-json.html - MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html + - ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/ #} {# format_json_path -------------------------------------------------- #} @@ -66,6 +67,14 @@ {{ "'$.\"" ~ str_list|join(".") ~ "\"'" }} {%- endmacro %} +{% macro clickhouse__format_json_path(json_path_list) -%} + {%- set str_list = [] -%} + {%- for json_path in json_path_list -%} + {%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%} + {%- endfor -%} + {{ "'" ~ str_list|join("','") ~ "'" }} +{%- endmacro %} + {# json_extract ------------------------------------------------- #} {% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} @@ -124,6 +133,14 @@ json_query({{ json_column }}, {{ format_json_path(json_path_list) }}) {%- endmacro %} +{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {%- if from_table|string() == '' %} + JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) + {% else %} + JSONExtractRaw({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}) + {% endif -%} +{%- endmacro %} + {# json_extract_scalar ------------------------------------------------- #} {% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} @@ -162,6 +179,10 @@ json_value({{ json_column }}, {{ format_json_path(json_path_list) }}) {%- endmacro %} +{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + JSONExtractRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + {# json_extract_array ------------------------------------------------- #} {% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%} @@ -199,3 +220,7 @@ {% macro sqlserver__json_extract_array(json_column, json_path_list, normalized_json_path) -%} json_query({{ json_column }}, {{ format_json_path(json_path_list) }}) {%- endmacro %} + +{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + JSONExtractArrayRaw({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql index c751abfaeb21..87862498cfc5 100644 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql @@ -10,3 +10,7 @@ {% macro oracle__quote(column_name) -%} {{ '\"' ~ column_name ~ '\"'}} {%- endmacro %} + +{% macro clickhouse__quote(column_name) -%} + {{ '\"' ~ column_name ~ '\"'}} +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql index a32b59b01774..9de2965409aa 100644 --- a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql @@ -11,3 +11,15 @@ {%- endfor %} ) {%- endmacro %} + +{% macro clickhouse__surrogate_key(field_list) -%} + assumeNotNull(hex(MD5( + {%- for field in field_list %} + {% if not loop.last %} + toString({{ field }}) || '~' || + {% else %} + toString({{ field }}) + {% endif %} + {%- endfor %} + ))) +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py index 3f23919a5cc1..42a481516855 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py @@ -14,6 +14,7 @@ import threading import time from typing import Any, Dict, List +from copy import copy from normalization.destination_type import DestinationType from normalization.transform_config.transform import TransformConfig @@ -22,6 +23,7 @@ NORMALIZATION_TEST_MSSQL_DB_PORT = "NORMALIZATION_TEST_MSSQL_DB_PORT" NORMALIZATION_TEST_MYSQL_DB_PORT = "NORMALIZATION_TEST_MYSQL_DB_PORT" NORMALIZATION_TEST_POSTGRES_DB_PORT = "NORMALIZATION_TEST_POSTGRES_DB_PORT" +NORMALIZATION_TEST_CLICKHOUSE_DB_PORT = "NORMALIZATION_TEST_CLICKHOUSE_DB_PORT" class DbtIntegrationTest(object): @@ -48,6 +50,8 @@ def setup_db(self, destinations_to_test: List[str]): self.setup_mysql_db() if DestinationType.MSSQL.value in destinations_to_test: self.setup_mssql_db() + if DestinationType.CLICKHOUSE.value in destinations_to_test: + self.setup_clickhouse_db() def setup_postgres_db(self): start_db = True @@ -209,6 +213,72 @@ def setup_mssql_db(self): with open("../secrets/mssql.json", "w") as fh: fh.write(json.dumps(config)) + def setup_clickhouse_db(self): + """ + ClickHouse official JDBC driver use HTTP port 8123, while Python ClickHouse + driver uses native port 9000, so we need to open both ports for destination + connector and dbt container respectively. + + Ref: https://altinity.com/blog/2019/3/15/clickhouse-networking-part-1 + """ + start_db = True + if os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT): + port = int(os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT)) + start_db = False + else: + port = self.find_free_port() + config = { + "host": "localhost", + "port": port, + "database": self.target_schema, + "username": "default", + "password": "", + } + if start_db: + self.db_names.append("clickhouse") + print("Starting localhost clickhouse container for tests") + commands = [ + "docker", + "run", + "--rm", + "--name", + f"{self.container_prefix}_clickhouse", + "--ulimit", + "nofile=262144:262144", + "-p", + "9000:9000", # Python clickhouse driver use native port + "-p", + f"{config['port']}:8123", # clickhouse JDBC driver use HTTP port + "-d", + # so far, only the latest version ClickHouse server image turned on + # window functions + "clickhouse/clickhouse-server:latest", + ] + print("Executing: ", " ".join(commands)) + subprocess.call(commands) + print("....Waiting for ClickHouse DB to start...15 sec") + time.sleep(15) + # Run additional commands to prepare the table + command_create_db = [ + "docker", + "run", + "--rm", + "--link", + f"{self.container_prefix}_clickhouse:clickhouse-server", + "clickhouse/clickhouse-client:21.8.10.19", + "--host", + "clickhouse-server", + "--query", + f"CREATE DATABASE IF NOT EXISTS {config['database']}", + ] + # create test db + print("Executing: ", " ".join(command_create_db)) + subprocess.call(command_create_db) + if not os.path.exists("../secrets"): + os.makedirs("../secrets") + with open("../secrets/clickhouse.json", "w") as fh: + fh.write(json.dumps(config)) + @staticmethod def find_free_port(): """ @@ -257,7 +327,14 @@ def generate_profile_yaml_file(self, destination_type: DestinationType, test_roo profiles_config["database"] = self.target_schema else: profiles_config["schema"] = self.target_schema - profiles_yaml = config_generator.transform(destination_type, profiles_config) + if destination_type.value == DestinationType.CLICKHOUSE.value: + # Python ClickHouse driver uses native port 9000, which is different + # from official ClickHouse JDBC driver + clickhouse_config = copy(profiles_config) + clickhouse_config["port"] = 9000 + profiles_yaml = config_generator.transform(destination_type, clickhouse_config) + else: + profiles_yaml = config_generator.transform(destination_type, profiles_config) config_generator.write_yaml_config(test_root_dir, profiles_yaml, "profiles.yml") return profiles_config @@ -294,6 +371,8 @@ def get_normalization_image(destination_type: DestinationType) -> str: return "airbyte/normalization-mysql:dev" elif DestinationType.ORACLE.value == destination_type.value: return "airbyte/normalization-oracle:dev" + elif DestinationType.CLICKHOUSE.value == destination_type.value: + return "airbyte/normalization-clickhouse:dev" else: return "airbyte/normalization:dev" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql new file mode 100644 index 000000000000..30483298999d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql @@ -0,0 +1,21 @@ + + + create view _airbyte_test_normalization.dedup_cdc_excluded_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias +-- dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql new file mode 100644 index 000000000000..64ffec89ae08 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql @@ -0,0 +1,29 @@ + + + create view _airbyte_test_normalization.dedup_cdc_excluded_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.dedup_cdc_excluded_ab1 +-- dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 000000000000..7caf3495cf97 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,24 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'currency') as currency, + JSONExtractRaw(_airbyte_data, 'date') as date, + JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, + JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(_airbyte_data, 'NZD') as NZD, + JSONExtractRaw(_airbyte_data, 'USD') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 000000000000..812c7b0fadb3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,32 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql new file mode 100644 index 000000000000..19bde82ae927 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql @@ -0,0 +1,24 @@ + + + create view _airbyte_test_normalization.exchange_rate_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'currency') as currency, + JSONExtractRaw(_airbyte_data, 'date') as date, + JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, + JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(_airbyte_data, 'NZD') as NZD, + JSONExtractRaw(_airbyte_data, 'USD') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql new file mode 100644 index 000000000000..3d80a32b6a2e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql @@ -0,0 +1,32 @@ + + + create view _airbyte_test_normalization.exchange_rate_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.exchange_rate_ab1 +-- exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql new file mode 100644 index 000000000000..322475a8028e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql @@ -0,0 +1,40 @@ + + + create view _airbyte_test_normalization.exchange_rate_ab3__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(date) || '~' || + + + toString(timestamp_col) || '~' || + + + toString("HKD@spéçiäl & characters") || '~' || + + + toString(HKD_special___characters) || '~' || + + + toString(NZD) || '~' || + + + toString(USD) + + ))) as _airbyte_exchange_rate_hashid, + tmp.* +from _airbyte_test_normalization.exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql new file mode 100644 index 000000000000..c5a003ac2bb5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql @@ -0,0 +1,21 @@ + + + create view _airbyte_test_normalization.pos_dedup_cdcx_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias +-- pos_dedup_cdcx +where 1 = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql new file mode 100644 index 000000000000..0e1dc7fdb2ae --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql @@ -0,0 +1,31 @@ + + + create view _airbyte_test_normalization.pos_dedup_cdcx_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + accurateCastOrNull(_ab_cdc_log_pos, ' + Float64 +') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.pos_dedup_cdcx_ab1 +-- pos_dedup_cdcx +where 1 = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql new file mode 100644 index 000000000000..df3cf26d6a8d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -0,0 +1,17 @@ + + + create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql new file mode 100644 index 000000000000..2fb72248eff2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -0,0 +1,19 @@ + + + create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab1 +-- renamed_dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 000000000000..e6ff4b6b12aa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,99 @@ + + + + + create table test_normalization.dedup_cdc_excluded_scd + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +with + +input_data as ( + select * + from _airbyte_test_normalization.dedup_cdc_excluded_ab3 + -- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select *, + multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row + from ( + select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data + ) table_alias +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String') + order by _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) || '~' || + + + toString(_ab_cdc_deleted_at) || '~' || + + + toString(_ab_cdc_updated_at) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 000000000000..8063fcd314ed --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,105 @@ + + + + + create table test_normalization.dedup_exchange_rate_scd + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +with + +input_data as ( + select * + from _airbyte_test_normalization.dedup_exchange_rate_ab3 + -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select *, + multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row + from ( + select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(NZD) + + ))) as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + date as _airbyte_start_at, + anyOrNull(date) over ( + partition by id, currency, cast(NZD as String) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data + ) table_alias +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + order by _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 000000000000..079e781b679e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,85 @@ + + + + + create table test_normalization.renamed_dedup_cdc_excluded_scd + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +with + +input_data as ( + select * + from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab3 + -- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select *, + multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row + from ( + select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_unique_key, + id, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data + ) table_alias +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + order by _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 000000000000..4b2055de8600 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,33 @@ + + + + + create table test_normalization.dedup_cdc_excluded + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +-- Final base SQL model +select + _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from test_normalization.dedup_cdc_excluded_scd +-- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded +where 1 = 1 +and _airbyte_active_row = 1 + + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 000000000000..d1dc1abdc714 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,36 @@ + + + + + create table test_normalization.dedup_exchange_rate + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +-- Final base SQL model +select + _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from test_normalization.dedup_exchange_rate_scd +-- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate +where 1 = 1 +and _airbyte_active_row = 1 + + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/exchange_rate.sql new file mode 100644 index 000000000000..52af32405c92 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/exchange_rate.sql @@ -0,0 +1,34 @@ + + + + + create table test_normalization.exchange_rate + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +-- Final base SQL model +select + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from _airbyte_test_normalization.exchange_rate_ab3 +-- exchange_rate from test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 000000000000..5295b9baa8da --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,29 @@ + + + + + create table test_normalization.renamed_dedup_cdc_excluded + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +-- Final base SQL model +select + _airbyte_unique_key, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from test_normalization.renamed_dedup_cdc_excluded_scd +-- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded +where 1 = 1 +and _airbyte_active_row = 1 + + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql new file mode 100644 index 000000000000..e14e9839149c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -0,0 +1,102 @@ + + + + create table test_normalization.pos_dedup_cdcx_scd__dbt_tmp + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +with + +input_data as ( + select * + from _airbyte_test_normalization.pos_dedup_cdcx_ab3 + -- pos_dedup_cdcx from test_normalization._airbyte_raw_pos_dedup_cdcx +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select *, + multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row + from ( + select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_pos_dedup_cdcx_hashid + from input_data + ) table_alias +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String'), accurateCastOrNull(_ab_cdc_log_pos, 'String') + order by _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) || '~' || + + + toString(_ab_cdc_deleted_at) || '~' || + + + toString(_ab_cdc_updated_at) || '~' || + + + toString(_ab_cdc_log_pos) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_pos_dedup_cdcx_hashid +from dedup_data where _airbyte_row_num = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql new file mode 100644 index 000000000000..6397037a6490 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql @@ -0,0 +1,31 @@ + + + + create table test_normalization.pos_dedup_cdcx__dbt_tmp + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +-- Final base SQL model +select + _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_pos_dedup_cdcx_hashid +from test_normalization.pos_dedup_cdcx_scd +-- pos_dedup_cdcx from test_normalization._airbyte_raw_pos_dedup_cdcx +where 1 = 1 +and _airbyte_active_row = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql new file mode 100644 index 000000000000..2788aa5cd7d4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql @@ -0,0 +1,31 @@ + + + create view _airbyte_test_normalization.dedup_cdc_excluded_ab3__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(name) || '~' || + + + toString(_ab_cdc_lsn) || '~' || + + + toString(_ab_cdc_updated_at) || '~' || + + + toString(_ab_cdc_deleted_at) + + ))) as _airbyte_dedup_cdc_excluded_hashid, + tmp.* +from _airbyte_test_normalization.dedup_cdc_excluded_ab2 tmp +-- dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql new file mode 100644 index 000000000000..715f70863de1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql @@ -0,0 +1,40 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab3__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(date) || '~' || + + + toString(timestamp_col) || '~' || + + + toString("HKD@spéçiäl & characters") || '~' || + + + toString(HKD_special___characters) || '~' || + + + toString(NZD) || '~' || + + + toString(USD) + + ))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from _airbyte_test_normalization.dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql new file mode 100644 index 000000000000..90f493b6c800 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql @@ -0,0 +1,33 @@ + + + create view _airbyte_test_normalization.pos_dedup_cdcx_ab3__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(name) || '~' || + + + toString(_ab_cdc_lsn) || '~' || + + + toString(_ab_cdc_updated_at) || '~' || + + + toString(_ab_cdc_deleted_at) || '~' || + + + toString(_ab_cdc_log_pos) + + ))) as _airbyte_pos_dedup_cdcx_hashid, + tmp.* +from _airbyte_test_normalization.pos_dedup_cdcx_ab2 tmp +-- pos_dedup_cdcx +where 1 = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql new file mode 100644 index 000000000000..11811d557f2a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql @@ -0,0 +1,19 @@ + + + create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab3__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_renamed_dedup_cdc_excluded_hashid, + tmp.* +from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab2 tmp +-- renamed_dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql new file mode 100644 index 000000000000..bc0150c8df4e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as name, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} as table_alias +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql new file mode 100644 index 000000000000..5cc104224b3e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql @@ -0,0 +1,20 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), '{{ dbt_utils.type_string() }}'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, '{{ dbt_utils.type_float() }}') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_cdc_excluded_ab1') }} +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 000000000000..69dafdd842bd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 000000000000..a32c380c7a4f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('date') }})) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('timestamp_col') }})) as timestamp_col, + accurateCastOrNull({{ quote('HKD@spéçiäl & characters') }}, '{{ dbt_utils.type_float() }}') as {{ quote('HKD@spéçiäl & characters') }}, + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), '{{ dbt_utils.type_string() }}'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, '{{ dbt_utils.type_float() }}') as NZD, + accurateCastOrNull(USD, '{{ dbt_utils.type_float() }}') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql new file mode 100644 index 000000000000..f71cd25a7b9d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias +-- exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql new file mode 100644 index 000000000000..584e689ce476 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('date') }})) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('timestamp_col') }})) as timestamp_col, + accurateCastOrNull({{ quote('HKD@spéçiäl & characters') }}, '{{ dbt_utils.type_float() }}') as {{ quote('HKD@spéçiäl & characters') }}, + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), '{{ dbt_utils.type_string() }}'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, '{{ dbt_utils.type_float() }}') as NZD, + accurateCastOrNull(USD, '{{ dbt_utils.type_float() }}') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('exchange_rate_ab1') }} +-- exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql new file mode 100644 index 000000000000..9e9f3a174545 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'date', + 'timestamp_col', + quote('HKD@spéçiäl & characters'), + 'HKD_special___characters', + 'NZD', + 'USD', + ]) }} as _airbyte_exchange_rate_hashid, + tmp.* +from {{ ref('exchange_rate_ab2') }} tmp +-- exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql new file mode 100644 index 000000000000..67bc11204a5c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as name, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_log_pos'], ['_ab_cdc_log_pos']) }} as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} as table_alias +-- pos_dedup_cdcx +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql new file mode 100644 index 000000000000..15f7fbf27359 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql @@ -0,0 +1,20 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), '{{ dbt_utils.type_string() }}'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, '{{ dbt_utils.type_float() }}') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_deleted_at, + accurateCastOrNull(_ab_cdc_log_pos, '{{ dbt_utils.type_float() }}') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('pos_dedup_cdcx_ab1') }} +-- pos_dedup_cdcx +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql new file mode 100644 index 000000000000..7fd3046082a7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -0,0 +1,16 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql new file mode 100644 index 000000000000..73d0ae1e998b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -0,0 +1,16 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('renamed_dedup_cdc_excluded_ab1') }} +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 000000000000..98a250fcf004 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,109 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_cdc_excluded_ab3') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at') }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_cdc_excluded_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) + --left join {{ ref('dedup_cdc_excluded_ab3') }} as inc_data on 1 = 0 + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_cdc_excluded_ab3')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_cdc_excluded_ab3')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_cdc_excluded_ab3') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select *, + multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row + from ( + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data + ) table_alias +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}') + order by _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 000000000000..692d4a791d9b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,119 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_ab3') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at') }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key + from new_data +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) + --left join {{ ref('dedup_exchange_rate_ab3') }} as inc_data on 1 = 0 + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_ab3')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_ab3')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_ab3') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select *, + multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row + from ( + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + date as _airbyte_start_at, + anyOrNull(date) over ( + partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data + ) table_alias +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + order by _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 000000000000..b93705c52fbd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,101 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('renamed_dedup_cdc_excluded_ab3') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at') }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('renamed_dedup_cdc_excluded_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) + --left join {{ ref('renamed_dedup_cdc_excluded_ab3') }} as inc_data on 1 = 0 + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_ab3')) }} from new_data + union all + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_ab3')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('renamed_dedup_cdc_excluded_ab3') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select *, + multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row + from ( + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data + ) table_alias +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + order by _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 000000000000..086676f17371 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +select + _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from {{ ref('dedup_cdc_excluded_scd') }} +-- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 000000000000..3fb8238f2479 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,26 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +select + _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/exchange_rate.sql new file mode 100644 index 000000000000..dcab54dd6a6a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/exchange_rate.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +select + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 000000000000..8a8ff85f5902 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +select + _airbyte_unique_key, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from {{ ref('renamed_dedup_cdc_excluded_scd') }} +-- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql new file mode 100644 index 000000000000..9cec19babf3a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -0,0 +1,111 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('pos_dedup_cdcx_ab3') }} + -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at') }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('pos_dedup_cdcx_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) + --left join {{ ref('pos_dedup_cdcx_ab3') }} as inc_data on 1 = 0 + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('pos_dedup_cdcx_ab3')) }} from new_data + union all + select {{ dbt_utils.star(ref('pos_dedup_cdcx_ab3')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('pos_dedup_cdcx_ab3') }} + -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select *, + multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row + from ( + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_pos_dedup_cdcx_hashid + from input_data + ) table_alias +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_log_pos, '{{ dbt_utils.type_string() }}') + order by _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at', '_ab_cdc_log_pos' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_pos_dedup_cdcx_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/pos_dedup_cdcx.sql new file mode 100644 index 000000000000..fc341acc8a63 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/pos_dedup_cdcx.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +select + _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_pos_dedup_cdcx_hashid +from {{ ref('pos_dedup_cdcx_scd') }} +-- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} +where 1 = 1 +and _airbyte_active_row = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql new file mode 100644 index 000000000000..dc6238af14f4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql @@ -0,0 +1,20 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +select + {{ dbt_utils.surrogate_key([ + 'id', + 'name', + '_ab_cdc_lsn', + '_ab_cdc_updated_at', + '_ab_cdc_deleted_at', + ]) }} as _airbyte_dedup_cdc_excluded_hashid, + tmp.* +from {{ ref('dedup_cdc_excluded_ab2') }} tmp +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql new file mode 100644 index 000000000000..22f448b39869 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql @@ -0,0 +1,23 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'date', + 'timestamp_col', + quote('HKD@spéçiäl & characters'), + 'HKD_special___characters', + 'NZD', + 'USD', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql new file mode 100644 index 000000000000..7dd3908bac88 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql @@ -0,0 +1,20 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +select + {{ dbt_utils.surrogate_key([ + 'id', + 'name', + '_ab_cdc_lsn', + '_ab_cdc_updated_at', + '_ab_cdc_deleted_at', + '_ab_cdc_log_pos', + ]) }} as _airbyte_pos_dedup_cdcx_hashid, + tmp.* +from {{ ref('pos_dedup_cdcx_ab2') }} tmp +-- pos_dedup_cdcx +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql new file mode 100644 index 000000000000..f5ce3e6ba182 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql @@ -0,0 +1,16 @@ +{{ config( + unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, + tmp.* +from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml new file mode 100644 index 000000000000..45c338b893ca --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,13 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_pos_dedup_cdcx + - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql new file mode 100644 index 000000000000..30483298999d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql @@ -0,0 +1,21 @@ + + + create view _airbyte_test_normalization.dedup_cdc_excluded_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias +-- dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql new file mode 100644 index 000000000000..64ffec89ae08 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql @@ -0,0 +1,29 @@ + + + create view _airbyte_test_normalization.dedup_cdc_excluded_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.dedup_cdc_excluded_ab1 +-- dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 000000000000..7caf3495cf97 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,24 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'currency') as currency, + JSONExtractRaw(_airbyte_data, 'date') as date, + JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, + JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(_airbyte_data, 'NZD') as NZD, + JSONExtractRaw(_airbyte_data, 'USD') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 000000000000..812c7b0fadb3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,32 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql new file mode 100644 index 000000000000..19bde82ae927 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql @@ -0,0 +1,24 @@ + + + create view _airbyte_test_normalization.exchange_rate_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'currency') as currency, + JSONExtractRaw(_airbyte_data, 'date') as date, + JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, + JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(_airbyte_data, 'NZD') as NZD, + JSONExtractRaw(_airbyte_data, 'USD') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql new file mode 100644 index 000000000000..3d80a32b6a2e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql @@ -0,0 +1,32 @@ + + + create view _airbyte_test_normalization.exchange_rate_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.exchange_rate_ab1 +-- exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql new file mode 100644 index 000000000000..322475a8028e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql @@ -0,0 +1,40 @@ + + + create view _airbyte_test_normalization.exchange_rate_ab3__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(date) || '~' || + + + toString(timestamp_col) || '~' || + + + toString("HKD@spéçiäl & characters") || '~' || + + + toString(HKD_special___characters) || '~' || + + + toString(NZD) || '~' || + + + toString(USD) + + ))) as _airbyte_exchange_rate_hashid, + tmp.* +from _airbyte_test_normalization.exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql new file mode 100644 index 000000000000..c5a003ac2bb5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql @@ -0,0 +1,21 @@ + + + create view _airbyte_test_normalization.pos_dedup_cdcx_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias +-- pos_dedup_cdcx +where 1 = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql new file mode 100644 index 000000000000..0e1dc7fdb2ae --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql @@ -0,0 +1,31 @@ + + + create view _airbyte_test_normalization.pos_dedup_cdcx_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + accurateCastOrNull(_ab_cdc_log_pos, ' + Float64 +') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.pos_dedup_cdcx_ab1 +-- pos_dedup_cdcx +where 1 = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql new file mode 100644 index 000000000000..df3cf26d6a8d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -0,0 +1,17 @@ + + + create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql new file mode 100644 index 000000000000..2fb72248eff2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -0,0 +1,19 @@ + + + create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab1 +-- renamed_dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 000000000000..029806e67c97 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" + from dedup_cdc_excluded_scd__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 000000000000..25b31a3767bc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.dedup_exchange_rate_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from dedup_exchange_rate_scd__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 000000000000..0fc967c7e00a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + from renamed_dedup_cdc_excluded_scd__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 000000000000..bd7ed508ea03 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.dedup_cdc_excluded ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" + from dedup_cdc_excluded__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 000000000000..f4ee25a87e97 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.dedup_exchange_rate ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from dedup_exchange_rate__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/exchange_rate.sql new file mode 100644 index 000000000000..a0f060cad62a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/exchange_rate.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.exchange_rate ("id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid") + select "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid" + from exchange_rate__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 000000000000..3ee365f07d58 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,5 @@ + + insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + from renamed_dedup_cdc_excluded__dbt_tmp + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql new file mode 100644 index 000000000000..e14e9839149c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -0,0 +1,102 @@ + + + + create table test_normalization.pos_dedup_cdcx_scd__dbt_tmp + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +with + +input_data as ( + select * + from _airbyte_test_normalization.pos_dedup_cdcx_ab3 + -- pos_dedup_cdcx from test_normalization._airbyte_raw_pos_dedup_cdcx +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select *, + multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row + from ( + select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_emitted_at as _airbyte_start_at, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_pos_dedup_cdcx_hashid + from input_data + ) table_alias +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String'), accurateCastOrNull(_ab_cdc_log_pos, 'String') + order by _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) || '~' || + + + toString(_ab_cdc_deleted_at) || '~' || + + + toString(_ab_cdc_updated_at) || '~' || + + + toString(_ab_cdc_log_pos) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_pos_dedup_cdcx_hashid +from dedup_data where _airbyte_row_num = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql new file mode 100644 index 000000000000..6397037a6490 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql @@ -0,0 +1,31 @@ + + + + create table test_normalization.pos_dedup_cdcx__dbt_tmp + + + + engine = MergeTree() + + order by (tuple()) + + as ( + +-- Final base SQL model +select + _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_pos_dedup_cdcx_hashid +from test_normalization.pos_dedup_cdcx_scd +-- pos_dedup_cdcx from test_normalization._airbyte_raw_pos_dedup_cdcx +where 1 = 1 +and _airbyte_active_row = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql new file mode 100644 index 000000000000..2788aa5cd7d4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql @@ -0,0 +1,31 @@ + + + create view _airbyte_test_normalization.dedup_cdc_excluded_ab3__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(name) || '~' || + + + toString(_ab_cdc_lsn) || '~' || + + + toString(_ab_cdc_updated_at) || '~' || + + + toString(_ab_cdc_deleted_at) + + ))) as _airbyte_dedup_cdc_excluded_hashid, + tmp.* +from _airbyte_test_normalization.dedup_cdc_excluded_ab2 tmp +-- dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql new file mode 100644 index 000000000000..715f70863de1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql @@ -0,0 +1,40 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab3__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(date) || '~' || + + + toString(timestamp_col) || '~' || + + + toString("HKD@spéçiäl & characters") || '~' || + + + toString(HKD_special___characters) || '~' || + + + toString(NZD) || '~' || + + + toString(USD) + + ))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from _airbyte_test_normalization.dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql new file mode 100644 index 000000000000..90f493b6c800 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql @@ -0,0 +1,33 @@ + + + create view _airbyte_test_normalization.pos_dedup_cdcx_ab3__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(name) || '~' || + + + toString(_ab_cdc_lsn) || '~' || + + + toString(_ab_cdc_updated_at) || '~' || + + + toString(_ab_cdc_deleted_at) || '~' || + + + toString(_ab_cdc_log_pos) + + ))) as _airbyte_pos_dedup_cdcx_hashid, + tmp.* +from _airbyte_test_normalization.pos_dedup_cdcx_ab2 tmp +-- pos_dedup_cdcx +where 1 = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql new file mode 100644 index 000000000000..11811d557f2a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql @@ -0,0 +1,19 @@ + + + create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab3__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_renamed_dedup_cdc_excluded_hashid, + tmp.* +from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab2 tmp +-- renamed_dedup_cdc_excluded +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json index 827dd4fd1642..ddb47f1fbbcb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json @@ -38,5 +38,6 @@ "mssql": [ { "- HKD_special___characters": "- '\"HKD_special___characters\"'" }, { "!= HKD_special___characters": "!= \"HKD_special___characters\"" } - ] + ], + "clickhouse": [] } diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py index 04b59dae2ff1..55adb8d83d38 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py @@ -68,7 +68,7 @@ def setup_test_path(request): def test_normalization(destination_type: DestinationType, test_resource_name: str, setup_test_path): if destination_type.value not in dbt_test_utils.get_test_targets(): pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") - if destination_type.value == DestinationType.ORACLE.value and test_resource_name == "test_nested_streams": + if destination_type.value in (DestinationType.ORACLE.value, DestinationType.CLICKHOUSE.value) and test_resource_name == "test_nested_streams": pytest.skip(f"Destinations {destination_type} does not support nested streams") target_schema = dbt_test_utils.target_schema @@ -126,7 +126,7 @@ def run_incremental_normalization(destination_type: DestinationType, test_resour def run_schema_change_normalization(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): if destination_type.value in [DestinationType.MSSQL.value, DestinationType.MYSQL.value, DestinationType.ORACLE.value]: pytest.skip(f"{destination_type} does not support schema change in incremental yet (requires dbt 0.21.0+)") - if destination_type.value in [DestinationType.SNOWFLAKE.value]: + if destination_type.value in [DestinationType.SNOWFLAKE.value, DestinationType.CLICKHOUSE.value]: pytest.skip(f"{destination_type} is disabled as it doesnt support schema change in incremental yet (column type changes)") setup_schema_change_data(destination_type, test_resource_name, test_root_dir) @@ -182,6 +182,9 @@ def setup_test_dir(destination_type: DestinationType, test_resource_name: str) - elif destination_type.value == DestinationType.ORACLE.value: copy_tree("../dbt-project-template-oracle", test_root_dir) dbt_project_yaml = "../dbt-project-template-oracle/dbt_project.yml" + elif destination_type.value == DestinationType.CLICKHOUSE.value: + copy_tree("../dbt-project-template-clickhouse", test_root_dir) + dbt_project_yaml = "../dbt-project-template-clickhouse/dbt_project.yml" if destination_type.value not in (DestinationType.REDSHIFT.value, DestinationType.ORACLE.value): # Prefer 'view' to 'ephemeral' for tests so it's easier to debug with dbt dbt_test_utils.copy_replace( diff --git a/airbyte-integrations/bases/base-normalization/normalization/destination_type.py b/airbyte-integrations/bases/base-normalization/normalization/destination_type.py index c25c8982f704..ef66c789495a 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/destination_type.py +++ b/airbyte-integrations/bases/base-normalization/normalization/destination_type.py @@ -14,6 +14,7 @@ class DestinationType(Enum): POSTGRES = "postgres" REDSHIFT = "redshift" SNOWFLAKE = "snowflake" + CLICKHOUSE = "clickhouse" @classmethod def from_string(cls, string_value: str) -> "DestinationType": diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py index ac4609626159..352fa8b9f93d 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py @@ -25,6 +25,8 @@ DestinationType.ORACLE.value: 128, # https://docs.microsoft.com/en-us/sql/odbc/microsoft/column-name-limitations?view=sql-server-ver15 DestinationType.MSSQL.value: 64, + # https://stackoverflow.com/questions/68358686/what-is-the-maximum-length-of-a-column-in-clickhouse-can-it-be-modified + DestinationType.CLICKHOUSE.value: 63, } # DBT also needs to generate suffix to table names, so we need to make sure it has enough characters to do so... @@ -166,6 +168,8 @@ def __normalize_identifier_name( if self.destination_type == DestinationType.ORACLE: # Oracle dbt lib doesn't implemented adapter quote yet. result = f"quote('{result}')" + elif self.destination_type == DestinationType.CLICKHOUSE: + result = f"quote('{result}')" else: result = f"adapter.quote('{result}')" if not in_jinja: @@ -213,6 +217,8 @@ def __normalize_identifier_case(self, input_name: str, is_quoted: bool = False) result = input_name.lower() else: result = input_name.upper() + elif self.destination_type.value == DestinationType.CLICKHOUSE.value: + pass else: raise KeyError(f"Unknown destination type {self.destination_type}") return result diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py index 7f69a3610883..32d1d416f494 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py @@ -2533,6 +2533,9 @@ "REGR_SYY", } +CLICKHOUSE = { +} + RESERVED_KEYWORDS = { DestinationType.BIGQUERY.value: BIGQUERY, DestinationType.POSTGRES.value: POSTGRES, @@ -2541,6 +2544,7 @@ DestinationType.MYSQL.value: MYSQL, DestinationType.ORACLE.value: ORACLE, DestinationType.MSSQL.value: MSSQL, + DestinationType.CLICKHOUSE.value: CLICKHOUSE, } diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 9af79c4a4e83..14c861dff545 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -487,6 +487,9 @@ def cast_property_type(self, property_name: str, column_name: str, jinja_column: # in case of datetime, we don't need to use [cast] function, use try_parse instead. sql_type = jinja_call("type_timestamp_with_timezone()") return f"try_parse({replace_operation} as {sql_type}) as {column_name}" + if self.destination_type == DestinationType.CLICKHOUSE: + sql_type = jinja_call("type_timestamp_with_timezone()") + return f"parseDateTime64BestEffortOrNull(trim(BOTH '\"' from {replace_operation})) as {column_name}" # in all other cases sql_type = jinja_call("type_timestamp_with_timezone()") return f"cast({replace_operation} as {sql_type}) as {column_name}" @@ -499,16 +502,26 @@ def cast_property_type(self, property_name: str, column_name: str, jinja_column: # in case of date, we don't need to use [cast] function, use try_parse instead. sql_type = jinja_call("type_date()") return f"try_parse({replace_operation} as {sql_type}) as {column_name}" + if self.destination_type == DestinationType.CLICKHOUSE: + sql_type = jinja_call("type_date()") + return f"parseDateTimeBestEffortOrNull(trim(BOTH '\"' from {replace_operation})) as {column_name}" # in all other cases sql_type = jinja_call("type_date()") return f"cast({replace_operation} as {sql_type}) as {column_name}" elif is_string(definition["type"]): sql_type = jinja_call("dbt_utils.type_string()") + if self.destination_type == DestinationType.CLICKHOUSE: + trimmed_column_name = f"trim(BOTH '\"' from {column_name})" + sql_type = f"'{sql_type}'" + return f"nullif(accurateCastOrNull({trimmed_column_name}, {sql_type}), 'null') as {column_name}" else: print(f"WARN: Unknown type {definition['type']} for column {property_name} at {self.current_json_path()}") return column_name - return f"cast({column_name} as {sql_type}) as {column_name}" + if self.destination_type == DestinationType.CLICKHOUSE: + return f"accurateCastOrNull({column_name}, '{sql_type}') as {column_name}" + else: + return f"cast({column_name} as {sql_type}) as {column_name}" @staticmethod def generate_mysql_date_format_statement(column_name: str) -> str: @@ -643,7 +656,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup -- make a join with new_data using primary key to filter active data that need to be updated only join new_data_ids on this_data.{{ unique_key }} = new_data_ids.{{ unique_key }} -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) - left join {{'{{'}} {{ from_table }} {{'}}'}} as inc_data on 1 = 0 + {{ enable_left_join_null }}left join {{'{{'}} {{ from_table }} {{'}}'}} as inc_data on 1 = 0 where {{ active_row }} = 1 ), input_data as ( @@ -660,37 +673,35 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup {{ '{% endif %}' }} scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {%- if parent_hash_id %} - {{ parent_hash_id }}, - {%- endif %} - {{ '{{' }} dbt_utils.surrogate_key([ - {%- for primary_key in primary_keys %} - {{ primary_key }}, - {%- endfor %} - ]) {{ '}}' }} as {{ unique_key }}, - {%- for field in fields %} - {{ field }}, - {%- endfor %} - {{ cursor_field }} as {{ airbyte_start_at }}, - lag({{ cursor_field }}) over ( - partition by {{ primary_key_partition | join(", ") }} - order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} - ) as {{ airbyte_end_at }}, - case when lag({{ cursor_field }}) over ( - partition by {{ primary_key_partition | join(", ") }} - order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} - ) is null {{ cdc_active_row }} then 1 else 0 end as {{ active_row }}, - {{ col_ab_id }}, - {{ col_emitted_at }}, - {{ hash_id }} - from input_data + select *, + {{ case_begin }} {{ airbyte_end_at }} is null {{ cdc_active_row }} {{ case_then }} 1 {{ case_else }} 0 {{ case_end }} as {{ active_row }} + from ( + select + {%- if parent_hash_id %} + {{ parent_hash_id }}, + {%- endif %} + {{ '{{' }} dbt_utils.surrogate_key([ + {%- for primary_key in primary_keys %} + {{ primary_key }}, + {%- endfor %} + ]) {{ '}}' }} as {{ unique_key }}, + {%- for field in fields %} + {{ field }}, + {%- endfor %} + {{ cursor_field }} as {{ airbyte_start_at }}, + {{ lag_begin }}({{ cursor_field }}) over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }}, + {{ cursor_field }} desc, + {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + {{ lag_end }} + ) as {{ airbyte_end_at }}, + {{ col_ab_id }}, + {{ col_emitted_at }}, + {{ hash_id }} + from input_data + ) table_alias ), dedup_data as ( select @@ -735,6 +746,36 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup # SQL Server treats NULL values as the lowest values, then sorted in ascending order, NULLs come first. order_null = "desc" + lag_begin = "lag" + lag_end = "" + if self.destination_type == DestinationType.CLICKHOUSE: + # ClickHouse doesn't support lag() yet, this is a workaround solution + # Ref: https://clickhouse.com/docs/en/sql-reference/window-functions/ + lag_begin = "anyOrNull" + lag_end = "ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING" + + case_begin = "case when" + case_then = "then" + case_else = "else" + case_end = "end" + if self.destination_type == DestinationType.CLICKHOUSE: + # ClickHouse doesn't have CASE WHEN, use multiIf instead + # Ref: https://clickhouse.com/docs/en/sql-reference/functions/conditional-functions/#multiif + case_begin = "multiIf(" + case_then = "," + case_else = "," + case_end = ")" + + enable_left_join_null = "" + cast_begin = "cast(" + cast_as = " as " + cast_end = ")" + if self.destination_type == DestinationType.CLICKHOUSE: + enable_left_join_null = "--" + cast_begin = "accurateCastOrNull(" + cast_as = ", '" + cast_end = "')" + # TODO move all cdc columns out of scd models cdc_active_row_pattern = "" cdc_updated_order_pattern = "" @@ -748,10 +789,12 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup cdc_active_row_pattern = f"and {col_cdc_deleted_at} is null " cdc_updated_order_pattern = f", {col_cdc_updated_at} desc" cdc_cols = ( - f", cast({col_cdc_deleted_at} as " - + "{{ dbt_utils.type_string() }})" - + f", cast({col_cdc_updated_at} as " - + "{{ dbt_utils.type_string() }})" + f", {cast_begin}{col_cdc_deleted_at}{cast_as}" + + "{{ dbt_utils.type_string() }}" + + f"{cast_end}" + + f", {cast_begin}{col_cdc_updated_at}{cast_as}" + + "{{ dbt_utils.type_string() }}" + + f"{cast_end}" ) quoted_cdc_cols = f", {quoted_col_cdc_deleted_at}, {quoted_col_cdc_updated_at}" @@ -759,7 +802,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos") quoted_col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos", in_jinja=True) cdc_updated_order_pattern += f", {col_cdc_log_pos} desc" - cdc_cols += f", cast({col_cdc_log_pos} as " + "{{ dbt_utils.type_string() }})" + cdc_cols += f", {cast_begin}{col_cdc_log_pos}{cast_as}" + "{{ dbt_utils.type_string() }}" + f"{cast_end}" quoted_cdc_cols += f", {quoted_col_cdc_log_pos}" sql = template.render( @@ -789,6 +832,13 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup cdc_updated_at_order=cdc_updated_order_pattern, cdc_cols=cdc_cols, quoted_cdc_cols=quoted_cdc_cols, + lag_begin=lag_begin, + lag_end=lag_end, + case_begin=case_begin, + case_then=case_then, + case_else=case_else, + case_end=case_end, + enable_left_join_null=enable_left_join_null, ) return sql diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py b/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py index e6b9315cd21c..d45cbc0623fb 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py @@ -22,6 +22,7 @@ class DestinationType(Enum): mysql = "mysql" oracle = "oracle" mssql = "mssql" + clickhouse = "clickhouse" class TransformConfig: @@ -66,6 +67,7 @@ def transform(self, integration_type: DestinationType, config: Dict[str, Any]): DestinationType.mysql.value: self.transform_mysql, DestinationType.oracle.value: self.transform_oracle, DestinationType.mssql.value: self.transform_mssql, + DestinationType.clickhouse.value: self.transform_clickhouse, }[integration_type.value](config) # merge pre-populated base_profile with destination-specific configuration. @@ -263,6 +265,20 @@ def transform_mssql(config: Dict[str, Any]): } return dbt_config + @staticmethod + def transform_clickhouse(config: Dict[str, Any]): + print("transform_clickhouse") + # https://docs.getdbt.com/reference/warehouse-profiles/clickhouse-profile + dbt_config = { + "type": "clickhouse", + "host": config["host"], + "port": config["port"], + "schema": config["database"], + "user": config["username"], + "password": config["password"], + } + return dbt_config + @staticmethod def read_json_config(input_path: str): with open(input_path, "r") as file: diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json new file mode 100644 index 000000000000..84f4fa7a50eb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json @@ -0,0 +1,32 @@ +{ + "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__destinations_are_fine", + "schema": "_airbyte_another", + "table": "postgres_has_a_64_ch__destinations_are_fine" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_d2b", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_d2b" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_e5a", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_e5a" + }, + "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__destinations_are_fine", + "schema": "another", + "table": "postgres_has_a_64_ch__destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_d2b", + "schema": "schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_d2b" + }, + "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_e5a", + "schema": "schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_e5a" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json new file mode 100644 index 000000000000..450b8a7f4bfc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json @@ -0,0 +1,252 @@ +{ + "_airbyte_schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "_airbyte_schema_test", + "table": "adcreatives" + }, + "_airbyte_schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "_airbyte_schema_test", + "table": "adcreatives_adlabels" + }, + "_airbyte_schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_s__nk_data_app_link_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__nk_data_app_link_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_s__app_link_spec_android", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__app_link_spec_android" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_s__ata_app_link_spec_ios", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ata_app_link_spec_ios" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_s__ta_app_link_spec_ipad", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_app_link_spec_ipad" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_s___app_link_spec_iphone", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s___app_link_spec_iphone" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_s__link_data_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__link_data_image_crops" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_s__a_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_s__ta_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_s__a_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_s__a_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_s__a_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_s__a_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_s__ta_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_s__nk_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_s__nk_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_s__app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_s__app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_s__ata_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_s__ata_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_s__ta_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_s___app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_s___app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_s__link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_s__link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_s__a_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_s__ta_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_s__a_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_s__a_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_s__a_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_s__a_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_s__ta_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json new file mode 100644 index 000000000000..047c8cb29a29 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json @@ -0,0 +1,52 @@ +{ + "_airbyte_namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "_airbyte_namespace", + "table": "simple_stream_name_f35" + }, + "_airbyte_namespace.simple.simple": { + "file": "simple", + "schema": "_airbyte_namespace", + "table": "simple" + }, + "_airbyte_namespace.simple_stream_name.stream_name": { + "file": "_airbyte_namespace_simple_b94_stream_name", + "schema": "_airbyte_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_other_names__e_b94_stream_name_f9d", + "schema": "_airbyte_other_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_yet_another__e_b94_stream_name_bae", + "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + }, + "namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "namespace", + "table": "simple_stream_name_f35" + }, + "namespace.simple.simple": { + "file": "simple", + "schema": "namespace", + "table": "simple" + }, + "namespace.simple_stream_name.stream_name": { + "file": "namespace_simple_b94_stream_name", + "schema": "namespace", + "table": "simple_b94_stream_name" + }, + "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "other_namespace_simple_b94_stream_name", + "schema": "other_namespace", + "table": "simple_b94_stream_name" + }, + "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "yet_another_namespac__e_b94_stream_name_5d1", + "schema": "yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + } +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/.dockerignore b/airbyte-integrations/connectors/destination-clickhouse/.dockerignore new file mode 100644 index 000000000000..65c7d0ad3e73 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-integrations/connectors/destination-clickhouse/Dockerfile b/airbyte-integrations/connectors/destination-clickhouse/Dockerfile new file mode 100644 index 000000000000..1a062e9892b0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/Dockerfile @@ -0,0 +1,11 @@ +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte +ENV APPLICATION destination-clickhouse + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-clickhouse diff --git a/airbyte-integrations/connectors/destination-clickhouse/README.md b/airbyte-integrations/connectors/destination-clickhouse/README.md new file mode 100644 index 000000000000..38646535af5c --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/README.md @@ -0,0 +1,68 @@ +# Destination Clickhouse + +This is the repository for the Clickhouse destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/clickhouse). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-clickhouse:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:destination-clickhouse:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-clickhouse:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-clickhouse:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-clickhouse:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-clickhouse:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/clickhouse`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/clickhouseDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-clickhouse:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-clickhouse:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/destination-clickhouse/build.gradle b/airbyte-integrations/connectors/destination-clickhouse/build.gradle new file mode 100644 index 000000000000..43d63d7c606e --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/build.gradle @@ -0,0 +1,31 @@ +plugins { + id 'application' + id 'airbyte-docker' + id 'airbyte-integration-test-java' +} + +application { + mainClass = 'io.airbyte.integrations.destination.clickhouse.ClickhouseDestination' + applicationDefaultJvmArgs = ['-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + implementation project(':airbyte-db:lib') + implementation project(':airbyte-config:models') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:bases:base-java') + implementation project(':airbyte-integrations:connectors:destination-jdbc') + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + + // https://mvnrepository.com/artifact/ru.yandex.clickhouse/clickhouse-jdbc + implementation 'ru.yandex.clickhouse:clickhouse-jdbc:0.3.1-patch' + + // https://mvnrepository.com/artifact/org.testcontainers/clickhouse + testImplementation 'org.testcontainers:clickhouse:1.16.2' + + integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') + integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-clickhouse') + integrationTestJavaImplementation project(':airbyte-workers') + // https://mvnrepository.com/artifact/org.testcontainers/clickhouse + integrationTestJavaImplementation "org.testcontainers:clickhouse:1.16.2" +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java new file mode 100644 index 000000000000..b8b1988c5ae3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.db.jdbc.JdbcUtils; +import io.airbyte.integrations.BaseConnector; +import io.airbyte.integrations.base.AirbyteMessageConsumer; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.integrations.destination.NamingConventionTransformer; +import io.airbyte.integrations.destination.jdbc.AbstractJdbcDestination; +import io.airbyte.integrations.destination.jdbc.SqlOperations; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import com.google.common.collect.ImmutableMap; +import java.util.UUID; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ClickhouseDestination extends AbstractJdbcDestination implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseDestination.class); + + public static final String DRIVER_CLASS = "ru.yandex.clickhouse.ClickHouseDriver"; + + public ClickhouseDestination() { + super(DRIVER_CLASS, new ClickhouseSQLNameTransformer(), new ClickhouseSqlOperations()); + } + + @Override + public JsonNode toJdbcConfig(final JsonNode config) { + final List additionalParameters = new ArrayList<>(); + + final StringBuilder jdbcUrl = new StringBuilder(String.format("jdbc:clickhouse://%s:%s/%s", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText())); + + final ImmutableMap.Builder configBuilder = ImmutableMap.builder() + .put("username", config.get("username").asText()) + .put("jdbc_url", jdbcUrl.toString()); + + if (config.has("password")) { + configBuilder.put("password", config.get("password").asText()); + } + + return Jsons.jsonNode(configBuilder.build()); + } + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + try (final JdbcDatabase database = getDatabase(config)) { + final NamingConventionTransformer namingResolver = getNamingResolver(); + final String outputSchema = namingResolver.getIdentifier(config.get("database").asText()); + attemptSQLCreateAndDropTableOperations(outputSchema, database, namingResolver, getSqlOperations()); + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } catch (final Exception e) { + LOGGER.error("Exception while checking connection: ", e); + return new AirbyteConnectionStatus() + .withStatus(Status.FAILED) + .withMessage("Could not connect with provided configuration. \n" + e.getMessage()); + } + } + + public static void main(String[] args) throws Exception { + final Destination destination = new ClickhouseDestination(); + LOGGER.info("starting destination: {}", ClickhouseDestination.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("completed destination: {}", ClickhouseDestination.class); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java new file mode 100644 index 000000000000..cbfb57eb6592 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import io.airbyte.integrations.destination.ExtendedNameTransformer; + +public class ClickhouseSQLNameTransformer extends ExtendedNameTransformer { + + @Override + protected String applyDefaultCase(final String input) { + return input.toLowerCase(); + } + +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java new file mode 100644 index 000000000000..ffe62c556564 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.destination.jdbc.DataAdapter; +import io.airbyte.integrations.destination.jdbc.JdbcSqlOperations; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Files; +import java.sql.SQLException; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import ru.yandex.clickhouse.ClickHouseStatement; +import ru.yandex.clickhouse.ClickHouseConnection; +import ru.yandex.clickhouse.domain.ClickHouseFormat; + +public class ClickhouseSqlOperations extends JdbcSqlOperations { + + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseSqlOperations.class); + + @Override + public void createSchemaIfNotExists(final JdbcDatabase database, final String schemaName) throws Exception { + database.execute(String.format("CREATE DATABASE IF NOT EXISTS %s;\n", schemaName)); + } + + @Override + public String createTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) { + return String.format( + "CREATE TABLE IF NOT EXISTS %s.%s ( \n" + + "%s String,\n" + + "%s String,\n" + + "%s DateTime64(3, 'GMT') DEFAULT now(),\n" + + "PRIMARY KEY(%s)\n" + + ")\n" + + "ENGINE = MergeTree;\n", + schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_DATA, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT, + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_AB_ID); + } + + @Override + public void executeTransaction(final JdbcDatabase database, final List queries) throws Exception { + final StringBuilder appendedQueries = new StringBuilder(); + for (final String query : queries) { + appendedQueries.append(query); + } + database.execute(appendedQueries.toString()); + } + + @Override + public void insertRecordsInternal(final JdbcDatabase database, + final List records, + final String schemaName, + final String tmpTableName) + throws SQLException { + if (records.isEmpty()) { + return; + } + + database.execute(connection -> { + File tmpFile = null; + try { + tmpFile = Files.createTempFile(tmpTableName + "-", ".tmp").toFile(); + writeBatchToFile(tmpFile, records); + + ClickHouseConnection conn = connection.unwrap(ClickHouseConnection.class); + ClickHouseStatement sth = conn.createStatement(); + sth.write() // Write API entrypoint + .table(String.format("%s.%s", schemaName, tmpTableName)) // where to write data + .data(tmpFile, ClickHouseFormat.CSV) // specify input + .send(); + + } catch (final Exception e) { + throw new RuntimeException(e); + } finally { + try { + if (tmpFile != null) { + Files.delete(tmpFile.toPath()); + } + } catch (final IOException e) { + throw new RuntimeException(e); + } + } + }); + } +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json new file mode 100644 index 000000000000..133717024171 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json @@ -0,0 +1,51 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse", + "supportsIncremental": true, + "supportsNormalization": true, + "supportsDBT": true, + "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ClickHouse Destination Spec", + "type": "object", + "required": ["host", "port", "database", "username"], + "additionalProperties": true, + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "JDBC port (not the native port) of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 8123, + "examples": ["8123"], + "order": 1 + }, + "database": { + "title": "DB Name", + "description": "Name of the database.", + "type": "string", + "order": 2 + }, + "username": { + "title": "User", + "description": "Username to use to access the database.", + "type": "string", + "order": 3 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 4 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java new file mode 100644 index 000000000000..a93eaff7690b --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; +import java.io.IOException; +import java.util.List; +import java.util.Comparator; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.slf4j.event.Level; +import org.junit.jupiter.api.Test; +import org.jooq.JSONFormat; +import org.jooq.JSONFormat.RecordFormat; +import org.testcontainers.containers.ClickHouseContainer; +import org.testcontainers.containers.Network; + +import java.sql.SQLException; +import java.lang.Thread; +import java.nio.file.Files; +import java.nio.file.Path; + +import io.airbyte.workers.process.DockerProcessFactory; +import io.airbyte.workers.process.ProcessFactory; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import java.util.ArrayList; + +public class ClickhouseDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final String DB_NAME = "default"; + + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseDestinationAcceptanceTest.class); + private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); + private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); + + private ClickHouseContainer db; + private Network network; + + private ProcessFactory processFactory; + private TestDestinationEnv testEnv; + + private boolean useNativePort = false; + + @Override + protected String getImageName() { + return "airbyte/destination-clickhouse:dev"; + } + + @Override + protected boolean supportsNormalization() { + return true; + } + + @Override + protected boolean supportsDBT() { + return true; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + if (config.get("database") == null) { + return null; + } + return config.get("database").asText(); + } + + @Override + protected JsonNode getConfig() { + // ClickHouse official JDBC driver use HTTP protocal, its default port is 8123 + // dbt clickhouse adapter use native protocal, its default port is 9000 + return Jsons.jsonNode(ImmutableMap.builder() + .put("host", db.getHost()) + .put("port", useNativePort ? db.getMappedPort(ClickHouseContainer.NATIVE_PORT) : db.getFirstMappedPort()) + .put("database", DB_NAME) + .put("username", db.getUsername()) + .put("password", db.getPassword()) + .put("schema", DB_NAME) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + String ipAddress = db.getContainerInfo().getNetworkSettings().getIpAddress(); + + return Jsons.jsonNode(ImmutableMap.builder() + .put("host", db.getHost()) + .put("port", db.getFirstMappedPort()) + .put("database", DB_NAME) + .put("username", db.getUsername()) + .put("password", "wrong password") + .put("schema", DB_NAME) + .build()); + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, final String streamName, + final String namespace) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getIdentifier(streamName), namespace); + } + + @Override + protected List retrieveRecords(TestDestinationEnv testEnv, + String streamName, + String namespace, + JsonNode streamSchema) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) + .stream() + .map(r -> Jsons.deserialize(r.get(JavaBaseConstants.COLUMN_NAME_DATA).asText())) + .collect(Collectors.toList()); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { + boolean oldUseNativePort = useNativePort; + useNativePort = false; + final JdbcDatabase jdbcDB = getDatabase(getConfig()); + useNativePort = oldUseNativePort; + return jdbcDB.query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) + .collect(Collectors.toList()); + } + + @Override + protected List resolveIdentifier(final String identifier) { + final List result = new ArrayList<>(); + final String resolved = namingResolver.getIdentifier(identifier); + result.add(identifier); + result.add(resolved); + if (!resolved.startsWith("\"")) { + result.add(resolved.toLowerCase()); + result.add(resolved.toUpperCase()); + } + return result; + } + + private static JdbcDatabase getDatabase(final JsonNode config) { + return Databases.createJdbcDatabase( + config.get("username").asText(), + config.has("password") ? config.get("password").asText() : null, + String.format("jdbc:clickhouse://%s:%s/%s", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText()), + ClickhouseDestination.DRIVER_CLASS); + } + + @Override + protected void setup(TestDestinationEnv testEnv) { + db = new ClickHouseContainer("yandex/clickhouse-server"); + db.start(); + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + db.stop(); + db.close(); + } + + /** + * The SQL script generated by dbt in 'test' step isn't compatible + * with ClickHouse, so we skip this test for now + * + * @throws Exception + */ + @Disabled + public void testCustomDbtTransformations() throws Exception { + useNativePort = true; + super.testCustomDbtTransformations(); + } + + /** + * The normalization container needs native port, while destination + * container needs HTTP port, we can't inject the port switch statement + * into DestinationAcceptanceTest.runSync() method for this test, so we skip it. + * @throws Exception + */ + @Disabled + public void testIncrementalDedupeSync() throws Exception { + super.testIncrementalDedupeSync(); + } + + /** + * The normalization container needs native port, while destination + * container needs HTTP port, we can't inject the port switch statement + * into DestinationAcceptanceTest.runSync() method for this test, so we skip it. + * @throws Exception + */ + @Disabled + public void testSyncWithNormalization(final String messagesFilename, final String catalogFilename) throws Exception { + super.testSyncWithNormalization(messagesFilename, catalogFilename); + } +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java new file mode 100644 index 000000000000..0de28f4fb5c1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.db.jdbc.JdbcUtils; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.base.AirbyteMessageConsumer; +import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteMessage.Type; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.AirbyteStateMessage; +import io.airbyte.protocol.models.CatalogHelpers; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaPrimitive; +import java.time.Instant; +import java.util.List; +import java.util.Comparator; +import java.util.stream.IntStream; +import java.util.stream.Collectors; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.testcontainers.containers.ClickHouseContainer; + +public class ClickhouseDestinationTest { + + private static final String DB_NAME = "default"; + private static final String STREAM_NAME = "id_and_name"; + private static final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); + + private static ClickHouseContainer db; + private static ConfiguredAirbyteCatalog catalog; + private static JsonNode config; + + @BeforeAll + static void init() { + db = new ClickHouseContainer("yandex/clickhouse-server"); + db.start(); + } + + @BeforeEach + void setup() { + catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( + CatalogHelpers.createConfiguredAirbyteStream( + STREAM_NAME, + DB_NAME, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)))); + + config = Jsons.jsonNode(ImmutableMap.builder() + .put("host", db.getHost()) + .put("port", db.getFirstMappedPort()) + .put("database", DB_NAME) + .put("username", db.getUsername()) + .put("password", db.getPassword()) + .put("schema", DB_NAME) + .build()); + } + + @AfterAll + static void cleanUp() { + db.stop(); + db.close(); + } + + //@Test + void sanityTest() throws Exception { + final Destination dest = new ClickhouseDestination(); + final AirbyteMessageConsumer consumer = dest.getConsumer(config, catalog, + Destination::defaultOutputRecordCollector); + final List expectedRecords = generateRecords(10); + + consumer.start(); + expectedRecords.forEach(m -> { + try { + consumer.accept(m); + } catch (final Exception e) { + throw new RuntimeException(e); + } + }); + consumer.accept(new AirbyteMessage() + .withType(Type.STATE) + .withState(new AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of(DB_NAME + "." + STREAM_NAME, 10))))); + consumer.close(); + + final JdbcDatabase database = Databases.createJdbcDatabase( + config.get("username").asText(), + config.get("password").asText(), + String.format("jdbc:clickhouse://%s:%s/%s", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText()), + "ru.yandex.clickhouse.ClickHouseDriver"); + + final List actualRecords = database.bufferedResultSetQuery( + connection -> connection.createStatement().executeQuery( + String.format("SELECT * FROM %s.%s;", DB_NAME, + namingResolver.getRawTableName(STREAM_NAME))), + JdbcUtils.getDefaultSourceOperations()::rowToJson); + + assertEquals( + expectedRecords.stream().map(AirbyteMessage::getRecord) + .map(AirbyteRecordMessage::getData).collect(Collectors.toList()), + actualRecords.stream() + .map(o -> o.get("_airbyte_data").asText()) + .map(Jsons::deserialize) + .sorted(Comparator.comparingInt(x -> x.get("id").asInt())) + .collect(Collectors.toList())); + } + + private List generateRecords(final int n) { + return IntStream.range(0, n) + .boxed() + .map(i -> new AirbyteMessage() + .withType(Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(STREAM_NAME) + .withNamespace(DB_NAME) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "test name " + i))))) + .collect(Collectors.toList()); + } +} \ No newline at end of file diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java index 39cbc52562dc..a37a806a1010 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/DefaultNormalizationRunner.java @@ -48,7 +48,8 @@ public enum DestinationType { ORACLE, POSTGRES, REDSHIFT, - SNOWFLAKE + SNOWFLAKE, + CLICKHOUSE } public DefaultNormalizationRunner(final DestinationType destinationType, final ProcessFactory processFactory, final String normalizationImageName) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java index fb238fb597e3..4ad71f155b10 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java @@ -27,6 +27,7 @@ public class NormalizationRunnerFactory { .put("airbyte/destination-postgres-strict-encrypt", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.POSTGRES)) .put("airbyte/destination-redshift", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.REDSHIFT)) .put("airbyte/destination-snowflake", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.SNOWFLAKE)) + .put("airbyte/destination-clickhouse", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.CLICKHOUSE)) .build(); public static NormalizationRunner create(final String imageName, final ProcessFactory processFactory) { diff --git a/docs/integrations/destinations/clickhouse.md b/docs/integrations/destinations/clickhouse.md new file mode 100644 index 000000000000..d24b1e6f8318 --- /dev/null +++ b/docs/integrations/destinations/clickhouse.md @@ -0,0 +1,83 @@ + +# ClickHouse + +## Features + +| Feature | Supported?\(Yes/No\) | Notes | +| :--- | :--- | :--- | +| Full Refresh Sync | Yes | | +| Incremental - Append Sync | Yes | | +| Incremental - Deduped History | Yes | | +| Namespaces | Yes | | + +#### Output Schema + +Each stream will be output into its own table in ClickHouse. Each table will contain 3 columns: + +* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. The column type in ClickHouse is `String`. +* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. The column type in ClickHouse is `DateTime64`. +* `_airbyte_data`: a json blob representing with the event data. The column type in ClickHouse is `String`. + +## Getting Started \(Airbyte Cloud\) + +Airbyte Cloud only supports connecting to your ClickHouse instance with SSL or TLS encryption, which is supported by [ClickHouse JDBC driver](https://github.com/ClickHouse/clickhouse-jdbc). + +## Getting Started \(Airbyte Open-Source\) + +#### Requirements + +To use the ClickHouse destination, you'll need: + +* A ClickHouse server version 21.8.10.19 or above + +#### Configure Network Access + +Make sure your ClickHouse database can be accessed by Airbyte. If your database is within a VPC, you may need to allow access from the IP you're using to expose Airbyte. + +#### **Permissions** + +You need a ClickHouse user with the following permissions: + +* can create tables and write rows. +* can create databases e.g: + +You can create such a user by running: + +``` +GRANT CREATE ON * TO airbyte_user; +``` + +You can also use a pre-existing user but we highly recommend creating a dedicated user for Airbyte. + +#### Target Database + +You will need to choose an existing database or create a new database that will be used to store synced data from Airbyte. + +### Setup the ClickHouse Destination in Airbyte + +You should now have all the requirements needed to configure ClickHouse as a destination in the UI. You'll need the following information to configure the ClickHouse destination: + +* **Host** +* **Port** (JDBC HTTP port, not the native port) +* **Username** +* **Password** +* **Database** + +## Naming Conventions + +From [ClickHouse SQL Identifiers syntax](https://clickhouse.com/docs/en/sql-reference/syntax/): + +* SQL identifiers and key words must begin with a letter \(a-z, but also letters with diacritical marks and non-Latin letters\) or an underscore \(\_\). +* Subsequent characters in an identifier or key word can be letters, underscores, digits \(0-9\). +* Identifiers can be quoted or non-quoted. The latter is preferred. +* If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, "id", `id`. +* If you want to write portable applications you are advised to always quote a particular name or never quote it. + +Therefore, Airbyte ClickHouse destination will create tables and schemas using the Unquoted identifiers when possible or fallback to Quoted Identifiers if the names are containing special characters. + +## Changelog + +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.0.1 | 2021-11-04 | [\#9999](https://github.com/airbytehq/airbyte/pull/9999) | Add ClickHouse destination | + From 410e9c9bedee71370e3c6651612c15e9b4c466ee Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Thu, 4 Nov 2021 16:50:22 +1100 Subject: [PATCH 02/12] update docs --- airbyte-integrations/builds.md | 1 + .../destination-clickhouse/bootstrap.md | 22 +++++++++++++++++++ docs/SUMMARY.md | 1 + docs/integrations/README.md | 1 + docs/integrations/destinations/clickhouse.md | 2 +- 5 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 airbyte-integrations/connectors/destination-clickhouse/bootstrap.md diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index d6c13ba9ec80..28b4d5cb94d8 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -97,6 +97,7 @@ | :--- | :--- | | Azure Blob Storage | [![destination-azure-blob-storage](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-azure-blob-storage%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-azure-blob-storage) | | BigQuery | [![destination-bigquery](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-bigquery%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-bigquery) | +| ClickHouse | [![destination-clickhouse](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-clickhouse%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-clickhouse) | | Databricks | (Temporarily Not Available) | | Google Cloud Storage (GCS) | [![destination-gcs](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-gcs%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-gcs) | | Google PubSub | [![destination-pubsub](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-pubsub%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-pubsub) | diff --git a/airbyte-integrations/connectors/destination-clickhouse/bootstrap.md b/airbyte-integrations/connectors/destination-clickhouse/bootstrap.md new file mode 100644 index 000000000000..13d96b595110 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/bootstrap.md @@ -0,0 +1,22 @@ +# ClickHouse + +## Overview + +ClickHouse is a fast open-source column-oriented database management system that allows generating analytical data reports in real-time using SQL queries. + +## Endpoints + +This destination connector uses ClickHouse official JDBC driver, which uses HTTP as protocol. [https://github.com/ClickHouse/clickhouse-jdbc](https://github.com/ClickHouse/clickhouse-jdbc) + +## Quick Notes + +- ClickHouse JDBC driver uses HTTP protocal (default 8123) but [dbt clickhouse adapter](https://github.com/silentsokolov/dbt-clickhouse) use TCP protocal (default 9000). + +- This connector doesn't support nested streams and schema change yet. + +- The community [dbt clickhouse adapter](https://github.com/silentsokolov/dbt-clickhouse) has some bugs haven't been fixed yet, for example [https://github.com/silentsokolov/dbt-clickhouse/issues/20](https://github.com/silentsokolov/dbt-clickhouse/issues/20), so the dbt test is based on a fork [https://github.com/burmecia/dbt-clickhouse](https://github.com/burmecia/dbt-clickhouse). + +## API Reference + +The ClickHouse reference documents: [https://clickhouse.com/docs/en/](https://clickhouse.com/docs/en/) + diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index f66c0bb837f4..963a7f3f840e 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -141,6 +141,7 @@ * [Destinations](integrations/destinations/README.md) * [AzureBlobStorage](integrations/destinations/azureblobstorage.md) * [BigQuery](integrations/destinations/bigquery.md) + * [ClickHouse](integrations/destinations/clickhouse.md) * [Databricks](integrations/destinations/databricks.md) * [DynamoDB](integrations/destinations/dynamodb.md) * [Chargify](integrations/destinations/chargify.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index 797b6fa5f144..feafbac188c2 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -128,6 +128,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [AzureBlobStorage](destinations/azureblobstorage.md) | Alpha | | [BigQuery](destinations/bigquery.md) | Certified | | [Chargify \(Keen\)](destinations/chargify.md) | Alpha | +| [ClickHouse](destinations/clickhouse.md) | Alpha | | [Databricks](destinations/databricks.md) | Beta | | [Google Cloud Storage \(GCS\)](destinations/gcs.md) | Alpha | | [Google Pubsub](destinations/pubsub.md) | Alpha | diff --git a/docs/integrations/destinations/clickhouse.md b/docs/integrations/destinations/clickhouse.md index d24b1e6f8318..0d05ff16c170 100644 --- a/docs/integrations/destinations/clickhouse.md +++ b/docs/integrations/destinations/clickhouse.md @@ -79,5 +79,5 @@ Therefore, Airbyte ClickHouse destination will create tables and schemas using t | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| 0.0.1 | 2021-11-04 | [\#9999](https://github.com/airbytehq/airbyte/pull/9999) | Add ClickHouse destination | +| 0.0.1 | 2021-11-04 | [\#7620](https://github.com/airbytehq/airbyte/pull/7620) | Add ClickHouse destination | From ff94b62c06bfd14cd92e4c6e9b6a14dc6fe8e31b Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Fri, 5 Nov 2021 10:00:59 +1100 Subject: [PATCH 03/12] format code --- .../integration_tests/dbt_integration_test.py | 2 +- .../integration_tests/test_normalization.py | 5 +- .../transform_catalog/reserved_keywords.py | 3 +- .../clickhouse/ClickhouseDestination.java | 24 +-- .../ClickhouseSQLNameTransformer.java | 10 +- .../clickhouse/ClickhouseSqlOperations.java | 130 ++++++----- .../ClickhouseDestinationAcceptanceTest.java | 104 +++++---- .../clickhouse/ClickhouseDestinationTest.java | 204 +++++++++--------- 8 files changed, 233 insertions(+), 249 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py index 42a481516855..34133deb59a2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py @@ -13,8 +13,8 @@ import sys import threading import time -from typing import Any, Dict, List from copy import copy +from typing import Any, Dict, List from normalization.destination_type import DestinationType from normalization.transform_config.transform import TransformConfig diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py index 55adb8d83d38..0f20e0fae016 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py @@ -68,7 +68,10 @@ def setup_test_path(request): def test_normalization(destination_type: DestinationType, test_resource_name: str, setup_test_path): if destination_type.value not in dbt_test_utils.get_test_targets(): pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") - if destination_type.value in (DestinationType.ORACLE.value, DestinationType.CLICKHOUSE.value) and test_resource_name == "test_nested_streams": + if ( + destination_type.value in (DestinationType.ORACLE.value, DestinationType.CLICKHOUSE.value) + and test_resource_name == "test_nested_streams" + ): pytest.skip(f"Destinations {destination_type} does not support nested streams") target_schema = dbt_test_utils.target_schema diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py index 32d1d416f494..d787dec66fc8 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py @@ -2533,8 +2533,7 @@ "REGR_SYY", } -CLICKHOUSE = { -} +CLICKHOUSE = {} RESERVED_KEYWORDS = { DestinationType.BIGQUERY.value: BIGQUERY, diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java index b8b1988c5ae3..4614ad502ffb 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java @@ -5,25 +5,17 @@ package io.airbyte.integrations.destination.clickhouse; import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.jdbc.JdbcDatabase; -import io.airbyte.db.jdbc.JdbcUtils; -import io.airbyte.integrations.BaseConnector; -import io.airbyte.integrations.base.AirbyteMessageConsumer; import io.airbyte.integrations.base.Destination; import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.destination.NamingConventionTransformer; import io.airbyte.integrations.destination.jdbc.AbstractJdbcDestination; -import io.airbyte.integrations.destination.jdbc.SqlOperations; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; -import io.airbyte.protocol.models.AirbyteMessage; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import com.google.common.collect.ImmutableMap; -import java.util.UUID; import java.util.ArrayList; import java.util.List; -import java.util.Optional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,13 +34,13 @@ public JsonNode toJdbcConfig(final JsonNode config) { final List additionalParameters = new ArrayList<>(); final StringBuilder jdbcUrl = new StringBuilder(String.format("jdbc:clickhouse://%s:%s/%s", - config.get("host").asText(), - config.get("port").asText(), - config.get("database").asText())); + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText())); final ImmutableMap.Builder configBuilder = ImmutableMap.builder() - .put("username", config.get("username").asText()) - .put("jdbc_url", jdbcUrl.toString()); + .put("username", config.get("username").asText()) + .put("jdbc_url", jdbcUrl.toString()); if (config.has("password")) { configBuilder.put("password", config.get("password").asText()); @@ -67,8 +59,8 @@ public AirbyteConnectionStatus check(final JsonNode config) { } catch (final Exception e) { LOGGER.error("Exception while checking connection: ", e); return new AirbyteConnectionStatus() - .withStatus(Status.FAILED) - .withMessage("Could not connect with provided configuration. \n" + e.getMessage()); + .withStatus(Status.FAILED) + .withMessage("Could not connect with provided configuration. \n" + e.getMessage()); } } diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java index cbfb57eb6592..fca0dc91e413 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSQLNameTransformer.java @@ -8,9 +8,9 @@ public class ClickhouseSQLNameTransformer extends ExtendedNameTransformer { - @Override - protected String applyDefaultCase(final String input) { - return input.toLowerCase(); - } + @Override + protected String applyDefaultCase(final String input) { + return input.toLowerCase(); + } -} \ No newline at end of file +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java index ffe62c556564..d5ab09c6abd1 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java @@ -6,92 +6,90 @@ import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.base.JavaBaseConstants; -import io.airbyte.integrations.destination.jdbc.DataAdapter; import io.airbyte.integrations.destination.jdbc.JdbcSqlOperations; import io.airbyte.protocol.models.AirbyteRecordMessage; -import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; import java.io.IOException; import java.nio.file.Files; import java.sql.SQLException; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import ru.yandex.clickhouse.ClickHouseStatement; import ru.yandex.clickhouse.ClickHouseConnection; +import ru.yandex.clickhouse.ClickHouseStatement; import ru.yandex.clickhouse.domain.ClickHouseFormat; public class ClickhouseSqlOperations extends JdbcSqlOperations { - private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseSqlOperations.class); + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseSqlOperations.class); - @Override - public void createSchemaIfNotExists(final JdbcDatabase database, final String schemaName) throws Exception { - database.execute(String.format("CREATE DATABASE IF NOT EXISTS %s;\n", schemaName)); - } + @Override + public void createSchemaIfNotExists(final JdbcDatabase database, final String schemaName) throws Exception { + database.execute(String.format("CREATE DATABASE IF NOT EXISTS %s;\n", schemaName)); + } + + @Override + public String createTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) { + return String.format( + "CREATE TABLE IF NOT EXISTS %s.%s ( \n" + + "%s String,\n" + + "%s String,\n" + + "%s DateTime64(3, 'GMT') DEFAULT now(),\n" + + "PRIMARY KEY(%s)\n" + + ")\n" + + "ENGINE = MergeTree;\n", + schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_DATA, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT, + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_AB_ID); + } - @Override - public String createTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) { - return String.format( - "CREATE TABLE IF NOT EXISTS %s.%s ( \n" - + "%s String,\n" - + "%s String,\n" - + "%s DateTime64(3, 'GMT') DEFAULT now(),\n" - + "PRIMARY KEY(%s)\n" - + ")\n" - + "ENGINE = MergeTree;\n", - schemaName, tableName, - JavaBaseConstants.COLUMN_NAME_AB_ID, - JavaBaseConstants.COLUMN_NAME_DATA, - JavaBaseConstants.COLUMN_NAME_EMITTED_AT, - JavaBaseConstants.COLUMN_NAME_AB_ID, - JavaBaseConstants.COLUMN_NAME_AB_ID); + @Override + public void executeTransaction(final JdbcDatabase database, final List queries) throws Exception { + final StringBuilder appendedQueries = new StringBuilder(); + for (final String query : queries) { + appendedQueries.append(query); } + database.execute(appendedQueries.toString()); + } - @Override - public void executeTransaction(final JdbcDatabase database, final List queries) throws Exception { - final StringBuilder appendedQueries = new StringBuilder(); - for (final String query : queries) { - appendedQueries.append(query); - } - database.execute(appendedQueries.toString()); + @Override + public void insertRecordsInternal(final JdbcDatabase database, + final List records, + final String schemaName, + final String tmpTableName) + throws SQLException { + if (records.isEmpty()) { + return; } - @Override - public void insertRecordsInternal(final JdbcDatabase database, - final List records, - final String schemaName, - final String tmpTableName) - throws SQLException { - if (records.isEmpty()) { - return; - } + database.execute(connection -> { + File tmpFile = null; + try { + tmpFile = Files.createTempFile(tmpTableName + "-", ".tmp").toFile(); + writeBatchToFile(tmpFile, records); - database.execute(connection -> { - File tmpFile = null; - try { - tmpFile = Files.createTempFile(tmpTableName + "-", ".tmp").toFile(); - writeBatchToFile(tmpFile, records); + ClickHouseConnection conn = connection.unwrap(ClickHouseConnection.class); + ClickHouseStatement sth = conn.createStatement(); + sth.write() // Write API entrypoint + .table(String.format("%s.%s", schemaName, tmpTableName)) // where to write data + .data(tmpFile, ClickHouseFormat.CSV) // specify input + .send(); - ClickHouseConnection conn = connection.unwrap(ClickHouseConnection.class); - ClickHouseStatement sth = conn.createStatement(); - sth.write() // Write API entrypoint - .table(String.format("%s.%s", schemaName, tmpTableName)) // where to write data - .data(tmpFile, ClickHouseFormat.CSV) // specify input - .send(); + } catch (final Exception e) { + throw new RuntimeException(e); + } finally { + try { + if (tmpFile != null) { + Files.delete(tmpFile.toPath()); + } + } catch (final IOException e) { + throw new RuntimeException(e); + } + } + }); + } - } catch (final Exception e) { - throw new RuntimeException(e); - } finally { - try { - if (tmpFile != null) { - Files.delete(tmpFile.toPath()); - } - } catch (final IOException e) { - throw new RuntimeException(e); - } - } - }); - } -} \ No newline at end of file +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java index a93eaff7690b..cc86c3c1507a 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java @@ -12,31 +12,19 @@ import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; -import java.io.IOException; +import io.airbyte.workers.process.ProcessFactory; +import java.sql.SQLException; +import java.util.ArrayList; import java.util.List; -import java.util.Comparator; import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.slf4j.event.Level; -import org.junit.jupiter.api.Test; import org.jooq.JSONFormat; import org.jooq.JSONFormat.RecordFormat; +import org.junit.jupiter.api.Disabled; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.testcontainers.containers.ClickHouseContainer; import org.testcontainers.containers.Network; -import java.sql.SQLException; -import java.lang.Thread; -import java.nio.file.Files; -import java.nio.file.Path; - -import io.airbyte.workers.process.DockerProcessFactory; -import io.airbyte.workers.process.ProcessFactory; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import java.util.ArrayList; - public class ClickhouseDestinationAcceptanceTest extends DestinationAcceptanceTest { private static final String DB_NAME = "default"; @@ -86,13 +74,13 @@ protected JsonNode getConfig() { // ClickHouse official JDBC driver use HTTP protocal, its default port is 8123 // dbt clickhouse adapter use native protocal, its default port is 9000 return Jsons.jsonNode(ImmutableMap.builder() - .put("host", db.getHost()) - .put("port", useNativePort ? db.getMappedPort(ClickHouseContainer.NATIVE_PORT) : db.getFirstMappedPort()) - .put("database", DB_NAME) - .put("username", db.getUsername()) - .put("password", db.getPassword()) - .put("schema", DB_NAME) - .build()); + .put("host", db.getHost()) + .put("port", useNativePort ? db.getMappedPort(ClickHouseContainer.NATIVE_PORT) : db.getFirstMappedPort()) + .put("database", DB_NAME) + .put("username", db.getUsername()) + .put("password", db.getPassword()) + .put("schema", DB_NAME) + .build()); } @Override @@ -100,19 +88,20 @@ protected JsonNode getFailCheckConfig() { String ipAddress = db.getContainerInfo().getNetworkSettings().getIpAddress(); return Jsons.jsonNode(ImmutableMap.builder() - .put("host", db.getHost()) - .put("port", db.getFirstMappedPort()) - .put("database", DB_NAME) - .put("username", db.getUsername()) - .put("password", "wrong password") - .put("schema", DB_NAME) - .build()); + .put("host", db.getHost()) + .put("port", db.getFirstMappedPort()) + .put("database", DB_NAME) + .put("username", db.getUsername()) + .put("password", "wrong password") + .put("schema", DB_NAME) + .build()); } @Override - protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, final String streamName, + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, + final String streamName, final String namespace) - throws Exception { + throws Exception { return retrieveRecordsFromTable(namingResolver.getIdentifier(streamName), namespace); } @@ -121,11 +110,11 @@ protected List retrieveRecords(TestDestinationEnv testEnv, String streamName, String namespace, JsonNode streamSchema) - throws Exception { + throws Exception { return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) - .stream() - .map(r -> Jsons.deserialize(r.get(JavaBaseConstants.COLUMN_NAME_DATA).asText())) - .collect(Collectors.toList()); + .stream() + .map(r -> Jsons.deserialize(r.get(JavaBaseConstants.COLUMN_NAME_DATA).asText())) + .collect(Collectors.toList()); } private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { @@ -134,8 +123,8 @@ private List retrieveRecordsFromTable(final String tableName, final St final JdbcDatabase jdbcDB = getDatabase(getConfig()); useNativePort = oldUseNativePort; return jdbcDB.query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, - JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) - .collect(Collectors.toList()); + JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) + .collect(Collectors.toList()); } @Override @@ -153,13 +142,13 @@ protected List resolveIdentifier(final String identifier) { private static JdbcDatabase getDatabase(final JsonNode config) { return Databases.createJdbcDatabase( - config.get("username").asText(), - config.has("password") ? config.get("password").asText() : null, - String.format("jdbc:clickhouse://%s:%s/%s", - config.get("host").asText(), - config.get("port").asText(), - config.get("database").asText()), - ClickhouseDestination.DRIVER_CLASS); + config.get("username").asText(), + config.has("password") ? config.get("password").asText() : null, + String.format("jdbc:clickhouse://%s:%s/%s", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText()), + ClickhouseDestination.DRIVER_CLASS); } @Override @@ -175,8 +164,8 @@ protected void tearDown(TestDestinationEnv testEnv) { } /** - * The SQL script generated by dbt in 'test' step isn't compatible - * with ClickHouse, so we skip this test for now + * The SQL script generated by dbt in 'test' step isn't compatible with ClickHouse, so we skip this + * test for now * * @throws Exception */ @@ -187,9 +176,10 @@ public void testCustomDbtTransformations() throws Exception { } /** - * The normalization container needs native port, while destination - * container needs HTTP port, we can't inject the port switch statement - * into DestinationAcceptanceTest.runSync() method for this test, so we skip it. + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * * @throws Exception */ @Disabled @@ -198,13 +188,15 @@ public void testIncrementalDedupeSync() throws Exception { } /** - * The normalization container needs native port, while destination - * container needs HTTP port, we can't inject the port switch statement - * into DestinationAcceptanceTest.runSync() method for this test, so we skip it. + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * * @throws Exception */ @Disabled public void testSyncWithNormalization(final String messagesFilename, final String catalogFilename) throws Exception { super.testSyncWithNormalization(messagesFilename, catalogFilename); } -} \ No newline at end of file + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java index 0de28f4fb5c1..ab03e8068002 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java @@ -12,8 +12,8 @@ import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.db.jdbc.JdbcUtils; -import io.airbyte.integrations.base.Destination; import io.airbyte.integrations.base.AirbyteMessageConsumer; +import io.airbyte.integrations.base.Destination; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; @@ -24,113 +24,113 @@ import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaPrimitive; import java.time.Instant; -import java.util.List; import java.util.Comparator; -import java.util.stream.IntStream; +import java.util.List; import java.util.stream.Collectors; +import java.util.stream.IntStream; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; import org.testcontainers.containers.ClickHouseContainer; public class ClickhouseDestinationTest { - private static final String DB_NAME = "default"; - private static final String STREAM_NAME = "id_and_name"; - private static final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); - - private static ClickHouseContainer db; - private static ConfiguredAirbyteCatalog catalog; - private static JsonNode config; - - @BeforeAll - static void init() { - db = new ClickHouseContainer("yandex/clickhouse-server"); - db.start(); - } - - @BeforeEach - void setup() { - catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( - CatalogHelpers.createConfiguredAirbyteStream( - STREAM_NAME, - DB_NAME, - Field.of("id", JsonSchemaPrimitive.NUMBER), - Field.of("name", JsonSchemaPrimitive.STRING)))); - - config = Jsons.jsonNode(ImmutableMap.builder() - .put("host", db.getHost()) - .put("port", db.getFirstMappedPort()) - .put("database", DB_NAME) - .put("username", db.getUsername()) - .put("password", db.getPassword()) - .put("schema", DB_NAME) - .build()); - } - - @AfterAll - static void cleanUp() { - db.stop(); - db.close(); - } - - //@Test - void sanityTest() throws Exception { - final Destination dest = new ClickhouseDestination(); - final AirbyteMessageConsumer consumer = dest.getConsumer(config, catalog, - Destination::defaultOutputRecordCollector); - final List expectedRecords = generateRecords(10); - - consumer.start(); - expectedRecords.forEach(m -> { - try { - consumer.accept(m); - } catch (final Exception e) { - throw new RuntimeException(e); - } - }); - consumer.accept(new AirbyteMessage() - .withType(Type.STATE) - .withState(new AirbyteStateMessage() - .withData(Jsons.jsonNode(ImmutableMap.of(DB_NAME + "." + STREAM_NAME, 10))))); - consumer.close(); - - final JdbcDatabase database = Databases.createJdbcDatabase( - config.get("username").asText(), - config.get("password").asText(), - String.format("jdbc:clickhouse://%s:%s/%s", - config.get("host").asText(), - config.get("port").asText(), - config.get("database").asText()), - "ru.yandex.clickhouse.ClickHouseDriver"); - - final List actualRecords = database.bufferedResultSetQuery( - connection -> connection.createStatement().executeQuery( - String.format("SELECT * FROM %s.%s;", DB_NAME, - namingResolver.getRawTableName(STREAM_NAME))), - JdbcUtils.getDefaultSourceOperations()::rowToJson); - - assertEquals( - expectedRecords.stream().map(AirbyteMessage::getRecord) - .map(AirbyteRecordMessage::getData).collect(Collectors.toList()), - actualRecords.stream() - .map(o -> o.get("_airbyte_data").asText()) - .map(Jsons::deserialize) - .sorted(Comparator.comparingInt(x -> x.get("id").asInt())) - .collect(Collectors.toList())); - } - - private List generateRecords(final int n) { - return IntStream.range(0, n) - .boxed() - .map(i -> new AirbyteMessage() - .withType(Type.RECORD) - .withRecord(new AirbyteRecordMessage() - .withStream(STREAM_NAME) - .withNamespace(DB_NAME) - .withEmittedAt(Instant.now().toEpochMilli()) - .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "test name " + i))))) - .collect(Collectors.toList()); - } -} \ No newline at end of file + private static final String DB_NAME = "default"; + private static final String STREAM_NAME = "id_and_name"; + private static final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); + + private static ClickHouseContainer db; + private static ConfiguredAirbyteCatalog catalog; + private static JsonNode config; + + @BeforeAll + static void init() { + db = new ClickHouseContainer("yandex/clickhouse-server"); + db.start(); + } + + @BeforeEach + void setup() { + catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( + CatalogHelpers.createConfiguredAirbyteStream( + STREAM_NAME, + DB_NAME, + Field.of("id", JsonSchemaPrimitive.NUMBER), + Field.of("name", JsonSchemaPrimitive.STRING)))); + + config = Jsons.jsonNode(ImmutableMap.builder() + .put("host", db.getHost()) + .put("port", db.getFirstMappedPort()) + .put("database", DB_NAME) + .put("username", db.getUsername()) + .put("password", db.getPassword()) + .put("schema", DB_NAME) + .build()); + } + + @AfterAll + static void cleanUp() { + db.stop(); + db.close(); + } + + // @Test + void sanityTest() throws Exception { + final Destination dest = new ClickhouseDestination(); + final AirbyteMessageConsumer consumer = dest.getConsumer(config, catalog, + Destination::defaultOutputRecordCollector); + final List expectedRecords = generateRecords(10); + + consumer.start(); + expectedRecords.forEach(m -> { + try { + consumer.accept(m); + } catch (final Exception e) { + throw new RuntimeException(e); + } + }); + consumer.accept(new AirbyteMessage() + .withType(Type.STATE) + .withState(new AirbyteStateMessage() + .withData(Jsons.jsonNode(ImmutableMap.of(DB_NAME + "." + STREAM_NAME, 10))))); + consumer.close(); + + final JdbcDatabase database = Databases.createJdbcDatabase( + config.get("username").asText(), + config.get("password").asText(), + String.format("jdbc:clickhouse://%s:%s/%s", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText()), + "ru.yandex.clickhouse.ClickHouseDriver"); + + final List actualRecords = database.bufferedResultSetQuery( + connection -> connection.createStatement().executeQuery( + String.format("SELECT * FROM %s.%s;", DB_NAME, + namingResolver.getRawTableName(STREAM_NAME))), + JdbcUtils.getDefaultSourceOperations()::rowToJson); + + assertEquals( + expectedRecords.stream().map(AirbyteMessage::getRecord) + .map(AirbyteRecordMessage::getData).collect(Collectors.toList()), + actualRecords.stream() + .map(o -> o.get("_airbyte_data").asText()) + .map(Jsons::deserialize) + .sorted(Comparator.comparingInt(x -> x.get("id").asInt())) + .collect(Collectors.toList())); + } + + private List generateRecords(final int n) { + return IntStream.range(0, n) + .boxed() + .map(i -> new AirbyteMessage() + .withType(Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(STREAM_NAME) + .withNamespace(DB_NAME) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "test name " + i))))) + .collect(Collectors.toList()); + } + +} From aff0c619e433428e0df1d0dce2df3670e2c892ed Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Sat, 6 Nov 2021 15:26:54 +1100 Subject: [PATCH 04/12] code improvement as per code review --- .../docker-compose.build.yaml | 7 ++ .../dedup_cdc_excluded_scd.sql | 50 ++++++++------- .../dedup_exchange_rate_scd.sql | 56 ++++++++-------- .../renamed_dedup_cdc_excluded_scd.sql | 42 ++++++------ .../test_normalization/pos_dedup_cdcx_scd.sql | 52 +++++++-------- .../dedup_cdc_excluded_scd.sql | 54 ++++++++-------- .../dedup_exchange_rate_scd.sql | 64 ++++++++++--------- .../renamed_dedup_cdc_excluded_scd.sql | 46 ++++++------- .../test_normalization/pos_dedup_cdcx_scd.sql | 56 ++++++++-------- .../test_normalization/pos_dedup_cdcx_scd.sql | 52 +++++++-------- .../transform_catalog/stream_processor.py | 60 ++++++++--------- .../clickhouse/ClickhouseDestination.java | 10 ++- .../clickhouse/ClickhouseSqlOperations.java | 2 + .../ClickhouseDestinationAcceptanceTest.java | 14 ---- .../clickhouse/ClickhouseDestinationTest.java | 3 +- .../NormalizationRunnerFactory.java | 2 +- 16 files changed, 292 insertions(+), 278 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml b/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml index f6d1df902699..16948b4a2221 100644 --- a/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml +++ b/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml @@ -29,3 +29,10 @@ services: context: . labels: io.airbyte.git-revision: ${GIT_REVISION} + normalization-clickhouse: + image: airbyte/normalization-clickhouse:${VERSION} + build: + dockerfile: clickhouse.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index e6ff4b6b12aa..aac6ab18ac72 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -20,36 +20,38 @@ input_data as ( -- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded ), +input_data_with_end_at as ( + select *, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select *, - multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row - from ( - select - assumeNotNull(hex(MD5( + select + assumeNotNull(hex(MD5( toString(id) ))) as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, - anyOrNull(_airbyte_emitted_at) over ( - partition by id - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_cdc_excluded_hashid - from input_data - ) table_alias + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_emitted_at as _airbyte_start_at, + _airbyte_end_at, + multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data_with_end_at ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 8063fcd314ed..7cc3ba1ffef8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -20,13 +20,22 @@ input_data as ( -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate ), +input_data_with_end_at as ( + select *, + anyOrNull(date) over ( + partition by id, currency, cast(NZD as String) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select *, - multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row - from ( - select - assumeNotNull(hex(MD5( + select + assumeNotNull(hex(MD5( toString(id) || '~' || @@ -37,28 +46,21 @@ scd_data as ( toString(NZD) ))) as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, - date as _airbyte_start_at, - anyOrNull(date) over ( - partition by id, currency, cast(NZD as String) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data - ) table_alias + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + date as _airbyte_start_at, + _airbyte_end_at, + multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data_with_end_at ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 079e781b679e..0d752f33e21d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -20,32 +20,34 @@ input_data as ( -- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded ), +input_data_with_end_at as ( + select *, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select *, - multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row - from ( - select - assumeNotNull(hex(MD5( + select + assumeNotNull(hex(MD5( toString(id) ))) as _airbyte_unique_key, - id, - _airbyte_emitted_at as _airbyte_start_at, - anyOrNull(_airbyte_emitted_at) over ( - partition by id - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_renamed_dedup_cdc_excluded_hashid - from input_data - ) table_alias + id, + _airbyte_emitted_at as _airbyte_start_at, + _airbyte_end_at, + multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data_with_end_at ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql index e14e9839149c..db4375f5e0dd 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -19,37 +19,39 @@ input_data as ( -- pos_dedup_cdcx from test_normalization._airbyte_raw_pos_dedup_cdcx ), +input_data_with_end_at as ( + select *, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select *, - multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row - from ( - select - assumeNotNull(hex(MD5( + select + assumeNotNull(hex(MD5( toString(id) ))) as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_emitted_at as _airbyte_start_at, - anyOrNull(_airbyte_emitted_at) over ( - partition by id - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_pos_dedup_cdcx_hashid - from input_data - ) table_alias + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_emitted_at as _airbyte_start_at, + _airbyte_end_at, + multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_pos_dedup_cdcx_hashid + from input_data_with_end_at ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 98a250fcf004..a5a3a1160125 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -45,34 +45,36 @@ input_data as ( -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} ), {% endif %} +input_data_with_end_at as ( + select *, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select *, - multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row - from ( - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, - anyOrNull(_airbyte_emitted_at) over ( - partition by id - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_cdc_excluded_hashid - from input_data - ) table_alias + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_emitted_at as _airbyte_start_at, + _airbyte_end_at, + multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data_with_end_at ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 692d4a791d9b..d9f1648c1425 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -47,39 +47,41 @@ input_data as ( -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} ), {% endif %} +input_data_with_end_at as ( + select *, + anyOrNull(date) over ( + partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select *, - multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row - from ( - select - {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', - ]) }} as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - {{ quote('HKD@spéçiäl & characters') }}, - HKD_special___characters, - NZD, - USD, - date as _airbyte_start_at, - anyOrNull(date) over ( - partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) - order by - date is null asc, - date desc, - _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_dedup_exchange_rate_hashid - from input_data - ) table_alias + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + date as _airbyte_start_at, + _airbyte_end_at, + multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data_with_end_at ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index b93705c52fbd..ea8efd5e523f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -45,30 +45,32 @@ input_data as ( -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} ), {% endif %} +input_data_with_end_at as ( + select *, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select *, - multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row - from ( - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key, - id, - _airbyte_emitted_at as _airbyte_start_at, - anyOrNull(_airbyte_emitted_at) over ( - partition by id - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_renamed_dedup_cdc_excluded_hashid - from input_data - ) table_alias + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + _airbyte_emitted_at as _airbyte_start_at, + _airbyte_end_at, + multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data_with_end_at ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql index 9cec19babf3a..34f778f069f0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -45,35 +45,37 @@ input_data as ( -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} ), {% endif %} +input_data_with_end_at as ( + select *, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select *, - multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row - from ( - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_emitted_at as _airbyte_start_at, - anyOrNull(_airbyte_emitted_at) over ( - partition by id - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_pos_dedup_cdcx_hashid - from input_data - ) table_alias + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_emitted_at as _airbyte_start_at, + _airbyte_end_at, + multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_pos_dedup_cdcx_hashid + from input_data_with_end_at ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql index e14e9839149c..db4375f5e0dd 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -19,37 +19,39 @@ input_data as ( -- pos_dedup_cdcx from test_normalization._airbyte_raw_pos_dedup_cdcx ), +input_data_with_end_at as ( + select *, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select *, - multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row - from ( - select - assumeNotNull(hex(MD5( + select + assumeNotNull(hex(MD5( toString(id) ))) as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_emitted_at as _airbyte_start_at, - anyOrNull(_airbyte_emitted_at) over ( - partition by id - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_pos_dedup_cdcx_hashid - from input_data - ) table_alias + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_emitted_at as _airbyte_start_at, + _airbyte_end_at, + multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_pos_dedup_cdcx_hashid + from input_data_with_end_at ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 14c861dff545..b6f8d1b1588e 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -671,37 +671,39 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup {{ sql_table_comment }} ), {{ '{% endif %}' }} +input_data_with_end_at as ( + select *, + {{ lag_begin }}({{ cursor_field }}) over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }}, + {{ cursor_field }} desc, + {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + {{ lag_end }} + ) as {{ airbyte_end_at }} + from input_data +), scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select *, - {{ case_begin }} {{ airbyte_end_at }} is null {{ cdc_active_row }} {{ case_then }} 1 {{ case_else }} 0 {{ case_end }} as {{ active_row }} - from ( - select - {%- if parent_hash_id %} - {{ parent_hash_id }}, - {%- endif %} - {{ '{{' }} dbt_utils.surrogate_key([ - {%- for primary_key in primary_keys %} - {{ primary_key }}, - {%- endfor %} - ]) {{ '}}' }} as {{ unique_key }}, - {%- for field in fields %} - {{ field }}, - {%- endfor %} - {{ cursor_field }} as {{ airbyte_start_at }}, - {{ lag_begin }}({{ cursor_field }}) over ( - partition by {{ primary_key_partition | join(", ") }} - order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} - {{ lag_end }} - ) as {{ airbyte_end_at }}, - {{ col_ab_id }}, - {{ col_emitted_at }}, - {{ hash_id }} - from input_data - ) table_alias + select + {%- if parent_hash_id %} + {{ parent_hash_id }}, + {%- endif %} + {{ '{{' }} dbt_utils.surrogate_key([ + {%- for primary_key in primary_keys %} + {{ primary_key }}, + {%- endfor %} + ]) {{ '}}' }} as {{ unique_key }}, + {%- for field in fields %} + {{ field }}, + {%- endfor %} + {{ cursor_field }} as {{ airbyte_start_at }}, + {{ airbyte_end_at }}, + {{ case_begin }} {{ airbyte_end_at }} is null {{ cdc_active_row }} {{ case_then }} 1 {{ case_else }} 0 {{ case_end }} as {{ active_row }}, + {{ col_ab_id }}, + {{ col_emitted_at }}, + {{ hash_id }} + from input_data_with_end_at ), dedup_data as ( select diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java index 4614ad502ffb..e0773fb6683e 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java @@ -14,8 +14,6 @@ import io.airbyte.integrations.destination.jdbc.AbstractJdbcDestination; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; -import java.util.ArrayList; -import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,14 +23,14 @@ public class ClickhouseDestination extends AbstractJdbcDestination implements De public static final String DRIVER_CLASS = "ru.yandex.clickhouse.ClickHouseDriver"; + private static final String PASSWORD = "password"; + public ClickhouseDestination() { super(DRIVER_CLASS, new ClickhouseSQLNameTransformer(), new ClickhouseSqlOperations()); } @Override public JsonNode toJdbcConfig(final JsonNode config) { - final List additionalParameters = new ArrayList<>(); - final StringBuilder jdbcUrl = new StringBuilder(String.format("jdbc:clickhouse://%s:%s/%s", config.get("host").asText(), config.get("port").asText(), @@ -42,8 +40,8 @@ public JsonNode toJdbcConfig(final JsonNode config) { .put("username", config.get("username").asText()) .put("jdbc_url", jdbcUrl.toString()); - if (config.has("password")) { - configBuilder.put("password", config.get("password").asText()); + if (config.has(PASSWORD)) { + configBuilder.put(PASSWORD, config.get(PASSWORD).asText()); } return Jsons.jsonNode(configBuilder.build()); diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java index d5ab09c6abd1..b0b4ed53839e 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java @@ -61,6 +61,8 @@ public void insertRecordsInternal(final JdbcDatabase database, final String schemaName, final String tmpTableName) throws SQLException { + LOGGER.info("actual size of batch: {}", records.size()); + if (records.isEmpty()) { return; } diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java index cc86c3c1507a..b1a49cf60c44 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java @@ -12,32 +12,20 @@ import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; -import io.airbyte.workers.process.ProcessFactory; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; -import org.jooq.JSONFormat; -import org.jooq.JSONFormat.RecordFormat; import org.junit.jupiter.api.Disabled; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.testcontainers.containers.ClickHouseContainer; -import org.testcontainers.containers.Network; public class ClickhouseDestinationAcceptanceTest extends DestinationAcceptanceTest { private static final String DB_NAME = "default"; - private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseDestinationAcceptanceTest.class); - private static final JSONFormat JSON_FORMAT = new JSONFormat().recordFormat(RecordFormat.OBJECT); private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); private ClickHouseContainer db; - private Network network; - - private ProcessFactory processFactory; - private TestDestinationEnv testEnv; private boolean useNativePort = false; @@ -85,8 +73,6 @@ protected JsonNode getConfig() { @Override protected JsonNode getFailCheckConfig() { - String ipAddress = db.getContainerInfo().getNetworkSettings().getIpAddress(); - return Jsons.jsonNode(ImmutableMap.builder() .put("host", db.getHost()) .put("port", db.getFirstMappedPort()) diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java index ab03e8068002..93e00a805085 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java @@ -31,6 +31,7 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; import org.testcontainers.containers.ClickHouseContainer; public class ClickhouseDestinationTest { @@ -74,7 +75,7 @@ static void cleanUp() { db.close(); } - // @Test + @Test void sanityTest() throws Exception { final Destination dest = new ClickhouseDestination(); final AirbyteMessageConsumer consumer = dest.getConsumer(config, catalog, diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java index 4ad71f155b10..8f0c62f7d5ef 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java @@ -27,7 +27,7 @@ public class NormalizationRunnerFactory { .put("airbyte/destination-postgres-strict-encrypt", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.POSTGRES)) .put("airbyte/destination-redshift", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.REDSHIFT)) .put("airbyte/destination-snowflake", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.SNOWFLAKE)) - .put("airbyte/destination-clickhouse", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.CLICKHOUSE)) + .put("airbyte/destination-clickhouse", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) .build(); public static NormalizationRunner create(final String imageName, final ProcessFactory processFactory) { From 27fddcf1a6744d11d2fd8932f7ffa11751e84f59 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Mon, 8 Nov 2021 10:56:07 +1100 Subject: [PATCH 05/12] add ssh tunneling and ssl/tls support and code enhancement --- .../main/java/io/airbyte/db/Databases.java | 4 + .../integration_tests/dbt_integration_test.py | 1 + .../.dockerignore | 3 + .../Dockerfile | 11 + .../README.md | 5 + .../build.gradle | 30 + .../ClickhouseDestinationStrictEncrypt.java | 38 + ...estinationStrictEncryptAcceptanceTest.java | 202 +++ .../src/test-integration/resources/config.xml | 1161 +++++++++++++++++ .../test-integration/resources/dhparam.pem | 13 + .../src/test-integration/resources/server.crt | 19 + .../src/test-integration/resources/server.key | 28 + ...lickhouseDestinationStrictEncryptTest.java | 23 + .../src/test/resources/expected_spec.json | 165 +++ .../clickhouse/ClickhouseDestination.java | 25 +- .../clickhouse/ClickhouseSqlOperations.java | 4 + .../src/main/resources/spec.json | 7 + .../ClickhouseDestinationAcceptanceTest.java | 30 +- ...shClickhouseDestinationAcceptanceTest.java | 182 +++ ...eyClickhouseDestinationAcceptanceTest.java | 16 + ...rdClickhouseDestinationAcceptanceTest.java | 16 + .../clickhouse/ClickhouseDestinationTest.java | 3 +- .../NormalizationRunnerFactory.java | 1 + docs/integrations/destinations/clickhouse.md | 2 +- 24 files changed, 1966 insertions(+), 23 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/.dockerignore create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/Dockerfile create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/README.md create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/build.gradle create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncrypt.java create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/config.xml create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/dhparam.pem create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.crt create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.key create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptTest.java create mode 100644 airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json create mode 100644 airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshKeyClickhouseDestinationAcceptanceTest.java create mode 100644 airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshPasswordClickhouseDestinationAcceptanceTest.java diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java b/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java index 776356a26499..629ee673adf1 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/Databases.java @@ -72,6 +72,10 @@ public static Database createOracleDatabase(final String username, final String return createDatabase(username, password, jdbcConnectionString, "oracle.jdbc.OracleDriver", SQLDialect.DEFAULT); } + public static Database createClickhouseDatabase(final String username, final String password, final String jdbcConnectionString) { + return createDatabase(username, password, jdbcConnectionString, "ru.yandex.clickhouse.ClickHouseDriver", SQLDialect.DEFAULT); + } + public static Database createDatabase(final String username, final String password, final String jdbcConnectionString, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py index 34133deb59a2..07c8c504e772 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py +++ b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py @@ -233,6 +233,7 @@ def setup_clickhouse_db(self): "database": self.target_schema, "username": "default", "password": "", + "ssl": False, } if start_db: self.db_names.append("clickhouse") diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/.dockerignore b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/.dockerignore new file mode 100644 index 000000000000..65c7d0ad3e73 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/.dockerignore @@ -0,0 +1,3 @@ +* +!Dockerfile +!build diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/Dockerfile b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/Dockerfile new file mode 100644 index 000000000000..3e11bb8fc910 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/Dockerfile @@ -0,0 +1,11 @@ +FROM airbyte/integration-base-java:dev + +WORKDIR /airbyte +ENV APPLICATION destination-clickhouse-strict-encrypt + +COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar + +RUN tar xf ${APPLICATION}.tar --strip-components=1 + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-clickhouse-strict-encrypt diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/README.md b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/README.md new file mode 100644 index 000000000000..aa674ff7cc03 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/README.md @@ -0,0 +1,5 @@ +# ClickHouse Strict Encrypt Test Configuration + +In order to test the ClickHouse destination, you need to have the up and running ClickHouse database that has SSL enabled. + +This connector inherits the ClickHouse destination, but support SSL connections only. diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/build.gradle b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/build.gradle new file mode 100644 index 000000000000..113db2568f59 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/build.gradle @@ -0,0 +1,30 @@ +plugins { + id 'application' + id 'airbyte-docker' + id 'airbyte-integration-test-java' +} + +application { + mainClass = 'io.airbyte.integrations.destination.clickhouse.ClickhouseDestination' + applicationDefaultJvmArgs = ['-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + implementation project(':airbyte-db:lib') + implementation project(':airbyte-integrations:bases:base-java') + implementation project(':airbyte-protocol:models') + implementation project(':airbyte-integrations:connectors:destination-jdbc') + implementation project(':airbyte-integrations:connectors:destination-clickhouse') + implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs) + + // https://mvnrepository.com/artifact/ru.yandex.clickhouse/clickhouse-jdbc + implementation 'ru.yandex.clickhouse:clickhouse-jdbc:0.3.1-patch' + + // https://mvnrepository.com/artifact/org.testcontainers/clickhouse + testImplementation 'org.testcontainers:clickhouse:1.16.2' + + integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test') + integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-clickhouse') + // https://mvnrepository.com/artifact/org.testcontainers/clickhouse + integrationTestJavaImplementation "org.testcontainers:clickhouse:1.16.2" +} diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncrypt.java b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncrypt.java new file mode 100644 index 000000000000..ddbf1b7aa24f --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncrypt.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.integrations.base.spec_modification.SpecModifyingDestination; +import io.airbyte.protocol.models.ConnectorSpecification; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ClickhouseDestinationStrictEncrypt extends SpecModifyingDestination implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseDestinationStrictEncrypt.class); + + public ClickhouseDestinationStrictEncrypt() { + super(ClickhouseDestination.sshWrappedDestination()); + } + + @Override + public ConnectorSpecification modifySpec(final ConnectorSpecification originalSpec) { + final ConnectorSpecification spec = Jsons.clone(originalSpec); + ((ObjectNode) spec.getConnectionSpecification().get("properties")).remove("ssl"); + return spec; + } + + public static void main(final String[] args) throws Exception { + final Destination destination = new ClickhouseDestinationStrictEncrypt(); + LOGGER.info("starting destination: {}", ClickhouseDestinationStrictEncrypt.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("completed destination: {}", ClickhouseDestinationStrictEncrypt.class); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java new file mode 100644 index 000000000000..0470a3b79ca3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Disabled; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.BindMode; +import org.testcontainers.containers.ClickHouseContainer; + +public class ClickhouseDestinationStrictEncryptAcceptanceTest extends DestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(ClickhouseDestinationStrictEncryptAcceptanceTest.class); + + private static final String DB_NAME = "default"; + + private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); + + private ClickHouseContainer db; + + public static final Integer HTTP_PORT = 8123; + public static final Integer NATIVE_PORT = 9000; + public static final Integer HTTPS_PORT = 8443; + public static final Integer NATIVE_SECURE_PORT = 9440; + + @Override + protected String getImageName() { + return "airbyte/destination-clickhouse-strict-encrypt:dev"; + } + + @Override + protected boolean supportsNormalization() { + return true; + } + + @Override + protected boolean supportsDBT() { + return true; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + if (config.get("database") == null) { + return null; + } + return config.get("database").asText(); + } + + @Override + protected JsonNode getConfig() { + // Note: ClickHouse official JDBC driver uses HTTP protocol, its default port is 8123 + // dbt clickhouse adapter uses native protocol, its default port is 9000 + // Since we disabled normalization and dbt test, we only use the JDBC port here. + return Jsons.jsonNode(ImmutableMap.builder() + .put("host", db.getHost()) + .put("port", db.getMappedPort(HTTPS_PORT)) + .put("database", DB_NAME) + .put("username", db.getUsername()) + .put("password", db.getPassword()) + .put("schema", DB_NAME) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + final JsonNode clone = Jsons.clone(getConfig()); + ((ObjectNode) clone).put("password", "wrong password").put("ssl", false); + return clone; + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getIdentifier(streamName), namespace); + } + + @Override + protected List retrieveRecords(TestDestinationEnv testEnv, + String streamName, + String namespace, + JsonNode streamSchema) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) + .stream() + .map(r -> Jsons.deserialize(r.get(JavaBaseConstants.COLUMN_NAME_DATA).asText())) + .collect(Collectors.toList()); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { + final JdbcDatabase jdbcDB = getDatabase(getConfig()); + return jdbcDB.query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) + .collect(Collectors.toList()); + } + + @Override + protected List resolveIdentifier(final String identifier) { + final List result = new ArrayList<>(); + final String resolved = namingResolver.getIdentifier(identifier); + result.add(identifier); + result.add(resolved); + if (!resolved.startsWith("\"")) { + result.add(resolved.toLowerCase()); + result.add(resolved.toUpperCase()); + } + return result; + } + + private static JdbcDatabase getDatabase(final JsonNode config) { + final String jdbcStr = String.format("jdbc:clickhouse://%s:%s/%s?ssl=true&sslmode=none", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText()); + return Databases.createJdbcDatabase( + config.get("username").asText(), + config.has("password") ? config.get("password").asText() : null, + jdbcStr, + ClickhouseDestination.DRIVER_CLASS); + } + + @Override + protected void setup(TestDestinationEnv testEnv) { + db = (ClickHouseContainer) new ClickHouseContainer("yandex/clickhouse-server") + .withExposedPorts(HTTP_PORT, NATIVE_PORT, HTTPS_PORT, NATIVE_SECURE_PORT) + .withClasspathResourceMapping("config.xml", "/etc/clickhouse-server/config.xml", BindMode.READ_ONLY) + .withClasspathResourceMapping("server.crt", "/etc/clickhouse-server/server.crt", BindMode.READ_ONLY) + .withClasspathResourceMapping("server.key", "/etc/clickhouse-server/server.key", BindMode.READ_ONLY) + .withClasspathResourceMapping("dhparam.pem", "/etc/clickhouse-server/dhparam.pem", BindMode.READ_ONLY); + db.start(); + + LOGGER.info(String.format("Clickhouse server container port mapping: %d -> %d, %d -> %d", + HTTP_PORT, db.getMappedPort(HTTP_PORT), + HTTPS_PORT, db.getMappedPort(HTTPS_PORT))); + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + db.stop(); + db.close(); + } + + /** + * The SQL script generated by dbt in 'test' step isn't compatible with ClickHouse, so we skip this + * test for now + * + * @throws Exception + */ + @Disabled + public void testCustomDbtTransformations() throws Exception { + super.testCustomDbtTransformations(); + } + + @Disabled + public void testCustomDbtTransformationsFailure() throws Exception {} + + /** + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * + * @throws Exception + */ + @Disabled + public void testIncrementalDedupeSync() throws Exception { + super.testIncrementalDedupeSync(); + } + + /** + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * + * @throws Exception + */ + @Disabled + public void testSyncWithNormalization(final String messagesFilename, final String catalogFilename) throws Exception { + super.testSyncWithNormalization(messagesFilename, catalogFilename); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/config.xml b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/config.xml new file mode 100644 index 000000000000..9b7432e4f240 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/config.xml @@ -0,0 +1,1161 @@ + + + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + + 1000M + 10 + + + + + + + + + + + + + + 8123 + + + 9000 + + + 9004 + + + 9005 + + + 8443 + + + 9440 + + + 9011 + + + 9009 + + + + + + + + + + + + :: + + + 0.0.0.0 + + + ::1 + 127.0.0.1 + + + + + + + + + + 4096 + + + 3 + + + + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + deflate + + + medium + + + -1 + -1 + + + false + + + + + + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + + /etc/clickhouse-server/dhparam.pem + none + true + true + + true + + + AcceptCertificateHandler + + + + + true + true + sslv2,sslv3 + true + + + + AcceptCertificateHandler + + + + + + + + + 100 + + + 0 + + + + 10000 + + + 0.9 + + + 4194304 + + + 0 + + + + + + 8589934592 + + + 5368709120 + + + + 1000 + + + 134217728 + + + /var/lib/clickhouse/ + + + /var/lib/clickhouse/tmp/ + + + + + + /var/lib/clickhouse/user_files/ + + + + + + + + + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + + + + + + default + + + + + + + + + + + + default + + + + + + + + + true + + + false + + ' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + apt install --no-install-recommends -f ./clickhouse-jdbc-bridge_$PKG_VER-1_all.deb + clickhouse-jdbc-bridge & + + * [CentOS/RHEL] + export MVN_URL=https://repo1.maven.org/maven2/ru/yandex/clickhouse/clickhouse-jdbc-bridge + export PKG_VER=$(curl -sL $MVN_URL/maven-metadata.xml | grep '' | sed -e 's|.*>\(.*\)<.*|\1|') + wget https://github.com/ClickHouse/clickhouse-jdbc-bridge/releases/download/v$PKG_VER/clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + yum localinstall -y clickhouse-jdbc-bridge-$PKG_VER-1.noarch.rpm + clickhouse-jdbc-bridge & + + Please refer to https://github.com/ClickHouse/clickhouse-jdbc-bridge#usage for more information. + ]]> + + + + + + + + + + + + + + + + localhost + 9000 + + + + + + + + + localhost + 9000 + + + + + localhost + 9000 + + + + + + + 127.0.0.1 + 9000 + + + + + 127.0.0.2 + 9000 + + + + + + true + + 127.0.0.1 + 9000 + + + + true + + 127.0.0.2 + 9000 + + + + + + + localhost + 9440 + 1 + + + + + + + localhost + 9000 + + + + + localhost + 1 + + + + + + + + + + + + + + + + + + + + + + + + 3600 + + + + 3600 + + + 60 + + + + + + + + + + + + + system + query_log
+ + toYYYYMM(event_date) + + + + + + 7500 +
+ + + + system + trace_log
+ + toYYYYMM(event_date) + 7500 +
+ + + + system + query_thread_log
+ toYYYYMM(event_date) + 7500 +
+ + + + + + + + system + metric_log
+ 7500 + 1000 +
+ + + + system + asynchronous_metric_log
+ + 7000 +
+ + + + + + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + + system + opentelemetry_span_log
+ 7500 +
+ + + + + system + crash_log
+ + + 1000 +
+ + + + + + + + + + + + + + + + + + *_dictionary.xml + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + + + /var/lib/clickhouse/format_schemas/ + + + + + hide encrypt/decrypt arguments + ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\) + + \1(???) + + + + + + + + + + false + + false + + + https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 + + + + +
diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/dhparam.pem b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/dhparam.pem new file mode 100644 index 000000000000..2a862dd18e82 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/dhparam.pem @@ -0,0 +1,13 @@ +-----BEGIN DH PARAMETERS----- +MIICCAKCAgEArsn0Y1ZPxaUNw4QREGoCFSWUhg05CVgDIlhQ42ixrIRPW+WduZwO +KqnCohYkMQvFM0J0s4laNXhWXjrEYtnzwp81M7t/3gmXxkglyE4gk9BhpmLpv/JU +TLVhjWLggOZTsEWyiR4REUvJ1IvaQ/K9RGoOSw4tgyW8gHGLjPHxhTPtp5ZfE5TJ +OQZsByLcQbqji3jCoZeNRGcOjMPri4A0u8cXlHQr3/t49G/nE2oC296MVAZNkdjt +mDQmNX8Ej5dm6F0ZWYFptgxkJknaBDjsJh+ga8SKG6dZvqEMvTqHLTOV8h2uFei9 +Gm4DtKWf8x232s9t+aGOF+qsPss1lM8spYTnY9B6jdEzEwEiXFBogG763lJNJpLv +nsk8YofO6hJrgiKJWiSbR50qo6us1cq191mFDBWO8yWSjfXgf5HpzutO1hVJPKyS ++3VTt3ZfRFBJZozWlQzddurwd5Wr+D3JR8E9mz7YukEA7iLwM3nZLcF1b35cYCRs +9Q59ezxyveVtWAcTBBzRzNGr3mf9LYoumd5o3jFJsLLWAHfDZXey7n6tBJUh6a6N +ChOCeoavhzlv3lx2+C19ZKkTKU+Z/cCjnW9530MVFtlXFqihIaVAB/ecAF0ZsPNB +pg5I+U4TR0J684eoM4LgMo0ydq7G9g+WbgQ+aD0C7CcEAV7t8iuj9IMCAQI= +-----END DH PARAMETERS----- diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.crt b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.crt new file mode 100644 index 000000000000..0aeb3cdc4027 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.crt @@ -0,0 +1,19 @@ +-----BEGIN CERTIFICATE----- +MIIDCTCCAfGgAwIBAgIUcuYIRxO5jbEzvookG0MTrb2jqDUwDQYJKoZIhvcNAQEL +BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MB4XDTIxMTEwNzAzNDI0MloXDTIyMTEw +NzAzNDI0MlowFDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEF +AAOCAQ8AMIIBCgKCAQEAmKDFGjs1/TpNv7ZOB9LTeSYOIjo50An4ahYO+xTGVQJS +SajsNs2NkZWXRUe+WOGANFjTWxTGs/vYWlJ/gC/0GckcuSyqWAShVfZuYHQsIFAG +yFYTAcXQ9bJPPiNTOwq00WqnhgQZ121EGiIYnxZ8XqR1CPS4fXMDFr5lSSrlu/yM +vzRQOEbiiKcrRZvikiFijFDt759J4r3Pr4+tmZ2GJe28ZI9sMmeLigup1/awna9i +7j1yS5iHwFd0xVXSVkPdym3crNDPG9Iy2QicDqijJpqDzKErwR71839x0E8B53HF +PL6U6KtACViDklPYn+IWsAuRV9o56/X3X6tmt8YjBQIDAQABo1MwUTAdBgNVHQ4E +FgQUNoWDQBHyh0EUzpvEtbktJwvPol8wHwYDVR0jBBgwFoAUNoWDQBHyh0EUzpvE +tbktJwvPol8wDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAJZLh +BdmGt85Ru8ACtrJ8P/59GjtLBXCDAyrQeR+SfPShoL3xx69ldh9flRWOgODxHQiX +tGMDPbXZ5dRUAPmwOEjDecYk5H6RXKWa1BqX5yAa1grfkZsj1+9Pv7DneQae/owq +RAVd0SWjomFL0oElUvHhk7AMpHu7XK9f3Vj2JPFcieD7SAotn6i3IFKFrBTz14T4 +6O0tgEUdcujr0DsIxZsJRt2ITXgdJCOT+ohDWTdqB5+vh+HxfaSxAPssLBwD1bFF +m3mEBNCkrFCpX/4+uYw559ah9KRAN8nVpzl1fKZLIms/6UCdA65FEGr+7m39UDa2 +leTb3J/1adaimGBHkw== +-----END CERTIFICATE----- diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.key b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.key new file mode 100644 index 000000000000..6d29bcb797fa --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/resources/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCYoMUaOzX9Ok2/ +tk4H0tN5Jg4iOjnQCfhqFg77FMZVAlJJqOw2zY2RlZdFR75Y4YA0WNNbFMaz+9ha +Un+AL/QZyRy5LKpYBKFV9m5gdCwgUAbIVhMBxdD1sk8+I1M7CrTRaqeGBBnXbUQa +IhifFnxepHUI9Lh9cwMWvmVJKuW7/Iy/NFA4RuKIpytFm+KSIWKMUO3vn0nivc+v +j62ZnYYl7bxkj2wyZ4uKC6nX9rCdr2LuPXJLmIfAV3TFVdJWQ93Kbdys0M8b0jLZ +CJwOqKMmmoPMoSvBHvXzf3HQTwHnccU8vpToq0AJWIOSU9if4hawC5FX2jnr9fdf +q2a3xiMFAgMBAAECggEABRh4/CM17jpgFiJ0AHbdcJIArsva2kM68+/AmEldRcFK +FgN5v2qla1LfNgTrKfR9X0IH7GaJK8EgvAM9Fn6DW8kHGHeRSFb/O+t6S+YVAw8o +6s9U2x5Ll6hYVO7A/Jui5cr1xNemNTcEqwqwX79Ub6R189KXxjhsxfmdUBAKk3gR +1yOsVhsN0B2FnVCF8V1Ad3G8AUtZ3Akrclex8kRSMIKYCDhhn8p1kNWQT7MSYXnD +YGJdO/a3ZdXUJvUG7W3J/lX6rS9A4dDA8S9AgQ06XroODdm+NrP2qFfseOeyZpYs +m0mRfSNasWXStU+IyjK/wuYLGS7YgeLASHReUMUWgQKBgQDKItQqpF4SxwfJCyET +W2HYcU6ho5B37EPcF7gBPFwg+/8Mr0vW1cr3HgCycZpud1T4v0Hcd0ZyPkEFZ9J1 +HAzmWyzMXyvBHrb5uUeOO3SHqOBKhhXCj+EVJUmgTjKmMMUSj2NGB5HycTYghCo8 +faGRe7knOXNBHbKayb5p8b5kSQKBgQDBTKMiF/V1D2O+j9/8Y8Voy6iBS6pMLzgF +5wk/jplTwBIxBE8RXRHkEOIdoO74I/8NvB4HSGRpN4ftoqPneGA4AUaPsvn9e76z ++7HhVCvAPCgdNMi2XIL76f0YFJPIBisnggun87hyAseUw/R6s9szlA1iQM6r94jX +S+eD5P0Q3QKBgFf9MT4erpSd7tWk8pDqhn0hVYknGgwZ3LBB4ucmzzPMfgmXhPvP +tLo9ZTBII2FSsLpXBuRhR1kToFoqB4LS4wqjRazxAKrgI+YwsOVJHECxUnEeTPqU +hvYddR9C6ulM8XxSznP9d5qBjX46CeirB1m88awRZgpCUyzuBOU5RheBAoGAWrbA +iCt+QeVrBe83Spb3+eo5thPwY7h1Li/yoyUkx60H3IENKjTnRIS32PfBrioWdDeo +T/qlRMuOuvLswKA5Z48RsjZoI5GDOawRGpIJxjl1Cd/PoeVggyCYakid4e0jK3NY +TQWPtdGgICyl+z+Uy2vbrBSF6SZNzdwNVlSMfvECgYAoPb1ihbS0+mSKvtBaw9dV +fWP0ktVEEIw5BLEeD+75goYKTTNtJ7hV47aAnEcpSOlO77I3G8E5v9YL/jlXMfvF +3wi2dfGgDbKuXCtCQS/BqnqDhw9Py3sg9XzgFdLM7Y0Z7JJHTDyV9b/PfAMk6bgn +SKOWqhX1fJgG5kgymk1YEg== +-----END PRIVATE KEY----- diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptTest.java b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptTest.java new file mode 100644 index 000000000000..ff1932d79056 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptTest.java @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.ConnectorSpecification; +import org.junit.jupiter.api.Test; + +class ClickhouseDestinationStrictEncryptTest { + + @Test + void testGetSpec() throws Exception { + System.out.println(new ClickhouseDestinationStrictEncrypt().spec().getConnectionSpecification()); + assertEquals(Jsons.deserialize(MoreResources.readResource("expected_spec.json"), ConnectorSpecification.class), + new ClickhouseDestinationStrictEncrypt().spec()); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json new file mode 100644 index 000000000000..2b385b8b4f87 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json @@ -0,0 +1,165 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse", + "supportsIncremental": true, + "supportsNormalization": true, + "supportsDBT": true, + "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "ClickHouse Destination Spec", + "type": "object", + "required": ["host", "port", "database", "username"], + "additionalProperties": true, + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "JDBC port (not the native port) of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 8123, + "examples": ["8123"], + "order": 1 + }, + "database": { + "title": "DB Name", + "description": "Name of the database.", + "type": "string", + "order": 2 + }, + "username": { + "title": "User", + "description": "Username to use to access the database.", + "type": "string", + "order": 3 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 4 + }, + "tunnel_method": { + "type": "object", + "title": "SSH Tunnel Method", + "description": "Whether to initiate an SSH tunnel before connecting to the database, and if so, which kind of authentication to use.", + "oneOf": [ + { + "title": "No Tunnel", + "required": ["tunnel_method"], + "properties": { + "tunnel_method": { + "description": "No ssh tunnel needed to connect to database", + "type": "string", + "const": "NO_TUNNEL", + "order": 0 + } + } + }, + { + "title": "SSH Key Authentication", + "required": [ + "tunnel_method", + "tunnel_host", + "tunnel_port", + "tunnel_user", + "ssh_key" + ], + "properties": { + "tunnel_method": { + "description": "Connect through a jump server tunnel host using username and ssh key", + "type": "string", + "const": "SSH_KEY_AUTH", + "order": 0 + }, + "tunnel_host": { + "title": "SSH Tunnel Jump Server Host", + "description": "Hostname of the jump server host that allows inbound ssh tunnel.", + "type": "string", + "order": 1 + }, + "tunnel_port": { + "title": "SSH Connection Port", + "description": "Port on the proxy/jump server that accepts inbound ssh connections.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 22, + "examples": ["22"], + "order": 2 + }, + "tunnel_user": { + "title": "SSH Login Username", + "description": "OS-level username for logging into the jump server host.", + "type": "string", + "order": 3 + }, + "ssh_key": { + "title": "SSH Private Key", + "description": "OS-level user account ssh key credentials in RSA PEM format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )", + "type": "string", + "airbyte_secret": true, + "multiline": true, + "order": 4 + } + } + }, + { + "title": "Password Authentication", + "required": [ + "tunnel_method", + "tunnel_host", + "tunnel_port", + "tunnel_user", + "tunnel_user_password" + ], + "properties": { + "tunnel_method": { + "description": "Connect through a jump server tunnel host using username and password authentication", + "type": "string", + "const": "SSH_PASSWORD_AUTH", + "order": 0 + }, + "tunnel_host": { + "title": "SSH Tunnel Jump Server Host", + "description": "Hostname of the jump server host that allows inbound ssh tunnel.", + "type": "string", + "order": 1 + }, + "tunnel_port": { + "title": "SSH Connection Port", + "description": "Port on the proxy/jump server that accepts inbound ssh connections.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 22, + "examples": ["22"], + "order": 2 + }, + "tunnel_user": { + "title": "SSH Login Username", + "description": "OS-level username for logging into the jump server host", + "type": "string", + "order": 3 + }, + "tunnel_user_password": { + "title": "Password", + "description": "OS-level password for logging into the jump server host", + "type": "string", + "airbyte_secret": true, + "order": 4 + } + } + } + ] + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java index e0773fb6683e..95c0b767b143 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestination.java @@ -10,10 +10,13 @@ import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.base.Destination; import io.airbyte.integrations.base.IntegrationRunner; +import io.airbyte.integrations.base.ssh.SshWrappedDestination; import io.airbyte.integrations.destination.NamingConventionTransformer; import io.airbyte.integrations.destination.jdbc.AbstractJdbcDestination; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; +import java.util.ArrayList; +import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -23,19 +26,37 @@ public class ClickhouseDestination extends AbstractJdbcDestination implements De public static final String DRIVER_CLASS = "ru.yandex.clickhouse.ClickHouseDriver"; + public static final List HOST_KEY = List.of("host"); + public static final List PORT_KEY = List.of("port"); + private static final String PASSWORD = "password"; + public static Destination sshWrappedDestination() { + return new SshWrappedDestination(new ClickhouseDestination(), HOST_KEY, PORT_KEY); + } + public ClickhouseDestination() { super(DRIVER_CLASS, new ClickhouseSQLNameTransformer(), new ClickhouseSqlOperations()); } @Override public JsonNode toJdbcConfig(final JsonNode config) { - final StringBuilder jdbcUrl = new StringBuilder(String.format("jdbc:clickhouse://%s:%s/%s", + final List additionalParameters = new ArrayList<>(); + + final StringBuilder jdbcUrl = new StringBuilder(String.format("jdbc:clickhouse://%s:%s/%s?", config.get("host").asText(), config.get("port").asText(), config.get("database").asText())); + if (!config.has("ssl") || config.get("ssl").asBoolean()) { + additionalParameters.add("ssl=true"); + additionalParameters.add("sslmode=none"); + } + + if (!additionalParameters.isEmpty()) { + additionalParameters.forEach(x -> jdbcUrl.append(x).append("&")); + } + final ImmutableMap.Builder configBuilder = ImmutableMap.builder() .put("username", config.get("username").asText()) .put("jdbc_url", jdbcUrl.toString()); @@ -63,7 +84,7 @@ public AirbyteConnectionStatus check(final JsonNode config) { } public static void main(String[] args) throws Exception { - final Destination destination = new ClickhouseDestination(); + final Destination destination = ClickhouseDestination.sshWrappedDestination(); LOGGER.info("starting destination: {}", ClickhouseDestination.class); new IntegrationRunner(destination).run(args); LOGGER.info("completed destination: {}", ClickhouseDestination.class); diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java index b0b4ed53839e..5346ddb1cc76 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java @@ -69,6 +69,7 @@ public void insertRecordsInternal(final JdbcDatabase database, database.execute(connection -> { File tmpFile = null; + Exception primaryException = null; try { tmpFile = Files.createTempFile(tmpTableName + "-", ".tmp").toFile(); writeBatchToFile(tmpFile, records); @@ -81,6 +82,7 @@ public void insertRecordsInternal(final JdbcDatabase database, .send(); } catch (final Exception e) { + primaryException = e; throw new RuntimeException(e); } finally { try { @@ -88,6 +90,8 @@ public void insertRecordsInternal(final JdbcDatabase database, Files.delete(tmpFile.toPath()); } } catch (final IOException e) { + if (primaryException != null) + e.addSuppressed(primaryException); throw new RuntimeException(e); } } diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json index 133717024171..52b3ead5255b 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json @@ -45,6 +45,13 @@ "type": "string", "airbyte_secret": true, "order": 4 + }, + "ssl": { + "title": "SSL Connection", + "description": "Encrypt data using SSL.", + "type": "boolean", + "default": false, + "order": 5 } } } diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java index b1a49cf60c44..8442ed22823f 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java @@ -5,6 +5,7 @@ package io.airbyte.integrations.destination.clickhouse; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.db.Databases; @@ -27,8 +28,6 @@ public class ClickhouseDestinationAcceptanceTest extends DestinationAcceptanceTe private ClickHouseContainer db; - private boolean useNativePort = false; - @Override protected String getImageName() { return "airbyte/destination-clickhouse:dev"; @@ -59,28 +58,25 @@ protected String getDefaultSchema(final JsonNode config) { @Override protected JsonNode getConfig() { - // ClickHouse official JDBC driver use HTTP protocal, its default port is 8123 - // dbt clickhouse adapter use native protocal, its default port is 9000 + // Note: ClickHouse official JDBC driver uses HTTP protocol, its default port is 8123 + // dbt clickhouse adapter uses native protocol, its default port is 9000 + // Since we disabled normalization and dbt test, we only use the JDBC port here. return Jsons.jsonNode(ImmutableMap.builder() .put("host", db.getHost()) - .put("port", useNativePort ? db.getMappedPort(ClickHouseContainer.NATIVE_PORT) : db.getFirstMappedPort()) + .put("port", db.getFirstMappedPort()) .put("database", DB_NAME) .put("username", db.getUsername()) .put("password", db.getPassword()) .put("schema", DB_NAME) + .put("ssl", false) .build()); } @Override protected JsonNode getFailCheckConfig() { - return Jsons.jsonNode(ImmutableMap.builder() - .put("host", db.getHost()) - .put("port", db.getFirstMappedPort()) - .put("database", DB_NAME) - .put("username", db.getUsername()) - .put("password", "wrong password") - .put("schema", DB_NAME) - .build()); + final JsonNode clone = Jsons.clone(getConfig()); + ((ObjectNode) clone).put("password", "wrong password"); + return clone; } @Override @@ -104,10 +100,7 @@ protected List retrieveRecords(TestDestinationEnv testEnv, } private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { - boolean oldUseNativePort = useNativePort; - useNativePort = false; final JdbcDatabase jdbcDB = getDatabase(getConfig()); - useNativePort = oldUseNativePort; return jdbcDB.query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .collect(Collectors.toList()); @@ -150,14 +143,13 @@ protected void tearDown(TestDestinationEnv testEnv) { } /** - * The SQL script generated by dbt in 'test' step isn't compatible with ClickHouse, so we skip this - * test for now + * The SQL script generated by old version of dbt in 'test' step isn't compatible with ClickHouse, + * so we skip this test for now. * * @throws Exception */ @Disabled public void testCustomDbtTransformations() throws Exception { - useNativePort = true; super.testCustomDbtTransformations(); } diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java new file mode 100644 index 000000000000..b6fb201cb271 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.functional.CheckedFunction; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.Databases; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.base.ssh.SshBastionContainer; +import io.airbyte.integrations.base.ssh.SshTunnel; +import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Disabled; +import org.testcontainers.containers.ClickHouseContainer; + +/** + * Abstract class that allows us to avoid duplicating testing logic for testing SSH with a key file + * or with a password. + */ +public abstract class SshClickhouseDestinationAcceptanceTest extends DestinationAcceptanceTest { + + public abstract SshTunnel.TunnelMethod getTunnelMethod(); + + private static final String DB_NAME = "default"; + + private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); + + private ClickHouseContainer db; + private final SshBastionContainer bastion = new SshBastionContainer(); + + @Override + protected String getImageName() { + return "airbyte/destination-clickhouse:dev"; + } + + @Override + protected boolean supportsNormalization() { + return true; + } + + @Override + protected boolean supportsDBT() { + return true; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + if (config.get("database") == null) { + return null; + } + return config.get("database").asText(); + } + + @Override + protected JsonNode getConfig() throws Exception { + return bastion.getTunnelConfig(getTunnelMethod(), bastion.getBasicDbConfigBuider(db, DB_NAME) + .put("schema", DB_NAME)); + } + + @Override + protected JsonNode getFailCheckConfig() throws Exception { + final JsonNode clone = Jsons.clone(getConfig()); + ((ObjectNode) clone).put("password", "wrong password"); + return clone; + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getIdentifier(streamName), namespace); + } + + @Override + protected List retrieveRecords(TestDestinationEnv testEnv, + String streamName, + String namespace, + JsonNode streamSchema) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) + .stream() + .map(r -> Jsons.deserialize(r.get(JavaBaseConstants.COLUMN_NAME_DATA).asText())) + .collect(Collectors.toList()); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws Exception { + return SshTunnel.sshWrap( + getConfig(), + ClickhouseDestination.HOST_KEY, + ClickhouseDestination.PORT_KEY, + (CheckedFunction, Exception>) mangledConfig -> getDatabase(mangledConfig) + .query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) + .collect(Collectors.toList())); + } + + @Override + protected List resolveIdentifier(final String identifier) { + final List result = new ArrayList<>(); + final String resolved = namingResolver.getIdentifier(identifier); + result.add(identifier); + result.add(resolved); + if (!resolved.startsWith("\"")) { + result.add(resolved.toLowerCase()); + result.add(resolved.toUpperCase()); + } + return result; + } + + private static JdbcDatabase getDatabase(final JsonNode config) { + return Databases.createJdbcDatabase( + config.get("username").asText(), + config.has("password") ? config.get("password").asText() : null, + String.format("jdbc:clickhouse://%s:%s/%s", + config.get("host").asText(), + config.get("port").asText(), + config.get("database").asText()), + ClickhouseDestination.DRIVER_CLASS); + } + + @Override + protected void setup(TestDestinationEnv testEnv) { + bastion.initAndStartBastion(); + db = (ClickHouseContainer) new ClickHouseContainer("yandex/clickhouse-server").withNetwork(bastion.getNetWork()); + db.start(); + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + bastion.stopAndCloseContainers(db); + } + + /** + * The SQL script generated by old version of dbt in 'test' step isn't compatible with ClickHouse, + * so we skip this test for now. + * + * @throws Exception + */ + @Disabled + public void testCustomDbtTransformations() throws Exception { + super.testCustomDbtTransformations(); + } + + /** + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * + * @throws Exception + */ + @Disabled + public void testIncrementalDedupeSync() throws Exception { + super.testIncrementalDedupeSync(); + } + + /** + * The normalization container needs native port, while destination container needs HTTP port, we + * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this + * test, so we skip it. + * + * @throws Exception + */ + @Disabled + public void testSyncWithNormalization(final String messagesFilename, final String catalogFilename) throws Exception { + super.testSyncWithNormalization(messagesFilename, catalogFilename); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshKeyClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshKeyClickhouseDestinationAcceptanceTest.java new file mode 100644 index 000000000000..7fd4f3269985 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshKeyClickhouseDestinationAcceptanceTest.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import io.airbyte.integrations.base.ssh.SshTunnel; + +public class SshKeyClickhouseDestinationAcceptanceTest extends SshClickhouseDestinationAcceptanceTest { + + @Override + public SshTunnel.TunnelMethod getTunnelMethod() { + return SshTunnel.TunnelMethod.SSH_KEY_AUTH; + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshPasswordClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshPasswordClickhouseDestinationAcceptanceTest.java new file mode 100644 index 000000000000..1bb01fb490d4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshPasswordClickhouseDestinationAcceptanceTest.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.clickhouse; + +import io.airbyte.integrations.base.ssh.SshTunnel; + +public class SshPasswordClickhouseDestinationAcceptanceTest extends SshClickhouseDestinationAcceptanceTest { + + @Override + public SshTunnel.TunnelMethod getTunnelMethod() { + return SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH; + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java index 93e00a805085..edad4528f29b 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationTest.java @@ -66,6 +66,7 @@ void setup() { .put("username", db.getUsername()) .put("password", db.getPassword()) .put("schema", DB_NAME) + .put("ssl", false) .build()); } @@ -103,7 +104,7 @@ void sanityTest() throws Exception { config.get("host").asText(), config.get("port").asText(), config.get("database").asText()), - "ru.yandex.clickhouse.ClickHouseDriver"); + ClickhouseDestination.DRIVER_CLASS); final List actualRecords = database.bufferedResultSetQuery( connection -> connection.createStatement().executeQuery( diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java index 8f0c62f7d5ef..2103bce8bb7e 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/normalization/NormalizationRunnerFactory.java @@ -28,6 +28,7 @@ public class NormalizationRunnerFactory { .put("airbyte/destination-redshift", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.REDSHIFT)) .put("airbyte/destination-snowflake", ImmutablePair.of(BASE_NORMALIZATION_IMAGE_NAME, DestinationType.SNOWFLAKE)) .put("airbyte/destination-clickhouse", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) + .put("airbyte/destination-clickhouse-strict-encrypt", ImmutablePair.of("airbyte/normalization-clickhouse", DestinationType.CLICKHOUSE)) .build(); public static NormalizationRunner create(final String imageName, final ProcessFactory processFactory) { diff --git a/docs/integrations/destinations/clickhouse.md b/docs/integrations/destinations/clickhouse.md index 0d05ff16c170..9312e173964b 100644 --- a/docs/integrations/destinations/clickhouse.md +++ b/docs/integrations/destinations/clickhouse.md @@ -79,5 +79,5 @@ Therefore, Airbyte ClickHouse destination will create tables and schemas using t | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | -| 0.0.1 | 2021-11-04 | [\#7620](https://github.com/airbytehq/airbyte/pull/7620) | Add ClickHouse destination | +| 0.1.0 | 2021-11-04 | [\#7620](https://github.com/airbytehq/airbyte/pull/7620) | Add ClickHouse destination | From 09f3fa61d01866b60b4458809d29c7b81f5eb2c8 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Tue, 9 Nov 2021 23:05:14 +1100 Subject: [PATCH 06/12] merge from master --- .../dedup_cdc_excluded_ab1.sql | 21 ---- .../dedup_cdc_excluded_ab2.sql | 29 ----- .../dedup_exchange_rate_ab1.sql | 24 ---- .../dedup_exchange_rate_ab2.sql | 32 ----- .../test_normalization/exchange_rate_ab1.sql | 24 ---- .../test_normalization/exchange_rate_ab2.sql | 32 ----- .../test_normalization/exchange_rate_ab3.sql | 40 ------- .../test_normalization/pos_dedup_cdcx_ab1.sql | 21 ---- .../test_normalization/pos_dedup_cdcx_ab2.sql | 31 ----- .../renamed_dedup_cdc_excluded_ab1.sql | 17 --- .../renamed_dedup_cdc_excluded_ab2.sql | 19 --- .../dedup_cdc_excluded_scd.sql | 20 ++-- .../dedup_exchange_rate_scd.sql | 20 ++-- .../renamed_dedup_cdc_excluded_scd.sql | 20 ++-- .../test_normalization/exchange_rate.sql | 34 ------ .../test_normalization/pos_dedup_cdcx_scd.sql | 104 ---------------- .../test_normalization/pos_dedup_cdcx.sql | 31 ----- .../dedup_cdc_excluded_ab3.sql | 44 ++++++- .../dedup_exchange_rate_ab3.sql | 50 +++++++- .../test_normalization/pos_dedup_cdcx_ab3.sql | 49 +++++++- .../renamed_dedup_cdc_excluded_ab3.sql | 30 ++++- .../dedup_cdc_excluded_ab1.sql | 2 +- .../dedup_cdc_excluded_ab2.sql | 2 +- .../dedup_exchange_rate_ab1.sql | 2 +- .../dedup_exchange_rate_ab2.sql | 2 +- .../test_normalization/exchange_rate_ab1.sql | 4 +- .../test_normalization/exchange_rate_ab2.sql | 4 +- .../test_normalization/exchange_rate_ab3.sql | 4 +- .../test_normalization/pos_dedup_cdcx_ab1.sql | 3 +- .../test_normalization/pos_dedup_cdcx_ab2.sql | 3 +- .../renamed_dedup_cdc_excluded_ab1.sql | 2 +- .../renamed_dedup_cdc_excluded_ab2.sql | 2 +- .../dedup_cdc_excluded_scd.sql | 20 ++-- .../dedup_exchange_rate_scd.sql | 20 ++-- .../renamed_dedup_cdc_excluded_scd.sql | 20 ++-- .../test_normalization/exchange_rate.sql | 24 ---- .../test_normalization/pos_dedup_cdcx_scd.sql | 113 ------------------ .../test_normalization/pos_dedup_cdcx.sql | 23 ---- .../dedup_cdc_excluded_ab3.sql | 2 +- .../dedup_exchange_rate_ab3.sql | 2 +- .../test_normalization/pos_dedup_cdcx_ab3.sql | 3 +- .../renamed_dedup_cdc_excluded_ab3.sql | 2 +- .../dedup_cdc_excluded_ab1.sql | 21 ---- .../dedup_cdc_excluded_ab2.sql | 29 ----- .../dedup_exchange_rate_ab1.sql | 24 ---- .../dedup_exchange_rate_ab2.sql | 32 ----- .../test_normalization/exchange_rate_ab1.sql | 24 ---- .../test_normalization/exchange_rate_ab2.sql | 32 ----- .../test_normalization/exchange_rate_ab3.sql | 40 ------- .../test_normalization/pos_dedup_cdcx_ab1.sql | 21 ---- .../test_normalization/pos_dedup_cdcx_ab2.sql | 31 ----- .../renamed_dedup_cdc_excluded_ab1.sql | 17 --- .../renamed_dedup_cdc_excluded_ab2.sql | 19 --- .../test_normalization/exchange_rate.sql | 5 - .../test_normalization/pos_dedup_cdcx_scd.sql | 104 ---------------- .../test_normalization/pos_dedup_cdcx.sql | 31 ----- .../dedup_cdc_excluded_ab3.sql | 44 ++++++- .../dedup_exchange_rate_ab3.sql | 50 +++++++- .../test_normalization/pos_dedup_cdcx_ab3.sql | 49 +++++++- .../renamed_dedup_cdc_excluded_ab3.sql | 30 ++++- 60 files changed, 429 insertions(+), 1125 deletions(-) delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/exchange_rate.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/exchange_rate.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/pos_dedup_cdcx.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/exchange_rate.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql delete mode 100644 airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql deleted file mode 100644 index 30483298999d..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_cdc_excluded_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'name') as name, - JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, - JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias --- dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql deleted file mode 100644 index 64ffec89ae08..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql +++ /dev/null @@ -1,29 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_cdc_excluded_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, - accurateCastOrNull(_ab_cdc_lsn, ' - Float64 -') as _ab_cdc_lsn, - accurateCastOrNull(_ab_cdc_updated_at, ' - Float64 -') as _ab_cdc_updated_at, - accurateCastOrNull(_ab_cdc_deleted_at, ' - Float64 -') as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.dedup_cdc_excluded_ab1 --- dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index 7caf3495cf97..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,24 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'currency') as currency, - JSONExtractRaw(_airbyte_data, 'date') as date, - JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, - JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, - JSONExtractRaw(_airbyte_data, 'NZD') as NZD, - JSONExtractRaw(_airbyte_data, 'USD') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 812c7b0fadb3..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,32 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, - parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, - parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, - accurateCastOrNull("HKD@spéçiäl & characters", ' - Float64 -') as "HKD@spéçiäl & characters", - nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, ' - Float64 -') as NZD, - accurateCastOrNull(USD, ' - Float64 -') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql deleted file mode 100644 index 19bde82ae927..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql +++ /dev/null @@ -1,24 +0,0 @@ - - - create view _airbyte_test_normalization.exchange_rate_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'currency') as currency, - JSONExtractRaw(_airbyte_data, 'date') as date, - JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, - JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, - JSONExtractRaw(_airbyte_data, 'NZD') as NZD, - JSONExtractRaw(_airbyte_data, 'USD') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql deleted file mode 100644 index 3d80a32b6a2e..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql +++ /dev/null @@ -1,32 +0,0 @@ - - - create view _airbyte_test_normalization.exchange_rate_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, - parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, - parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, - accurateCastOrNull("HKD@spéçiäl & characters", ' - Float64 -') as "HKD@spéçiäl & characters", - nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, ' - Float64 -') as NZD, - accurateCastOrNull(USD, ' - Float64 -') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.exchange_rate_ab1 --- exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql deleted file mode 100644 index 322475a8028e..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql +++ /dev/null @@ -1,40 +0,0 @@ - - - create view _airbyte_test_normalization.exchange_rate_ab3__dbt_tmp - - as ( - --- SQL model to build a hash column based on the values of this record -select - assumeNotNull(hex(MD5( - - toString(id) || '~' || - - - toString(currency) || '~' || - - - toString(date) || '~' || - - - toString(timestamp_col) || '~' || - - - toString("HKD@spéçiäl & characters") || '~' || - - - toString(HKD_special___characters) || '~' || - - - toString(NZD) || '~' || - - - toString(USD) - - ))) as _airbyte_exchange_rate_hashid, - tmp.* -from _airbyte_test_normalization.exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql deleted file mode 100644 index c5a003ac2bb5..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ - - - create view _airbyte_test_normalization.pos_dedup_cdcx_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'name') as name, - JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, - JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias --- pos_dedup_cdcx -where 1 = 1 - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql deleted file mode 100644 index 0e1dc7fdb2ae..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql +++ /dev/null @@ -1,31 +0,0 @@ - - - create view _airbyte_test_normalization.pos_dedup_cdcx_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, - accurateCastOrNull(_ab_cdc_lsn, ' - Float64 -') as _ab_cdc_lsn, - accurateCastOrNull(_ab_cdc_updated_at, ' - Float64 -') as _ab_cdc_updated_at, - accurateCastOrNull(_ab_cdc_deleted_at, ' - Float64 -') as _ab_cdc_deleted_at, - accurateCastOrNull(_ab_cdc_log_pos, ' - Float64 -') as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.pos_dedup_cdcx_ab1 --- pos_dedup_cdcx -where 1 = 1 - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql deleted file mode 100644 index df3cf26d6a8d..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,17 +0,0 @@ - - - create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias --- renamed_dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql deleted file mode 100644 index 2fb72248eff2..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ /dev/null @@ -1,19 +0,0 @@ - - - create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab1 --- renamed_dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index aac6ab18ac72..749e5b38562b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -20,16 +20,15 @@ input_data as ( -- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded ), -input_data_with_end_at as ( +input_data_with_active_row_num as ( select *, - anyOrNull(_airbyte_emitted_at) over ( + row_number() over ( partition by id order by _airbyte_emitted_at is null asc, _airbyte_emitted_at desc, _airbyte_emitted_at desc, _ab_cdc_updated_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at + ) as _airbyte_active_row_num from input_data ), scd_data as ( @@ -46,12 +45,19 @@ scd_data as ( _ab_cdc_updated_at, _ab_cdc_deleted_at, _airbyte_emitted_at as _airbyte_start_at, - _airbyte_end_at, - multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_cdc_excluded_hashid - from input_data_with_end_at + from input_data_with_active_row_num ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 7cc3ba1ffef8..49c1843204fe 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -20,16 +20,15 @@ input_data as ( -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate ), -input_data_with_end_at as ( +input_data_with_active_row_num as ( select *, - anyOrNull(date) over ( + row_number() over ( partition by id, currency, cast(NZD as String) order by date is null asc, date desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at + ) as _airbyte_active_row_num from input_data ), scd_data as ( @@ -55,12 +54,19 @@ scd_data as ( NZD, USD, date as _airbyte_start_at, - _airbyte_end_at, - multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row, + anyOrNull(date) over ( + partition by id, currency, cast(NZD as String) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_exchange_rate_hashid - from input_data_with_end_at + from input_data_with_active_row_num ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 0d752f33e21d..61f46aa4665c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -20,16 +20,15 @@ input_data as ( -- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded ), -input_data_with_end_at as ( +input_data_with_active_row_num as ( select *, - anyOrNull(_airbyte_emitted_at) over ( + row_number() over ( partition by id order by _airbyte_emitted_at is null asc, _airbyte_emitted_at desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at + ) as _airbyte_active_row_num from input_data ), scd_data as ( @@ -42,12 +41,19 @@ scd_data as ( ))) as _airbyte_unique_key, id, _airbyte_emitted_at as _airbyte_start_at, - _airbyte_end_at, - multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_renamed_dedup_cdc_excluded_hashid - from input_data_with_end_at + from input_data_with_active_row_num ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/exchange_rate.sql deleted file mode 100644 index 52af32405c92..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,34 +0,0 @@ - - - - - create table test_normalization.exchange_rate - - - - engine = MergeTree() - - order by (tuple()) - - as ( - --- Final base SQL model -select - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from _airbyte_test_normalization.exchange_rate_ab3 --- exchange_rate from test_normalization._airbyte_raw_exchange_rate -where 1 = 1 - - ) - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql deleted file mode 100644 index db4375f5e0dd..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ /dev/null @@ -1,104 +0,0 @@ - - - - create table test_normalization.pos_dedup_cdcx_scd__dbt_tmp - - - - engine = MergeTree() - - order by (tuple()) - - as ( - -with - -input_data as ( - select * - from _airbyte_test_normalization.pos_dedup_cdcx_ab3 - -- pos_dedup_cdcx from test_normalization._airbyte_raw_pos_dedup_cdcx -), - -input_data_with_end_at as ( - select *, - anyOrNull(_airbyte_emitted_at) over ( - partition by id - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at - from input_data -), -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - assumeNotNull(hex(MD5( - - toString(id) - - ))) as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_emitted_at as _airbyte_start_at, - _airbyte_end_at, - multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_pos_dedup_cdcx_hashid - from input_data_with_end_at -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String'), accurateCastOrNull(_ab_cdc_log_pos, 'String') - order by _airbyte_ab_id - ) as _airbyte_row_num, - assumeNotNull(hex(MD5( - - toString(_airbyte_unique_key) || '~' || - - - toString(_airbyte_start_at) || '~' || - - - toString(_airbyte_emitted_at) || '~' || - - - toString(_ab_cdc_deleted_at) || '~' || - - - toString(_ab_cdc_updated_at) || '~' || - - - toString(_ab_cdc_log_pos) - - ))) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_pos_dedup_cdcx_hashid -from dedup_data where _airbyte_row_num = 1 - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql deleted file mode 100644 index 6397037a6490..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql +++ /dev/null @@ -1,31 +0,0 @@ - - - - create table test_normalization.pos_dedup_cdcx__dbt_tmp - - - - engine = MergeTree() - - order by (tuple()) - - as ( - --- Final base SQL model -select - _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_pos_dedup_cdcx_hashid -from test_normalization.pos_dedup_cdcx_scd --- pos_dedup_cdcx from test_normalization._airbyte_raw_pos_dedup_cdcx -where 1 = 1 -and _airbyte_active_row = 1 - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql index 2788aa5cd7d4..fe2bf632dbf2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql @@ -4,7 +4,47 @@ as ( --- SQL model to build a hash column based on the values of this record +with __dbt__cte__dedup_cdc_excluded_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias +-- dedup_cdc_excluded +where 1 = 1 + +), __dbt__cte__dedup_cdc_excluded_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__dedup_cdc_excluded_ab1 +-- dedup_cdc_excluded +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record select assumeNotNull(hex(MD5( @@ -24,7 +64,7 @@ select ))) as _airbyte_dedup_cdc_excluded_hashid, tmp.* -from _airbyte_test_normalization.dedup_cdc_excluded_ab2 tmp +from __dbt__cte__dedup_cdc_excluded_ab2 tmp -- dedup_cdc_excluded where 1 = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql index 715f70863de1..28abd1a79a7f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql @@ -4,7 +4,53 @@ as ( --- SQL model to build a hash column based on the values of this record +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'currency') as currency, + JSONExtractRaw(_airbyte_data, 'date') as date, + JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, + JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(_airbyte_data, 'NZD') as NZD, + JSONExtractRaw(_airbyte_data, 'USD') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record select assumeNotNull(hex(MD5( @@ -33,7 +79,7 @@ select ))) as _airbyte_dedup_exchange_rate_hashid, tmp.* -from _airbyte_test_normalization.dedup_exchange_rate_ab2 tmp +from __dbt__cte__dedup_exchange_rate_ab2 tmp -- dedup_exchange_rate where 1 = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql index 90f493b6c800..9f515f09a4a4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql @@ -4,7 +4,51 @@ as ( --- SQL model to build a hash column based on the values of this record +with __dbt__cte__pos_dedup_cdcx_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias +-- pos_dedup_cdcx +where 1 = 1 + +), __dbt__cte__pos_dedup_cdcx_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + accurateCastOrNull(_ab_cdc_log_pos, ' + Float64 +') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__pos_dedup_cdcx_ab1 +-- pos_dedup_cdcx +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record select assumeNotNull(hex(MD5( @@ -27,7 +71,8 @@ select ))) as _airbyte_pos_dedup_cdcx_hashid, tmp.* -from _airbyte_test_normalization.pos_dedup_cdcx_ab2 tmp +from __dbt__cte__pos_dedup_cdcx_ab2 tmp -- pos_dedup_cdcx where 1 = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql index 11811d557f2a..43c5b8ad9e18 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql @@ -4,7 +4,33 @@ as ( --- SQL model to build a hash column based on the values of this record +with __dbt__cte__renamed_dedup_cdc_excluded_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 + +), __dbt__cte__renamed_dedup_cdc_excluded_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__renamed_dedup_cdc_excluded_ab1 +-- renamed_dedup_cdc_excluded +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record select assumeNotNull(hex(MD5( @@ -12,7 +38,7 @@ select ))) as _airbyte_renamed_dedup_cdc_excluded_hashid, tmp.* -from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab2 tmp +from __dbt__cte__renamed_dedup_cdc_excluded_ab2 tmp -- renamed_dedup_cdc_excluded where 1 = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql index bc0150c8df4e..ad250a2de196 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql index 5cc104224b3e..b4921f53776b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql index 69dafdd842bd..bdfc716769ae 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql index a32c380c7a4f..351ccad8f300 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql index f71cd25a7b9d..59f1c4bcfba0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} @@ -13,11 +13,11 @@ select {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters, {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, + {{ json_extract_scalar('_airbyte_data', ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ quote('column`_\'with""_quotes') }}, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias -- exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql index 584e689ce476..a48a14a7aecc 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} @@ -13,11 +13,11 @@ select nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), '{{ dbt_utils.type_string() }}'), 'null') as HKD_special___characters, accurateCastOrNull(NZD, '{{ dbt_utils.type_float() }}') as NZD, accurateCastOrNull(USD, '{{ dbt_utils.type_float() }}') as USD, + nullif(accurateCastOrNull(trim(BOTH '"' from {{ quote('column`_\'with""_quotes') }}), '{{ dbt_utils.type_string() }}'), 'null') as {{ quote('column`_\'with""_quotes') }}, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at from {{ ref('exchange_rate_ab1') }} -- exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql index 9e9f3a174545..d6593d4eb8f1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} @@ -14,10 +14,10 @@ select 'HKD_special___characters', 'NZD', 'USD', + quote('column`_\'with""_quotes'), ]) }} as _airbyte_exchange_rate_hashid, tmp.* from {{ ref('exchange_rate_ab2') }} tmp -- exchange_rate where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql index 67bc11204a5c..182bcd7dbfd4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} @@ -17,4 +17,5 @@ select from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} as table_alias -- pos_dedup_cdcx where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql index 15f7fbf27359..3769adf4d02e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} @@ -17,4 +17,5 @@ select from {{ ref('pos_dedup_cdcx_ab1') }} -- pos_dedup_cdcx where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql index 7fd3046082a7..4504a7bbffa3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql index 73d0ae1e998b..8b248db9590f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index a5a3a1160125..eca9d38763a0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -45,16 +45,15 @@ input_data as ( -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} ), {% endif %} -input_data_with_end_at as ( +input_data_with_active_row_num as ( select *, - anyOrNull(_airbyte_emitted_at) over ( + row_number() over ( partition by id order by _airbyte_emitted_at is null asc, _airbyte_emitted_at desc, _airbyte_emitted_at desc, _ab_cdc_updated_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at + ) as _airbyte_active_row_num from input_data ), scd_data as ( @@ -69,12 +68,19 @@ scd_data as ( _ab_cdc_updated_at, _ab_cdc_deleted_at, _airbyte_emitted_at as _airbyte_start_at, - _airbyte_end_at, - multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc, _ab_cdc_updated_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_cdc_excluded_hashid - from input_data_with_end_at + from input_data_with_active_row_num ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index d9f1648c1425..97757d03ce77 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -47,16 +47,15 @@ input_data as ( -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} ), {% endif %} -input_data_with_end_at as ( +input_data_with_active_row_num as ( select *, - anyOrNull(date) over ( + row_number() over ( partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) order by date is null asc, date desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at + ) as _airbyte_active_row_num from input_data ), scd_data as ( @@ -76,12 +75,19 @@ scd_data as ( NZD, USD, date as _airbyte_start_at, - _airbyte_end_at, - multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row, + anyOrNull(date) over ( + partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_exchange_rate_hashid - from input_data_with_end_at + from input_data_with_active_row_num ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index ea8efd5e523f..c0dcee2b2ccb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -45,16 +45,15 @@ input_data as ( -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} ), {% endif %} -input_data_with_end_at as ( +input_data_with_active_row_num as ( select *, - anyOrNull(_airbyte_emitted_at) over ( + row_number() over ( partition by id order by _airbyte_emitted_at is null asc, _airbyte_emitted_at desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at + ) as _airbyte_active_row_num from input_data ), scd_data as ( @@ -65,12 +64,19 @@ scd_data as ( ]) }} as _airbyte_unique_key, id, _airbyte_emitted_at as _airbyte_start_at, - _airbyte_end_at, - multiIf( _airbyte_end_at is null , 1 , 0 ) as _airbyte_active_row, + anyOrNull(_airbyte_emitted_at) over ( + partition by id + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING + ) as _airbyte_end_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_renamed_dedup_cdc_excluded_hashid - from input_data_with_end_at + from input_data_with_active_row_num ), dedup_data as ( select diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/exchange_rate.sql deleted file mode 100644 index dcab54dd6a6a..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,24 +0,0 @@ -{{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model -select - id, - currency, - date, - timestamp_col, - {{ quote('HKD@spéçiäl & characters') }}, - HKD_special___characters, - NZD, - USD, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_exchange_rate_hashid -from {{ ref('exchange_rate_ab3') }} --- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} -where 1 = 1 -{{ incremental_clause('_airbyte_emitted_at') }} - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql deleted file mode 100644 index 34f778f069f0..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ /dev/null @@ -1,113 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key_scd", - schema = "test_normalization", - tags = [ "top-level" ] -) }} -with -{% if is_incremental() %} -new_data as ( - -- retrieve incremental "new" data - select - * - from {{ ref('pos_dedup_cdcx_ab3') }} - -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} - where 1 = 1 - {{ incremental_clause('_airbyte_emitted_at') }} -), -new_data_ids as ( - -- build a subset of _airbyte_unique_key from rows that are new - select distinct - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key - from new_data -), -previous_active_scd_data as ( - -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes - select - {{ star_intersect(ref('pos_dedup_cdcx_ab3'), this, from_alias='inc_data', intersect_alias='this_data') }} - from {{ this }} as this_data - -- make a join with new_data using primary key to filter active data that need to be updated only - join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key - -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro) - --left join {{ ref('pos_dedup_cdcx_ab3') }} as inc_data on 1 = 0 - where _airbyte_active_row = 1 -), -input_data as ( - select {{ dbt_utils.star(ref('pos_dedup_cdcx_ab3')) }} from new_data - union all - select {{ dbt_utils.star(ref('pos_dedup_cdcx_ab3')) }} from previous_active_scd_data -), -{% else %} -input_data as ( - select * - from {{ ref('pos_dedup_cdcx_ab3') }} - -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} -), -{% endif %} -input_data_with_end_at as ( - select *, - anyOrNull(_airbyte_emitted_at) over ( - partition by id - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at - from input_data -), -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - {{ dbt_utils.surrogate_key([ - 'id', - ]) }} as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_emitted_at as _airbyte_start_at, - _airbyte_end_at, - multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_pos_dedup_cdcx_hashid - from input_data_with_end_at -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_log_pos, '{{ dbt_utils.type_string() }}') - order by _airbyte_ab_id - ) as _airbyte_row_num, - {{ dbt_utils.surrogate_key([ - '_airbyte_unique_key', - '_airbyte_start_at', - '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at', '_ab_cdc_log_pos' - ]) }} as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_pos_dedup_cdcx_hashid -from dedup_data where _airbyte_row_num = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/pos_dedup_cdcx.sql deleted file mode 100644 index fc341acc8a63..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/pos_dedup_cdcx.sql +++ /dev/null @@ -1,23 +0,0 @@ -{{ config( - unique_key = "_airbyte_unique_key", - schema = "test_normalization", - tags = [ "top-level" ] -) }} --- Final base SQL model -select - _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - {{ current_timestamp() }} as _airbyte_normalized_at, - _airbyte_pos_dedup_cdcx_hashid -from {{ ref('pos_dedup_cdcx_scd') }} --- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} -where 1 = 1 -and _airbyte_active_row = 1 - diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql index dc6238af14f4..446204f691eb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql index 22f448b39869..b14a1fb639b1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql index 7dd3908bac88..dbe0c313b238 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} @@ -17,4 +17,5 @@ select from {{ ref('pos_dedup_cdcx_ab2') }} tmp -- pos_dedup_cdcx where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql index f5ce3e6ba182..2356b929f1f3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql @@ -1,5 +1,5 @@ {{ config( - unique_key = env_var('AIRBYTE_DEFAULT_UNIQUE_KEY', '_airbyte_ab_id'), + unique_key = '_airbyte_ab_id', schema = "_airbyte_test_normalization", tags = [ "top-level-intermediate" ] ) }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql deleted file mode 100644 index 30483298999d..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_cdc_excluded_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'name') as name, - JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, - JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias --- dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql deleted file mode 100644 index 64ffec89ae08..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql +++ /dev/null @@ -1,29 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_cdc_excluded_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, - accurateCastOrNull(_ab_cdc_lsn, ' - Float64 -') as _ab_cdc_lsn, - accurateCastOrNull(_ab_cdc_updated_at, ' - Float64 -') as _ab_cdc_updated_at, - accurateCastOrNull(_ab_cdc_deleted_at, ' - Float64 -') as _ab_cdc_deleted_at, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.dedup_cdc_excluded_ab1 --- dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql deleted file mode 100644 index 7caf3495cf97..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql +++ /dev/null @@ -1,24 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'currency') as currency, - JSONExtractRaw(_airbyte_data, 'date') as date, - JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, - JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, - JSONExtractRaw(_airbyte_data, 'NZD') as NZD, - JSONExtractRaw(_airbyte_data, 'USD') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias --- dedup_exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql deleted file mode 100644 index 812c7b0fadb3..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql +++ /dev/null @@ -1,32 +0,0 @@ - - - create view _airbyte_test_normalization.dedup_exchange_rate_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, - parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, - parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, - accurateCastOrNull("HKD@spéçiäl & characters", ' - Float64 -') as "HKD@spéçiäl & characters", - nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, ' - Float64 -') as NZD, - accurateCastOrNull(USD, ' - Float64 -') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.dedup_exchange_rate_ab1 --- dedup_exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql deleted file mode 100644 index 19bde82ae927..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab1.sql +++ /dev/null @@ -1,24 +0,0 @@ - - - create view _airbyte_test_normalization.exchange_rate_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'currency') as currency, - JSONExtractRaw(_airbyte_data, 'date') as date, - JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, - JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", - JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, - JSONExtractRaw(_airbyte_data, 'NZD') as NZD, - JSONExtractRaw(_airbyte_data, 'USD') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_exchange_rate as table_alias --- exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql deleted file mode 100644 index 3d80a32b6a2e..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab2.sql +++ /dev/null @@ -1,32 +0,0 @@ - - - create view _airbyte_test_normalization.exchange_rate_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, - parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, - parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, - accurateCastOrNull("HKD@spéçiäl & characters", ' - Float64 -') as "HKD@spéçiäl & characters", - nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, - accurateCastOrNull(NZD, ' - Float64 -') as NZD, - accurateCastOrNull(USD, ' - Float64 -') as USD, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.exchange_rate_ab1 --- exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql deleted file mode 100644 index 322475a8028e..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/exchange_rate_ab3.sql +++ /dev/null @@ -1,40 +0,0 @@ - - - create view _airbyte_test_normalization.exchange_rate_ab3__dbt_tmp - - as ( - --- SQL model to build a hash column based on the values of this record -select - assumeNotNull(hex(MD5( - - toString(id) || '~' || - - - toString(currency) || '~' || - - - toString(date) || '~' || - - - toString(timestamp_col) || '~' || - - - toString("HKD@spéçiäl & characters") || '~' || - - - toString(HKD_special___characters) || '~' || - - - toString(NZD) || '~' || - - - toString(USD) - - ))) as _airbyte_exchange_rate_hashid, - tmp.* -from _airbyte_test_normalization.exchange_rate_ab2 tmp --- exchange_rate -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql deleted file mode 100644 index c5a003ac2bb5..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql +++ /dev/null @@ -1,21 +0,0 @@ - - - create view _airbyte_test_normalization.pos_dedup_cdcx_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - JSONExtractRaw(_airbyte_data, 'name') as name, - JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, - JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, - JSONExtractRaw(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias --- pos_dedup_cdcx -where 1 = 1 - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql deleted file mode 100644 index 0e1dc7fdb2ae..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql +++ /dev/null @@ -1,31 +0,0 @@ - - - create view _airbyte_test_normalization.pos_dedup_cdcx_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, - accurateCastOrNull(_ab_cdc_lsn, ' - Float64 -') as _ab_cdc_lsn, - accurateCastOrNull(_ab_cdc_updated_at, ' - Float64 -') as _ab_cdc_updated_at, - accurateCastOrNull(_ab_cdc_deleted_at, ' - Float64 -') as _ab_cdc_deleted_at, - accurateCastOrNull(_ab_cdc_log_pos, ' - Float64 -') as _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.pos_dedup_cdcx_ab1 --- pos_dedup_cdcx -where 1 = 1 - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql deleted file mode 100644 index df3cf26d6a8d..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ /dev/null @@ -1,17 +0,0 @@ - - - create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab1__dbt_tmp - - as ( - --- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema -select - JSONExtractRaw(_airbyte_data, 'id') as id, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias --- renamed_dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql deleted file mode 100644 index 2fb72248eff2..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ /dev/null @@ -1,19 +0,0 @@ - - - create view _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab2__dbt_tmp - - as ( - --- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -select - accurateCastOrNull(id, ' - BIGINT -') as id, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at -from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab1 --- renamed_dedup_cdc_excluded -where 1 = 1 - - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/exchange_rate.sql deleted file mode 100644 index a0f060cad62a..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/exchange_rate.sql +++ /dev/null @@ -1,5 +0,0 @@ - - insert into test_normalization.exchange_rate ("id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid") - select "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid" - from exchange_rate__dbt_tmp - \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql deleted file mode 100644 index db4375f5e0dd..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ /dev/null @@ -1,104 +0,0 @@ - - - - create table test_normalization.pos_dedup_cdcx_scd__dbt_tmp - - - - engine = MergeTree() - - order by (tuple()) - - as ( - -with - -input_data as ( - select * - from _airbyte_test_normalization.pos_dedup_cdcx_ab3 - -- pos_dedup_cdcx from test_normalization._airbyte_raw_pos_dedup_cdcx -), - -input_data_with_end_at as ( - select *, - anyOrNull(_airbyte_emitted_at) over ( - partition by id - order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at - from input_data -), -scd_data as ( - -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key - select - assumeNotNull(hex(MD5( - - toString(id) - - ))) as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_emitted_at as _airbyte_start_at, - _airbyte_end_at, - multiIf( _airbyte_end_at is null and _ab_cdc_deleted_at is null , 1 , 0 ) as _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - _airbyte_pos_dedup_cdcx_hashid - from input_data_with_end_at -), -dedup_data as ( - select - -- we need to ensure de-duplicated rows for merge/update queries - -- additionally, we generate a unique key for the scd table - row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String'), accurateCastOrNull(_ab_cdc_log_pos, 'String') - order by _airbyte_ab_id - ) as _airbyte_row_num, - assumeNotNull(hex(MD5( - - toString(_airbyte_unique_key) || '~' || - - - toString(_airbyte_start_at) || '~' || - - - toString(_airbyte_emitted_at) || '~' || - - - toString(_ab_cdc_deleted_at) || '~' || - - - toString(_ab_cdc_updated_at) || '~' || - - - toString(_ab_cdc_log_pos) - - ))) as _airbyte_unique_key_scd, - scd_data.* - from scd_data -) -select - _airbyte_unique_key, - _airbyte_unique_key_scd, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_start_at, - _airbyte_end_at, - _airbyte_active_row, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_pos_dedup_cdcx_hashid -from dedup_data where _airbyte_row_num = 1 - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql deleted file mode 100644 index 6397037a6490..000000000000 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/pos_dedup_cdcx.sql +++ /dev/null @@ -1,31 +0,0 @@ - - - - create table test_normalization.pos_dedup_cdcx__dbt_tmp - - - - engine = MergeTree() - - order by (tuple()) - - as ( - --- Final base SQL model -select - _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_ab_id, - _airbyte_emitted_at, - now() as _airbyte_normalized_at, - _airbyte_pos_dedup_cdcx_hashid -from test_normalization.pos_dedup_cdcx_scd --- pos_dedup_cdcx from test_normalization._airbyte_raw_pos_dedup_cdcx -where 1 = 1 -and _airbyte_active_row = 1 - ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql index 2788aa5cd7d4..fe2bf632dbf2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_cdc_excluded_ab3.sql @@ -4,7 +4,47 @@ as ( --- SQL model to build a hash column based on the values of this record +with __dbt__cte__dedup_cdc_excluded_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias +-- dedup_cdc_excluded +where 1 = 1 + +), __dbt__cte__dedup_cdc_excluded_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__dedup_cdc_excluded_ab1 +-- dedup_cdc_excluded +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record select assumeNotNull(hex(MD5( @@ -24,7 +64,7 @@ select ))) as _airbyte_dedup_cdc_excluded_hashid, tmp.* -from _airbyte_test_normalization.dedup_cdc_excluded_ab2 tmp +from __dbt__cte__dedup_cdc_excluded_ab2 tmp -- dedup_cdc_excluded where 1 = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql index 715f70863de1..28abd1a79a7f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_ab3.sql @@ -4,7 +4,53 @@ as ( --- SQL model to build a hash column based on the values of this record +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'currency') as currency, + JSONExtractRaw(_airbyte_data, 'date') as date, + JSONExtractRaw(_airbyte_data, 'timestamp_col') as timestamp_col, + JSONExtractRaw(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(_airbyte_data, 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(_airbyte_data, 'NZD') as NZD, + JSONExtractRaw(_airbyte_data, 'USD') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, ''))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record select assumeNotNull(hex(MD5( @@ -33,7 +79,7 @@ select ))) as _airbyte_dedup_exchange_rate_hashid, tmp.* -from _airbyte_test_normalization.dedup_exchange_rate_ab2 tmp +from __dbt__cte__dedup_exchange_rate_ab2 tmp -- dedup_exchange_rate where 1 = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql index 90f493b6c800..9f515f09a4a4 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/pos_dedup_cdcx_ab3.sql @@ -4,7 +4,51 @@ as ( --- SQL model to build a hash column based on the values of this record +with __dbt__cte__pos_dedup_cdcx_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + JSONExtractRaw(_airbyte_data, 'name') as name, + JSONExtractRaw(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + JSONExtractRaw(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + JSONExtractRaw(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias +-- pos_dedup_cdcx +where 1 = 1 + +), __dbt__cte__pos_dedup_cdcx_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from name), 'String'), 'null') as name, + accurateCastOrNull(_ab_cdc_lsn, ' + Float64 +') as _ab_cdc_lsn, + accurateCastOrNull(_ab_cdc_updated_at, ' + Float64 +') as _ab_cdc_updated_at, + accurateCastOrNull(_ab_cdc_deleted_at, ' + Float64 +') as _ab_cdc_deleted_at, + accurateCastOrNull(_ab_cdc_log_pos, ' + Float64 +') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__pos_dedup_cdcx_ab1 +-- pos_dedup_cdcx +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record select assumeNotNull(hex(MD5( @@ -27,7 +71,8 @@ select ))) as _airbyte_pos_dedup_cdcx_hashid, tmp.* -from _airbyte_test_normalization.pos_dedup_cdcx_ab2 tmp +from __dbt__cte__pos_dedup_cdcx_ab2 tmp -- pos_dedup_cdcx where 1 = 1 + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql index 11811d557f2a..43c5b8ad9e18 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/renamed_dedup_cdc_excluded_ab3.sql @@ -4,7 +4,33 @@ as ( --- SQL model to build a hash column based on the values of this record +with __dbt__cte__renamed_dedup_cdc_excluded_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +select + JSONExtractRaw(_airbyte_data, 'id') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 + +), __dbt__cte__renamed_dedup_cdc_excluded_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +select + accurateCastOrNull(id, ' + BIGINT +') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__renamed_dedup_cdc_excluded_ab1 +-- renamed_dedup_cdc_excluded +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record select assumeNotNull(hex(MD5( @@ -12,7 +38,7 @@ select ))) as _airbyte_renamed_dedup_cdc_excluded_hashid, tmp.* -from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab2 tmp +from __dbt__cte__renamed_dedup_cdc_excluded_ab2 tmp -- renamed_dedup_cdc_excluded where 1 = 1 From 4fc08be14f9132743a64b2ecf53210d233643dd4 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Wed, 10 Nov 2021 11:28:34 +1100 Subject: [PATCH 07/12] disable testCustomDbtTransformationsFailure test --- .../ClickhouseDestinationStrictEncryptAcceptanceTest.java | 6 ++++-- .../clickhouse/ClickhouseDestinationAcceptanceTest.java | 5 +++++ .../clickhouse/SshClickhouseDestinationAcceptanceTest.java | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java index 0470a3b79ca3..a1d3ce0ecce0 100644 --- a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java @@ -162,8 +162,10 @@ protected void tearDown(TestDestinationEnv testEnv) { } /** - * The SQL script generated by dbt in 'test' step isn't compatible with ClickHouse, so we skip this - * test for now + * The SQL script generated by old version of dbt in 'test' step isn't compatible with ClickHouse, + * so we skip this test for now. + * + * Ref: https://github.com/dbt-labs/dbt-core/issues/3905 * * @throws Exception */ diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java index 8442ed22823f..66e1e8f8d1c8 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java @@ -146,6 +146,8 @@ protected void tearDown(TestDestinationEnv testEnv) { * The SQL script generated by old version of dbt in 'test' step isn't compatible with ClickHouse, * so we skip this test for now. * + * Ref: https://github.com/dbt-labs/dbt-core/issues/3905 + * * @throws Exception */ @Disabled @@ -153,6 +155,9 @@ public void testCustomDbtTransformations() throws Exception { super.testCustomDbtTransformations(); } + @Disabled + public void testCustomDbtTransformationsFailure() throws Exception {} + /** * The normalization container needs native port, while destination container needs HTTP port, we * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java index b6fb201cb271..417930fa2f67 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java @@ -148,6 +148,8 @@ protected void tearDown(TestDestinationEnv testEnv) { * The SQL script generated by old version of dbt in 'test' step isn't compatible with ClickHouse, * so we skip this test for now. * + * Ref: https://github.com/dbt-labs/dbt-core/issues/3905 + * * @throws Exception */ @Disabled @@ -155,6 +157,9 @@ public void testCustomDbtTransformations() throws Exception { super.testCustomDbtTransformations(); } + @Disabled + public void testCustomDbtTransformationsFailure() throws Exception {} + /** * The normalization container needs native port, while destination container needs HTTP port, we * can't inject the port switch statement into DestinationAcceptanceTest.runSync() method for this From b7d12cc5699d693cffab26d4a96d11fc287a1fe9 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Thu, 11 Nov 2021 22:31:15 +1100 Subject: [PATCH 08/12] fix string format bug --- .../destination/clickhouse/ClickhouseSqlOperations.java | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java index 5346ddb1cc76..b712a8cf0a7a 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/java/io/airbyte/integrations/destination/clickhouse/ClickhouseSqlOperations.java @@ -42,7 +42,6 @@ public String createTableQuery(final JdbcDatabase database, final String schemaN JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_DATA, JavaBaseConstants.COLUMN_NAME_EMITTED_AT, - JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_AB_ID); } From 282febe52c70938795d8e503cb9154cad45bace3 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Sat, 13 Nov 2021 12:35:41 +1100 Subject: [PATCH 09/12] fix reserved keywords bug and disable dbt --- .../normalization/transform_catalog/reserved_keywords.py | 4 +++- .../ClickhouseDestinationStrictEncryptAcceptanceTest.java | 2 +- .../destination-clickhouse/src/main/resources/spec.json | 2 +- .../clickhouse/ClickhouseDestinationAcceptanceTest.java | 2 +- .../clickhouse/SshClickhouseDestinationAcceptanceTest.java | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py index d787dec66fc8..ff1f87d064ac 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py @@ -2533,7 +2533,9 @@ "REGR_SYY", } -CLICKHOUSE = {} +# In ClickHouse, keywords are not reserved. +# Ref: https://clickhouse.com/docs/en/sql-reference/syntax/#syntax-keywords +CLICKHOUSE = set() RESERVED_KEYWORDS = { DestinationType.BIGQUERY.value: BIGQUERY, diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java index a1d3ce0ecce0..f59bfdcaaf5b 100644 --- a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java @@ -50,7 +50,7 @@ protected boolean supportsNormalization() { @Override protected boolean supportsDBT() { - return true; + return false; } @Override diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json index 52b3ead5255b..6037b573394d 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-clickhouse/src/main/resources/spec.json @@ -2,7 +2,7 @@ "documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse", "supportsIncremental": true, "supportsNormalization": true, - "supportsDBT": true, + "supportsDBT": false, "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java index 66e1e8f8d1c8..cf25f5211d54 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java @@ -40,7 +40,7 @@ protected boolean supportsNormalization() { @Override protected boolean supportsDBT() { - return true; + return false; } @Override diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java index 417930fa2f67..ed50b11027f1 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java @@ -48,7 +48,7 @@ protected boolean supportsNormalization() { @Override protected boolean supportsDBT() { - return true; + return false; } @Override From 43e39f6558633b42761ce110c6f6fd49c9b40946 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Wed, 17 Nov 2021 22:45:53 +1100 Subject: [PATCH 10/12] disable dbt in expect result --- .../src/test/resources/expected_spec.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json index 2b385b8b4f87..3d15378c4080 100644 --- a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test/resources/expected_spec.json @@ -2,7 +2,7 @@ "documentationUrl": "https://docs.airbyte.io/integrations/destinations/clickhouse", "supportsIncremental": true, "supportsNormalization": true, - "supportsDBT": true, + "supportsDBT": false, "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", From d85eb1fccbe8533f92e63d57b323b67559067626 Mon Sep 17 00:00:00 2001 From: Bo Lu Date: Fri, 26 Nov 2021 10:43:22 +1100 Subject: [PATCH 11/12] add type hints --- .../normalization/transform_catalog/reserved_keywords.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py index ff1f87d064ac..5f1a526fce1d 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py @@ -3,6 +3,7 @@ # +from typing import Set from normalization import DestinationType # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords @@ -2535,7 +2536,7 @@ # In ClickHouse, keywords are not reserved. # Ref: https://clickhouse.com/docs/en/sql-reference/syntax/#syntax-keywords -CLICKHOUSE = set() +CLICKHOUSE: Set[str] = set() RESERVED_KEYWORDS = { DestinationType.BIGQUERY.value: BIGQUERY, From 56bbb84377d12e456dd1676acd3e7d69cb6f925e Mon Sep 17 00:00:00 2001 From: Marcos Marx Date: Mon, 13 Dec 2021 19:21:56 -0300 Subject: [PATCH 12/12] bump connector version --- .../seed/destination_definitions.yaml | 5 + .../resources/seed/destination_specs.yaml | 159 ++++++++++++++++++ 2 files changed, 164 insertions(+) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 6cbfba046e5f..a5397fa5e2c2 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -34,6 +34,11 @@ dockerImageTag: 0.2.0 documentationUrl: https://docs.airbyte.io/integrations/destinations/keen icon: chargify.svg +- name: Clickhouse + destinationDefinitionId: ce0d828e-1dc4-496c-b122-2da42e637e48 + dockerRepository: airbyte/destination-clickhouse + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/destinations/clickhouse - name: DynamoDB destinationDefinitionId: 8ccd8909-4e99-4141-b48d-4984b70b2d89 dockerRepository: airbyte/destination-dynamodb diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 86739d566e2f..c160408c1329 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -525,6 +525,165 @@ supported_destination_sync_modes: - "overwrite" - "append" +- dockerImage: "airbyte/destination-clickhouse:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/destinations/clickhouse" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "ClickHouse Destination Spec" + type: "object" + required: + - "host" + - "port" + - "database" + - "username" + additionalProperties: true + properties: + host: + title: "Host" + description: "Hostname of the database." + type: "string" + order: 0 + port: + title: "Port" + description: "JDBC port (not the native port) of the database." + type: "integer" + minimum: 0 + maximum: 65536 + default: 8123 + examples: + - "8123" + order: 1 + database: + title: "DB Name" + description: "Name of the database." + type: "string" + order: 2 + username: + title: "User" + description: "Username to use to access the database." + type: "string" + order: 3 + password: + title: "Password" + description: "Password associated with the username." + type: "string" + airbyte_secret: true + order: 4 + ssl: + title: "SSL Connection" + description: "Encrypt data using SSL." + type: "boolean" + default: false + order: 5 + tunnel_method: + type: "object" + title: "SSH Tunnel Method" + description: "Whether to initiate an SSH tunnel before connecting to the\ + \ database, and if so, which kind of authentication to use." + oneOf: + - title: "No Tunnel" + required: + - "tunnel_method" + properties: + tunnel_method: + description: "No ssh tunnel needed to connect to database" + type: "string" + const: "NO_TUNNEL" + order: 0 + - title: "SSH Key Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "ssh_key" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and ssh key" + type: "string" + const: "SSH_KEY_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host." + type: "string" + order: 3 + ssh_key: + title: "SSH Private Key" + description: "OS-level user account ssh key credentials in RSA PEM\ + \ format ( created with ssh-keygen -t rsa -m PEM -f myuser_rsa )" + type: "string" + airbyte_secret: true + multiline: true + order: 4 + - title: "Password Authentication" + required: + - "tunnel_method" + - "tunnel_host" + - "tunnel_port" + - "tunnel_user" + - "tunnel_user_password" + properties: + tunnel_method: + description: "Connect through a jump server tunnel host using username\ + \ and password authentication" + type: "string" + const: "SSH_PASSWORD_AUTH" + order: 0 + tunnel_host: + title: "SSH Tunnel Jump Server Host" + description: "Hostname of the jump server host that allows inbound\ + \ ssh tunnel." + type: "string" + order: 1 + tunnel_port: + title: "SSH Connection Port" + description: "Port on the proxy/jump server that accepts inbound ssh\ + \ connections." + type: "integer" + minimum: 0 + maximum: 65536 + default: 22 + examples: + - "22" + order: 2 + tunnel_user: + title: "SSH Login Username" + description: "OS-level username for logging into the jump server host" + type: "string" + order: 3 + tunnel_user_password: + title: "Password" + description: "OS-level password for logging into the jump server host" + type: "string" + airbyte_secret: true + order: 4 + supportsIncremental: true + supportsNormalization: true + supportsDBT: false + supported_destination_sync_modes: + - "overwrite" + - "append" + - "append_dedup" - dockerImage: "airbyte/destination-dynamodb:0.1.0" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/dynamodb"