airbytehq · tuliren · Jul 4, 2021 · Jun 16, 2021 · Jun 16, 2021 · Jun 16, 2021
diff --git a/airbyte-integrations/bases/base-normalization/README.md b/airbyte-integrations/bases/base-normalization/README.md
@@ -54,6 +54,7 @@ allowed characters, if quotes are needed or not, and the length limitations:
 - [postgres](../../../docs/integrations/destinations/postgres.md)
 - [redshift](../../../docs/integrations/destinations/redshift.md)
 - [snowflake](../../../docs/integrations/destinations/snowflake.md)
+- [mysql](../../../docs/integrations/destinations/mysql.md)
 
 Rules about truncations, for example for both of these strings which are too long for the postgres 64 limit:
 - `Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iiii`

diff --git a/airbyte-integrations/bases/base-normalization/build.gradle b/airbyte-integrations/bases/base-normalization/build.gradle
@@ -25,6 +25,7 @@ task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs
     dependsOn ':airbyte-integrations:connectors:destination-postgres:airbyteDocker'
     dependsOn ':airbyte-integrations:connectors:destination-redshift:airbyteDocker'
     dependsOn ':airbyte-integrations:connectors:destination-snowflake:airbyteDocker'
+    dependsOn ':airbyte-integrations:connectors:destination-mysql:airbyteDocker'
 }
 
 integrationTest.dependsOn("customIntegrationTestPython")

diff --git a/...ntegrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql b/...ntegrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql
@@ -28,6 +28,10 @@
     ) as _airbyte_nested_data
 {%- endmacro %}
 
+{% macro mysql__cross_join_unnest(stream_name, array_col) -%}
+    left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
+{%- endmacro %}
+
 {% macro redshift__cross_join_unnest(stream_name, array_col) -%}
     left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
 {%- endmacro %}
@@ -97,3 +101,37 @@ joined as (
     where numbers.generated_number <= json_array_length({{ column_col }}, true)
 )
 {%- endmacro %}
+
+{% macro mysql__unnest_cte(table_name, stream_name, column_col) -%}
+    {%- if not execute -%}
+        {{ return('') }}
+    {% endif %}
+
+    {%- call statement('max_json_array_length', fetch_result=True) -%}
+        with max_value as (
+            select max(json_length({{ column_col }})) as max_number_of_items
+            from {{ ref(table_name) }}
+        )
+        select
+            case when max_number_of_items is not null and max_number_of_items > 1
+            then max_number_of_items
+            else 1 end as max_number_of_items
+        from max_value
+    {%- endcall -%}
+
+    {%- set max_length = load_result('max_json_array_length') -%}
+    with numbers as (
+        {{ dbt_utils.generate_series(max_length["data"][0][0]) }}
+    ),
+    joined as (
+        select
+            _airbyte_{{ stream_name }}_hashid as _airbyte_hashid,
+            {# -- json_extract(column_col, '$[i]') as _airbyte_nested_data #}
+            json_extract({{ column_col }}, concat("'$[", numbers.generated_number - 1, "]'")) as _airbyte_nested_data
+        from {{ ref(table_name) }}
+        cross join numbers
+        -- only generate the number of records in the cross join that corresponds
+        -- to the number of items in {{ table_name }}.{{ column_col }}
+        where numbers.generated_number <= json_length({{ column_col }})
+    )
+{%- endmacro %}
diff --git a/...rations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql b/...rations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql
@@ -19,3 +19,11 @@
 {% macro snowflake__type_json() %}
     variant
 {% endmacro %}
+
+{%- macro mysql__type_json() -%}
+    json
+{%- endmacro -%}
+
+{%- macro mysql__type_string() -%}
+    char
+{%- endmacro -%}
diff --git a/...tegrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/except.sql b/...tegrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/except.sql
@@ -0,0 +1,3 @@
+{% macro mysql__except() %}
+    {% do exceptions.warn("MySQL does not support EXCEPT operator") %}
+{% endmacro %}
diff --git a/...s/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql b/...s/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql
@@ -2,8 +2,9 @@
     Adapter Macros for the following functions:
     - Bigquery: JSON_EXTRACT(json_string_expr, json_path_format) -> https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions
     - Snowflake: JSON_EXTRACT_PATH_TEXT( <column_identifier> , '<path_name>' ) -> https://docs.snowflake.com/en/sql-reference/functions/json_extract_path_text.html
-    - Redshift: json_extract_path_text('json_string', 'path_elem' [,'path_elem'[, …] ] [, null_if_invalid ] ) -> https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html
+    - Redshift: json_extract_path_text('json_string', 'path_elem' [,'path_elem'[, ...] ] [, null_if_invalid ] ) -> https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html
     - Postgres: json_extract_path_text(<from_json>, 'path' [, 'path' [, ...}}) -> https://www.postgresql.org/docs/12/functions-json.html
+    - MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html
 #}
 
 {# format_json_path --------------------------------------------------     #}
@@ -23,6 +24,11 @@
  {{ "'" ~ json_path_list|join("','") ~ "'" }}
 {%- endmacro %}
 
+{% macro mysql__format_json_path(json_path_list) -%}
+  {# -- '$."x"."y"."z"' #}
+  {{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
+{%- endmacro %}
+
 {% macro redshift__format_json_path(json_path_list) -%}
  {{ "'" ~ json_path_list|join("','") ~ "'" }}
 {%- endmacro %}
@@ -49,6 +55,10 @@
     jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 
+{% macro mysql__json_extract(json_column, json_path_list) -%}
+    json_extract({{ json_column }}, {{ format_json_path(json_path_list) }})
+{%- endmacro %}
+
 {% macro redshift__json_extract(json_column, json_path_list) -%}
     case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
 {%- endmacro %}
@@ -75,6 +85,10 @@
     jsonb_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 
+{% macro mysql__json_extract_scalar(json_column, json_path_list) -%}
+    json_extract({{ json_column }}, {{ format_json_path(json_path_list) }})
+{%- endmacro %}
+
 {% macro redshift__json_extract_scalar(json_column, json_path_list) -%}
     case when json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) != '' then json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true) end
 {%- endmacro %}
@@ -101,6 +115,10 @@
     jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }})
 {%- endmacro %}
 
+{% macro mysql__json_extract_array(json_column, json_path_list) -%}
+    json_extract({{ json_column }}, {{ format_json_path(json_path_list) }})
+{%- endmacro %}
+
 {% macro redshift__json_extract_array(json_column, json_path_list) -%}
     json_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}, true)
 {%- endmacro %}

diff --git a/.../bases/base-normalization/dbt-project-template/macros/cross_db_utils/type_conversions.sql b/.../bases/base-normalization/dbt-project-template/macros/cross_db_utils/type_conversions.sql
@@ -38,3 +38,8 @@
 {% macro redshift__cast_to_boolean(field) -%}
     cast(decode({{ field }}, 'true', '1', 'false', '0')::integer as boolean)
 {%- endmacro %}
+
+{# cast_to_bigint -------------------------------------------------     #}
+{% macro mysql__type_bigint() %}
+    signed
+{% endmacro %}
diff --git a/...tegrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equality.sql b/...tegrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equality.sql
@@ -0,0 +1,58 @@
+{#
+-- Adapted from https://github.com/fishtown-analytics/dbt-utils/blob/master/macros/schema_tests/equality.sql
+-- This is needed because MySQL does not support the EXCEPT operator!
+#}
+
+{% macro mysql__test_equality(model, compare_model, compare_columns=None) %}
+
+    {% set set_diff %}
+        count(*) + coalesce(abs(
+            sum(case when which_diff = 'a_minus_b' then 1 else 0 end) -
+            sum(case when which_diff = 'b_minus_a' then 1 else 0 end)
+        ), 0)
+    {% endset %}
+
+    {{ config(fail_calc = set_diff) }}
+
+    {%- if not execute -%}
+        {{ return('') }}
+    {% endif %}
+
+    {%- do dbt_utils._is_relation(model, 'test_equality') -%}
+
+    {%- if not compare_columns -%}
+        {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
+        {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%}
+    {%- endif -%}
+
+    {% set compare_cols_csv = compare_columns | join(', ') %}
+
+    with a as (
+        select * from {{ model }}
+    ),
+
+    b as (
+        select * from {{ compare_model }}
+    ),
+
+    a_minus_b as (
+        select {{ compare_cols_csv }} from a
+        where ({{ compare_cols_csv }}) not in
+            (select {{ compare_cols_csv }} from b)
+    ),
+
+    b_minus_a as (
+       select {{ compare_cols_csv }} from b
+        where ({{ compare_cols_csv }}) not in
+            (select {{ compare_cols_csv }} from a)
+    ),
+
+    unioned as (
+        select 'a_minus_b' as which_diff from a_minus_b
+        union all
+        select 'b_minus_a' as which_diff from b_minus_a
+    )
+
+    select * from unioned
+
+{% endmacro %}
diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py
@@ -42,12 +42,17 @@
 class DbtIntegrationTest(object):
     def __init__(self):
         self.target_schema = "test_normalization"
-        self.container_name = "test_normalization_db_" + self.random_string(3)
+        self.container_prefix = f"test_normalization_db_{self.random_string(3)}"
+        self.db_names = ["postgres", "mysql"]
 
     @staticmethod
     def random_string(length: int) -> str:
         return "".join(random.choice(string.ascii_lowercase) for i in range(length))
 
+    def setup_db(self):
+        self.setup_postgres_db()
+        self.setup_mysql_db()
+
     def setup_postgres_db(self):
         print("Starting localhost postgres container for tests")
         port = self.find_free_port()
@@ -64,7 +69,7 @@ def setup_postgres_db(self):
             "run",
             "--rm",
             "--name",
-            f"{self.container_name}",
+            f"{self.container_prefix}_postgres",
             "-e",
             f"POSTGRES_USER={config['username']}",
             "-e",
@@ -81,6 +86,42 @@ def setup_postgres_db(self):
         with open("../secrets/postgres.json", "w") as fh:
             fh.write(json.dumps(config))
 
+    def setup_mysql_db(self):
+        print("Starting localhost mysql container for tests")
+        port = self.find_free_port()
+        config = {
+            "type": "mysql",
+            "host": "localhost",
+            "port": port,
+            "database": self.target_schema,
+            "username": "root",
+            "password": "",
+        }
+        commands = [
+            "docker",
+            "run",
+            "--rm",
+            "--name",
+            f"{self.container_prefix}_mysql",
+            "-e",
+            "MYSQL_ALLOW_EMPTY_PASSWORD=yes",
+            "-e",
+            "MYSQL_INITDB_SKIP_TZINFO=yes",
+            "-e",
+            f"MYSQL_DATABASE={config['database']}",
+            "-p",
+            f"{config['port']}:3306",
+            "-d",
+            "mysql",
+        ]
+        print("Executing: ", " ".join(commands))
+        subprocess.call(commands)
+
+        if not os.path.exists("../secrets"):
+            os.makedirs("../secrets")
+        with open("../secrets/mysql.json", "w") as fh:
+            fh.write(json.dumps(config))
+
     @staticmethod
     def find_free_port():
         """
@@ -92,12 +133,13 @@ def find_free_port():
         s.close()
         return addr[1]
 
-    def tear_down_postgres_db(self):
-        print("Stopping localhost postgres container for tests")
-        try:
-            subprocess.call(["docker", "kill", f"{self.container_name}"])
-        except Exception as e:
-            print(f"WARN: Exception while shutting down postgres db: {e}")
+    def tear_down_db(self):
+        for db_name in self.db_names:
+            print(f"Stopping localhost {db_name} container for tests")
+            try:
+                subprocess.call(["docker", "kill", f"{self.container_prefix}_{db_name}"])
+            except Exception as e:
+                print(f"WARN: Exception while shutting down {db_name}: {e}")
 
     @staticmethod
     def change_current_test_dir(request):

diff --git a/.../integration_tests/resources/test_primary_key_streams/data_input/replace_identifiers.json b/.../integration_tests/resources/test_primary_key_streams/data_input/replace_identifiers.json
@@ -22,5 +22,23 @@
     { "#quote: true in postgres": "quote: true" }
   ],
   "snowflake": [{ "HKD@SPÉÇIÄL & CHARACTERS": "HKD@spéçiäl & characters" }],
-  "redshift": []
+  "redshift": [],
+  "mysql": [
+    {
+      "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_co__ion_double_array_data"
+    },
+    {
+      "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_co___names_partition_data"
+    },
+    {
+      "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_co___long_names_partition"
+    },
+    {
+      "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_co__lting_into_long_names'"
+    },
+    {
+      "'non_nested_stream_without_namespace_resulting_into_long_names'": "'non_nested_stream_wit__lting_into_long_names'"
+    },
+    { "#quote: true in mysql": "quote: true" }
+  ]
 }
diff --git a/...ion/integration_tests/resources/test_primary_key_streams/dbt_schema_tests/schema_test.yml b/...ion/integration_tests/resources/test_primary_key_streams/dbt_schema_tests/schema_test.yml
@@ -4,11 +4,11 @@ models:
   - name: exchange_rate
     tests:
       - dbt_utils.equality:
-          description: check_streams_are_equal
-            In this integration test, we are sending the same records to both streams
-            exchange_rate and dedup_exchange_rate.
-            The SCD table of dedup_exchange_rate in append_dedup mode should therefore mirror
-            the final table with append or overwrite mode from exchange_rate.
+          # description: check_streams_are_equal
+          #  In this integration test, we are sending the same records to both streams
+          #  exchange_rate and dedup_exchange_rate.
+          #  The SCD table of dedup_exchange_rate in append_dedup mode should therefore mirror
+          #  the final table with append or overwrite mode from exchange_rate.
           compare_model: ref('dedup_exchange_rate_scd')
           compare_columns:
             - id

diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py
@@ -42,10 +42,10 @@
 @pytest.fixture(scope="module", autouse=True)
 def before_all_tests(request):
     dbt_test_utils.change_current_test_dir(request)
-    dbt_test_utils.setup_postgres_db()
+    dbt_test_utils.setup_db()
     os.environ["PATH"] = os.path.abspath("../.venv/bin/") + ":" + os.environ["PATH"]
     yield
-    dbt_test_utils.tear_down_postgres_db()
+    dbt_test_utils.tear_down_db()
     for folder in temporary_folders:
         print(f"Deleting temporary test folder {folder}")
         shutil.rmtree(folder, ignore_errors=True)
@@ -81,6 +81,7 @@ def test_destination_supported_limits(integration_type: DestinationType, column_
             "Operation failed because soft limit on objects of type 'Column' per table was exceeded.",
         ),
         ("Redshift", 1665, "target lists can have at most 1664 entries"),
+        # MySQL allows upto 4096 columns, and is not worth testing
     ],
 )
 def test_destination_failure_over_limits(integration_type: str, column_count: int, expected_exception_message: str, setup_test_path):

diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py
@@ -47,10 +47,10 @@
 @pytest.fixture(scope="module", autouse=True)
 def before_all_tests(request):
     dbt_test_utils.change_current_test_dir(request)
-    dbt_test_utils.setup_postgres_db()
+    dbt_test_utils.setup_db()
     os.environ["PATH"] = os.path.abspath("../.venv/bin/") + ":" + os.environ["PATH"]
     yield
-    dbt_test_utils.tear_down_postgres_db()
+    dbt_test_utils.tear_down_db()
     for folder in temporary_folders:
         print(f"Deleting temporary test folder {folder}")
         shutil.rmtree(folder, ignore_errors=True)
@@ -75,7 +75,6 @@ def setup_test_path(request):
     ),
 )
 @pytest.mark.parametrize("destination_type", list(DestinationType))
-# @pytest.mark.parametrize("destination_type", [DestinationType.POSTGRES])
 def test_normalization(destination_type: DestinationType, test_resource_name: str, setup_test_path):
     print("Testing normalization")
     integration_type = destination_type.value

diff --git a/airbyte-integrations/bases/base-normalization/normalization/destination_type.py b/airbyte-integrations/bases/base-normalization/normalization/destination_type.py
@@ -31,6 +31,7 @@ class DestinationType(Enum):
     POSTGRES = "postgres"
     REDSHIFT = "redshift"
     SNOWFLAKE = "snowflake"
+    MYSQL = "mysql"
 
     @classmethod
     def from_string(cls, string_value: str) -> "DestinationType":