diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json index 8daf1cf6cd2e..aac58b6e8ee7 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/079d5540-f236-4294-ba7c-ade8fd918496.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "079d5540-f236-4294-ba7c-ade8fd918496", "name": "BigQuery (denormalized typed struct)", "dockerRepository": "airbyte/destination-bigquery-denormalized", - "dockerImageTag": "0.2.1", + "dockerImageTag": "0.2.2", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", "icon": "bigquery.svg" } diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json index 490be0c83a79..1f67b317fd09 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/22f6c74f-5699-40ff-833c-4a879ea40133.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "22f6c74f-5699-40ff-833c-4a879ea40133", "name": "BigQuery", "dockerRepository": "airbyte/destination-bigquery", - "dockerImageTag": "0.4.1", + "dockerImageTag": "0.6.1", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/bigquery", "icon": "bigquery.svg" } diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json index b321e5e97a76..0c5a32eba6cf 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_DESTINATION_DEFINITION/424892c4-daac-4491-b35d-c6688ba547ba.json @@ -2,7 +2,7 @@ "destinationDefinitionId": "424892c4-daac-4491-b35d-c6688ba547ba", "name": "Snowflake", "dockerRepository": "airbyte/destination-snowflake", - "dockerImageTag": "0.3.22", + "dockerImageTag": "0.3.23", "documentationUrl": "https://docs.airbyte.io/integrations/destinations/snowflake", "icon": "snowflake.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 9987b85ae1da..13373d49aeac 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -13,13 +13,13 @@ - name: BigQuery destinationDefinitionId: 22f6c74f-5699-40ff-833c-4a879ea40133 dockerRepository: airbyte/destination-bigquery - dockerImageTag: 0.5.1 + dockerImageTag: 0.6.1 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg - name: BigQuery (denormalized typed struct) destinationDefinitionId: 079d5540-f236-4294-ba7c-ade8fd918496 dockerRepository: airbyte/destination-bigquery-denormalized - dockerImageTag: 0.2.1 + dockerImageTag: 0.2.2 documentationUrl: https://docs.airbyte.io/integrations/destinations/bigquery icon: bigquery.svg - name: Cassandra @@ -179,7 +179,7 @@ - name: Snowflake destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba dockerRepository: airbyte/destination-snowflake - dockerImageTag: 0.3.22 + dockerImageTag: 0.3.23 documentationUrl: https://docs.airbyte.io/integrations/destinations/snowflake icon: snowflake.svg - name: MariaDB ColumnStore diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 57ce3b322403..6ce5873b624e 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -176,7 +176,7 @@ supportsDBT: false supported_destination_sync_modes: - "append" -- dockerImage: "airbyte/destination-bigquery:0.5.1" +- dockerImage: "airbyte/destination-bigquery:0.6.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: @@ -307,6 +307,16 @@ type: "string" examples: - "data_sync/test" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + examples: + - 5 keep_files_in_gcs-bucket: type: "string" description: "This upload method is supposed to temporary store records\ @@ -354,7 +364,7 @@ - "overwrite" - "append" - "append_dedup" -- dockerImage: "airbyte/destination-bigquery-denormalized:0.2.1" +- dockerImage: "airbyte/destination-bigquery-denormalized:0.2.2" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/bigquery" connectionSpecification: @@ -474,6 +484,18 @@ type: "string" examples: - "data_sync/test" + part_size_mb: + title: "Block Size (MB) for GCS multipart upload" + description: "This is the size of a \"Part\" being buffered in memory.\ + \ It limits the memory usage when writing. Larger values will allow\ + \ to upload a bigger files and improve the speed, but consumes9\ + \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." + type: "integer" + default: 5 + minimum: 5 + maximum: 525 + examples: + - 5 keep_files_in_gcs-bucket: type: "string" description: "This upload method is supposed to temporary store records\ @@ -3549,6 +3571,8 @@ \ more memory. Allowed values: min=5MB, max=525MB Default: 5MB." type: "integer" default: 5 + minimum: 5 + maximum: 525 examples: - 5 - title: "CSV: Comma-Separated Values" @@ -3722,7 +3746,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-snowflake:0.3.22" +- dockerImage: "airbyte/destination-snowflake:0.3.23" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" connectionSpecification: @@ -3895,6 +3919,21 @@ title: "S3 Access Key" airbyte_secret: true order: 4 + part_size: + type: "integer" + default: 5 + examples: + - 5 + description: "Optional. Increase this if syncing tables larger than\ + \ 100GB. Only relevant for COPY. Files are streamed to S3 in parts.\ + \ This determines the size of each part, in MBs. As S3 has a limit\ + \ of 10,000 parts per file, part size affects the table size. This\ + \ is 10MB by default, resulting in a default limit of 100GB tables.\ + \ Note, a larger part size will result in larger memory requirements.\ + \ A rule of thumb is to multiply the part size by 10 to get the\ + \ memory requirement. Modify this with care." + title: "Stream Part Size" + order: 5 - title: "GCS Staging" additionalProperties: false description: "Writes large batches of records to a file, uploads the file\ diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile index ca6f9213a062..3d08312ff01c 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.2.1 +LABEL io.airbyte.version=0.2.2 LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json index 5fcd57490590..829f6c3199a0 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/main/resources/spec.json @@ -118,6 +118,15 @@ "type": "string", "examples": ["data_sync/test"] }, + "part_size_mb": { + "title": "Block Size (MB) for GCS multipart upload", + "description": "This is the size of a \"Part\" being buffered in memory. It limits the memory usage when writing. Larger values will allow to upload a bigger files and improve the speed, but consumes more memory. Allowed values: min=5MB, max=525MB Default: 5MB.", + "type": "integer", + "default": 5, + "minimum": 5, + "maximum": 525, + "examples": [5] + }, "keep_files_in_gcs-bucket": { "type": "string", "description": "This upload method is supposed to temporary store records in GCS bucket. What do you want to do with data in GCS bucket when migration has finished?", diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGscDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGscDestinationAcceptanceTest.java index 34261da17837..ffaddde9ef33 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGscDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGscDestinationAcceptanceTest.java @@ -202,6 +202,7 @@ protected void setup(final TestDestinationEnv testEnv) throws Exception { .put(BigQueryConsts.METHOD, BigQueryConsts.GCS_STAGING) .put(BigQueryConsts.GCS_BUCKET_NAME, gcsConfigFromSecretFile.get(BigQueryConsts.GCS_BUCKET_NAME)) .put(BigQueryConsts.GCS_BUCKET_PATH, gcsConfigFromSecretFile.get(BigQueryConsts.GCS_BUCKET_PATH).asText() + System.currentTimeMillis()) + .put(BigQueryConsts.PART_SIZE, gcsConfigFromSecretFile.get(BigQueryConsts.PART_SIZE)) .put(BigQueryConsts.CREDENTIAL, credential) .build()); diff --git a/airbyte-integrations/connectors/destination-bigquery/Dockerfile b/airbyte-integrations/connectors/destination-bigquery/Dockerfile index 7e59e7c0f2e5..d199d0e47652 100644 --- a/airbyte-integrations/connectors/destination-bigquery/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery/Dockerfile @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.6.0-rc1 +LABEL io.airbyte.version=0.6.1 LABEL io.airbyte.name=airbyte/destination-bigquery diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryConsts.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryConsts.java index 01ebd59a24b9..12510288c81e 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryConsts.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryConsts.java @@ -23,6 +23,7 @@ public class BigQueryConsts { public static final String FORMAT = "format"; public static final String KEEP_GCS_FILES = "keep_files_in_gcs-bucket"; public static final String KEEP_GCS_FILES_VAL = "Keep all tmp files in GCS"; + public static final String PART_SIZE = "part_size_mb"; // tests public static final String BIGQUERY_BASIC_CONFIG = "basic_bigquery_config"; diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java index c4dbde0a84e9..fd19e8861e19 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java @@ -135,7 +135,8 @@ public static JsonNode getGcsJsonNodeConfig(final JsonNode config) { .put(BigQueryConsts.CREDENTIAL, loadingMethod.get(BigQueryConsts.CREDENTIAL)) .put(BigQueryConsts.FORMAT, Jsons.deserialize("{\n" + " \"format_type\": \"CSV\",\n" - + " \"flattening\": \"No flattening\"\n" + + " \"flattening\": \"No flattening\",\n" + + " \"part_size_mb\": \"" + loadingMethod.get(BigQueryConsts.PART_SIZE) + "\"\n" + "}")) .build()); @@ -152,7 +153,8 @@ public static JsonNode getGcsAvroJsonNodeConfig(final JsonNode config) { .put(BigQueryConsts.CREDENTIAL, loadingMethod.get(BigQueryConsts.CREDENTIAL)) .put(BigQueryConsts.FORMAT, Jsons.deserialize("{\n" + " \"format_type\": \"AVRO\",\n" - + " \"flattening\": \"No flattening\"\n" + + " \"flattening\": \"No flattening\",\n" + + " \"part_size_mb\": \"" + loadingMethod.get(BigQueryConsts.PART_SIZE) + "\"\n" + "}")) .build()); diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json index a92dc705469f..879d4c12c639 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/resources/spec.json @@ -125,6 +125,15 @@ "type": "string", "examples": ["data_sync/test"] }, + "part_size_mb": { + "title": "Block Size (MB) for GCS multipart upload", + "description": "This is the size of a \"Part\" being buffered in memory. It limits the memory usage when writing. Larger values will allow to upload a bigger files and improve the speed, but consumes more memory. Allowed values: min=5MB, max=525MB Default: 5MB.", + "type": "integer", + "default": 5, + "minimum": 5, + "maximum": 525, + "examples": [5] + }, "keep_files_in_gcs-bucket": { "type": "string", "description": "This upload method is supposed to temporary store records in GCS bucket. What do you want to do with data in GCS bucket when migration has finished?", diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryGcsDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryGcsDestinationAcceptanceTest.java index 58d2d76e1200..2b966877e6a0 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryGcsDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryGcsDestinationAcceptanceTest.java @@ -45,6 +45,7 @@ protected void setup(final TestDestinationEnv testEnv) throws Exception { .put(BigQueryConsts.METHOD, BigQueryConsts.GCS_STAGING) .put(BigQueryConsts.GCS_BUCKET_NAME, gcsConfigFromSecretFile.get(BigQueryConsts.GCS_BUCKET_NAME)) .put(BigQueryConsts.GCS_BUCKET_PATH, gcsConfigFromSecretFile.get(BigQueryConsts.GCS_BUCKET_PATH).asText() + System.currentTimeMillis()) + .put(BigQueryConsts.PART_SIZE, gcsConfigFromSecretFile.get(BigQueryConsts.PART_SIZE)) .put(BigQueryConsts.CREDENTIAL, credential) .build()); diff --git a/airbyte-integrations/connectors/destination-snowflake/Dockerfile b/airbyte-integrations/connectors/destination-snowflake/Dockerfile index 224372736d04..d14960abccd2 100644 --- a/airbyte-integrations/connectors/destination-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/destination-snowflake/Dockerfile @@ -18,5 +18,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -LABEL io.airbyte.version=0.3.22 +LABEL io.airbyte.version=0.3.23 LABEL io.airbyte.name=airbyte/destination-snowflake diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json index deaedfe55676..80580b6bedb3 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json @@ -170,6 +170,16 @@ "title": "S3 Access Key", "airbyte_secret": true, "order": 4 + }, + "part_size": { + "type": "integer", + "default": 5, + "examples": [ + 5 + ], + "description": "Optional. Increase this if syncing tables larger than 100GB. Only relevant for COPY. Files are streamed to S3 in parts. This determines the size of each part, in MBs. As S3 has a limit of 10,000 parts per file, part size affects the table size. This is 10MB by default, resulting in a default limit of 100GB tables. Note, a larger part size will result in larger memory requirements. A rule of thumb is to multiply the part size by 10 to get the memory requirement. Modify this with care.", + "title": "Stream Part Size", + "order": 5 } } }, diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index 95ac4cb43ac9..eb1b08ce81fe 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -109,6 +109,7 @@ There are 2 available options to upload data to BigQuery `Standard` and `GCS Sta This is the recommended configuration for uploading data to BigQuery. It works by first uploading all the data to a [GCS](https://cloud.google.com/storage) bucket, then ingesting the data to BigQuery. To configure GCS Staging, you'll need the following parameters: * **GCS Bucket Name** * **GCS Bucket Path** +* **Block Size (MB) for GCS multipart upload** * **GCS Bucket Keep files after migration** * See [this](https://cloud.google.com/storage/docs/creating-buckets) for instructions on how to create a GCS bucket. * **HMAC Key Access ID** @@ -145,6 +146,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | +| 0.6.1 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | | 0.6.0 | 2021-12-17 | [\#8788](https://github.com/airbytehq/airbyte/issues/8788) | BigQuery/BiqQuery denorm Destinations : Add possibility to use different types of GCS files | | 0.5.1 | 2021-12-16 | [\#8816](https://github.com/airbytehq/airbyte/issues/8816) | Update dataset locations | | 0.5.0 | 2021-10-26 | [\#7240](https://github.com/airbytehq/airbyte/issues/7240) | Output partitioned/clustered tables | @@ -161,6 +163,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | +| 0.2.2 | 2021-12-22 | [\#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration to UI for GCS staging | | 0.2.1 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | | 0.2.0 | 2021-12-17 | [\#8788](https://github.com/airbytehq/airbyte/pull/8788) | BigQuery/BiqQuery denorm Destinations : Add possibility to use different types of GCS files | | 0.1.11 | 2021-12-16 | [\#8816](https://github.com/airbytehq/airbyte/issues/8816) | Update dataset locations | diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index 917db1d691e1..a47603701c74 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -196,6 +196,7 @@ Finally, you need to add read/write permissions to your bucket with that email. | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| 0.3.23 | 2021-12-22 | [#9039](https://github.com/airbytehq/airbyte/pull/9039) | Added part_size configuration in UI for S3 loading method | | 0.3.22 | 2021-12-21 | [#9006](https://github.com/airbytehq/airbyte/pull/9006) | Updated jdbc schema naming to follow Snowflake Naming Conventions | | 0.3.21 | 2021-12-15 | [#8781](https://github.com/airbytehq/airbyte/pull/8781) | Updated check method to verify permissions to create/drop stage for internal staging; compatibility fix for Java 17 | | 0.3.20 | 2021-12-10 | [#8562](https://github.com/airbytehq/airbyte/pull/8562) | Moving classes around for better dependency management; compatibility fix for Java 17 |