From 3eb3367cc3eaf86ef6aa7eef11948653b0356b44 Mon Sep 17 00:00:00 2001 From: Serhii Chvaliuk Date: Thu, 17 Feb 2022 16:52:20 +0200 Subject: [PATCH 01/14] =?UTF-8?q?=F0=9F=90=9B=20Source=20Quickbooks:=20upd?= =?UTF-8?q?ate=20label=20`Quickbooks`=20->=20`QuickBooks`=20(#10346)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Quickbooks -> QuickBooks Signed-off-by: Sergey Chvalyuk --- .../init/src/main/resources/seed/source_definitions.yaml | 4 ++-- .../init/src/main/resources/seed/source_specs.yaml | 6 +++--- airbyte-integrations/builds.md | 2 +- .../connectors/source-quickbooks-singer/Dockerfile | 2 +- .../connectors/source-quickbooks-singer/README.md | 4 ++-- .../connectors/source-quickbooks-singer/setup.py | 2 +- .../source_quickbooks_singer/spec.json | 4 ++-- docs/SUMMARY.md | 2 +- docs/integrations/README.md | 2 +- docs/integrations/sources/quickbooks.md | 7 ++++--- docs/project-overview/changelog/connectors.md | 4 ++-- 11 files changed, 20 insertions(+), 19 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 94acc51aa9fd..b4827791603a 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -596,10 +596,10 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/qualaroo icon: qualaroo.svg sourceType: api -- name: Quickbooks +- name: QuickBooks sourceDefinitionId: 29b409d9-30a5-4cc8-ad50-886eb846fea3 dockerRepository: airbyte/source-quickbooks-singer - dockerImageTag: 0.1.4 + dockerImageTag: 0.1.5 documentationUrl: https://docs.airbyte.io/integrations/sources/quickbooks icon: qb.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 417f3a2d6c38..8ac57614d197 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -6266,12 +6266,12 @@ oauthFlowOutputParameters: - - "token" - - "key" -- dockerImage: "airbyte/source-quickbooks-singer:0.1.4" +- dockerImage: "airbyte/source-quickbooks-singer:0.1.5" spec: - documentationUrl: "https://docsurl.com" + documentationUrl: "https://docs.airbyte.com/integrations/sources/quickbooks" connectionSpecification: $schema: "http://json-schema.org/draft-07/schema#" - title: "Source Quickbooks Singer Spec" + title: "Source QuickBooks Singer Spec" type: "object" required: - "client_id" diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index 50deaeffa517..719f1eee0cf4 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -80,7 +80,7 @@ | CockroachDb | [![source-cockroachdb](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-cockroachdb%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-cockroachdb) | | Confluence | [![source-confluence](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-confluence%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-confluence) | | Qualaroo | [![source-qualaroo](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-qualaroo%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-qualaroo) | -| Quickbooks | [![source-quickbooks-singer](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-quickbooks-singer%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-quickbooks-singer) | +| QuickBooks | [![source-quickbooks-singer](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-quickbooks-singer%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-quickbooks-singer) | | Recharge | [![source-recharge](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-recharge%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-recharge) | | Recurly | [![source-recurly](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-recurly%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-recurly) | | Redshift | [![source-redshift](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-redshift%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-redshift) | diff --git a/airbyte-integrations/connectors/source-quickbooks-singer/Dockerfile b/airbyte-integrations/connectors/source-quickbooks-singer/Dockerfile index 0eb9718307ed..25f96add8815 100644 --- a/airbyte-integrations/connectors/source-quickbooks-singer/Dockerfile +++ b/airbyte-integrations/connectors/source-quickbooks-singer/Dockerfile @@ -36,6 +36,6 @@ COPY source_quickbooks_singer ./source_quickbooks_singer ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.4 +LABEL io.airbyte.version=0.1.5 LABEL io.airbyte.name=airbyte/source-quickbooks-singer diff --git a/airbyte-integrations/connectors/source-quickbooks-singer/README.md b/airbyte-integrations/connectors/source-quickbooks-singer/README.md index fcc8899976ce..6e24eadfe14c 100644 --- a/airbyte-integrations/connectors/source-quickbooks-singer/README.md +++ b/airbyte-integrations/connectors/source-quickbooks-singer/README.md @@ -1,6 +1,6 @@ -# Source Quickbooks Singer +# Source QuickBooks Singer -This is the repository for the Quickbooks source connector, based on a Singer tap. +This is the repository for the QuickBooks source connector, based on a Singer tap. For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/sources/quickbooks). ## Local development diff --git a/airbyte-integrations/connectors/source-quickbooks-singer/setup.py b/airbyte-integrations/connectors/source-quickbooks-singer/setup.py index 3294cc4d584d..d315cf961061 100644 --- a/airbyte-integrations/connectors/source-quickbooks-singer/setup.py +++ b/airbyte-integrations/connectors/source-quickbooks-singer/setup.py @@ -16,7 +16,7 @@ setup( name="source_quickbooks_singer", - description="Source implementation for Quickbooks, built on the Singer tap implementation.", + description="Source implementation for QuickBooks, built on the Singer tap implementation.", author="Airbyte", author_email="contact@airbyte.io", packages=find_packages(), diff --git a/airbyte-integrations/connectors/source-quickbooks-singer/source_quickbooks_singer/spec.json b/airbyte-integrations/connectors/source-quickbooks-singer/source_quickbooks_singer/spec.json index e022bc7606e3..8c03de47fa50 100644 --- a/airbyte-integrations/connectors/source-quickbooks-singer/source_quickbooks_singer/spec.json +++ b/airbyte-integrations/connectors/source-quickbooks-singer/source_quickbooks_singer/spec.json @@ -1,8 +1,8 @@ { - "documentationUrl": "https://docsurl.com", + "documentationUrl": "https://docs.airbyte.com/integrations/sources/quickbooks", "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Source Quickbooks Singer Spec", + "title": "Source QuickBooks Singer Spec", "type": "object", "required": [ "client_id", diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 296b1bff137d..1ec08533dc71 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -132,7 +132,7 @@ * [PostHog](integrations/sources/posthog.md) * [PrestaShop](integrations/sources/presta-shop.md) * [Qualaroo](integrations/sources/qualaroo.md) - * [Quickbooks](integrations/sources/quickbooks.md) + * [QuickBooks](integrations/sources/quickbooks.md) * [Recharge](integrations/sources/recharge.md) * [Recurly](integrations/sources/recurly.md) * [Redshift](integrations/sources/redshift.md) diff --git a/docs/integrations/README.md b/docs/integrations/README.md index 7ef4847fa54c..9ab58c61afb2 100644 --- a/docs/integrations/README.md +++ b/docs/integrations/README.md @@ -100,7 +100,7 @@ Airbyte uses a grading system for connectors to help users understand what to ex | [PostHog](sources/posthog.md) | Beta | | [PrestaShop](sources/presta-shop.md) | Beta | | [Qualaroo](sources/qualaroo.md) | Beta | -| [Quickbooks](sources/quickbooks.md) | Beta | +| [QuickBooks](sources/quickbooks.md) | Beta | | [Recharge](sources/recharge.md) | Beta | | [Recurly](sources/recurly.md) | Beta | | [Redshift](sources/redshift.md) | Certified | diff --git a/docs/integrations/sources/quickbooks.md b/docs/integrations/sources/quickbooks.md index 6b74b618db23..863481bec6c2 100644 --- a/docs/integrations/sources/quickbooks.md +++ b/docs/integrations/sources/quickbooks.md @@ -1,10 +1,10 @@ -# Quickbooks +# QuickBooks ## Overview -The Quickbooks source supports both Full Refresh and Incremental syncs. You can choose if this connector will copy only the new or updated data, or all rows in the tables and columns you set up for replication, every time a sync is run. +The QuickBooks source supports both Full Refresh and Incremental syncs. You can choose if this connector will copy only the new or updated data, or all rows in the tables and columns you set up for replication, every time a sync is run. -This source wraps the [Singer Quickbooks Tap](https://github.com/singer-io/tap-quickbooks). +This source wraps the [Singer QuickBooks Tap](https://github.com/singer-io/tap-quickbooks). ### Output schema @@ -78,6 +78,7 @@ The easiest way to get these credentials is by using Quickbook's [OAuth 2.0 play | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| `0.1.5` | 2022-02-17 | [10346](https://github.com/airbytehq/airbyte/pull/10346) | Update label `Quickbooks` -> `QuickBooks` | | `0.1.4` | 2021-12-20 | [8960](https://github.com/airbytehq/airbyte/pull/8960) | Update connector fields title/description | | `0.1.3` | 2021-08-10 | [4986](https://github.com/airbytehq/airbyte/pull/4986) | Using number data type for decimal fields instead string | | `0.1.2` | 2021-07-06 | [4539](https://github.com/airbytehq/airbyte/pull/4539) | Add `AIRBYTE_ENTRYPOINT` for Kubernetes support | diff --git a/docs/project-overview/changelog/connectors.md b/docs/project-overview/changelog/connectors.md index e5fe2effb927..f07565693ec8 100644 --- a/docs/project-overview/changelog/connectors.md +++ b/docs/project-overview/changelog/connectors.md @@ -281,7 +281,7 @@ New features: Bug fixes: -* **Quickbooks** source: Now uses the number data type for decimal fields. +* **QuickBooks** source: Now uses the number data type for decimal fields. * **HubSpot** source: Fixed `empty string` inside of the `number` and `float` datatypes. * **GitHub** source: Validation fixed on non-required fields. * **BigQuery** destination: Now supports processing of arrays of records properly. @@ -591,7 +591,7 @@ Progress on connectors: * [**Zendesk Talk**](https://docs.airbyte.io/integrations/sources/zendesk-talk) * [**Iterable**](https://docs.airbyte.io/integrations/sources/iterable) -* [**Quickbooks**](https://docs.airbyte.io/integrations/sources/quickbooks) +* [**QuickBooks**](https://docs.airbyte.io/integrations/sources/quickbooks) Other progress on connectors: From a56c4fb11fab9a1f6d305f157b31e607ed19ded7 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 17 Feb 2022 07:12:41 -0800 Subject: [PATCH 02/14] add "internal use" caveat to interval configuration comments (#10390) --- .../src/main/java/io/airbyte/config/Configs.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java index a41d56162237..52329f6554ec 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java @@ -297,35 +297,35 @@ public interface Configs { /** * Define the interval for checking for a Kubernetes pod status for a worker of an unspecified type. * - * In seconds if specified by environment variable. + * In seconds if specified by environment variable. Airbyte internal use only. */ Duration getDefaultWorkerStatusCheckInterval(); /** * Define the interval for checking for "get spec" Kubernetes pod statuses. * - * In seconds if specified by environment variable. + * In seconds if specified by environment variable. Airbyte internal use only. */ Duration getSpecWorkerStatusCheckInterval(); /** * Define the interval for checking for "check connection" Kubernetes pod statuses. * - * In seconds if specified by environment variable. + * In seconds if specified by environment variable. Airbyte internal use only. */ Duration getCheckWorkerStatusCheckInterval(); /** * Define the interval for checking for "discover" Kubernetes pod statuses. * - * In seconds if specified by environment variable. + * In seconds if specified by environment variable. Airbyte internal use only. */ Duration getDiscoverWorkerStatusCheckInterval(); /** * Define the interval for checking for "replication" Kubernetes pod statuses. * - * In seconds if specified by environment variable. + * In seconds if specified by environment variable. Airbyte internal use only. */ Duration getReplicationWorkerStatusCheckInterval(); From fe1eb8dd003067b7ce617ec6dd367a6f2763791c Mon Sep 17 00:00:00 2001 From: Malik Diarra Date: Thu, 17 Feb 2022 08:15:57 -0800 Subject: [PATCH 03/14] Add persistence function for discovered schema (#10326) - Add functions to persist/edit/delete ACTOR_CATALOG and ACTOR_CATALOG_FETCH_EVENT in ConfigPersistence - Add high level operation in ConfigRepository --- .../airbyte/bootloader/BootloaderAppTest.java | 4 +- .../java/io/airbyte/config/ConfigSchema.java | 3 + .../main/resources/types/ActorCatalog.yaml | 20 ++ .../types/ActorCatalogFetchEvent.yaml | 27 +++ .../config/persistence/ConfigRepository.java | 95 ++++++++ .../DatabaseConfigPersistence.java | 210 ++++++++++++++++++ ...baseConfigPersistenceE2EReadWriteTest.java | 40 +++- .../airbyte/config/persistence/MockData.java | 40 ++++ ...8_001__AddActorCatalogMetadataColumns.java | 47 ++++ .../configs_database/schema_dump.txt | 3 + ...1__AddActorCatalogMetadataColumnsTest.java | 123 ++++++++++ 11 files changed, 609 insertions(+), 3 deletions(-) create mode 100644 airbyte-config/models/src/main/resources/types/ActorCatalog.yaml create mode 100644 airbyte-config/models/src/main/resources/types/ActorCatalogFetchEvent.yaml create mode 100644 airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_35_28_001__AddActorCatalogMetadataColumns.java create mode 100644 airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_35_28_001__AddActorCatalogMetadataColumnsTest.java diff --git a/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java b/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java index 8045a5f6dd0d..1234256f023f 100644 --- a/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java +++ b/airbyte-bootloader/src/test/java/io/airbyte/bootloader/BootloaderAppTest.java @@ -80,7 +80,7 @@ void testBootloaderAppBlankDb() throws Exception { mockedConfigs.getConfigDatabaseUrl()) .getAndInitialize(); val configsMigrator = new ConfigsDatabaseMigrator(configDatabase, this.getClass().getName()); - assertEquals("0.35.26.001", configsMigrator.getLatestMigration().getVersion().getVersion()); + assertEquals("0.35.28.001", configsMigrator.getLatestMigration().getVersion().getVersion()); val jobsPersistence = new DefaultJobPersistence(jobDatabase); assertEquals(version, jobsPersistence.getVersion().get()); @@ -112,7 +112,7 @@ void testIsLegalUpgradePredicate() { @Test void testPostLoadExecutionExecutes() throws Exception { - var testTriggered = new AtomicBoolean(); + final var testTriggered = new AtomicBoolean(); val container = new PostgreSQLContainer<>("postgres:13-alpine") .withDatabaseName("public") diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/ConfigSchema.java b/airbyte-config/models/src/main/java/io/airbyte/config/ConfigSchema.java index 2a11f656ebe8..4d2296a0cbde 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/ConfigSchema.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/ConfigSchema.java @@ -60,6 +60,9 @@ public enum ConfigSchema implements AirbyteConfig { STANDARD_SYNC_SUMMARY("StandardSyncSummary.yaml", StandardSyncSummary.class), + ACTOR_CATALOG("ActorCatalog.yaml", ActorCatalog.class), + ACTOR_CATALOG_FETCH_EVENT("ActorCatalogFetchEvent.yaml", ActorCatalogFetchEvent.class), + // worker STANDARD_SYNC_INPUT("StandardSyncInput.yaml", StandardSyncInput.class), NORMALIZATION_INPUT("NormalizationInput.yaml", NormalizationInput.class), diff --git a/airbyte-config/models/src/main/resources/types/ActorCatalog.yaml b/airbyte-config/models/src/main/resources/types/ActorCatalog.yaml new file mode 100644 index 000000000000..a925ee891cdf --- /dev/null +++ b/airbyte-config/models/src/main/resources/types/ActorCatalog.yaml @@ -0,0 +1,20 @@ +--- +"$schema": http://json-schema.org/draft-07/schema# +"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/AttemptFailureSummary.yaml +title: ActorCatalog +description: Catalog of an actor. +type: object +additionalProperties: false +required: + - id + - catalog + - catalogHash +properties: + id: + type: string + format: uuid + catalog: + type: object + existingJavaType: com.fasterxml.jackson.databind.JsonNode + catalogHash: + type: string diff --git a/airbyte-config/models/src/main/resources/types/ActorCatalogFetchEvent.yaml b/airbyte-config/models/src/main/resources/types/ActorCatalogFetchEvent.yaml new file mode 100644 index 000000000000..bd2b5206c62b --- /dev/null +++ b/airbyte-config/models/src/main/resources/types/ActorCatalogFetchEvent.yaml @@ -0,0 +1,27 @@ +--- +"$schema": http://json-schema.org/draft-07/schema# +"$id": https://github.com/airbytehq/airbyte/blob/master/airbyte-config/models/src/main/resources/types/AttemptFailureSummary.yaml +title: ActorCatalogFetchEvent +description: Link actor to their actual catalog +type: object +additionalProperties: false +required: + - id + - actorCatalogId + - actorId + - configHash + - connectorVersion +properties: + id: + type: string + format: uuid + actorId: + type: string + format: uuid + actorCatalogId: + type: string + format: uuid + configHash: + type: string + connectorVersion: + type: string diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java index 051729193057..e8cbe0417bc9 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java @@ -5,9 +5,14 @@ package io.airbyte.config.persistence; import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Charsets; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.Exceptions; import io.airbyte.commons.lang.MoreBooleans; +import io.airbyte.config.ActorCatalog; +import io.airbyte.config.ActorCatalogFetchEvent; import io.airbyte.config.AirbyteConfig; import io.airbyte.config.ConfigSchema; import io.airbyte.config.DestinationConnection; @@ -25,6 +30,7 @@ import io.airbyte.config.persistence.split_secrets.SecretsHelpers; import io.airbyte.config.persistence.split_secrets.SecretsHydrator; import io.airbyte.config.persistence.split_secrets.SplitSecretConfig; +import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.ConnectorSpecification; import io.airbyte.validation.json.JsonSchemaValidator; import io.airbyte.validation.json.JsonValidationException; @@ -551,6 +557,95 @@ public void updateConnectionState(final UUID connectionId, final State state) th } } + public Optional getSourceCatalog(final UUID sourceId, + final String configurationHash, + final String connectorVersion) + throws JsonValidationException, IOException { + for (final ActorCatalogFetchEvent event : listActorCatalogFetchEvents()) { + if (event.getConnectorVersion().equals(connectorVersion) + && event.getConfigHash().equals(configurationHash) + && event.getActorId().equals(sourceId)) { + return getCatalogById(event.getActorCatalogId()); + } + } + return Optional.empty(); + } + + public List listActorCatalogFetchEvents() + throws JsonValidationException, IOException { + final List actorCatalogFetchEvents = new ArrayList<>(); + + for (final ActorCatalogFetchEvent event : persistence.listConfigs(ConfigSchema.ACTOR_CATALOG_FETCH_EVENT, + ActorCatalogFetchEvent.class)) { + actorCatalogFetchEvents.add(event); + } + return actorCatalogFetchEvents; + } + + public Optional getCatalogById(final UUID catalogId) + throws IOException { + try { + return Optional.of(persistence.getConfig(ConfigSchema.ACTOR_CATALOG, catalogId.toString(), + ActorCatalog.class)); + } catch (final ConfigNotFoundException e) { + return Optional.empty(); + } catch (final JsonValidationException e) { + throw new IllegalStateException(e); + } + } + + public Optional findExistingCatalog(final ActorCatalog actorCatalog) + throws JsonValidationException, IOException { + for (final ActorCatalog fetchedCatalog : listActorCatalogs()) { + if (actorCatalog.getCatalogHash().equals(fetchedCatalog.getCatalogHash())) { + return Optional.of(fetchedCatalog); + } + } + return Optional.empty(); + } + + public List listActorCatalogs() + throws JsonValidationException, IOException { + final List actorCatalogs = new ArrayList<>(); + + for (final ActorCatalog event : persistence.listConfigs(ConfigSchema.ACTOR_CATALOG, + ActorCatalog.class)) { + actorCatalogs.add(event); + } + return actorCatalogs; + } + + public void writeCatalog(final AirbyteCatalog catalog, + final UUID sourceId, + final String configurationHash, + final String connectorVersion) + throws JsonValidationException, IOException { + final HashFunction hashFunction = Hashing.murmur3_32_fixed(); + final String catalogHash = hashFunction.hashBytes(Jsons.serialize(catalog).getBytes( + Charsets.UTF_8)).toString(); + ActorCatalog actorCatalog = new ActorCatalog() + .withCatalog(Jsons.jsonNode(catalog)) + .withId(UUID.randomUUID()) + .withCatalogHash(catalogHash); + final Optional existingCatalog = findExistingCatalog(actorCatalog); + if (existingCatalog.isPresent()) { + actorCatalog = existingCatalog.get(); + } else { + persistence.writeConfig(ConfigSchema.ACTOR_CATALOG, + actorCatalog.getId().toString(), + actorCatalog); + } + final ActorCatalogFetchEvent actorCatalogFetchEvent = new ActorCatalogFetchEvent() + .withActorCatalogId(actorCatalog.getId()) + .withId(UUID.randomUUID()) + .withConfigHash(configurationHash) + .withConnectorVersion(connectorVersion) + .withActorId(sourceId); + persistence.writeConfig(ConfigSchema.ACTOR_CATALOG_FETCH_EVENT, + actorCatalogFetchEvent.getId().toString(), + actorCatalogFetchEvent); + } + /** * Converts between a dumpConfig() output and a replaceAllConfigs() input, by deserializing the * string/jsonnode into the AirbyteConfig, Stream<Object<AirbyteConfig.getClassName()>> diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java index b761358eae48..f761909bf9c8 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java @@ -5,6 +5,8 @@ package io.airbyte.config.persistence; import static io.airbyte.db.instance.configs.jooq.Tables.ACTOR; +import static io.airbyte.db.instance.configs.jooq.Tables.ACTOR_CATALOG; +import static io.airbyte.db.instance.configs.jooq.Tables.ACTOR_CATALOG_FETCH_EVENT; import static io.airbyte.db.instance.configs.jooq.Tables.ACTOR_DEFINITION; import static io.airbyte.db.instance.configs.jooq.Tables.ACTOR_OAUTH_PARAMETER; import static io.airbyte.db.instance.configs.jooq.Tables.CONNECTION; @@ -23,6 +25,8 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.util.MoreIterators; import io.airbyte.commons.version.AirbyteVersion; +import io.airbyte.config.ActorCatalog; +import io.airbyte.config.ActorCatalogFetchEvent; import io.airbyte.config.AirbyteConfig; import io.airbyte.config.ConfigSchema; import io.airbyte.config.ConfigWithMetadata; @@ -114,6 +118,10 @@ public T getConfig(final AirbyteConfig configType, final String configId, fi return (T) getStandardSync(configId); } else if (configType == ConfigSchema.STANDARD_SYNC_STATE) { return (T) getStandardSyncState(configId); + } else if (configType == ConfigSchema.ACTOR_CATALOG) { + return (T) getActorCatalog(configId); + } else if (configType == ConfigSchema.ACTOR_CATALOG_FETCH_EVENT) { + return (T) getActorCatalogFetchEvent(configId); } else { throw new IllegalArgumentException("Unknown Config Type " + configType); } @@ -181,6 +189,18 @@ private StandardSyncState getStandardSyncState(final String configId) throws IOE return result.get(0).getConfig(); } + private ActorCatalog getActorCatalog(final String configId) throws IOException, ConfigNotFoundException { + final List> result = listActorCatalogWithMetadata(Optional.of(UUID.fromString(configId))); + validate(configId, result, ConfigSchema.ACTOR_CATALOG); + return result.get(0).getConfig(); + } + + private ActorCatalogFetchEvent getActorCatalogFetchEvent(final String configId) throws IOException, ConfigNotFoundException { + final List> result = listActorCatalogFetchEventWithMetadata(Optional.of(UUID.fromString(configId))); + validate(configId, result, ConfigSchema.ACTOR_CATALOG_FETCH_EVENT); + return result.get(0).getConfig(); + } + private List connectionOperationIds(final UUID connectionId) throws IOException { final Result result = database.query(ctx -> ctx.select(asterisk()) .from(CONNECTION_OPERATION) @@ -243,6 +263,10 @@ public ConfigWithMetadata getConfigWithMetadata(final AirbyteConfig confi return (ConfigWithMetadata) validateAndReturn(configId, listStandardSyncWithMetadata(configIdOpt), configType); } else if (configType == ConfigSchema.STANDARD_SYNC_STATE) { return (ConfigWithMetadata) validateAndReturn(configId, listStandardSyncStateWithMetadata(configIdOpt), configType); + } else if (configType == ConfigSchema.ACTOR_CATALOG) { + return (ConfigWithMetadata) validateAndReturn(configId, listActorCatalogWithMetadata(configIdOpt), configType); + } else if (configType == ConfigSchema.ACTOR_CATALOG_FETCH_EVENT) { + return (ConfigWithMetadata) validateAndReturn(configId, listActorCatalogFetchEventWithMetadata(configIdOpt), configType); } else { throw new IllegalArgumentException("Unknown Config Type " + configType); } @@ -271,6 +295,10 @@ public List> listConfigsWithMetadata(final AirbyteConf listStandardSyncWithMetadata().forEach(c -> configWithMetadata.add((ConfigWithMetadata) c)); } else if (configType == ConfigSchema.STANDARD_SYNC_STATE) { listStandardSyncStateWithMetadata().forEach(c -> configWithMetadata.add((ConfigWithMetadata) c)); + } else if (configType == ConfigSchema.ACTOR_CATALOG) { + listActorCatalogWithMetadata().forEach(c -> configWithMetadata.add((ConfigWithMetadata) c)); + } else if (configType == ConfigSchema.ACTOR_CATALOG_FETCH_EVENT) { + listActorCatalogFetchEventWithMetadata().forEach(c -> configWithMetadata.add((ConfigWithMetadata) c)); } else { throw new IllegalArgumentException("Unknown Config Type " + configType); } @@ -672,6 +700,72 @@ private StandardSyncState buildStandardSyncState(final Record record) { .withState(Jsons.deserialize(record.get(STATE.STATE_).data(), State.class)); } + private List> listActorCatalogWithMetadata() throws IOException { + return listActorCatalogWithMetadata(Optional.empty()); + } + + private List> listActorCatalogWithMetadata(final Optional configId) throws IOException { + final Result result = database.query(ctx -> { + final SelectJoinStep query = ctx.select(asterisk()).from(ACTOR_CATALOG); + if (configId.isPresent()) { + return query.where(ACTOR_CATALOG.ID.eq(configId.get())).fetch(); + } + return query.fetch(); + }); + final List> actorCatalogs = new ArrayList<>(); + for (final Record record : result) { + final ActorCatalog actorCatalog = buildActorCatalog(record); + actorCatalogs.add(new ConfigWithMetadata<>( + record.get(ACTOR_CATALOG.ID).toString(), + ConfigSchema.ACTOR_CATALOG.name(), + record.get(ACTOR_CATALOG.CREATED_AT).toInstant(), + record.get(ACTOR_CATALOG.MODIFIED_AT).toInstant(), + actorCatalog)); + } + return actorCatalogs; + } + + private ActorCatalog buildActorCatalog(final Record record) { + return new ActorCatalog() + .withId(record.get(ACTOR_CATALOG.ID)) + .withCatalog(Jsons.deserialize(record.get(ACTOR_CATALOG.CATALOG).toString())) + .withCatalogHash(record.get(ACTOR_CATALOG.CATALOG_HASH)); + } + + private List> listActorCatalogFetchEventWithMetadata() throws IOException { + return listActorCatalogFetchEventWithMetadata(Optional.empty()); + } + + private List> listActorCatalogFetchEventWithMetadata(final Optional configId) throws IOException { + final Result result = database.query(ctx -> { + final SelectJoinStep query = ctx.select(asterisk()).from(ACTOR_CATALOG_FETCH_EVENT); + if (configId.isPresent()) { + return query.where(ACTOR_CATALOG_FETCH_EVENT.ID.eq(configId.get())).fetch(); + } + return query.fetch(); + }); + final List> actorCatalogFetchEvents = new ArrayList<>(); + for (final Record record : result) { + final ActorCatalogFetchEvent actorCatalogFetchEvent = buildActorCatalogFetchEvent(record); + actorCatalogFetchEvents.add(new ConfigWithMetadata<>( + record.get(ACTOR_CATALOG_FETCH_EVENT.ID).toString(), + ConfigSchema.ACTOR_CATALOG_FETCH_EVENT.name(), + record.get(ACTOR_CATALOG_FETCH_EVENT.CREATED_AT).toInstant(), + record.get(ACTOR_CATALOG_FETCH_EVENT.MODIFIED_AT).toInstant(), + actorCatalogFetchEvent)); + } + return actorCatalogFetchEvents; + } + + private ActorCatalogFetchEvent buildActorCatalogFetchEvent(final Record record) { + return new ActorCatalogFetchEvent() + .withId(record.get(ACTOR_CATALOG_FETCH_EVENT.ID)) + .withActorCatalogId(record.get(ACTOR_CATALOG_FETCH_EVENT.ACTOR_CATALOG_ID)) + .withConfigHash(record.get(ACTOR_CATALOG_FETCH_EVENT.CONFIG_HASH)) + .withConnectorVersion(record.get(ACTOR_CATALOG_FETCH_EVENT.ACTOR_VERSION)) + .withActorId(record.get(ACTOR_CATALOG_FETCH_EVENT.ACTOR_ID)); + } + @Override public void writeConfig(final AirbyteConfig configType, final String configId, final T config) throws JsonValidationException, IOException { if (configType == ConfigSchema.STANDARD_WORKSPACE) { @@ -694,6 +788,10 @@ public void writeConfig(final AirbyteConfig configType, final String configI writeStandardSync(Collections.singletonList((StandardSync) config)); } else if (configType == ConfigSchema.STANDARD_SYNC_STATE) { writeStandardSyncState(Collections.singletonList((StandardSyncState) config)); + } else if (configType == ConfigSchema.ACTOR_CATALOG) { + writeActorCatalog(Collections.singletonList((ActorCatalog) config)); + } else if (configType == ConfigSchema.ACTOR_CATALOG_FETCH_EVENT) { + writeActorCatalogFetchEvent(Collections.singletonList((ActorCatalogFetchEvent) config)); } else { throw new IllegalArgumentException("Unknown Config Type " + configType); } @@ -1200,6 +1298,76 @@ private void writeStandardSyncState(final List configs, final }); } + private void writeActorCatalog(final List configs) throws IOException { + database.transaction(ctx -> { + writeActorCatalog(configs, ctx); + return null; + }); + } + + private void writeActorCatalog(final List configs, final DSLContext ctx) { + final OffsetDateTime timestamp = OffsetDateTime.now(); + configs.forEach((actorCatalog) -> { + final boolean isExistingConfig = ctx.fetchExists(select() + .from(ACTOR_CATALOG) + .where(ACTOR_CATALOG.ID.eq(actorCatalog.getId()))); + + if (isExistingConfig) { + ctx.update(ACTOR_CATALOG) + .set(ACTOR_CATALOG.CATALOG, JSONB.valueOf(Jsons.serialize(actorCatalog.getCatalog()))) + .set(ACTOR_CATALOG.CATALOG_HASH, actorCatalog.getCatalogHash()) + .set(ACTOR_CATALOG.MODIFIED_AT, timestamp) + .where(ACTOR_CATALOG.ID.eq(actorCatalog.getId())) + .execute(); + } else { + ctx.insertInto(ACTOR_CATALOG) + .set(ACTOR_CATALOG.ID, actorCatalog.getId()) + .set(ACTOR_CATALOG.CATALOG, JSONB.valueOf(Jsons.serialize(actorCatalog.getCatalog()))) + .set(ACTOR_CATALOG.CATALOG_HASH, actorCatalog.getCatalogHash()) + .set(ACTOR_CATALOG.CREATED_AT, timestamp) + .set(ACTOR_CATALOG.MODIFIED_AT, timestamp) + .execute(); + } + }); + } + + private void writeActorCatalogFetchEvent(final List configs) throws IOException { + database.transaction(ctx -> { + writeActorCatalogFetchEvent(configs, ctx); + return null; + }); + } + + private void writeActorCatalogFetchEvent(final List configs, final DSLContext ctx) { + final OffsetDateTime timestamp = OffsetDateTime.now(); + configs.forEach((actorCatalogFetchEvent) -> { + final boolean isExistingConfig = ctx.fetchExists(select() + .from(ACTOR_CATALOG_FETCH_EVENT) + .where(ACTOR_CATALOG_FETCH_EVENT.ID.eq(actorCatalogFetchEvent.getId()))); + + if (isExistingConfig) { + ctx.update(ACTOR_CATALOG_FETCH_EVENT) + .set(ACTOR_CATALOG_FETCH_EVENT.CONFIG_HASH, actorCatalogFetchEvent.getConfigHash()) + .set(ACTOR_CATALOG_FETCH_EVENT.ACTOR_CATALOG_ID, actorCatalogFetchEvent.getActorCatalogId()) + .set(ACTOR_CATALOG_FETCH_EVENT.ACTOR_ID, actorCatalogFetchEvent.getActorId()) + .set(ACTOR_CATALOG_FETCH_EVENT.ACTOR_VERSION, actorCatalogFetchEvent.getConnectorVersion()) + .set(ACTOR_CATALOG_FETCH_EVENT.MODIFIED_AT, timestamp) + .where(ACTOR_CATALOG_FETCH_EVENT.ID.eq(actorCatalogFetchEvent.getId())) + .execute(); + } else { + ctx.insertInto(ACTOR_CATALOG_FETCH_EVENT) + .set(ACTOR_CATALOG_FETCH_EVENT.ID, actorCatalogFetchEvent.getId()) + .set(ACTOR_CATALOG_FETCH_EVENT.CONFIG_HASH, actorCatalogFetchEvent.getConfigHash()) + .set(ACTOR_CATALOG_FETCH_EVENT.ACTOR_CATALOG_ID, actorCatalogFetchEvent.getActorCatalogId()) + .set(ACTOR_CATALOG_FETCH_EVENT.ACTOR_ID, actorCatalogFetchEvent.getActorId()) + .set(ACTOR_CATALOG_FETCH_EVENT.ACTOR_VERSION, actorCatalogFetchEvent.getConnectorVersion()) + .set(ACTOR_CATALOG_FETCH_EVENT.CREATED_AT, timestamp) + .set(ACTOR_CATALOG_FETCH_EVENT.MODIFIED_AT, timestamp) + .execute(); + } + }); + } + @Override public void writeConfigs(final AirbyteConfig configType, final Map configs) throws IOException, JsonValidationException { if (configType == ConfigSchema.STANDARD_WORKSPACE) { @@ -1222,6 +1390,10 @@ public void writeConfigs(final AirbyteConfig configType, final Map (StandardSync) c).collect(Collectors.toList())); } else if (configType == ConfigSchema.STANDARD_SYNC_STATE) { writeStandardSyncState(configs.values().stream().map(c -> (StandardSyncState) c).collect(Collectors.toList())); + } else if (configType == ConfigSchema.ACTOR_CATALOG) { + writeActorCatalog(configs.values().stream().map(c -> (ActorCatalog) c).collect(Collectors.toList())); + } else if (configType == ConfigSchema.ACTOR_CATALOG_FETCH_EVENT) { + writeActorCatalogFetchEvent(configs.values().stream().map(c -> (ActorCatalogFetchEvent) c).collect(Collectors.toList())); } else { throw new IllegalArgumentException("Unknown Config Type " + configType); } @@ -1249,6 +1421,10 @@ public void deleteConfig(final AirbyteConfig configType, final String configId) deleteStandardSync(configId); } else if (configType == ConfigSchema.STANDARD_SYNC_STATE) { deleteConfig(STATE, STATE.CONNECTION_ID, UUID.fromString(configId)); + } else if (configType == ConfigSchema.ACTOR_CATALOG) { + deleteConfig(ACTOR_CATALOG, ACTOR_CATALOG.ID, UUID.fromString(configId)); + } else if (configType == ConfigSchema.ACTOR_CATALOG_FETCH_EVENT) { + deleteConfig(ACTOR_CATALOG_FETCH_EVENT, ACTOR_CATALOG_FETCH_EVENT.ID, UUID.fromString(configId)); } else { throw new IllegalArgumentException("Unknown Config Type " + configType); } @@ -1304,6 +1480,8 @@ public void replaceAllConfigs(final Map> configs, final ctx.truncate(CONNECTION).restartIdentity().cascade().execute(); ctx.truncate(CONNECTION_OPERATION).restartIdentity().cascade().execute(); ctx.truncate(STATE).restartIdentity().cascade().execute(); + ctx.truncate(ACTOR_CATALOG).restartIdentity().cascade().execute(); + ctx.truncate(ACTOR_CATALOG_FETCH_EVENT).restartIdentity().cascade().execute(); if (configs.containsKey(ConfigSchema.STANDARD_WORKSPACE)) { configs.get(ConfigSchema.STANDARD_WORKSPACE).map(c -> (StandardWorkspace) c) @@ -1383,6 +1561,22 @@ public void replaceAllConfigs(final Map> configs, final LOGGER.warn(ConfigSchema.STANDARD_SYNC_STATE + " not found"); } + if (configs.containsKey(ConfigSchema.ACTOR_CATALOG)) { + configs.get(ConfigSchema.ACTOR_CATALOG).map(c -> (ActorCatalog) c) + .forEach(c -> writeActorCatalog(Collections.singletonList(c), ctx)); + originalConfigs.remove(ConfigSchema.ACTOR_CATALOG); + } else { + LOGGER.warn(ConfigSchema.ACTOR_CATALOG + " not found"); + } + + if (configs.containsKey(ConfigSchema.ACTOR_CATALOG_FETCH_EVENT)) { + configs.get(ConfigSchema.ACTOR_CATALOG_FETCH_EVENT).map(c -> (ActorCatalogFetchEvent) c) + .forEach(c -> writeActorCatalogFetchEvent(Collections.singletonList(c), ctx)); + originalConfigs.remove(ConfigSchema.ACTOR_CATALOG_FETCH_EVENT); + } else { + LOGGER.warn(ConfigSchema.ACTOR_CATALOG_FETCH_EVENT + " not found"); + } + if (!originalConfigs.isEmpty()) { originalConfigs.forEach(c -> LOGGER.warn("Unknown Config " + c + " ignored")); } @@ -1479,6 +1673,22 @@ public Map> dumpConfigs() throws IOException { .map(ConfigWithMetadata::getConfig) .map(Jsons::jsonNode)); } + final List> actorCatalogWithMetadata = listActorCatalogWithMetadata(); + if (!standardSyncStateWithMetadata.isEmpty()) { + result.put(ConfigSchema.ACTOR_CATALOG.name(), + standardSyncStateWithMetadata + .stream() + .map(ConfigWithMetadata::getConfig) + .map(Jsons::jsonNode)); + } + final List> actorCatalogFetchEventWithMetadata = listActorCatalogFetchEventWithMetadata(); + if (!standardSyncStateWithMetadata.isEmpty()) { + result.put(ConfigSchema.ACTOR_CATALOG_FETCH_EVENT.name(), + standardSyncStateWithMetadata + .stream() + .map(ConfigWithMetadata::getConfig) + .map(Jsons::jsonNode)); + } return result; } diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceE2EReadWriteTest.java b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceE2EReadWriteTest.java index 12352c65123f..99ed6b445969 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceE2EReadWriteTest.java +++ b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/DatabaseConfigPersistenceE2EReadWriteTest.java @@ -10,6 +10,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.spy; +import io.airbyte.config.ActorCatalog; +import io.airbyte.config.ActorCatalogFetchEvent; import io.airbyte.config.ConfigSchema; import io.airbyte.config.DestinationConnection; import io.airbyte.config.DestinationOAuthParameter; @@ -62,22 +64,25 @@ public void test() throws JsonValidationException, IOException, ConfigNotFoundEx standardSyncOperation(); standardSync(); standardSyncState(); + standardActorCatalog(); deletion(); } private void deletion() throws ConfigNotFoundException, IOException, JsonValidationException { - // Deleting the workspace should delete everything except for definitions + // Deleting the workspace should delete everything except for definitions and catalogs configPersistence.deleteConfig(ConfigSchema.STANDARD_WORKSPACE, MockData.standardWorkspace().getWorkspaceId().toString()); assertTrue(configPersistence.listConfigs(ConfigSchema.STANDARD_SYNC_STATE, StandardSyncState.class).isEmpty()); assertTrue(configPersistence.listConfigs(ConfigSchema.STANDARD_SYNC, StandardSync.class).isEmpty()); assertTrue(configPersistence.listConfigs(ConfigSchema.STANDARD_SYNC_OPERATION, StandardSyncOperation.class).isEmpty()); assertTrue(configPersistence.listConfigs(ConfigSchema.DESTINATION_CONNECTION, SourceConnection.class).isEmpty()); assertTrue(configPersistence.listConfigs(ConfigSchema.STANDARD_WORKSPACE, StandardWorkspace.class).isEmpty()); + assertTrue(configPersistence.listConfigs(ConfigSchema.ACTOR_CATALOG_FETCH_EVENT, ActorCatalogFetchEvent.class).isEmpty()); assertFalse(configPersistence.listConfigs(ConfigSchema.SOURCE_OAUTH_PARAM, SourceOAuthParameter.class).isEmpty()); assertFalse(configPersistence.listConfigs(ConfigSchema.DESTINATION_OAUTH_PARAM, DestinationOAuthParameter.class).isEmpty()); assertFalse(configPersistence.listConfigs(ConfigSchema.STANDARD_SOURCE_DEFINITION, StandardSourceDefinition.class).isEmpty()); assertFalse(configPersistence.listConfigs(ConfigSchema.STANDARD_DESTINATION_DEFINITION, StandardDestinationDefinition.class).isEmpty()); + assertFalse(configPersistence.listConfigs(ConfigSchema.ACTOR_CATALOG, ActorCatalog.class).isEmpty()); for (final SourceOAuthParameter sourceOAuthParameter : MockData.sourceOauthParameters()) { configPersistence.deleteConfig(ConfigSchema.SOURCE_OAUTH_PARAM, sourceOAuthParameter.getOauthParameterId().toString()); @@ -99,6 +104,12 @@ private void deletion() throws ConfigNotFoundException, IOException, JsonValidat .deleteConfig(ConfigSchema.STANDARD_DESTINATION_DEFINITION, standardDestinationDefinition.getDestinationDefinitionId().toString()); } assertTrue(configPersistence.listConfigs(ConfigSchema.STANDARD_DESTINATION_DEFINITION, StandardDestinationDefinition.class).isEmpty()); + + for (final ActorCatalog actorCatalog : MockData.actorCatalogs()) { + configPersistence + .deleteConfig(ConfigSchema.ACTOR_CATALOG, actorCatalog.getId().toString()); + } + assertTrue(configPersistence.listConfigs(ConfigSchema.ACTOR_CATALOG, ActorCatalog.class).isEmpty()); } private void standardSyncState() throws JsonValidationException, IOException, ConfigNotFoundException { @@ -259,4 +270,31 @@ private void standardWorkspace() throws JsonValidationException, IOException, Co assertTrue(standardWorkspaces.contains(MockData.standardWorkspace())); } + public void standardActorCatalog() throws JsonValidationException, IOException, ConfigNotFoundException { + + for (final ActorCatalog actorCatalog : MockData.actorCatalogs()) { + configPersistence.writeConfig(ConfigSchema.ACTOR_CATALOG, actorCatalog.getId().toString(), actorCatalog); + final ActorCatalog retrievedActorCatalog = configPersistence.getConfig( + ConfigSchema.ACTOR_CATALOG, actorCatalog.getId().toString(), ActorCatalog.class); + assertEquals(actorCatalog, retrievedActorCatalog); + } ; + final List actorCatalogs = configPersistence + .listConfigs(ConfigSchema.ACTOR_CATALOG, ActorCatalog.class); + assertEquals(MockData.actorCatalogs().size(), actorCatalogs.size()); + assertThat(MockData.actorCatalogs()).hasSameElementsAs(actorCatalogs); + + for (final ActorCatalogFetchEvent actorCatalogFetchEvent : MockData.actorCatalogFetchEvents()) { + configPersistence.writeConfig(ConfigSchema.ACTOR_CATALOG_FETCH_EVENT, + actorCatalogFetchEvent.getId().toString(), actorCatalogFetchEvent); + final ActorCatalogFetchEvent retrievedActorCatalogFetchEvent = configPersistence.getConfig( + ConfigSchema.ACTOR_CATALOG_FETCH_EVENT, actorCatalogFetchEvent.getId().toString(), + ActorCatalogFetchEvent.class); + assertEquals(actorCatalogFetchEvent, retrievedActorCatalogFetchEvent); + } + final List actorCatalogFetchEvents = configPersistence + .listConfigs(ConfigSchema.ACTOR_CATALOG_FETCH_EVENT, ActorCatalogFetchEvent.class); + assertEquals(MockData.actorCatalogFetchEvents().size(), actorCatalogFetchEvents.size()); + assertThat(MockData.actorCatalogFetchEvents()).hasSameElementsAs(actorCatalogFetchEvents); + } + } diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/MockData.java b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/MockData.java index 33d8fb253309..c7aba3991602 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/MockData.java +++ b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/MockData.java @@ -6,6 +6,8 @@ import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; +import io.airbyte.config.ActorCatalog; +import io.airbyte.config.ActorCatalogFetchEvent; import io.airbyte.config.DestinationConnection; import io.airbyte.config.DestinationOAuthParameter; import io.airbyte.config.JobSyncConfig.NamespaceDefinitionType; @@ -68,6 +70,12 @@ public class MockData { private static final UUID SOURCE_OAUTH_PARAMETER_ID_2 = UUID.randomUUID(); private static final UUID DESTINATION_OAUTH_PARAMETER_ID_1 = UUID.randomUUID(); private static final UUID DESTINATION_OAUTH_PARAMETER_ID_2 = UUID.randomUUID(); + private static final UUID ACTOR_CATALOG_ID_1 = UUID.randomUUID(); + private static final UUID ACTOR_CATALOG_ID_2 = UUID.randomUUID(); + private static final UUID ACTOR_CATALOG_ID_3 = UUID.randomUUID(); + private static final UUID ACTOR_CATALOG_FETCH_EVENT_ID_1 = UUID.randomUUID(); + private static final UUID ACTOR_CATALOG_FETCH_EVENT_ID_2 = UUID.randomUUID(); + private static final Instant NOW = Instant.parse("2021-12-15T20:30:40.00Z"); public static StandardWorkspace standardWorkspace() { @@ -341,6 +349,38 @@ public static List standardSyncStates() { return Arrays.asList(standardSyncState1, standardSyncState2, standardSyncState3, standardSyncState4); } + public static List actorCatalogs() { + final ActorCatalog actorCatalog1 = new ActorCatalog() + .withId(ACTOR_CATALOG_ID_1) + .withCatalog(Jsons.deserialize("{}")) + .withCatalogHash("TESTHASH"); + final ActorCatalog actorCatalog2 = new ActorCatalog() + .withId(ACTOR_CATALOG_ID_2) + .withCatalog(Jsons.deserialize("{}")) + .withCatalogHash("12345"); + final ActorCatalog actorCatalog3 = new ActorCatalog() + .withId(ACTOR_CATALOG_ID_3) + .withCatalog(Jsons.deserialize("{}")) + .withCatalogHash("SomeOtherHash"); + return Arrays.asList(actorCatalog1, actorCatalog2, actorCatalog3); + } + + public static List actorCatalogFetchEvents() { + final ActorCatalogFetchEvent actorCatalogFetchEvent1 = new ActorCatalogFetchEvent() + .withId(ACTOR_CATALOG_FETCH_EVENT_ID_1) + .withActorCatalogId(ACTOR_CATALOG_ID_1) + .withActorId(SOURCE_ID_1) + .withConfigHash("CONFIG_HASH") + .withConnectorVersion("1.0.0"); + final ActorCatalogFetchEvent actorCatalogFetchEvent2 = new ActorCatalogFetchEvent() + .withId(ACTOR_CATALOG_FETCH_EVENT_ID_2) + .withActorCatalogId(ACTOR_CATALOG_ID_2) + .withActorId(SOURCE_ID_2) + .withConfigHash("1394") + .withConnectorVersion("1.2.0"); + return Arrays.asList(actorCatalogFetchEvent1); + } + public static Instant now() { return NOW; } diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_35_28_001__AddActorCatalogMetadataColumns.java b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_35_28_001__AddActorCatalogMetadataColumns.java new file mode 100644 index 000000000000..2c9fe64a0daa --- /dev/null +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_35_28_001__AddActorCatalogMetadataColumns.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.db.instance.configs.migrations; + +import static org.jooq.impl.DSL.currentOffsetDateTime; + +import com.google.common.annotations.VisibleForTesting; +import java.time.OffsetDateTime; +import org.flywaydb.core.api.migration.BaseJavaMigration; +import org.flywaydb.core.api.migration.Context; +import org.jooq.DSLContext; +import org.jooq.Field; +import org.jooq.impl.DSL; +import org.jooq.impl.SQLDataType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class V0_35_28_001__AddActorCatalogMetadataColumns extends BaseJavaMigration { + + private static final Logger LOGGER = LoggerFactory.getLogger( + V0_35_28_001__AddActorCatalogMetadataColumns.class); + + @Override + public void migrate(final Context context) throws Exception { + LOGGER.info("Running migration: {}", this.getClass().getSimpleName()); + + final DSLContext ctx = DSL.using(context.getConnection()); + migrate(ctx); + } + + @VisibleForTesting + public static void migrate(final DSLContext ctx) { + final Field createdAt = + DSL.field("created_at", SQLDataType.TIMESTAMPWITHTIMEZONE.nullable(false).defaultValue(currentOffsetDateTime())); + final Field modifiedAt = + DSL.field("modified_at", SQLDataType.TIMESTAMPWITHTIMEZONE.nullable(false).defaultValue(currentOffsetDateTime())); + ctx.alterTable("actor_catalog") + .addIfNotExists(modifiedAt).execute(); + ctx.alterTable("actor_catalog_fetch_event") + .addIfNotExists(createdAt).execute(); + ctx.alterTable("actor_catalog_fetch_event") + .addIfNotExists(modifiedAt).execute(); + } + +} diff --git a/airbyte-db/lib/src/main/resources/configs_database/schema_dump.txt b/airbyte-db/lib/src/main/resources/configs_database/schema_dump.txt index d7f92c4cfa4a..5512808ea0d5 100644 --- a/airbyte-db/lib/src/main/resources/configs_database/schema_dump.txt +++ b/airbyte-db/lib/src/main/resources/configs_database/schema_dump.txt @@ -20,6 +20,7 @@ create table "public"."actor_catalog"( "catalog" jsonb not null, "catalog_hash" varchar(32) not null, "created_at" timestamptz(35) not null, + "modified_at" timestamptz(35) not null default null, constraint "actor_catalog_pkey" primary key ("id") ); @@ -29,6 +30,8 @@ create table "public"."actor_catalog_fetch_event"( "actor_id" uuid not null, "config_hash" varchar(32) not null, "actor_version" varchar(256) not null, + "created_at" timestamptz(35) not null default null, + "modified_at" timestamptz(35) not null default null, constraint "actor_catalog_fetch_event_pkey" primary key ("id") ); diff --git a/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_35_28_001__AddActorCatalogMetadataColumnsTest.java b/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_35_28_001__AddActorCatalogMetadataColumnsTest.java new file mode 100644 index 000000000000..7038326b98d4 --- /dev/null +++ b/airbyte-db/lib/src/test/java/io/airbyte/db/instance/configs/migrations/V0_35_28_001__AddActorCatalogMetadataColumnsTest.java @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.db.instance.configs.migrations; + +import io.airbyte.db.Database; +import io.airbyte.db.instance.configs.AbstractConfigsDatabaseTest; +import io.airbyte.db.instance.configs.migrations.V0_32_8_001__AirbyteConfigDatabaseDenormalization.ActorType; +import java.io.IOException; +import java.sql.SQLException; +import java.time.OffsetDateTime; +import java.util.UUID; +import org.jooq.DSLContext; +import org.jooq.JSONB; +import org.jooq.impl.DSL; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class V0_35_28_001__AddActorCatalogMetadataColumnsTest extends AbstractConfigsDatabaseTest { + + @Test + public void test() throws SQLException, IOException { + + final Database database = getDatabase(); + final DSLContext context = DSL.using(database.getDataSource().getConnection()); + V0_32_8_001__AirbyteConfigDatabaseDenormalization.migrate(context); + V0_35_26_001__PersistDiscoveredCatalog.migrate(context); + V0_35_28_001__AddActorCatalogMetadataColumns.migrate(context); + assertCanInsertSchemaDataWithMetadata(context); + } + + private void assertCanInsertSchemaDataWithMetadata(final DSLContext ctx) { + Assertions.assertDoesNotThrow(() -> { + final UUID catalogId = UUID.randomUUID(); + final UUID actorId = UUID.randomUUID(); + final UUID actorDefinitionId = UUID.randomUUID(); + final UUID workspaceId = UUID.randomUUID(); + + ctx.insertInto(DSL.table("workspace")) + .columns( + DSL.field("id"), + DSL.field("name"), + DSL.field("slug"), + DSL.field("initial_setup_complete")) + .values( + workspaceId, + "base workspace", + "base_workspace", + true) + .execute(); + ctx.insertInto(DSL.table("actor_definition")) + .columns( + DSL.field("id"), + DSL.field("name"), + DSL.field("docker_repository"), + DSL.field("docker_image_tag"), + DSL.field("actor_type"), + DSL.field("spec")) + .values( + actorDefinitionId, + "Jenkins", + "farosai/airbyte-jenkins-source", + "0.1.23", + ActorType.source, + JSONB.valueOf("{}")) + .execute(); + ctx.insertInto(DSL.table("actor")) + .columns( + DSL.field("id"), + DSL.field("workspace_id"), + DSL.field("actor_definition_id"), + DSL.field("name"), + DSL.field("configuration"), + DSL.field("actor_type"), + DSL.field("created_at"), + DSL.field("updated_at")) + .values( + actorId, + workspaceId, + actorDefinitionId, + "JenkinsConnection", + JSONB.valueOf("{}"), + ActorType.source, + OffsetDateTime.now(), + OffsetDateTime.now()) + .execute(); + ctx.insertInto(DSL.table("actor_catalog")) + .columns( + DSL.field("id"), + DSL.field("catalog"), + DSL.field("catalog_hash"), + DSL.field("created_at"), + DSL.field("modified_at")) + .values( + catalogId, + JSONB.valueOf("{}"), + "", + OffsetDateTime.now(), + OffsetDateTime.now()) + .execute(); + ctx.insertInto(DSL.table("actor_catalog_fetch_event")) + .columns( + DSL.field("id"), + DSL.field("actor_catalog_id"), + DSL.field("actor_id"), + DSL.field("config_hash"), + DSL.field("actor_version"), + DSL.field("created_at"), + DSL.field("modified_at")) + .values( + UUID.randomUUID(), + catalogId, + actorId, + "HASHVALUE", + "2.0.1", + OffsetDateTime.now(), + OffsetDateTime.now()) + .execute(); + }); + } + +} From 3db3e8818729058fdeb6a004f2ed2cbd5a0b54f2 Mon Sep 17 00:00:00 2001 From: girarda Date: Thu, 17 Feb 2022 10:58:50 -0800 Subject: [PATCH 04/14] =?UTF-8?q?=F0=9F=8E=89=20Destination=20MySQL:=20Add?= =?UTF-8?q?=20jdbc=5Furl=5Fparams=20support=20for=20optional=20JDBC=20para?= =?UTF-8?q?meters=20(#10362)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * pass through jdbc params * fail if contains verifyServerCertificate * do the same for all ssl params * delete dead file * slight refactor * new method * remove default value * error message * rename * update as per comments * Update exception message * Bump version * extract to method * Update doc * Revert "Update doc" This reverts commit 097906fe64f81f03f80d7a2e16ee295e06e7e6e7. * Update doc * delete dead code * update doc * Throw exception with better error message * Add missing test * Use MoreMaps::merge * Add missing tests * camel case * Allow colliding parameters if values are equal * Remove trailing & * Throw IllegalArgumentException * extract to constants * Bump version in seed * Update destination specs --- .../seed/destination_definitions.yaml | 2 +- .../resources/seed/destination_specs.yaml | 9 +- .../connectors/destination-mysql/Dockerfile | 2 +- .../destination/mysql/MySQLDestination.java | 123 ++++++++++++---- .../src/main/resources/spec.json | 22 ++- .../SshMySQLDestinationAcceptanceTest.java | 27 ++-- .../mysql/MySQLDestinationTest.java | 136 ++++++++++++++++++ docs/integrations/destinations/mysql.md | 11 ++ 8 files changed, 288 insertions(+), 44 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-mysql/src/test/java/io/airbyte/integrations/destination/mysql/MySQLDestinationTest.java diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index b5cb2c24bce3..3c2582aff4cb 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -126,7 +126,7 @@ - name: MySQL destinationDefinitionId: ca81ee7c-3163-4246-af40-094cc31e5e42 dockerRepository: airbyte/destination-mysql - dockerImageTag: 0.1.16 + dockerImageTag: 0.1.17 documentationUrl: https://docs.airbyte.io/integrations/destinations/mysql icon: mysql.svg - name: Oracle diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 9920382af6ab..27c4e373924d 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -2464,7 +2464,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-mysql:0.1.16" +- dockerImage: "airbyte/destination-mysql:0.1.17" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/mysql" connectionSpecification: @@ -2515,6 +2515,13 @@ type: "boolean" default: true order: 5 + jdbc_url_params: + description: "Additional properties to pass to the JDBC URL string when\ + \ connecting to the database formatted as 'key=value' pairs separated\ + \ by the symbol '&'. (example: key1=value1&key2=value2&key3=value3)." + title: "JDBC URL Params" + type: "string" + order: 6 tunnel_method: type: "object" title: "SSH Tunnel Method" diff --git a/airbyte-integrations/connectors/destination-mysql/Dockerfile b/airbyte-integrations/connectors/destination-mysql/Dockerfile index ac835aba62d6..b137bdd41e32 100644 --- a/airbyte-integrations/connectors/destination-mysql/Dockerfile +++ b/airbyte-integrations/connectors/destination-mysql/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-mysql COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.16 +LABEL io.airbyte.version=0.1.17 LABEL io.airbyte.name=airbyte/destination-mysql diff --git a/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/MySQLDestination.java b/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/MySQLDestination.java index 7bc53a2beb54..ec2016a41dfc 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/MySQLDestination.java +++ b/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/MySQLDestination.java @@ -6,7 +6,9 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Streams; import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.map.MoreMaps; import io.airbyte.db.Databases; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.base.Destination; @@ -16,21 +18,42 @@ import io.airbyte.integrations.destination.mysql.MySQLSqlOperations.VersionCompatibility; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; -import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class MySQLDestination extends AbstractJdbcDestination implements Destination { private static final Logger LOGGER = LoggerFactory.getLogger(MySQLDestination.class); - public static final List HOST_KEY = List.of("host"); - public static final List PORT_KEY = List.of("port"); + + public static final String DATABASE_KEY = "database"; + public static final String HOST_KEY = "host"; + public static final String JDBC_URL_KEY = "jdbc_url"; + public static final String JDBC_URL_PARAMS_KEY = "jdbc_url_params"; + public static final String PASSWORD_KEY = "password"; + public static final String PORT_KEY = "port"; + public static final String SSL_KEY = "ssl"; + public static final String USERNAME_KEY = "username"; public static final String DRIVER_CLASS = "com.mysql.cj.jdbc.Driver"; + + static final Map SSL_JDBC_PARAMETERS = ImmutableMap.of( + "useSSL", "true", + "requireSSL", "true", + "verifyServerCertificate", "false" + ); + static final Map DEFAULT_JDBC_PARAMETERS = ImmutableMap.of( + "zeroDateTimeBehavior", "convertToNull" + ); public static Destination sshWrappedDestination() { - return new SshWrappedDestination(new MySQLDestination(), HOST_KEY, PORT_KEY); + return new SshWrappedDestination(new MySQLDestination(), List.of(HOST_KEY), List.of(PORT_KEY)); } @Override @@ -38,7 +61,7 @@ public AirbyteConnectionStatus check(final JsonNode config) { try (final JdbcDatabase database = getDatabase(config)) { final MySQLSqlOperations mySQLSqlOperations = (MySQLSqlOperations) getSqlOperations(); - final String outputSchema = getNamingResolver().getIdentifier(config.get("database").asText()); + final String outputSchema = getNamingResolver().getIdentifier(config.get(DATABASE_KEY).asText()); attemptSQLCreateAndDropTableOperations(outputSchema, database, getNamingResolver(), mySQLSqlOperations); @@ -69,49 +92,99 @@ protected JdbcDatabase getDatabase(final JsonNode config) { final JsonNode jdbcConfig = toJdbcConfig(config); return Databases.createJdbcDatabase( - jdbcConfig.get("username").asText(), - jdbcConfig.has("password") ? jdbcConfig.get("password").asText() : null, - jdbcConfig.get("jdbc_url").asText(), + jdbcConfig.get(USERNAME_KEY).asText(), + jdbcConfig.has(PASSWORD_KEY) ? jdbcConfig.get(PASSWORD_KEY).asText() : null, + jdbcConfig.get(JDBC_URL_KEY).asText(), getDriverClass(), "allowLoadLocalInfile=true"); } @Override public JsonNode toJdbcConfig(final JsonNode config) { - final List additionalParameters = new ArrayList<>(); - - if (!config.has("ssl") || config.get("ssl").asBoolean()) { - additionalParameters.add("useSSL=true"); - additionalParameters.add("requireSSL=true"); - additionalParameters.add("verifyServerCertificate=false"); - } + final List additionalParameters = getAdditionalParameters(config); final StringBuilder jdbcUrl = new StringBuilder(String.format("jdbc:mysql://%s:%s/%s", - config.get("host").asText(), - config.get("port").asText(), - config.get("database").asText())); + config.get(HOST_KEY).asText(), + config.get(PORT_KEY).asText(), + config.get(DATABASE_KEY).asText())); // zero dates by default cannot be parsed into java date objects (they will throw an error) // in addition, users don't always have agency in fixing them e.g: maybe they don't own the database // and can't // remove zero date values. // since zero dates are placeholders, we convert them to null by default - jdbcUrl.append("?zeroDateTimeBehavior=convertToNull"); if (!additionalParameters.isEmpty()) { - jdbcUrl.append("&"); - additionalParameters.forEach(x -> jdbcUrl.append(x).append("&")); + jdbcUrl.append("?"); + jdbcUrl.append(String.join("&", additionalParameters)); } final ImmutableMap.Builder configBuilder = ImmutableMap.builder() - .put("username", config.get("username").asText()) - .put("jdbc_url", jdbcUrl.toString()); + .put(USERNAME_KEY, config.get(USERNAME_KEY).asText()) + .put(JDBC_URL_KEY, jdbcUrl.toString()); - if (config.has("password")) { - configBuilder.put("password", config.get("password").asText()); + if (config.has(PASSWORD_KEY)) { + configBuilder.put(PASSWORD_KEY, config.get(PASSWORD_KEY).asText()); } return Jsons.jsonNode(configBuilder.build()); } + private List getAdditionalParameters(final JsonNode config) { + final Map customParameters = getCustomJdbcParameters(config); + + if (useSSL(config)) { + return convertToJdbcStrings(customParameters, MoreMaps.merge(DEFAULT_JDBC_PARAMETERS, SSL_JDBC_PARAMETERS)); + } else { + return convertToJdbcStrings(customParameters, DEFAULT_JDBC_PARAMETERS); + } + } + + private List convertToJdbcStrings(final Map customParameters, final Map defaultParametersMap) { + assertCustomParametersDontOverwriteDefaultParameters(customParameters, defaultParametersMap); + return Streams.concat(Stream.of(customParameters, defaultParametersMap)) + .map(Map::entrySet) + .flatMap(Collection::stream) + .map(entry -> formatParameter(entry.getKey(), entry.getValue())) + .collect(Collectors.toList()); + } + + private void assertCustomParametersDontOverwriteDefaultParameters(final Map customParameters, + final Map defaultParameters) { + for (final String key : defaultParameters.keySet()) { + if (customParameters.containsKey(key) && !Objects.equals(customParameters.get(key), defaultParameters.get(key))) { + throw new IllegalArgumentException("Cannot overwrite default JDBC parameter " + key); + } + } + } + + private Map getCustomJdbcParameters(final JsonNode config) { + final Map parameters = new HashMap<>(); + if (config.has(JDBC_URL_PARAMS_KEY)) { + final String jdbcParams = config.get(JDBC_URL_PARAMS_KEY).asText(); + if (!jdbcParams.isBlank()) { + final String[] keyValuePairs = jdbcParams.split("&"); + for (final String kv : keyValuePairs) { + final String[] split = kv.split("="); + if (split.length == 2) { + parameters.put(split[0], split[1]); + } else { + throw new IllegalArgumentException( + "jdbc_url_params must be formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3). Got " + + jdbcParams); + } + } + } + } + return parameters; + } + + private boolean useSSL(final JsonNode config) { + return !config.has(SSL_KEY) || config.get(SSL_KEY).asBoolean(); + } + + static String formatParameter(final String key, final String value) { + return String.format("%s=%s", key, value); + } + public static void main(final String[] args) throws Exception { final Destination destination = MySQLDestination.sshWrappedDestination(); LOGGER.info("starting destination: {}", MySQLDestination.class); diff --git a/airbyte-integrations/connectors/destination-mysql/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-mysql/src/main/resources/spec.json index e705623e1f89..212dd541d84a 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-mysql/src/main/resources/spec.json @@ -3,12 +3,20 @@ "supportsIncremental": true, "supportsNormalization": true, "supportsDBT": true, - "supported_destination_sync_modes": ["overwrite", "append"], + "supported_destination_sync_modes": [ + "overwrite", + "append" + ], "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "title": "MySQL Destination Spec", "type": "object", - "required": ["host", "port", "username", "database"], + "required": [ + "host", + "port", + "username", + "database" + ], "additionalProperties": true, "properties": { "host": { @@ -24,7 +32,9 @@ "minimum": 0, "maximum": 65536, "default": 3306, - "examples": ["3306"], + "examples": [ + "3306" + ], "order": 1 }, "database": { @@ -52,6 +62,12 @@ "type": "boolean", "default": true, "order": 5 + }, + "jdbc_url_params": { + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).", + "title": "JDBC URL Params", + "type": "string", + "order": 6 } } } diff --git a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java index 409737eaf9de..0d1c1f18d1f7 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mysql/src/test-integration/java/io/airbyte/integrations/destination/mysql/SshMySQLDestinationAcceptanceTest.java @@ -25,12 +25,13 @@ import org.apache.commons.lang3.RandomStringUtils; /** - * Abstract class that allows us to avoid duplicating testing logic for testing SSH with a key file - * or with a password. + * Abstract class that allows us to avoid duplicating testing logic for testing SSH with a key file or with a password. */ public abstract class SshMySQLDestinationAcceptanceTest extends DestinationAcceptanceTest { private final ExtendedNameTransformer namingResolver = new MySQLNameTransformer(); + private final List HOST_KEY = List.of(MySQLDestination.HOST_KEY); + private final List PORT_KEY = List.of(MySQLDestination.PORT_KEY); private String schemaName; public abstract Path getConfigFilePath(); @@ -60,9 +61,9 @@ protected JsonNode getFailCheckConfig() { @Override protected List retrieveRecords(final TestDestinationEnv env, - final String streamName, - final String namespace, - final JsonNode streamSchema) + final String streamName, + final String namespace, + final JsonNode streamSchema) throws Exception { return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) .stream() @@ -87,8 +88,8 @@ protected boolean implementsNamespaces() { @Override protected List retrieveNormalizedRecords(final TestDestinationEnv env, - final String streamName, - final String namespace) + final String streamName, + final String namespace) throws Exception { final var tableName = namingResolver.getIdentifier(streamName); final String schema = namespace != null ? namingResolver.getIdentifier(namespace) : namingResolver.getIdentifier(schemaName); @@ -121,8 +122,8 @@ private List retrieveRecordsFromTable(final String tableName, final St final var schema = schemaName == null ? this.schemaName : schemaName; return SshTunnel.sshWrap( getConfig(), - MySQLDestination.HOST_KEY, - MySQLDestination.PORT_KEY, + HOST_KEY, + PORT_KEY, (CheckedFunction, Exception>) mangledConfig -> getDatabaseFromConfig(mangledConfig) .query( ctx -> ctx @@ -140,8 +141,8 @@ protected void setup(final TestDestinationEnv testEnv) throws Exception { final var config = getConfig(); SshTunnel.sshWrap( config, - MySQLDestination.HOST_KEY, - MySQLDestination.PORT_KEY, + HOST_KEY, + PORT_KEY, mangledConfig -> { getDatabaseFromConfig(mangledConfig).query(ctx -> ctx.fetch(String.format("CREATE DATABASE %s;", schemaName))); }); @@ -151,8 +152,8 @@ protected void setup(final TestDestinationEnv testEnv) throws Exception { protected void tearDown(final TestDestinationEnv testEnv) throws Exception { SshTunnel.sshWrap( getConfig(), - MySQLDestination.HOST_KEY, - MySQLDestination.PORT_KEY, + HOST_KEY, + PORT_KEY, mangledConfig -> { getDatabaseFromConfig(mangledConfig).query(ctx -> ctx.fetch(String.format("DROP DATABASE %s", schemaName))); }); diff --git a/airbyte-integrations/connectors/destination-mysql/src/test/java/io/airbyte/integrations/destination/mysql/MySQLDestinationTest.java b/airbyte-integrations/connectors/destination-mysql/src/test/java/io/airbyte/integrations/destination/mysql/MySQLDestinationTest.java new file mode 100644 index 000000000000..aad36914cf40 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mysql/src/test/java/io/airbyte/integrations/destination/mysql/MySQLDestinationTest.java @@ -0,0 +1,136 @@ +package io.airbyte.integrations.destination.mysql; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.spy; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.map.MoreMaps; +import java.util.Map; +import java.util.Map.Entry; +import org.junit.jupiter.api.Test; + +public class MySQLDestinationTest { + + private MySQLDestination getDestination() { + final MySQLDestination result = spy(MySQLDestination.class); + return result; + } + + private JsonNode buildConfigNoJdbcParameters() { + final JsonNode config = Jsons.jsonNode(ImmutableMap.of( + "host", "localhost", + "port", 1337, + "username", "user", + "database", "db" + )); + return config; + } + + private JsonNode buildConfigWithExtraJdbcParameters(final String extraParam) { + final JsonNode config = Jsons.jsonNode(ImmutableMap.of( + "host", "localhost", + "port", 1337, + "username", "user", + "database", "db", + "jdbc_url_params", extraParam + )); + return config; + } + + private JsonNode buildConfigWithExtraJdbcParametersWithNoSsl(final String extraParam) { + final JsonNode config = Jsons.jsonNode(ImmutableMap.of( + "host", "localhost", + "port", 1337, + "username", "user", + "database", "db", + "ssl", false, + "jdbc_url_params", extraParam + )); + return config; + } + + private JsonNode buildConfigNoExtraJdbcParametersWithoutSsl() { + final JsonNode config = Jsons.jsonNode(ImmutableMap.of( + "host", "localhost", + "port", 1337, + "username", "user", + "database", "db", + "ssl", false + )); + return config; + } + + @Test + void testNoExtraParams() { + final JsonNode jdbcConfig = getDestination().toJdbcConfig(buildConfigNoJdbcParameters()); + final String url = jdbcConfig.get("jdbc_url").asText(); + assertEquals("jdbc:mysql://localhost:1337/db?verifyServerCertificate=false&zeroDateTimeBehavior=convertToNull&requireSSL=true&useSSL=true", url); + } + + @Test + void testEmptyExtraParams() { + final JsonNode jdbcConfig = getDestination().toJdbcConfig(buildConfigWithExtraJdbcParameters("")); + final String url = jdbcConfig.get("jdbc_url").asText(); + assertEquals("jdbc:mysql://localhost:1337/db?verifyServerCertificate=false&zeroDateTimeBehavior=convertToNull&requireSSL=true&useSSL=true", url); + } + + @Test + void testExtraParams() { + final String extraParam = "key1=value1&key2=value2&key3=value3"; + final JsonNode jdbcConfig = getDestination().toJdbcConfig(buildConfigWithExtraJdbcParameters(extraParam)); + final String url = jdbcConfig.get("jdbc_url").asText(); + assertEquals( + "jdbc:mysql://localhost:1337/db?key1=value1&key2=value2&key3=value3&verifyServerCertificate=false&zeroDateTimeBehavior=convertToNull&requireSSL=true&useSSL=true", + url); + } + + @Test + void testExtraParamsWithDefaultParameter() { + final Map allDefaultParameters = MoreMaps.merge(MySQLDestination.SSL_JDBC_PARAMETERS, + MySQLDestination.DEFAULT_JDBC_PARAMETERS); + for (final Entry entry : allDefaultParameters.entrySet()) { + final String identicalParameter = MySQLDestination.formatParameter(entry.getKey(), entry.getValue()); + final String overridingParameter = MySQLDestination.formatParameter(entry.getKey(), "DIFFERENT_VALUE"); + + // Do not throw an exception if the values are equal + assertDoesNotThrow(() -> + getDestination().toJdbcConfig(buildConfigWithExtraJdbcParameters(identicalParameter)).get("jdbc_url").asText() + ); + // Throw an exception if the values are different + assertThrows(IllegalArgumentException.class, () -> + getDestination().toJdbcConfig(buildConfigWithExtraJdbcParameters(overridingParameter)) + ); + } + } + + @Test + void testExtraParameterNoSsl() { + final String extraParam = "key1=value1&key2=value2&key3=value3"; + final JsonNode jdbcConfig = getDestination().toJdbcConfig(buildConfigWithExtraJdbcParametersWithNoSsl(extraParam)); + final String url = jdbcConfig.get("jdbc_url").asText(); + assertEquals( + "jdbc:mysql://localhost:1337/db?key1=value1&key2=value2&key3=value3&zeroDateTimeBehavior=convertToNull", + url); + } + + @Test + void testNoExtraParameterNoSsl() { + final JsonNode jdbcConfig = getDestination().toJdbcConfig(buildConfigNoExtraJdbcParametersWithoutSsl()); + final String url = jdbcConfig.get("jdbc_url").asText(); + assertEquals( + "jdbc:mysql://localhost:1337/db?zeroDateTimeBehavior=convertToNull", + url); + } + + @Test + void testInvalidExtraParam() { + final String extraParam = "key1=value1&sdf&"; + assertThrows(IllegalArgumentException.class, () -> { + getDestination().toJdbcConfig(buildConfigWithExtraJdbcParameters(extraParam)); + }); + } +} diff --git a/docs/integrations/destinations/mysql.md b/docs/integrations/destinations/mysql.md index b85993fd8b7b..35486908b507 100644 --- a/docs/integrations/destinations/mysql.md +++ b/docs/integrations/destinations/mysql.md @@ -59,6 +59,16 @@ You should now have all the requirements needed to configure MySQL as a destinat * **Username** * **Password** * **Database** +* **jdbc_url_params** (Optional) + +### Default JDBC URL Parameters + +The following JDBC URL parameters are set by Airbyte and cannot be overridden by the `jdbc_url_params` field: + +* `useSSL=true` (unless `ssl` is set to false) +* `requireSSL=true` (unless `ssl` is set to false) +* `verifyServerCertificate=false` (unless `ssl` is set to false) +* `zeroDateTimeBehavior=convertToNull` ## Known Limitations @@ -95,6 +105,7 @@ Using this feature requires additional configuration, when creating the destinat | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | +| 0.1.17 | 2022-02-16 | [10362](https://github.com/airbytehq/airbyte/pull/10362) | Add jdbc_url_params support for optional JDBC parameters | | 0.1.16 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | | 0.1.15 | 2021-12-01 | [8371](https://github.com/airbytehq/airbyte/pull/8371) | Fixed incorrect handling "\n" in ssh key | | 0.1.14 | 2021-11-08 | [#7719](https://github.com/airbytehq/airbyte/pull/7719) | Improve handling of wide rows by buffering records based on their byte size rather than their count | From c9bd5e9bbd7fab6d49b135943702539142757e7f Mon Sep 17 00:00:00 2001 From: VitaliiMaltsev <39538064+VitaliiMaltsev@users.noreply.github.com> Date: Thu, 17 Feb 2022 21:16:09 +0200 Subject: [PATCH 05/14] Destination Snowflake Execute COPY in parallel (#10212) * fix for jdk 17 * add parallel chunk copy S3 * add parallel chunk copy GCS * fixed checkstyle * refactoring * add unit tests * updated CHANGELOG * fixed S3 bucket path generation * refactoring * refactoring * fixed compilation error after merge * add multitheading into S3 and GCS stream copiers * fixed checkstyle * fixed checkstyle * update parallel copy with CompletableFuture * refactoring * add javadoc * bump version * update destination_specs.yaml Co-authored-by: vmaltsev --- .../seed/destination_definitions.yaml | 2 +- .../resources/seed/destination_specs.yaml | 2 +- .../jdbc/copy/gcs/GcsStreamCopier.java | 28 +++++--- .../jdbc/copy/s3/S3StreamCopier.java | 8 +-- .../destination-snowflake/Dockerfile | 4 +- .../snowflake/SnowflakeGcsStreamCopier.java | 48 +++++++++++-- .../SnowflakeParallelCopyStreamCopier.java | 56 +++++++++++++++ .../snowflake/SnowflakeS3StreamCopier.java | 56 ++++++++++++--- .../SnowflakeGCSStreamCopierTest.java | 68 +++++++++++++++++++ .../SnowflakeS3StreamCopierTest.java | 19 ++++-- .../source_smartsheets/spec.json | 3 +- docs/integrations/destinations/snowflake.md | 1 + 12 files changed, 251 insertions(+), 44 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeParallelCopyStreamCopier.java create mode 100644 airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeGCSStreamCopierTest.java diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 3c2582aff4cb..d055a53ed021 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -185,7 +185,7 @@ - name: Snowflake destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba dockerRepository: airbyte/destination-snowflake - dockerImageTag: 0.4.12 + dockerImageTag: 0.4.13 documentationUrl: https://docs.airbyte.io/integrations/destinations/snowflake icon: snowflake.svg - name: MariaDB ColumnStore diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 27c4e373924d..dd5f3182b639 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -3825,7 +3825,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-snowflake:0.4.12" +- dockerImage: "airbyte/destination-snowflake:0.4.13" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" connectionSpecification: diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.java index d7f5956b5c29..1c1f7d06ce60 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/gcs/GcsStreamCopier.java @@ -10,6 +10,7 @@ import com.google.cloud.storage.BlobInfo; import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; +import com.google.common.annotations.VisibleForTesting; import io.airbyte.commons.json.Jsons; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.destination.ExtendedNameTransformer; @@ -50,21 +51,20 @@ public abstract class GcsStreamCopier implements StreamCopier { // QUERY_TIMEOUT when // the records from the file are copied to the staging table. public static final int MAX_PARTS_PER_FILE = 1000; - + protected final GcsConfig gcsConfig; + protected final String tmpTableName; + protected final String schemaName; + protected final String streamName; + protected final JdbcDatabase db; + protected final Set gcsStagingFiles = new HashSet<>(); + protected final String stagingFolder; + protected StagingFilenameGenerator filenameGenerator; private final Storage storageClient; - private final GcsConfig gcsConfig; - private final String tmpTableName; private final DestinationSyncMode destSyncMode; - private final String schemaName; - private final String streamName; - private final JdbcDatabase db; private final ExtendedNameTransformer nameTransformer; private final SqlOperations sqlOperations; - private final Set gcsStagingFiles = new HashSet<>(); private final HashMap channels = new HashMap<>(); private final HashMap csvPrinters = new HashMap<>(); - private final String stagingFolder; - protected StagingFilenameGenerator filenameGenerator; public GcsStreamCopier(final String stagingFolder, final DestinationSyncMode destSyncMode, @@ -234,6 +234,16 @@ public static Storage getStorageClient(final GcsConfig gcsConfig) throws IOExcep .getService(); } + @VisibleForTesting + public String getTmpTableName() { + return tmpTableName; + } + + @VisibleForTesting + public Set getGcsStagingFiles() { + return gcsStagingFiles; + } + public abstract void copyGcsCsvFileIntoTable(JdbcDatabase database, String gcsFileLocation, String schema, diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java index 94b02fa4da44..cde82c6ec2a4 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java @@ -41,16 +41,16 @@ public abstract class S3StreamCopier implements StreamCopier { protected final AmazonS3 s3Client; protected final S3DestinationConfig s3Config; protected final String tmpTableName; - private final DestinationSyncMode destSyncMode; protected final String schemaName; protected final String streamName; protected final JdbcDatabase db; + protected final ConfiguredAirbyteStream configuredAirbyteStream; + protected final String stagingFolder; + protected final Map stagingWritersByFile = new HashMap<>(); + private final DestinationSyncMode destSyncMode; private final ExtendedNameTransformer nameTransformer; private final SqlOperations sqlOperations; - private final ConfiguredAirbyteStream configuredAirbyteStream; private final Timestamp uploadTime; - protected final String stagingFolder; - protected final Map stagingWritersByFile = new HashMap<>(); protected final Set activeStagingWriterFileNames = new HashSet<>(); protected final Set stagingFileNames = new LinkedHashSet<>(); private final boolean purgeStagingData; diff --git a/airbyte-integrations/connectors/destination-snowflake/Dockerfile b/airbyte-integrations/connectors/destination-snowflake/Dockerfile index ef9d102b78ac..5fb9f66fb850 100644 --- a/airbyte-integrations/connectors/destination-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/destination-snowflake/Dockerfile @@ -18,8 +18,8 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -ENV APPLICATION_VERSION 0.4.12 +ENV APPLICATION_VERSION 0.4.13 ENV ENABLE_SENTRY true -LABEL io.airbyte.version=0.4.12 +LABEL io.airbyte.version=0.4.13 LABEL io.airbyte.name=airbyte/destination-snowflake diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeGcsStreamCopier.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeGcsStreamCopier.java index 41fca1d5fc63..61a0d7d35724 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeGcsStreamCopier.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeGcsStreamCopier.java @@ -4,7 +4,11 @@ package io.airbyte.integrations.destination.snowflake; +import static io.airbyte.integrations.destination.snowflake.SnowflakeS3StreamCopier.MAX_FILES_PER_COPY; + import com.google.cloud.storage.Storage; +import com.google.common.collect.Lists; +import io.airbyte.commons.lang.Exceptions; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.destination.jdbc.SqlOperations; @@ -13,8 +17,14 @@ import io.airbyte.integrations.destination.jdbc.copy.gcs.GcsStreamCopier; import io.airbyte.protocol.models.DestinationSyncMode; import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SnowflakeGcsStreamCopier extends GcsStreamCopier implements SnowflakeParallelCopyStreamCopier { -public class SnowflakeGcsStreamCopier extends GcsStreamCopier { + private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeGcsStreamCopier.class); public SnowflakeGcsStreamCopier(final String stagingFolder, final DestinationSyncMode destSyncMode, @@ -30,6 +40,35 @@ public SnowflakeGcsStreamCopier(final String stagingFolder, this.filenameGenerator = stagingFilenameGenerator; } + @Override + public void copyStagingFileToTemporaryTable() throws Exception { + List> partitions = Lists.partition(new ArrayList<>(gcsStagingFiles), MAX_FILES_PER_COPY); + LOGGER.info("Starting parallel copy to tmp table: {} in destination for stream: {}, schema: {}. Chunks count {}", tmpTableName, streamName, + schemaName, partitions.size()); + + copyFilesInParallel(partitions); + LOGGER.info("Copy to tmp table {} in destination for stream {} complete.", tmpTableName, streamName); + } + + @Override + public void copyIntoStage(List files) { + + final var copyQuery = String.format( + "COPY INTO %s.%s FROM '%s' storage_integration = gcs_airbyte_integration " + + " file_format = (type = csv field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"') " + + "files = (" + generateFilesList(files) + " );", + schemaName, + tmpTableName, + generateBucketPath()); + + Exceptions.toRuntime(() -> db.execute(copyQuery)); + } + + @Override + public String generateBucketPath() { + return "gcs://" + gcsConfig.getBucketName() + "/" + stagingFolder + "/" + schemaName + "/"; + } + @Override public void copyGcsCsvFileIntoTable(final JdbcDatabase database, final String gcsFileLocation, @@ -37,13 +76,8 @@ public void copyGcsCsvFileIntoTable(final JdbcDatabase database, final String tableName, final GcsConfig gcsConfig) throws SQLException { - final var copyQuery = String.format( - "COPY INTO %s.%s FROM '%s' storage_integration = gcs_airbyte_integration file_format = (type = csv field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"');", - schema, - tableName, - gcsFileLocation); + throw new RuntimeException("Snowflake GCS Stream Copier should not copy individual files without use of a parallel copy"); - database.execute(copyQuery); } } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeParallelCopyStreamCopier.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeParallelCopyStreamCopier.java new file mode 100644 index 000000000000..abd495f26b8c --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeParallelCopyStreamCopier.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.snowflake; + +import java.util.List; +import java.util.StringJoiner; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.stream.Collectors; + +interface SnowflakeParallelCopyStreamCopier { + + /** + * Generates list of staging files. See more + * https://docs.snowflake.com/en/user-guide/data-load-considerations-load.html#lists-of-files + */ + default String generateFilesList(List files) { + StringJoiner joiner = new StringJoiner(","); + files.forEach(filename -> joiner.add("'" + filename.substring(filename.lastIndexOf("/") + 1) + "'")); + return joiner.toString(); + } + + /** + * Executes async copying of staging files.This method should block until the copy/upload has + * completed. + */ + default void copyFilesInParallel(List> partitions) { + ExecutorService executorService = Executors.newFixedThreadPool(5); + List> futures = partitions.stream() + .map(partition -> CompletableFuture.runAsync(() -> copyIntoStage(partition), executorService)) + .collect(Collectors.toList()); + + try { + // wait until all futures ready + CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join(); + } catch (Exception e) { + throw new RuntimeException("Failed to copy files from stage to tmp table {}" + e); + } finally { + executorService.shutdown(); + } + } + + /** + * Copies staging files to the temporary table using statement + */ + void copyIntoStage(List files); + + /** + * Generates full bucket/container path to staging files + */ + String generateBucketPath(); + +} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java index d25e00a7675c..5a7d414aa63a 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java @@ -6,23 +6,33 @@ import com.amazonaws.services.s3.AmazonS3; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import io.airbyte.commons.lang.Exceptions; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.destination.jdbc.SqlOperations; import io.airbyte.integrations.destination.jdbc.copy.s3.S3CopyConfig; import io.airbyte.integrations.destination.jdbc.copy.s3.S3StreamCopier; import io.airbyte.integrations.destination.s3.S3DestinationConfig; +import io.airbyte.integrations.destination.s3.util.S3OutputPathHelper; import io.airbyte.protocol.models.ConfiguredAirbyteStream; import java.sql.SQLException; import java.sql.Timestamp; import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -public class SnowflakeS3StreamCopier extends S3StreamCopier { +public class SnowflakeS3StreamCopier extends S3StreamCopier implements SnowflakeParallelCopyStreamCopier { + + private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeS3StreamCopier.class); // From https://docs.aws.amazon.com/redshift/latest/dg/t_loading-tables-from-s3.html // "Split your load data files so that the files are about equal size, between 1 MB and 1 GB after // compression" public static final int MAX_PARTS_PER_FILE = 4; + public static final int MAX_FILES_PER_COPY = 1000; public SnowflakeS3StreamCopier(final String stagingFolder, final String schema, @@ -54,6 +64,7 @@ public SnowflakeS3StreamCopier(final String stagingFolder, final SqlOperations sqlOperations, final Timestamp uploadTime, final ConfiguredAirbyteStream configuredAirbyteStream) { + super(stagingFolder, schema, client, @@ -66,6 +77,38 @@ public SnowflakeS3StreamCopier(final String stagingFolder, MAX_PARTS_PER_FILE); } + @Override + public void copyStagingFileToTemporaryTable() throws Exception { + List> partitions = Lists.partition(new ArrayList<>(stagingWritersByFile.keySet()), MAX_FILES_PER_COPY); + LOGGER.info("Starting parallel copy to tmp table: {} in destination for stream: {}, schema: {}. Chunks count {}", tmpTableName, streamName, + schemaName, partitions.size()); + + copyFilesInParallel(partitions); + LOGGER.info("Copy to tmp table {} in destination for stream {} complete.", tmpTableName, streamName); + } + + @Override + public void copyIntoStage(List files) { + final var copyQuery = String.format( + "COPY INTO %s.%s FROM '%s' " + + "CREDENTIALS=(aws_key_id='%s' aws_secret_key='%s') " + + "file_format = (type = csv field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"') " + + "files = (" + generateFilesList(files) + " );", + schemaName, + tmpTableName, + generateBucketPath(), + s3Config.getAccessKeyId(), + s3Config.getSecretAccessKey()); + + Exceptions.toRuntime(() -> db.execute(copyQuery)); + } + + @Override + public String generateBucketPath() { + return "s3://" + s3Config.getBucketName() + "/" + + S3OutputPathHelper.getOutputPrefix(s3Config.getBucketPath(), configuredAirbyteStream.getStream()) + "/"; + } + @Override public void copyS3CsvFileIntoTable(final JdbcDatabase database, final String s3FileLocation, @@ -73,17 +116,8 @@ public void copyS3CsvFileIntoTable(final JdbcDatabase database, final String tableName, final S3DestinationConfig s3Config) throws SQLException { - final var copyQuery = String.format( - "COPY INTO %s.%s FROM '%s' " - + "CREDENTIALS=(aws_key_id='%s' aws_secret_key='%s') " - + "file_format = (type = csv field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"');", - schema, - tableName, - s3FileLocation, - s3Config.getAccessKeyId(), - s3Config.getSecretAccessKey()); + throw new RuntimeException("Snowflake Stream Copier should not copy individual files without use of a parallel copy"); - database.execute(copyQuery); } } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeGCSStreamCopierTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeGCSStreamCopierTest.java new file mode 100644 index 000000000000..d0b3cf3fa36b --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeGCSStreamCopierTest.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.snowflake; + +import static io.airbyte.integrations.destination.snowflake.SnowflakeS3StreamCopier.MAX_PARTS_PER_FILE; +import static org.mockito.Mockito.RETURNS_DEEP_STUBS; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + +import com.google.cloud.storage.Storage; +import com.google.common.collect.Lists; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.integrations.destination.jdbc.SqlOperations; +import io.airbyte.integrations.destination.jdbc.StagingFilenameGenerator; +import io.airbyte.integrations.destination.jdbc.copy.gcs.GcsConfig; +import io.airbyte.protocol.models.DestinationSyncMode; +import java.util.ArrayList; +import java.util.List; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class SnowflakeGCSStreamCopierTest { + + private JdbcDatabase db; + private SnowflakeGcsStreamCopier copier; + + @BeforeEach + public void setup() { + Storage storageClient = mock(Storage.class, RETURNS_DEEP_STUBS); + db = mock(JdbcDatabase.class); + SqlOperations sqlOperations = mock(SqlOperations.class); + + copier = new SnowflakeGcsStreamCopier( + "fake-staging-folder", + DestinationSyncMode.OVERWRITE, + "fake-schema", + "fake-stream", + storageClient, + db, + new GcsConfig("fake-project-id", "fake-bucket-name", "fake-credentials"), + new ExtendedNameTransformer(), + sqlOperations, + new StagingFilenameGenerator("fake-stream", 256L)); + } + + @Test + public void copiesCorrectFilesToTable() throws Exception { + for (int i = 0; i < MAX_PARTS_PER_FILE + 1; i++) { + copier.prepareStagingFile(); + } + + copier.copyStagingFileToTemporaryTable(); + List> partition = Lists.partition(new ArrayList<>(copier.getGcsStagingFiles()), 1000); + for (List files : partition) { + verify(db).execute(String.format( + "COPY INTO fake-schema.%s FROM '%s' storage_integration = gcs_airbyte_integration " + + " file_format = (type = csv field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"') " + + "files = (" + copier.generateFilesList(files) + " );", + copier.getTmpTableName(), + copier.generateBucketPath())); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java index 77913d82e6fa..7c3879edca57 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java @@ -10,6 +10,7 @@ import static org.mockito.Mockito.verify; import com.amazonaws.services.s3.AmazonS3Client; +import com.google.common.collect.Lists; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.destination.ExtendedNameTransformer; import io.airbyte.integrations.destination.jdbc.SqlOperations; @@ -20,6 +21,8 @@ import io.airbyte.protocol.models.DestinationSyncMode; import java.sql.Timestamp; import java.time.Instant; +import java.util.ArrayList; +import java.util.List; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -76,13 +79,15 @@ public void copiesCorrectFilesToTable() throws Exception { } copier.copyStagingFileToTemporaryTable(); - - for (String fileName : copier.getStagingWritersByFile().keySet()) { - verify(db).execute(String.format("COPY INTO fake-schema.%s FROM " - + "'s3://fake-bucket/%s'" - + " CREDENTIALS=(aws_key_id='fake-access-key-id' aws_secret_key='fake-secret-access-key') " - + "file_format = (type = csv field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"');", - copier.getTmpTableName(), fileName)); + List> partition = Lists.partition(new ArrayList<>(copier.getStagingWritersByFile().keySet()), 1000); + for (List files : partition) { + verify(db).execute(String.format( + "COPY INTO fake-schema.%s FROM '%s' " + + "CREDENTIALS=(aws_key_id='fake-access-key-id' aws_secret_key='fake-secret-access-key') " + + "file_format = (type = csv field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"') " + + "files = (" + copier.generateFilesList(files) + " );", + copier.getTmpTableName(), + copier.generateBucketPath())); } } diff --git a/airbyte-integrations/connectors/source-smartsheets/source_smartsheets/spec.json b/airbyte-integrations/connectors/source-smartsheets/source_smartsheets/spec.json index 63f28fb2cb68..57876a9a81a1 100644 --- a/airbyte-integrations/connectors/source-smartsheets/source_smartsheets/spec.json +++ b/airbyte-integrations/connectors/source-smartsheets/source_smartsheets/spec.json @@ -50,8 +50,7 @@ "complete_oauth_server_output_specification": { "type": "object", "additionalProperties": false, - "properties": { - } + "properties": {} } } } diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index d6ab229ed517..98f5814956fb 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -224,6 +224,7 @@ Finally, you need to add read/write permissions to your bucket with that email. | Version | Date | Pull Request | Subject | |:--------|:-----------| :----- | :------ | +| 0.4.13 | 2022-02-16 | [\#10212](https://github.com/airbytehq/airbyte/pull/10212) | Execute COPY command in parallel for S3 and GCS staging | | 0.4.12 | 2022-02-15 | [\#10342](https://github.com/airbytehq/airbyte/pull/10342) | Use connection pool, and fix connection leak. | | 0.4.11 | 2022-02-14 | [\#9920](https://github.com/airbytehq/airbyte/pull/9920) | Updated the size of staging files for S3 staging. Also, added closure of S3 writers to staging files when data has been written to an staging file. | | 0.4.10 | 2022-02-14 | [\#10297](https://github.com/airbytehq/airbyte/pull/10297) | Halve the record buffer size to reduce memory consumption. | From 049a11b2bcfbd3eb7bc49ac1803c0fe69bb5aa33 Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Thu, 17 Feb 2022 12:55:35 -0800 Subject: [PATCH 06/14] =?UTF-8?q?=F0=9F=8E=89=20Snowflake=20destination:?= =?UTF-8?q?=20reduce=20memory=20footprint=20(#10394)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add detailed logging for flushing * Log sentry transaction event id * Adjust logging * Log memory usage * Add jvm monitoring * Remove log * Remove port 9010 * Remove host network mode * Sample record size * Remove profiling code * Add unit tests * Use average estimation * Rename variable * Format code * Bump version * Revert unnecessary change * Update doc * Fix format * Bump version in seed --- .../airbyte_cdk/sources/streams/core.py | 2 +- .../io/airbyte/commons/bytes/ByteUtils.java | 23 ------ .../airbyte/commons/bytes/ByteUtilsTest.java | 24 ------ .../seed/destination_definitions.yaml | 2 +- .../resources/seed/destination_specs.yaml | 2 +- .../integrations/base/IntegrationRunner.java | 1 + .../BufferedStreamConsumer.java | 18 ++--- .../RecordSizeEstimator.java | 79 +++++++++++++++++++ .../BufferedStreamConsumerTest.java | 3 +- .../RecordSizeEstimatorTest.java | 65 +++++++++++++++ .../destination-snowflake/Dockerfile | 4 +- .../SnowflakeStagingSqlOperations.java | 2 +- .../workers/DefaultReplicationWorker.java | 1 + .../workers/process/DockerProcessFactory.java | 1 + docs/integrations/destinations/snowflake.md | 1 + 15 files changed, 164 insertions(+), 64 deletions(-) delete mode 100644 airbyte-commons/src/main/java/io/airbyte/commons/bytes/ByteUtils.java delete mode 100644 airbyte-commons/src/test/java/io/airbyte/commons/bytes/ByteUtilsTest.java create mode 100644 airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordSizeEstimator.java create mode 100644 airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordSizeEstimatorTest.java diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/core.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/core.py index fd6b32324128..f419c4422749 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/core.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/core.py @@ -171,7 +171,7 @@ def state_checkpoint_interval(self) -> Optional[int]: """ return None - @deprecated(version='0.1.49', reason="You should use explicit state property instead, see IncrementalMixin docs.") + @deprecated(version="0.1.49", reason="You should use explicit state property instead, see IncrementalMixin docs.") def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]): """Override to extract state from the latest record. Needed to implement incremental sync. diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/bytes/ByteUtils.java b/airbyte-commons/src/main/java/io/airbyte/commons/bytes/ByteUtils.java deleted file mode 100644 index 1ba95a5b735d..000000000000 --- a/airbyte-commons/src/main/java/io/airbyte/commons/bytes/ByteUtils.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.commons.bytes; - -import java.nio.charset.StandardCharsets; - -public class ByteUtils { - - /** - * Encodes this String into a sequence of bytes using the given charset. UTF-8 is based on 8-bit - * code units. Each character is encoded as 1 to 4 bytes. The first 128 Unicode code points are - * encoded as 1 byte in UTF-8. - * - * @param s - string where charset length will be counted - * @return length of bytes for charset - */ - public static long getSizeInBytesForUTF8CharSet(String s) { - return s.getBytes(StandardCharsets.UTF_8).length; - } - -} diff --git a/airbyte-commons/src/test/java/io/airbyte/commons/bytes/ByteUtilsTest.java b/airbyte-commons/src/test/java/io/airbyte/commons/bytes/ByteUtilsTest.java deleted file mode 100644 index f8f4c1d8d9bd..000000000000 --- a/airbyte-commons/src/test/java/io/airbyte/commons/bytes/ByteUtilsTest.java +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2021 Airbyte, Inc., all rights reserved. - */ - -package io.airbyte.commons.bytes; - -import static org.junit.jupiter.api.Assertions.*; - -import java.nio.charset.StandardCharsets; -import org.apache.commons.lang3.RandomStringUtils; -import org.junit.jupiter.api.Test; - -class ByteUtilsTest { - - @Test - public void testIt() { - for (int i = 1; i < 1000; i++) { - String s = RandomStringUtils.random(i); - // for now the formula is just hardcoded to str length * 2 - assertEquals(s.getBytes(StandardCharsets.UTF_8).length, ByteUtils.getSizeInBytesForUTF8CharSet(s), "The bytes length should be equal."); - } - } - -} diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index d055a53ed021..ffdedd382768 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -185,7 +185,7 @@ - name: Snowflake destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba dockerRepository: airbyte/destination-snowflake - dockerImageTag: 0.4.13 + dockerImageTag: 0.4.14 documentationUrl: https://docs.airbyte.io/integrations/destinations/snowflake icon: snowflake.svg - name: MariaDB ColumnStore diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index dd5f3182b639..9ebd2407a385 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -3825,7 +3825,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-snowflake:0.4.13" +- dockerImage: "airbyte/destination-snowflake:0.4.14" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" connectionSpecification: diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/IntegrationRunner.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/IntegrationRunner.java index f1cf6074daed..fa007180851c 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/IntegrationRunner.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/IntegrationRunner.java @@ -85,6 +85,7 @@ public void run(final String[] args) throws Exception { integration.getClass().getSimpleName(), parsed.getCommand().toString(), true); + LOGGER.info("Sentry transaction event: {}", transaction.getEventId()); try { runInternal(transaction, parsed); transaction.finish(SpanStatus.OK); diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java index d670455e7b45..58dd939a7cce 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java @@ -6,7 +6,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.base.Preconditions; -import io.airbyte.commons.bytes.ByteUtils; import io.airbyte.commons.concurrency.VoidCallable; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.functional.CheckedFunction; @@ -85,6 +84,7 @@ public class BufferedStreamConsumer extends FailureTrackingAirbyteMessageConsume private final Map streamToIgnoredRecordCount; private final Consumer outputRecordCollector; private final long maxQueueSizeInBytes; + private final RecordSizeEstimator recordSizeEstimator; private long bufferSizeInBytes; private Map> streamBuffer; private String fileName; @@ -128,6 +128,7 @@ public BufferedStreamConsumer(final Consumer outputRecordCollect this.bufferSizeInBytes = 0; this.streamToIgnoredRecordCount = new HashMap<>(); this.streamBuffer = new HashMap<>(); + this.recordSizeEstimator = new RecordSizeEstimator(); } @Override @@ -158,13 +159,9 @@ protected void acceptTracked(final AirbyteMessage message) throws Exception { return; } - // TODO use a more efficient way to compute bytes that doesn't require double serialization (records - // are serialized again when writing to - // the destination - final long messageSizeInBytes = ByteUtils.getSizeInBytesForUTF8CharSet(Jsons.serialize(recordMessage.getData())); + final long messageSizeInBytes = recordSizeEstimator.getEstimatedByteSize(recordMessage); if (bufferSizeInBytes + messageSizeInBytes > maxQueueSizeInBytes) { - LOGGER.info("Flushing buffer..."); - flushQueueToDestination(bufferSizeInBytes); + flushQueueToDestination(); bufferSizeInBytes = 0; } @@ -180,9 +177,12 @@ protected void acceptTracked(final AirbyteMessage message) throws Exception { } - private void flushQueueToDestination(long bufferSizeInBytes) throws Exception { + private void flushQueueToDestination() throws Exception { + LOGGER.info("Flushing buffer: {} bytes", bufferSizeInBytes); + AirbyteSentry.executeWithTracing("FlushBuffer", () -> { for (final Map.Entry> entry : streamBuffer.entrySet()) { + LOGGER.info("Flushing {}: {} records", entry.getKey().getName(), entry.getValue().size()); recordWriter.accept(entry.getKey(), entry.getValue()); if (checkAndRemoveRecordWriter != null) { fileName = checkAndRemoveRecordWriter.apply(entry.getKey(), fileName); @@ -215,7 +215,7 @@ protected void close(final boolean hasFailed) throws Exception { LOGGER.error("executing on failed close procedure."); } else { LOGGER.info("executing on success close procedure."); - flushQueueToDestination(bufferSizeInBytes); + flushQueueToDestination(); } try { diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordSizeEstimator.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordSizeEstimator.java new file mode 100644 index 000000000000..cceada786996 --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordSizeEstimator.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.buffered_stream_consumer; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import java.util.HashMap; +import java.util.Map; + +/** + * This class estimate the byte size of the record message. To reduce memory footprint, 1) it + * assumes that a character is always four bytes, and 2) it only performs a sampling every N + * records. The size of the samples are averaged together to protect the estimation against + * outliers. + */ +public class RecordSizeEstimator { + + // by default, perform one estimation for every 20 records + private static final int DEFAULT_SAMPLE_BATCH_SIZE = 20; + + // latest estimated record message size for each stream + private final Map streamRecordSizeEstimation; + // number of record messages until next real sampling for each stream + private final Map streamSampleCountdown; + // number of record messages + private final int sampleBatchSize; + + /** + * The estimator will perform a real calculation once per sample batch. The size of the batch is + * determined by {@code sampleBatchSize}. + */ + public RecordSizeEstimator(final int sampleBatchSize) { + this.streamRecordSizeEstimation = new HashMap<>(); + this.streamSampleCountdown = new HashMap<>(); + this.sampleBatchSize = sampleBatchSize; + } + + public RecordSizeEstimator() { + this(DEFAULT_SAMPLE_BATCH_SIZE); + } + + public long getEstimatedByteSize(final AirbyteRecordMessage recordMessage) { + final String stream = recordMessage.getStream(); + final Integer countdown = streamSampleCountdown.get(stream); + + // this is a new stream; initialize its estimation + if (countdown == null) { + final long byteSize = getStringByteSize(recordMessage.getData()); + streamRecordSizeEstimation.put(stream, byteSize); + streamSampleCountdown.put(stream, sampleBatchSize - 1); + return byteSize; + } + + // this stream needs update; compute a new estimation + if (countdown <= 0) { + final long prevMeanByteSize = streamRecordSizeEstimation.get(stream); + final long currentByteSize = getStringByteSize(recordMessage.getData()); + final long newMeanByteSize = prevMeanByteSize / 2 + currentByteSize / 2; + streamRecordSizeEstimation.put(stream, newMeanByteSize); + streamSampleCountdown.put(stream, sampleBatchSize - 1); + return newMeanByteSize; + } + + // this stream does not need update; return current estimation + streamSampleCountdown.put(stream, countdown - 1); + return streamRecordSizeEstimation.get(stream); + } + + @VisibleForTesting + static long getStringByteSize(final JsonNode data) { + // assume UTF-8 encoding, and each char is 4 bytes long + return Jsons.serialize(data).length() * 4L; + } + +} diff --git a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java index ee52350bc198..fea5e1c5d7e5 100644 --- a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java +++ b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java @@ -16,7 +16,6 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import io.airbyte.commons.bytes.ByteUtils; import io.airbyte.commons.concurrency.VoidCallable; import io.airbyte.commons.functional.CheckedConsumer; import io.airbyte.commons.functional.CheckedFunction; @@ -316,7 +315,7 @@ private static List generateRecords(final long targetSizeInBytes long bytesCounter = 0; for (int i = 0;; i++) { JsonNode payload = Jsons.jsonNode(ImmutableMap.of("id", RandomStringUtils.randomAlphabetic(7), "name", "human " + String.format("%8d", i))); - long sizeInBytes = ByteUtils.getSizeInBytesForUTF8CharSet(Jsons.serialize(payload)); + long sizeInBytes = RecordSizeEstimator.getStringByteSize(payload); bytesCounter += sizeInBytes; AirbyteMessage airbyteMessage = new AirbyteMessage() .withType(Type.RECORD) diff --git a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordSizeEstimatorTest.java b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordSizeEstimatorTest.java new file mode 100644 index 000000000000..64e21425ef7b --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordSizeEstimatorTest.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.buffered_stream_consumer; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import org.junit.jupiter.api.Test; + +class RecordSizeEstimatorTest { + + private static final JsonNode DATA_0 = Jsons.deserialize("{}"); + private static final JsonNode DATA_1 = Jsons.deserialize("{ \"field1\": true }"); + private static final JsonNode DATA_2 = Jsons.deserialize("{ \"field1\": 10000 }"); + private static final long DATA_0_SIZE = RecordSizeEstimator.getStringByteSize(DATA_0); + private static final long DATA_1_SIZE = RecordSizeEstimator.getStringByteSize(DATA_1); + private static final long DATA_2_SIZE = RecordSizeEstimator.getStringByteSize(DATA_2); + + @Test + public void testPeriodicSampling() { + // the estimate performs a size sampling every 3 records + final RecordSizeEstimator sizeEstimator = new RecordSizeEstimator(3); + final String stream = "stream"; + final AirbyteRecordMessage record0 = new AirbyteRecordMessage().withStream(stream).withData(DATA_0); + final AirbyteRecordMessage record1 = new AirbyteRecordMessage().withStream(stream).withData(DATA_1); + final AirbyteRecordMessage record2 = new AirbyteRecordMessage().withStream(stream).withData(DATA_2); + + // sample record message 1 + final long firstEstimation = DATA_1_SIZE; + assertEquals(firstEstimation, sizeEstimator.getEstimatedByteSize(record1)); + // next two calls return the first sampling result + assertEquals(firstEstimation, sizeEstimator.getEstimatedByteSize(record0)); + assertEquals(firstEstimation, sizeEstimator.getEstimatedByteSize(record0)); + + // sample record message 2 + final long secondEstimation = firstEstimation / 2 + DATA_2_SIZE / 2; + assertEquals(secondEstimation, sizeEstimator.getEstimatedByteSize(record2)); + // next two calls return the second sampling result + assertEquals(secondEstimation, sizeEstimator.getEstimatedByteSize(record0)); + assertEquals(secondEstimation, sizeEstimator.getEstimatedByteSize(record0)); + + // sample record message 1 + final long thirdEstimation = secondEstimation / 2 + DATA_1_SIZE / 2; + assertEquals(thirdEstimation, sizeEstimator.getEstimatedByteSize(record1)); + // next two calls return the first sampling result + assertEquals(thirdEstimation, sizeEstimator.getEstimatedByteSize(record0)); + assertEquals(thirdEstimation, sizeEstimator.getEstimatedByteSize(record0)); + } + + @Test + public void testDifferentEstimationPerStream() { + final RecordSizeEstimator sizeEstimator = new RecordSizeEstimator(); + final AirbyteRecordMessage record0 = new AirbyteRecordMessage().withStream("stream1").withData(DATA_0); + final AirbyteRecordMessage record1 = new AirbyteRecordMessage().withStream("stream2").withData(DATA_1); + final AirbyteRecordMessage record2 = new AirbyteRecordMessage().withStream("stream3").withData(DATA_2); + assertEquals(DATA_0_SIZE, sizeEstimator.getEstimatedByteSize(record0)); + assertEquals(DATA_1_SIZE, sizeEstimator.getEstimatedByteSize(record1)); + assertEquals(DATA_2_SIZE, sizeEstimator.getEstimatedByteSize(record2)); + } + +} diff --git a/airbyte-integrations/connectors/destination-snowflake/Dockerfile b/airbyte-integrations/connectors/destination-snowflake/Dockerfile index 5fb9f66fb850..5b0e807bd81b 100644 --- a/airbyte-integrations/connectors/destination-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/destination-snowflake/Dockerfile @@ -18,8 +18,8 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar RUN tar xf ${APPLICATION}.tar --strip-components=1 -ENV APPLICATION_VERSION 0.4.13 +ENV APPLICATION_VERSION 0.4.14 ENV ENABLE_SENTRY true -LABEL io.airbyte.version=0.4.13 +LABEL io.airbyte.version=0.4.14 LABEL io.airbyte.name=airbyte/destination-snowflake diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java index e11a5ec96b43..6ef030b42c5b 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeStagingSqlOperations.java @@ -26,7 +26,7 @@ public void insertRecordsInternal(final JdbcDatabase database, final List records, final String schemaName, final String stage) { - LOGGER.info("actual size of batch for staging: {}", records.size()); + LOGGER.info("Writing {} records to {}", records.size(), stage); if (records.isEmpty()) { return; diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/DefaultReplicationWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/DefaultReplicationWorker.java index b3ffca261565..66aee63c62df 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/DefaultReplicationWorker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/DefaultReplicationWorker.java @@ -301,6 +301,7 @@ private static Runnable getReplicationRunnable(final AirbyteSource source, } } } + LOGGER.info("Total records read: {}", recordsRead); try { destination.notifyEndOfStream(); } catch (final Exception e) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java b/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java index 6184690b5ad1..709a12372567 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/process/DockerProcessFactory.java @@ -101,6 +101,7 @@ public Process create(final String jobId, IOs.writeFile(jobRoot, file.getKey(), file.getValue()); } + LOGGER.info("Creating docker job ID: {}", jobId); final List cmd = Lists.newArrayList( "docker", "run", diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index 98f5814956fb..aa39e657f2b7 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -224,6 +224,7 @@ Finally, you need to add read/write permissions to your bucket with that email. | Version | Date | Pull Request | Subject | |:--------|:-----------| :----- | :------ | +| 0.4.14 | 2022-02-17 | [\#10394](https://github.com/airbytehq/airbyte/pull/10394) | Reduce memory footprint. | | 0.4.13 | 2022-02-16 | [\#10212](https://github.com/airbytehq/airbyte/pull/10212) | Execute COPY command in parallel for S3 and GCS staging | | 0.4.12 | 2022-02-15 | [\#10342](https://github.com/airbytehq/airbyte/pull/10342) | Use connection pool, and fix connection leak. | | 0.4.11 | 2022-02-14 | [\#9920](https://github.com/airbytehq/airbyte/pull/9920) | Updated the size of staging files for S3 staging. Also, added closure of S3 writers to staging files when data has been written to an staging file. | From 1ce230de2e8c74ace04479be1a70cf94090c0581 Mon Sep 17 00:00:00 2001 From: Malik Diarra Date: Thu, 17 Feb 2022 13:52:05 -0800 Subject: [PATCH 07/14] Explicit set final variables in BootloaderApp (#10422) --- .../java/io/airbyte/bootloader/BootloaderApp.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/airbyte-bootloader/src/main/java/io/airbyte/bootloader/BootloaderApp.java b/airbyte-bootloader/src/main/java/io/airbyte/bootloader/BootloaderApp.java index 147ea9c3420f..e1879919c5d0 100644 --- a/airbyte-bootloader/src/main/java/io/airbyte/bootloader/BootloaderApp.java +++ b/airbyte-bootloader/src/main/java/io/airbyte/bootloader/BootloaderApp.java @@ -55,10 +55,10 @@ public class BootloaderApp { private final Configs configs; private Runnable postLoadExecution; - private FeatureFlags featureFlags; + private final FeatureFlags featureFlags; @VisibleForTesting - public BootloaderApp(Configs configs, FeatureFlags featureFlags) { + public BootloaderApp(final Configs configs, final FeatureFlags featureFlags) { this.configs = configs; this.featureFlags = featureFlags; } @@ -71,7 +71,7 @@ public BootloaderApp(Configs configs, FeatureFlags featureFlags) { * @param configs * @param postLoadExecution */ - public BootloaderApp(Configs configs, Runnable postLoadExecution, FeatureFlags featureFlags) { + public BootloaderApp(final Configs configs, final Runnable postLoadExecution, final FeatureFlags featureFlags) { this.configs = configs; this.postLoadExecution = postLoadExecution; this.featureFlags = featureFlags; @@ -87,7 +87,7 @@ public BootloaderApp() { final ConfigPersistence configPersistence = DatabaseConfigPersistence.createWithValidation(configDatabase); configPersistence.loadData(YamlSeedConfigPersistence.getDefault()); LOGGER.info("Loaded seed data.."); - } catch (IOException e) { + } catch (final IOException e) { e.printStackTrace(); } }; @@ -134,7 +134,7 @@ public void load() throws Exception { LOGGER.info("Finished bootstrapping Airbyte environment.."); } - public static void main(String[] args) throws Exception { + public static void main(final String[] args) throws Exception { final var bootloader = new BootloaderApp(); bootloader.load(); } @@ -168,7 +168,8 @@ private static void createWorkspaceIfNoneExists(final ConfigRepository configRep configRepository.writeStandardWorkspace(workspace); } - private static void assertNonBreakingMigration(JobPersistence jobPersistence, AirbyteVersion airbyteVersion) throws IOException { + private static void assertNonBreakingMigration(final JobPersistence jobPersistence, final AirbyteVersion airbyteVersion) + throws IOException { // version in the database when the server main method is called. may be empty if this is the first // time the server is started. LOGGER.info("Checking illegal upgrade.."); @@ -221,7 +222,7 @@ private static void runFlywayMigration(final Configs configs, final Database con private static void cleanupZombies(final JobPersistence jobPersistence) throws IOException { final Configs configs = new EnvConfigs(); - WorkflowClient wfClient = + final WorkflowClient wfClient = WorkflowClient.newInstance(WorkflowServiceStubs.newInstance( WorkflowServiceStubsOptions.newBuilder().setTarget(configs.getTemporalHost()).build())); for (final Job zombieJob : jobPersistence.listJobsWithStatus(JobStatus.RUNNING)) { From a66d8be03a89b3322ab23518abb0e00d7b4ce366 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 17 Feb 2022 15:14:51 -0800 Subject: [PATCH 08/14] continue workflows on restarts (#10294) * fix normalization output processing in container orchestrator * add full scheduler v2 acceptance tests * speed up tests * fixes * clean up * wip handle worker restarts * only downtime during sync test not passing * commit temp * mostly cleaned up * add attempt count check * remove todo * switch all pending checks to running checks * use ++ * Update airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java Co-authored-by: Charles * Update airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/LauncherWorker.java Co-authored-by: Charles * add more context * remove unused arg * test on CI that no_retry is insufficient * revert back to orchestrator retry * test for retry logic * remove fialing test and switch back activity config to just no retry Co-authored-by: Charles --- .../ContainerOrchestratorApp.java | 13 +- airbyte-tests/build.gradle | 2 + .../test/acceptance/AcceptanceTests.java | 209 +++++++++++++++++- .../workers/temporal/TemporalUtils.java | 30 +++ .../ConnectionManagerWorkflowImpl.java | 6 +- .../workers/temporal/sync/LauncherWorker.java | 45 ++-- .../workers/temporal/TemporalUtilsTest.java | 127 ++++++++++- tools/bin/acceptance_test_kube_schedulerv2.sh | 2 +- 8 files changed, 412 insertions(+), 22 deletions(-) diff --git a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java index 501c1c4818aa..64149a0c5a04 100644 --- a/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java +++ b/airbyte-container-orchestrator/src/main/java/io/airbyte/container_orchestrator/ContainerOrchestratorApp.java @@ -53,6 +53,8 @@ @Slf4j public class ContainerOrchestratorApp { + public static final int MAX_SECONDS_TO_WAIT_FOR_FILE_COPY = 60; + private final String application; private final Map envMap; private final JobRunConfig jobRunConfig; @@ -155,10 +157,17 @@ public static void main(final String[] args) { // wait for config files to be copied final var successFile = Path.of(KubePodProcess.CONFIG_DIR, KubePodProcess.SUCCESS_FILE_NAME); + int secondsWaited = 0; - while (!successFile.toFile().exists()) { + while (!successFile.toFile().exists() && secondsWaited < MAX_SECONDS_TO_WAIT_FOR_FILE_COPY) { log.info("Waiting for config file transfers to complete..."); Thread.sleep(1000); + secondsWaited++; + } + + if (!successFile.toFile().exists()) { + log.error("Config files did not transfer within the maximum amount of time ({} seconds)!", MAX_SECONDS_TO_WAIT_FOR_FILE_COPY); + System.exit(1); } final var applicationName = JobOrchestrator.readApplicationName(); @@ -169,7 +178,7 @@ public static void main(final String[] args) { final var app = new ContainerOrchestratorApp(applicationName, envMap, jobRunConfig, kubePodInfo); app.run(); } catch (Throwable t) { - log.info("Orchestrator failed...", t); + log.error("Orchestrator failed...", t); // otherwise the pod hangs on closing System.exit(1); } diff --git a/airbyte-tests/build.gradle b/airbyte-tests/build.gradle index 6f2a572c9bc1..da90487a69f0 100644 --- a/airbyte-tests/build.gradle +++ b/airbyte-tests/build.gradle @@ -36,7 +36,9 @@ configurations { dependencies { implementation project(':airbyte-api') + implementation project(':airbyte-container-orchestrator') + implementation 'io.fabric8:kubernetes-client:5.3.1' implementation 'org.testcontainers:testcontainers:1.15.3' acceptanceTestsImplementation project(':airbyte-api') diff --git a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java index 3d8af6b62682..e7703f41bc7d 100644 --- a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java +++ b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java @@ -74,10 +74,13 @@ import io.airbyte.commons.lang.MoreBooleans; import io.airbyte.commons.resources.MoreResources; import io.airbyte.commons.util.MoreProperties; +import io.airbyte.container_orchestrator.ContainerOrchestratorApp; import io.airbyte.db.Database; import io.airbyte.db.Databases; import io.airbyte.test.airbyte_test_container.AirbyteTestContainer; import io.airbyte.test.utils.PostgreSQLContainerHelper; +import io.fabric8.kubernetes.client.DefaultKubernetesClient; +import io.fabric8.kubernetes.client.KubernetesClient; import java.io.File; import java.io.IOException; import java.net.Inet4Address; @@ -98,6 +101,9 @@ import java.util.Map; import java.util.Set; import java.util.UUID; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.jooq.JSONB; import org.jooq.Record; @@ -111,9 +117,12 @@ import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestMethodOrder; +import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.condition.DisabledIfEnvironmentVariable; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.slf4j.MDC; import org.testcontainers.containers.PostgreSQLContainer; import org.testcontainers.utility.MountableFile; @@ -140,6 +149,7 @@ public class AcceptanceTests { private static final Charset UTF8 = StandardCharsets.UTF_8; private static final boolean IS_KUBE = System.getenv().containsKey("KUBE"); + private static final boolean IS_CONTAINER_ORCHESTRATOR = System.getenv().containsKey("CONTAINER_ORCHESTRATOR"); private static final boolean IS_MINIKUBE = System.getenv().containsKey("IS_MINIKUBE"); private static final boolean IS_GKE = System.getenv().containsKey("IS_GKE"); private static final boolean USE_EXTERNAL_DEPLOYMENT = @@ -175,6 +185,8 @@ public class AcceptanceTests { private static FeatureFlags featureFlags; + private static KubernetesClient kubernetesClient = null; + @SuppressWarnings("UnstableApiUsage") @BeforeAll public static void init() throws URISyntaxException, IOException, InterruptedException { @@ -189,6 +201,10 @@ public static void init() throws URISyntaxException, IOException, InterruptedExc sourcePsql.start(); } + if (IS_KUBE) { + kubernetesClient = new DefaultKubernetesClient(); + } + // by default use airbyte deployment governed by a test container. if (!USE_EXTERNAL_DEPLOYMENT) { LOGGER.info("Using deployment of airbyte managed by test containers."); @@ -231,6 +247,7 @@ public void setup() throws ApiException, URISyntaxException, SQLException, IOExc // work in whatever default workspace is present. workspaceId = apiClient.getWorkspaceApi().listWorkspaces().getWorkspaces().get(0).getWorkspaceId(); + LOGGER.info("workspaceId = " + workspaceId); // log which connectors are being used. final SourceDefinitionRead sourceDef = apiClient.getSourceDefinitionApi() @@ -500,7 +517,7 @@ public void testCancelSync() throws Exception { waitForTemporalWorkflow(connectionId); } final JobInfoRead connectionSyncRead = apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); - waitForJob(apiClient.getJobsApi(), connectionSyncRead.getJob(), Set.of(JobStatus.PENDING)); + waitForJob(apiClient.getJobsApi(), connectionSyncRead.getJob(), Set.of(JobStatus.RUNNING)); final var resp = apiClient.getJobsApi().cancelJob(new JobIdRequestBody().id(connectionSyncRead.getJob().getId())); assertEquals(JobStatus.CANCELLED, resp.getJob().getStatus()); @@ -970,6 +987,196 @@ public void testFailureTimeout() throws Exception { } } + @Test + @Order(18) + @EnabledIfEnvironmentVariable(named = "CONTAINER_ORCHESTRATOR", + matches = "true") + public void testDowntimeDuringSync() throws Exception { + final String connectionName = "test-connection"; + final UUID sourceId = createPostgresSource().getSourceId(); + final UUID destinationId = createDestination().getDestinationId(); + final UUID operationId = createOperation().getOperationId(); + final AirbyteCatalog catalog = discoverSourceSchema(sourceId); + final SyncMode syncMode = SyncMode.FULL_REFRESH; + final DestinationSyncMode destinationSyncMode = DestinationSyncMode.OVERWRITE; + catalog.getStreams().forEach(s -> s.getConfig().syncMode(syncMode).destinationSyncMode(destinationSyncMode)); + final UUID connectionId = + createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); + + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } + + final JobInfoRead connectionSyncRead = apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); + + Thread.sleep(5000); + + LOGGER.info("Scaling down workers..."); + kubernetesClient.apps().deployments().inNamespace("default").withName("airbyte-worker").scale(0); + Thread.sleep(1000); + + LOGGER.info("Scaling up workers..."); + kubernetesClient.apps().deployments().inNamespace("default").withName("airbyte-worker").scale(1); + + waitForSuccessfulJob(apiClient.getJobsApi(), connectionSyncRead.getJob()); + assertSourceAndDestinationDbInSync(false); + + final long numAttempts = apiClient.getJobsApi() + .getJobInfo(new JobIdRequestBody().id(connectionSyncRead.getJob().getId())) + .getAttempts() + .size(); + + // it should be able to accomplish the resume without an additional attempt! + assertEquals(1, numAttempts); + } + + @Test + @Order(19) + @EnabledIfEnvironmentVariable(named = "CONTAINER_ORCHESTRATOR", + matches = "true") + public void testCancelSyncWithInterruption() throws Exception { + final String connectionName = "test-connection"; + final UUID sourceId = createPostgresSource().getSourceId(); + final UUID destinationId = createDestination().getDestinationId(); + final UUID operationId = createOperation().getOperationId(); + final AirbyteCatalog catalog = discoverSourceSchema(sourceId); + final SyncMode syncMode = SyncMode.FULL_REFRESH; + final DestinationSyncMode destinationSyncMode = DestinationSyncMode.OVERWRITE; + catalog.getStreams().forEach(s -> s.getConfig().syncMode(syncMode).destinationSyncMode(destinationSyncMode)); + final UUID connectionId = + createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); + + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } + final JobInfoRead connectionSyncRead = apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); + waitForJob(apiClient.getJobsApi(), connectionSyncRead.getJob(), Set.of(JobStatus.RUNNING)); + + Thread.sleep(5000); + + kubernetesClient.apps().deployments().inNamespace("default").withName("airbyte-worker").scale(0); + Thread.sleep(1000); + kubernetesClient.apps().deployments().inNamespace("default").withName("airbyte-worker").scale(1); + + final var resp = apiClient.getJobsApi().cancelJob(new JobIdRequestBody().id(connectionSyncRead.getJob().getId())); + assertEquals(JobStatus.CANCELLED, resp.getJob().getStatus()); + } + + @Test + @Order(20) + @Timeout(value = 5, + unit = TimeUnit.MINUTES) + @EnabledIfEnvironmentVariable(named = "CONTAINER_ORCHESTRATOR", + matches = "true") + public void testCuttingOffPodBeforeFilesTransfer() throws Exception { + final String connectionName = "test-connection"; + final UUID sourceId = createPostgresSource().getSourceId(); + final UUID destinationId = createDestination().getDestinationId(); + final UUID operationId = createOperation().getOperationId(); + final AirbyteCatalog catalog = discoverSourceSchema(sourceId); + final SyncMode syncMode = SyncMode.FULL_REFRESH; + final DestinationSyncMode destinationSyncMode = DestinationSyncMode.OVERWRITE; + catalog.getStreams().forEach(s -> s.getConfig().syncMode(syncMode).destinationSyncMode(destinationSyncMode)); + + LOGGER.info("Creating connection..."); + final UUID connectionId = + createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); + + LOGGER.info("Waiting for connection to be available in Temporal..."); + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } + + LOGGER.info("Run manual sync..."); + final JobInfoRead connectionSyncRead = apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); + + LOGGER.info("Waiting for job to run..."); + waitForJob(apiClient.getJobsApi(), connectionSyncRead.getJob(), Set.of(JobStatus.RUNNING)); + + LOGGER.info("Scale down workers..."); + kubernetesClient.apps().deployments().inNamespace("default").withName("airbyte-worker").scale(0); + + LOGGER.info("Wait for worker scale down..."); + Thread.sleep(1000); + + LOGGER.info("Scale up workers..."); + kubernetesClient.apps().deployments().inNamespace("default").withName("airbyte-worker").scale(1); + + LOGGER.info("Waiting for worker timeout..."); + Thread.sleep(ContainerOrchestratorApp.MAX_SECONDS_TO_WAIT_FOR_FILE_COPY * 1000 + 1000); + + LOGGER.info("Waiting for job to retry and succeed..."); + waitForSuccessfulJob(apiClient.getJobsApi(), connectionSyncRead.getJob()); + } + + @Test + @Order(21) + @Timeout(value = 5, + unit = TimeUnit.MINUTES) + @EnabledIfEnvironmentVariable(named = "CONTAINER_ORCHESTRATOR", + matches = "true") + public void testCancelSyncWhenCancelledWhenWorkerIsNotRunning() throws Exception { + final String connectionName = "test-connection"; + final UUID sourceId = createPostgresSource().getSourceId(); + final UUID destinationId = createDestination().getDestinationId(); + final UUID operationId = createOperation().getOperationId(); + final AirbyteCatalog catalog = discoverSourceSchema(sourceId); + final SyncMode syncMode = SyncMode.FULL_REFRESH; + final DestinationSyncMode destinationSyncMode = DestinationSyncMode.OVERWRITE; + catalog.getStreams().forEach(s -> s.getConfig().syncMode(syncMode).destinationSyncMode(destinationSyncMode)); + + LOGGER.info("Creating connection..."); + final UUID connectionId = + createConnection(connectionName, sourceId, destinationId, List.of(operationId), catalog, null).getConnectionId(); + + LOGGER.info("Waiting for connection to be available in Temporal..."); + // Avoid Race condition with the new scheduler + if (featureFlags.usesNewScheduler()) { + waitForTemporalWorkflow(connectionId); + } + + LOGGER.info("Run manual sync..."); + final JobInfoRead connectionSyncRead = apiClient.getConnectionApi().syncConnection(new ConnectionIdRequestBody().connectionId(connectionId)); + + LOGGER.info("Waiting for job to run..."); + waitForJob(apiClient.getJobsApi(), connectionSyncRead.getJob(), Set.of(JobStatus.RUNNING)); + + LOGGER.info("Waiting for job to run a little..."); + Thread.sleep(5000); + + LOGGER.info("Scale down workers..."); + kubernetesClient.apps().deployments().inNamespace("default").withName("airbyte-worker").scale(0); + + LOGGER.info("Waiting for worker shutdown..."); + Thread.sleep(2000); + + LOGGER.info("Starting background cancellation request..."); + final var pool = Executors.newSingleThreadExecutor(); + final var mdc = MDC.getCopyOfContextMap(); + final Future resp = + pool.submit(() -> { + MDC.setContextMap(mdc); + try { + final JobInfoRead jobInfoRead = apiClient.getJobsApi().cancelJob(new JobIdRequestBody().id(connectionSyncRead.getJob().getId())); + LOGGER.info("jobInfoRead = " + jobInfoRead); + return jobInfoRead; + } catch (ApiException e) { + LOGGER.error("Failed to read from api", e); + throw e; + } + }); + Thread.sleep(2000); + + LOGGER.info("Scaling up workers..."); + kubernetesClient.apps().deployments().inNamespace("default").withName("airbyte-worker").scale(1); + + LOGGER.info("Waiting for cancellation to go into effect..."); + assertEquals(JobStatus.CANCELLED, resp.get().getJob().getStatus()); + } + private AirbyteCatalog discoverSourceSchema(final UUID sourceId) throws ApiException { return apiClient.getSourceApi().discoverSchemaForSource(new SourceIdRequestBody().sourceId(sourceId)).getCatalog(); } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalUtils.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalUtils.java index a8adecb6d26d..ad8e8021ae6d 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalUtils.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalUtils.java @@ -35,6 +35,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; import org.apache.commons.lang3.time.DurationFormatUtils; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -236,4 +237,33 @@ public static T withBackgroundHeartbeat(Callable callable) { } } + public static T withBackgroundHeartbeat(final AtomicReference cancellationCallbackRef, final Callable callable) { + final ScheduledExecutorService scheduledExecutor = Executors.newSingleThreadScheduledExecutor(); + + try { + scheduledExecutor.scheduleAtFixedRate(() -> { + final CancellationHandler cancellationHandler = new CancellationHandler.TemporalCancellationHandler(); + + cancellationHandler.checkAndHandleCancellation(() -> { + if (cancellationCallbackRef != null) { + final Runnable cancellationCallback = cancellationCallbackRef.get(); + if (cancellationCallback != null) { + cancellationCallback.run(); + } + } + }); + }, 0, SEND_HEARTBEAT_INTERVAL.toSeconds(), TimeUnit.SECONDS); + + return callable.call(); + } catch (final ActivityCompletionException e) { + LOGGER.warn("Job either timed out or was cancelled."); + throw new RuntimeException(e); + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + LOGGER.info("Stopping temporal heartbeating..."); + scheduledExecutor.shutdown(); + } + } + } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java index a158d1ce1a0b..8901bb70f81d 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java @@ -139,8 +139,10 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr ChildWorkflowOptions.newBuilder() .setWorkflowId("sync_" + maybeJobId.get()) .setTaskQueue(TemporalJobType.CONNECTION_UPDATER.name()) - // This will cancel the child workflow when the parent is terminated - .setParentClosePolicy(ParentClosePolicy.PARENT_CLOSE_POLICY_TERMINATE) + // we want to send a signal to the child workflow, not kill it, so we can choose how to handle it. + // this is necessary to be able to retry (think "resume") the orchestrator run in an activity and + // propagate cancellation to that level. + .setParentClosePolicy(ParentClosePolicy.PARENT_CLOSE_POLICY_REQUEST_CANCEL) .build()); final UUID connectionId = connectionUpdaterInput.getConnectionId(); diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/LauncherWorker.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/LauncherWorker.java index 573c19e974a5..f4f0d815325c 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/LauncherWorker.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/sync/LauncherWorker.java @@ -20,6 +20,7 @@ import io.airbyte.workers.temporal.TemporalUtils; import io.fabric8.kubernetes.api.model.DeletionPropagation; import io.fabric8.kubernetes.api.model.Pod; +import io.fabric8.kubernetes.client.KubernetesClientException; import java.nio.file.Path; import java.time.Duration; import java.util.HashMap; @@ -28,6 +29,7 @@ import java.util.UUID; import java.util.concurrent.CancellationException; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; @@ -76,7 +78,9 @@ public LauncherWorker(final UUID connectionId, @Override public OUTPUT run(INPUT input, Path jobRoot) throws WorkerException { - return TemporalUtils.withBackgroundHeartbeat(() -> { + final AtomicBoolean isCanceled = new AtomicBoolean(false); + final AtomicReference cancellationCallback = new AtomicReference<>(null); + return TemporalUtils.withBackgroundHeartbeat(cancellationCallback, () -> { try { final Map envMap = System.getenv().entrySet().stream() .filter(entry -> OrchestratorConstants.ENV_VARS_TO_TRANSFER.contains(entry.getKey())) @@ -104,8 +108,6 @@ public OUTPUT run(INPUT input, Path jobRoot) throws WorkerException { final var podNameAndJobPrefix = podNamePrefix + "-job-" + jobRunConfig.getJobId() + "-attempt-"; final var podName = podNameAndJobPrefix + jobRunConfig.getAttemptId(); - killRunningPodsForConnection(podName); - final var kubePodInfo = new KubePodInfo(containerOrchestratorConfig.namespace(), podName); process = new AsyncOrchestratorPodProcess( @@ -117,12 +119,30 @@ public OUTPUT run(INPUT input, Path jobRoot) throws WorkerException { containerOrchestratorConfig.containerOrchestratorImage(), containerOrchestratorConfig.googleApplicationCredentials()); + cancellationCallback.set(() -> { + // When cancelled, try to set to true. + // Only proceed if value was previously false, so we only have one cancellation going. at a time + if (!isCanceled.getAndSet(true)) { + log.info("Trying to cancel async pod process."); + process.destroy(); + } + }); + + // only kill running pods and create process if it is not already running. if (process.getDocStoreStatus().equals(AsyncKubePodStatus.NOT_STARTED)) { - process.create( - allLabels, - resourceRequirements, - fileMap, - portMap); + log.info("Creating " + podName + " for attempt number: " + jobRunConfig.getAttemptId()); + killRunningPodsForConnection(); + + try { + process.create( + allLabels, + resourceRequirements, + fileMap, + portMap); + } catch (KubernetesClientException e) { + throw new WorkerException( + "Failed to create pod " + podName + ", pre-existing pod exists which didn't advance out of the NOT_STARTED state."); + } } // this waitFor can resume if the activity is re-run @@ -159,10 +179,8 @@ public OUTPUT run(INPUT input, Path jobRoot) throws WorkerException { * It is imperative that we do not run multiple replications, normalizations, syncs, etc. at the * same time. Our best bet is to kill everything that is labelled with the connection id and wait * until no more pods exist with that connection id. - * - * @param podNameToKeep a nullable name of a pod we don't want to delete. */ - private void killRunningPodsForConnection(String podNameToKeep) { + private void killRunningPodsForConnection() { final var client = containerOrchestratorConfig.kubernetesClient(); // delete all pods with the connection id label @@ -175,7 +193,6 @@ private void killRunningPodsForConnection(String podNameToKeep) { log.info("Attempting to delete pods: " + getPodNames(runningPods).toString()); runningPods.stream() .parallel() - .filter(pod -> !pod.getMetadata().getName().equals(podNameToKeep)) .forEach(kubePod -> client.resource(kubePod).withPropagationPolicy(DeletionPropagation.FOREGROUND).delete()); log.info("Waiting for deletion..."); @@ -215,13 +232,13 @@ public void cancel() { } log.debug("Closing sync runner process"); - killRunningPodsForConnection(null); + killRunningPodsForConnection(); if (process.hasExited()) { log.info("Successfully cancelled process."); } else { // try again - killRunningPodsForConnection(null); + killRunningPodsForConnection(); if (process.hasExited()) { log.info("Successfully cancelled process."); diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalUtilsTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalUtilsTest.java index 4e80f22e7f03..9e35ccbadda7 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalUtilsTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/TemporalUtilsTest.java @@ -5,14 +5,14 @@ package io.airbyte.workers.temporal; import static io.airbyte.workers.temporal.TemporalUtils.getTemporalClientWhenConnected; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import io.airbyte.commons.concurrency.VoidCallable; +import io.airbyte.workers.WorkerException; import io.temporal.activity.ActivityCancellationType; import io.temporal.activity.ActivityInterface; import io.temporal.activity.ActivityMethod; @@ -22,6 +22,7 @@ import io.temporal.api.workflow.v1.WorkflowExecutionInfo; import io.temporal.api.workflowservice.v1.DescribeNamespaceResponse; import io.temporal.client.WorkflowClient; +import io.temporal.client.WorkflowFailedException; import io.temporal.client.WorkflowOptions; import io.temporal.serviceclient.WorkflowServiceStubs; import io.temporal.testing.TestWorkflowEnvironment; @@ -34,6 +35,8 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; import org.apache.commons.lang3.tuple.ImmutablePair; import org.junit.jupiter.api.Test; @@ -125,6 +128,50 @@ public void testWaitThatTimesOut() { }); } + @Test + public void testRuntimeExceptionOnHeartbeatWrapper() { + final TestWorkflowEnvironment testEnv = TestWorkflowEnvironment.newInstance(); + final Worker worker = testEnv.newWorker(TASK_QUEUE); + worker.registerWorkflowImplementationTypes(TestFailingWorkflow.WorkflowImpl.class); + final WorkflowClient client = testEnv.getWorkflowClient(); + final AtomicInteger timesReachedEnd = new AtomicInteger(0); + worker.registerActivitiesImplementations(new TestFailingWorkflow.Activity1Impl(timesReachedEnd)); + testEnv.start(); + + final TestFailingWorkflow workflowStub = + client.newWorkflowStub(TestFailingWorkflow.class, WorkflowOptions.newBuilder().setTaskQueue(TASK_QUEUE).build()); + + // test runtime first + assertThrows(RuntimeException.class, () -> { + workflowStub.run("runtime"); + }); + + // we should never retry enough to reach the end + assertEquals(0, timesReachedEnd.get()); + } + + @Test + public void testWorkerExceptionOnHeartbeatWrapper() { + final TestWorkflowEnvironment testEnv = TestWorkflowEnvironment.newInstance(); + final Worker worker = testEnv.newWorker(TASK_QUEUE); + worker.registerWorkflowImplementationTypes(TestFailingWorkflow.WorkflowImpl.class); + final WorkflowClient client = testEnv.getWorkflowClient(); + final AtomicInteger timesReachedEnd = new AtomicInteger(0); + worker.registerActivitiesImplementations(new TestFailingWorkflow.Activity1Impl(timesReachedEnd)); + testEnv.start(); + + final TestFailingWorkflow workflowStub = + client.newWorkflowStub(TestFailingWorkflow.class, WorkflowOptions.newBuilder().setTaskQueue(TASK_QUEUE).build()); + + // throws workerexception wrapped in a WorkflowFailedException + assertThrows(WorkflowFailedException.class, () -> { + workflowStub.run("worker"); + }); + + // we should never retry enough to reach the end + assertEquals(0, timesReachedEnd.get()); + } + @WorkflowInterface public interface TestWorkflow { @@ -190,4 +237,80 @@ public void activity() { } + @WorkflowInterface + public interface TestFailingWorkflow { + + @WorkflowMethod + String run(String arg); + + class WorkflowImpl implements TestFailingWorkflow { + + private static final Logger LOGGER = LoggerFactory.getLogger(WorkflowImpl.class); + + final ActivityOptions options = ActivityOptions.newBuilder() + .setScheduleToCloseTimeout(Duration.ofMinutes(30)) + .setStartToCloseTimeout(Duration.ofMinutes(30)) + .setScheduleToStartTimeout(Duration.ofMinutes(30)) + .setCancellationType(ActivityCancellationType.WAIT_CANCELLATION_COMPLETED) + .setRetryOptions(TemporalUtils.NO_RETRY) + .setHeartbeatTimeout(Duration.ofSeconds(1)) + .build(); + + private final Activity1 activity1 = Workflow.newActivityStub(Activity1.class, options); + + @Override + public String run(final String arg) { + + LOGGER.info("workflow before activity 1"); + activity1.activity(arg); + LOGGER.info("workflow after all activities"); + + return "completed"; + } + + } + + @ActivityInterface + interface Activity1 { + + @ActivityMethod + void activity(String arg); + + } + + class Activity1Impl implements Activity1 { + + private static final Logger LOGGER = LoggerFactory.getLogger(TestWorkflow.Activity1Impl.class); + private static final String ACTIVITY1 = "activity1"; + + private final AtomicInteger timesReachedEnd; + + public Activity1Impl(final AtomicInteger timesReachedEnd) { + this.timesReachedEnd = timesReachedEnd; + } + + public void activity(String arg) { + LOGGER.info("before: {}", ACTIVITY1); + TemporalUtils.withBackgroundHeartbeat(new AtomicReference<>(null), () -> { + if (timesReachedEnd.get() == 0) { + if (arg.equals("runtime")) { + throw new RuntimeException("failed"); + } else if (arg.equals("timeout")) { + Thread.sleep(10000); + return null; + } else { + throw new WorkerException("failed"); + } + } else { + return null; + } + }); + timesReachedEnd.incrementAndGet(); + LOGGER.info("before: {}", ACTIVITY1); + } + + } + + } + } diff --git a/tools/bin/acceptance_test_kube_schedulerv2.sh b/tools/bin/acceptance_test_kube_schedulerv2.sh index cf4fe87f6c76..83ee4f3cc1d5 100755 --- a/tools/bin/acceptance_test_kube_schedulerv2.sh +++ b/tools/bin/acceptance_test_kube_schedulerv2.sh @@ -80,4 +80,4 @@ if [ -n "$CI" ]; then fi echo "Running e2e tests via gradle..." -KUBE=true NEW_SCHEDULER=true SUB_BUILD=PLATFORM USE_EXTERNAL_DEPLOYMENT=true ./gradlew :airbyte-tests:acceptanceTests --scan +KUBE=true NEW_SCHEDULER=true SUB_BUILD=PLATFORM USE_EXTERNAL_DEPLOYMENT=true CONTAINER_ORCHESTRATOR=true ./gradlew :airbyte-tests:acceptanceTests --scan From 07e2232025e8fe83f433579b462263dfd337c4fd Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Thu, 17 Feb 2022 15:17:03 -0800 Subject: [PATCH 09/14] Track const config values in analytics (#10120) --- .../java/io/airbyte/commons/json/Jsons.java | 61 +++++++- .../persistence/job_tracker/JobTracker.java | 134 ++++++++++++++---- .../src/main/resources/example_config.json | 21 +++ .../main/resources/example_config_schema.json | 67 +++++++++ .../job_tracker/JobTrackerTest.java | 73 ++++++++-- 5 files changed, 316 insertions(+), 40 deletions(-) create mode 100644 airbyte-scheduler/persistence/src/main/resources/example_config_schema.json diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/json/Jsons.java b/airbyte-commons/src/main/java/io/airbyte/commons/json/Jsons.java index 6afcda2f7488..96235a646ab7 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/json/Jsons.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/json/Jsons.java @@ -4,6 +4,9 @@ package io.airbyte.commons.json; +import static java.util.Collections.singletonMap; +import static java.util.stream.Collectors.toMap; + import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; @@ -19,9 +22,12 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; import java.util.Set; import java.util.function.BiConsumer; @@ -198,8 +204,59 @@ public static int getIntOrZero(final JsonNode json, final List keys) { } /** - * By the Jackson DefaultPrettyPrinter prints objects with an extra space as follows: {"name" : - * "airbyte"}. We prefer {"name": "airbyte"}. + * Flattens an ObjectNode, or dumps it into a {null: value} map if it's not an object. + */ + public static Map flatten(final JsonNode node) { + if (node.isObject()) { + final Map output = new HashMap<>(); + for (final Iterator> it = node.fields(); it.hasNext(); ) { + final Entry entry = it.next(); + final String field = entry.getKey(); + final JsonNode value = entry.getValue(); + mergeMaps(output, field, flatten(value)); + } + return output; + } else { + final Object value; + if (node.isBoolean()) { + value = node.asBoolean(); + } else if (node.isLong()) { + value = node.asLong(); + } else if (node.isInt()) { + value = node.asInt(); + } else if (node.isDouble()) { + value = node.asDouble(); + } else if (node.isValueNode() && !node.isNull()) { + value = node.asText(); + } else { + // Fallback handling for e.g. arrays + value = node.toString(); + } + return singletonMap(null, value); + } + } + + /** + * Prepend all keys in subMap with prefix, then merge that map into originalMap. + *

+ * If subMap contains a null key, then instead it is replaced with prefix. I.e. {null: value} is treated as {prefix: value} when merging into + * originalMap. + */ + public static void mergeMaps(final Map originalMap, final String prefix, final Map subMap) { + originalMap.putAll(subMap.entrySet().stream().collect(toMap( + e -> { + final String key = e.getKey(); + if (key != null) { + return prefix + "." + key; + } else { + return prefix; + } + }, + Entry::getValue))); + } + + /** + * By the Jackson DefaultPrettyPrinter prints objects with an extra space as follows: {"name" : "airbyte"}. We prefer {"name": "airbyte"}. */ private static class JsonPrettyPrinter extends DefaultPrettyPrinter { diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/JobTracker.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/JobTracker.java index c2624e22a1dd..56cbbd806824 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/JobTracker.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/JobTracker.java @@ -4,13 +4,17 @@ package io.airbyte.scheduler.persistence.job_tracker; +import static java.util.Collections.emptyMap; +import static java.util.Collections.singletonMap; + import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableMap.Builder; import io.airbyte.analytics.TrackingClient; +import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.Exceptions; import io.airbyte.commons.map.MoreMaps; import io.airbyte.config.JobConfig; @@ -28,12 +32,13 @@ import io.airbyte.scheduler.models.Job; import io.airbyte.scheduler.persistence.JobPersistence; import io.airbyte.scheduler.persistence.WorkspaceHelper; +import io.airbyte.validation.json.JsonSchemaValidator; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; -import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Map; +import java.util.Map.Entry; import java.util.UUID; public class JobTracker { @@ -50,6 +55,8 @@ public enum JobState { public static final String OPERATION = "operation."; public static final String SET = "set"; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private final ConfigRepository configRepository; private final JobPersistence jobPersistence; private final WorkspaceHelper workspaceHelper; @@ -118,8 +125,10 @@ public void trackSync(final Job job, final JobState jobState) { Preconditions.checkArgument(allowedJob, "Job type " + configType + " is not allowed!"); final long jobId = job.getId(); final UUID connectionId = UUID.fromString(job.getScope()); - final UUID sourceDefinitionId = configRepository.getSourceDefinitionFromConnection(connectionId).getSourceDefinitionId(); - final UUID destinationDefinitionId = configRepository.getDestinationDefinitionFromConnection(connectionId).getDestinationDefinitionId(); + final StandardSourceDefinition sourceDefinition = configRepository.getSourceDefinitionFromConnection(connectionId); + final UUID sourceDefinitionId = sourceDefinition.getSourceDefinitionId(); + final StandardDestinationDefinition destinationDefinition = configRepository.getDestinationDefinitionFromConnection(connectionId); + final UUID destinationDefinitionId = destinationDefinition.getDestinationDefinitionId(); final Map jobMetadata = generateJobMetadata(String.valueOf(jobId), configType, job.getAttemptsCount()); final Map jobAttemptMetadata = generateJobAttemptMetadata(job.getId(), jobState); @@ -127,7 +136,10 @@ public void trackSync(final Job job, final JobState jobState) { final Map destinationDefMetadata = generateDestinationDefinitionMetadata(destinationDefinitionId); final Map syncMetadata = generateSyncMetadata(connectionId); final Map stateMetadata = generateStateMetadata(jobState); - final Map syncConfigMetadata = generateSyncConfigMetadata(job.getConfig()); + final Map syncConfigMetadata = generateSyncConfigMetadata( + job.getConfig(), + sourceDefinition.getSpec().getConnectionSpecification(), + destinationDefinition.getSpec().getConnectionSpecification()); final UUID workspaceId = workspaceHelper.getWorkspaceForJobIdIgnoreExceptions(jobId); track(workspaceId, @@ -142,18 +154,20 @@ public void trackSync(final Job job, final JobState jobState) { }); } - private Map generateSyncConfigMetadata(final JobConfig config) { + private Map generateSyncConfigMetadata(final JobConfig config, + final JsonNode sourceConfigSchema, + final JsonNode destinationConfigSchema) { if (config.getConfigType() == ConfigType.SYNC) { final JsonNode sourceConfiguration = config.getSync().getSourceConfiguration(); final JsonNode destinationConfiguration = config.getSync().getDestinationConfiguration(); - final Map sourceMetadata = configToMetadata(CONFIG + ".source", sourceConfiguration); - final Map destinationMetadata = configToMetadata(CONFIG + ".destination", destinationConfiguration); + final Map sourceMetadata = configToMetadata(CONFIG + ".source", sourceConfiguration, sourceConfigSchema); + final Map destinationMetadata = configToMetadata(CONFIG + ".destination", destinationConfiguration, destinationConfigSchema); final Map catalogMetadata = getCatalogMetadata(config.getSync().getConfiguredAirbyteCatalog()); return MoreMaps.merge(sourceMetadata, destinationMetadata, catalogMetadata); } else { - return Collections.emptyMap(); + return emptyMap(); } } @@ -168,30 +182,94 @@ private Map getCatalogMetadata(final ConfiguredAirbyteCatalog ca return output; } - protected static Map configToMetadata(final String jsonPath, final JsonNode config) { + /** + * Flattens a config into a map. Uses the schema to determine which fields are const (i.e. + * non-sensitive). Non-const, non-boolean values are replaced with {@link #SET} to avoid leaking + * potentially-sensitive information. + *

+ * anyOf/allOf schemas are treated as non-const values. These aren't (currently) used in config + * schemas anyway. + * + * @param jsonPath A prefix to add to all the keys in the returned map, with a period (`.`) + * separator + * @param schema The JSON schema that {@code config} conforms to + */ + protected static Map configToMetadata(final String jsonPath, final JsonNode config, final JsonNode schema) { + final Map metadata = configToMetadata(config, schema); + // Prepend all the keys with the root jsonPath + // But leave the values unchanged final Map output = new HashMap<>(); + Jsons.mergeMaps(output, jsonPath, metadata); + return output; + } + + /** + * Does the actually interesting bits of configToMetadata. If config is an object, returns a + * flattened map. If config is _not_ an object (i.e. it's a primitive string/number/etc, or it's an + * array) then returns a map of {null: toMetadataValue(config)}. + */ + private static Map configToMetadata(final JsonNode config, final JsonNode schema) { + if (schema.hasNonNull("const")) { + // If this schema is a const, then just dump it into a map: + // * If it's an object, flatten it + // * Otherwise, do some basic conversions to value-ish data. + // It would be a weird thing to declare const: null, but in that case we don't want to report null + // anyway, so explicitly use hasNonNull. + return Jsons.flatten(config); + } else if (schema.has("oneOf")) { + // If this schema is a oneOf, then find the first sub-schema which the config matches + // and use that sub-schema to convert the config to a map + final JsonSchemaValidator validator = new JsonSchemaValidator(); + for (final Iterator it = schema.get("oneOf").elements(); it.hasNext();) { + final JsonNode subSchema = it.next(); + if (validator.test(subSchema, config)) { + return configToMetadata(config, subSchema); + } + } + // If we didn't match any of the subschemas, then something is wrong. Bail out silently. + return emptyMap(); + } else if (config.isObject()) { + // If the schema is not a oneOf, but the config is an object (i.e. the schema has "type": "object") + // then we need to recursively convert each field of the object to a map. + final Map output = new HashMap<>(); + final JsonNode maybeProperties = schema.get("properties"); + + // If additionalProperties is not set, or it's a boolean, then there's no schema for additional properties. Use the accept-all schema. + // Otherwise, it's an actual schema. + final JsonNode maybeAdditionalProperties = schema.get("additionalProperties"); + final JsonNode additionalPropertiesSchema; + if (maybeAdditionalProperties == null || maybeAdditionalProperties.isBoolean()) { + additionalPropertiesSchema = OBJECT_MAPPER.createObjectNode(); + } else { + additionalPropertiesSchema = maybeAdditionalProperties; + } - if (config.isObject()) { - final ObjectNode node = (ObjectNode) config; - for (final Iterator> it = node.fields(); it.hasNext();) { - final var entry = it.next(); - final var field = entry.getKey(); - final var fieldJsonPath = jsonPath + "." + field; - final var child = entry.getValue(); - - if (child.isBoolean()) { - output.put(fieldJsonPath, child.asBoolean()); - } else if (!child.isNull()) { - if (child.isObject()) { - output.putAll(configToMetadata(fieldJsonPath, child)); - } else if (!child.isTextual() || (child.isTextual() && !child.asText().isEmpty())) { - output.put(fieldJsonPath, SET); - } + for (final Iterator> it = config.fields(); it.hasNext(); ) { + final Entry entry = it.next(); + final String field = entry.getKey(); + final JsonNode value = entry.getValue(); + + final JsonNode propertySchema; + if (maybeProperties != null && maybeProperties.hasNonNull(field)) { + // If this property is explicitly declared, then use its schema + propertySchema = maybeProperties.get(field); + } else { + // otherwise, use the additionalProperties schema + propertySchema = additionalPropertiesSchema; } + + Jsons.mergeMaps(output, field, configToMetadata(value, propertySchema)); } + return output; + } else if (config.isBoolean()) { + return singletonMap(null, config.asBoolean()); + } else if ((!config.isTextual() && !config.isNull()) || (config.isTextual() && !config.asText().isEmpty())) { + // This is either non-textual (e.g. integer, array, etc) or non-empty text + return singletonMap(null, SET); + } else { + // Otherwise, this is an empty string, so just ignore it + return emptyMap(); } - - return output; } private Map generateSyncMetadata(final UUID connectionId) throws ConfigNotFoundException, IOException, JsonValidationException { diff --git a/airbyte-scheduler/persistence/src/main/resources/example_config.json b/airbyte-scheduler/persistence/src/main/resources/example_config.json index 11a101d190c6..c3f2f31fc16f 100644 --- a/airbyte-scheduler/persistence/src/main/resources/example_config.json +++ b/airbyte-scheduler/persistence/src/main/resources/example_config.json @@ -5,6 +5,27 @@ "empty_string": "", "null_value": null, "one_of": { + "type_key": "foo", "some_key": 100 + }, + "const_object": { + "sub_key": "bar", + "sub_array": [1, 2, 3], + "sub_object": { + "sub_sub_key": "baz" + } + }, + "const_null": null, + "additionalPropertiesUnset": { + "foo": "bar" + }, + "additionalPropertiesBoolean": { + "foo": "bar" + }, + "additionalPropertiesSchema": { + "foo": 42 + }, + "additionalPropertiesConst": { + "foo": 42 } } diff --git a/airbyte-scheduler/persistence/src/main/resources/example_config_schema.json b/airbyte-scheduler/persistence/src/main/resources/example_config_schema.json new file mode 100644 index 000000000000..27ceeb106721 --- /dev/null +++ b/airbyte-scheduler/persistence/src/main/resources/example_config_schema.json @@ -0,0 +1,67 @@ +{ + "type": "object", + "properties": { + "username": { + "type": "string" + }, + "password": { + "type": "string" + }, + "has_ssl": { + "type": "boolean" + }, + "empty_string": { + "type": "string" + }, + "null_value": { + "type": "null" + }, + "one_of": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "type_key": { + "const": "foo" + }, + "some_key": { + "type": "integer" + } + } + } + ] + }, + "const_object": { + "const": { + "sub_key": "bar", + "sub_array": [1, 2, 3], + "sub_object": { + "sub_sub_key": "baz" + } + } + }, + "const_null": { + "const": null + }, + "additionalPropertiesUnset": { + "type": "object" + }, + "additionalPropertiesBoolean": { + "type": "object", + "additionalProperties": true + }, + "additionalPropertiesSchema": { + "type": "object", + "additionalProperties": { + "type": "integer" + } + }, + "additionalPropertiesConst": { + "type": "object", + "additionalProperties": { + "const": 42 + } + } + } +} diff --git a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_tracker/JobTrackerTest.java b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_tracker/JobTrackerTest.java index 3e5bf9a0b0cd..c57b58550757 100644 --- a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_tracker/JobTrackerTest.java +++ b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/job_tracker/JobTrackerTest.java @@ -11,7 +11,9 @@ import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; +import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableMap; import io.airbyte.analytics.TrackingClient; import io.airbyte.commons.json.Jsons; @@ -36,6 +38,7 @@ import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.ConnectorSpecification; import io.airbyte.protocol.models.DestinationSyncMode; import io.airbyte.protocol.models.SyncMode; import io.airbyte.scheduler.models.Attempt; @@ -57,6 +60,8 @@ class JobTrackerTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final UUID WORKSPACE_ID = UUID.randomUUID(); private static final String WORKSPACE_NAME = "WORKSPACE_TEST"; private static final UUID JOB_ID = UUID.randomUUID(); @@ -100,6 +105,38 @@ class JobTrackerTest { .put("operation_count", 0) .build(); + private static final ConnectorSpecification SOURCE_SPEC; + private static final ConnectorSpecification DESTINATION_SPEC; + + static { + try { + SOURCE_SPEC = new ConnectorSpecification().withConnectionSpecification(OBJECT_MAPPER.readTree( + """ + { + "type": "object", + "properties": { + "key": { + "type": "string" + } + } + } + """)); + DESTINATION_SPEC = new ConnectorSpecification().withConnectionSpecification(OBJECT_MAPPER.readTree( + """ + { + "type": "object", + "properties": { + "key": { + "type": "boolean" + } + } + } + """)); + } catch (final JsonProcessingException e) { + throw new RuntimeException(e); + } + } + private ConfigRepository configRepository; private JobPersistence jobPersistence; @@ -259,13 +296,25 @@ void testConfigToMetadata() throws IOException { final String configJson = MoreResources.readResource("example_config.json"); final JsonNode config = Jsons.deserialize(configJson); - final Map expected = ImmutableMap.of( - JobTracker.CONFIG + ".username", JobTracker.SET, - JobTracker.CONFIG + ".has_ssl", false, - JobTracker.CONFIG + ".password", JobTracker.SET, - JobTracker.CONFIG + ".one_of.some_key", JobTracker.SET); + final String schemaJson = MoreResources.readResource("example_config_schema.json"); + final JsonNode schema = Jsons.deserialize(schemaJson); + + final Map expected = new ImmutableMap.Builder() + .put(JobTracker.CONFIG + ".username", JobTracker.SET) + .put(JobTracker.CONFIG + ".has_ssl", false) + .put(JobTracker.CONFIG + ".password", JobTracker.SET) + .put(JobTracker.CONFIG + ".one_of.type_key", "foo") + .put(JobTracker.CONFIG + ".one_of.some_key", JobTracker.SET) + .put(JobTracker.CONFIG + ".const_object.sub_key", "bar") + .put(JobTracker.CONFIG + ".const_object.sub_array", "[1,2,3]") + .put(JobTracker.CONFIG + ".const_object.sub_object.sub_sub_key", "baz") + .put(JobTracker.CONFIG + ".additionalPropertiesUnset.foo", JobTracker.SET) + .put(JobTracker.CONFIG + ".additionalPropertiesBoolean.foo", JobTracker.SET) + .put(JobTracker.CONFIG + ".additionalPropertiesSchema.foo", JobTracker.SET) + .put(JobTracker.CONFIG + ".additionalPropertiesConst.foo", 42) + .build(); - final Map actual = JobTracker.configToMetadata(JobTracker.CONFIG, config); + final Map actual = JobTracker.configToMetadata(JobTracker.CONFIG, config, schema); assertEquals(expected, actual); } @@ -305,26 +354,30 @@ private Job getJobMock(final ConfigType configType, final long jobId) throws Con .withSourceDefinitionId(UUID1) .withName(SOURCE_DEF_NAME) .withDockerRepository(CONNECTOR_REPOSITORY) - .withDockerImageTag(CONNECTOR_VERSION)); + .withDockerImageTag(CONNECTOR_VERSION) + .withSpec(SOURCE_SPEC)); when(configRepository.getDestinationDefinitionFromConnection(CONNECTION_ID)) .thenReturn(new StandardDestinationDefinition() .withDestinationDefinitionId(UUID2) .withName(DESTINATION_DEF_NAME) .withDockerRepository(CONNECTOR_REPOSITORY) - .withDockerImageTag(CONNECTOR_VERSION)); + .withDockerImageTag(CONNECTOR_VERSION) + .withSpec(DESTINATION_SPEC)); when(configRepository.getStandardSourceDefinition(UUID1)) .thenReturn(new StandardSourceDefinition() .withSourceDefinitionId(UUID1) .withName(SOURCE_DEF_NAME) .withDockerRepository(CONNECTOR_REPOSITORY) - .withDockerImageTag(CONNECTOR_VERSION)); + .withDockerImageTag(CONNECTOR_VERSION) + .withSpec(SOURCE_SPEC)); when(configRepository.getStandardDestinationDefinition(UUID2)) .thenReturn(new StandardDestinationDefinition() .withDestinationDefinitionId(UUID2) .withName(DESTINATION_DEF_NAME) .withDockerRepository(CONNECTOR_REPOSITORY) - .withDockerImageTag(CONNECTOR_VERSION)); + .withDockerImageTag(CONNECTOR_VERSION) + .withSpec(DESTINATION_SPEC)); final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( new ConfiguredAirbyteStream() From 33cd51f6688d118b8d02c66e906e66b8ba9b4f4e Mon Sep 17 00:00:00 2001 From: LiRen Tu Date: Thu, 17 Feb 2022 15:27:13 -0800 Subject: [PATCH 10/14] Add back jdbc database comment (#10444) --- .../src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java index 1ef8aebca048..b1280568d5f4 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java @@ -113,6 +113,8 @@ public Stream query(final CheckedFunction Date: Thu, 17 Feb 2022 15:28:14 -0800 Subject: [PATCH 11/14] Add a maximum page size and use the count instead of the list (#10443) * Add a maximum page size and use the count instead of the list * Fix typo --- .../io/airbyte/workers/temporal/TemporalClient.java | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java index a27579320536..ff83e94e1337 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/TemporalClient.java @@ -31,7 +31,6 @@ import io.airbyte.workers.temporal.scheduling.state.WorkflowState; import io.airbyte.workers.temporal.spec.SpecWorkflow; import io.airbyte.workers.temporal.sync.SyncWorkflow; -import io.temporal.api.workflow.v1.WorkflowExecutionInfo; import io.temporal.api.workflowservice.v1.ListOpenWorkflowExecutionsRequest; import io.temporal.api.workflowservice.v1.ListOpenWorkflowExecutionsResponse; import io.temporal.client.BatchRequest; @@ -40,7 +39,6 @@ import java.io.IOException; import java.nio.file.Path; import java.util.HashSet; -import java.util.List; import java.util.Optional; import java.util.Set; import java.util.UUID; @@ -65,6 +63,8 @@ public class TemporalClient { */ private static final int DELAY_BETWEEN_QUERY_MS = 10; + private static final int MAXIMUM_SEARCH_PAGE_SIZE = 50; + public static TemporalClient production(final String temporalHost, final Path workspaceRoot, final Configs configs) { final WorkflowServiceStubs temporalService = TemporalUtils.createTemporalService(temporalHost); return new TemporalClient(WorkflowClient.newInstance(temporalService), workspaceRoot, temporalService, configs); @@ -454,14 +454,15 @@ public boolean isWorkflowRunning(final String workflowName) { ListOpenWorkflowExecutionsRequest openWorkflowExecutionsRequest = ListOpenWorkflowExecutionsRequest.newBuilder() .setNamespace(client.getOptions().getNamespace()) + .setMaximumPageSize(MAXIMUM_SEARCH_PAGE_SIZE) .build(); do { final ListOpenWorkflowExecutionsResponse listOpenWorkflowExecutionsRequest = service.blockingStub().listOpenWorkflowExecutions(openWorkflowExecutionsRequest); - final List workflowExecutionInfos = listOpenWorkflowExecutionsRequest.getExecutionsList().stream() + final long matchingWorkflowCount = listOpenWorkflowExecutionsRequest.getExecutionsList().stream() .filter((workflowExecutionInfo -> workflowExecutionInfo.getExecution().getWorkflowId().equals(workflowName))) - .collect(Collectors.toList()); - if (!workflowExecutionInfos.isEmpty()) { + .count(); + if (matchingWorkflowCount != 0) { return true; } token = listOpenWorkflowExecutionsRequest.getNextPageToken(); @@ -470,6 +471,7 @@ public boolean isWorkflowRunning(final String workflowName) { ListOpenWorkflowExecutionsRequest.newBuilder() .setNamespace(client.getOptions().getNamespace()) .setNextPageToken(token) + .setMaximumPageSize(MAXIMUM_SEARCH_PAGE_SIZE) .build(); } while (token != null && token.size() > 0); From a85c02ab7667689e6594d0933d601dac02ee24c2 Mon Sep 17 00:00:00 2001 From: Jared Rhizor Date: Thu, 17 Feb 2022 15:28:28 -0800 Subject: [PATCH 12/14] run format (#10445) --- .../src/main/java/io/airbyte/commons/json/Jsons.java | 9 +++++---- .../scheduler/persistence/job_tracker/JobTracker.java | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/airbyte-commons/src/main/java/io/airbyte/commons/json/Jsons.java b/airbyte-commons/src/main/java/io/airbyte/commons/json/Jsons.java index 96235a646ab7..e063d053adae 100644 --- a/airbyte-commons/src/main/java/io/airbyte/commons/json/Jsons.java +++ b/airbyte-commons/src/main/java/io/airbyte/commons/json/Jsons.java @@ -209,7 +209,7 @@ public static int getIntOrZero(final JsonNode json, final List keys) { public static Map flatten(final JsonNode node) { if (node.isObject()) { final Map output = new HashMap<>(); - for (final Iterator> it = node.fields(); it.hasNext(); ) { + for (final Iterator> it = node.fields(); it.hasNext();) { final Entry entry = it.next(); final String field = entry.getKey(); final JsonNode value = entry.getValue(); @@ -239,8 +239,8 @@ public static Map flatten(final JsonNode node) { /** * Prepend all keys in subMap with prefix, then merge that map into originalMap. *

- * If subMap contains a null key, then instead it is replaced with prefix. I.e. {null: value} is treated as {prefix: value} when merging into - * originalMap. + * If subMap contains a null key, then instead it is replaced with prefix. I.e. {null: value} is + * treated as {prefix: value} when merging into originalMap. */ public static void mergeMaps(final Map originalMap, final String prefix, final Map subMap) { originalMap.putAll(subMap.entrySet().stream().collect(toMap( @@ -256,7 +256,8 @@ public static void mergeMaps(final Map originalMap, final String } /** - * By the Jackson DefaultPrettyPrinter prints objects with an extra space as follows: {"name" : "airbyte"}. We prefer {"name": "airbyte"}. + * By the Jackson DefaultPrettyPrinter prints objects with an extra space as follows: {"name" : + * "airbyte"}. We prefer {"name": "airbyte"}. */ private static class JsonPrettyPrinter extends DefaultPrettyPrinter { diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/JobTracker.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/JobTracker.java index 56cbbd806824..573b0b4b20d0 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/JobTracker.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/JobTracker.java @@ -234,7 +234,8 @@ private static Map configToMetadata(final JsonNode config, final final Map output = new HashMap<>(); final JsonNode maybeProperties = schema.get("properties"); - // If additionalProperties is not set, or it's a boolean, then there's no schema for additional properties. Use the accept-all schema. + // If additionalProperties is not set, or it's a boolean, then there's no schema for additional + // properties. Use the accept-all schema. // Otherwise, it's an actual schema. final JsonNode maybeAdditionalProperties = schema.get("additionalProperties"); final JsonNode additionalPropertiesSchema; @@ -244,7 +245,7 @@ private static Map configToMetadata(final JsonNode config, final additionalPropertiesSchema = maybeAdditionalProperties; } - for (final Iterator> it = config.fields(); it.hasNext(); ) { + for (final Iterator> it = config.fields(); it.hasNext();) { final Entry entry = it.next(); final String field = entry.getKey(); final JsonNode value = entry.getValue(); From 6bc9240c9ced31768d1a6af5894e76ba213acb1b Mon Sep 17 00:00:00 2001 From: Octavia Squidington III <90398440+octavia-squidington-iii@users.noreply.github.com> Date: Fri, 18 Feb 2022 01:11:39 +0100 Subject: [PATCH 13/14] Bump Airbyte version from 0.35.30-alpha to 0.35.31-alpha (#10446) Co-authored-by: jrhizor --- .bumpversion.cfg | 2 +- .env | 2 +- airbyte-bootloader/Dockerfile | 4 ++-- airbyte-container-orchestrator/Dockerfile | 6 +++--- airbyte-scheduler/app/Dockerfile | 4 ++-- airbyte-server/Dockerfile | 4 ++-- airbyte-webapp/package-lock.json | 4 ++-- airbyte-webapp/package.json | 2 +- airbyte-workers/Dockerfile | 4 ++-- charts/airbyte/Chart.yaml | 2 +- charts/airbyte/README.md | 10 +++++----- charts/airbyte/values.yaml | 10 +++++----- docs/operator-guides/upgrading-airbyte.md | 2 +- kube/overlays/stable-with-resource-limits/.env | 2 +- .../stable-with-resource-limits/kustomization.yaml | 12 ++++++------ kube/overlays/stable/.env | 2 +- kube/overlays/stable/kustomization.yaml | 12 ++++++------ 17 files changed, 42 insertions(+), 42 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index bdd89dbddc35..86d7a2397917 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.35.30-alpha +current_version = 0.35.31-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index fd704d457e47..d430f68db5b1 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ ### SHARED ### -VERSION=0.35.30-alpha +VERSION=0.35.31-alpha # When using the airbyte-db via default docker image CONFIG_ROOT=/data diff --git a/airbyte-bootloader/Dockerfile b/airbyte-bootloader/Dockerfile index 4c93931beb04..7e3c7f662135 100644 --- a/airbyte-bootloader/Dockerfile +++ b/airbyte-bootloader/Dockerfile @@ -5,6 +5,6 @@ ENV APPLICATION airbyte-bootloader WORKDIR /app -ADD bin/${APPLICATION}-0.35.30-alpha.tar /app +ADD bin/${APPLICATION}-0.35.31-alpha.tar /app -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.30-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index 39125e8dacc7..d219e4035edf 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -26,12 +26,12 @@ RUN echo "deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] htt RUN apt-get update && apt-get install -y kubectl ENV APPLICATION airbyte-container-orchestrator -ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.30-alpha/bin/${APPLICATION}" +ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.31-alpha/bin/${APPLICATION}" WORKDIR /app # Move orchestrator app -ADD bin/${APPLICATION}-0.35.30-alpha.tar /app +ADD bin/${APPLICATION}-0.35.31-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.30-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 45cb20e1bb8d..3e82fe27d813 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD bin/${APPLICATION}-0.35.30-alpha.tar /app +ADD bin/${APPLICATION}-0.35.31-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.30-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index b8bf5681f775..b0231e906607 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD bin/${APPLICATION}-0.35.30-alpha.tar /app +ADD bin/${APPLICATION}-0.35.31-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.30-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.31-alpha/bin/${APPLICATION}"] diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 5907d33c02b7..96e2081a9e5e 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,12 +1,12 @@ { "name": "airbyte-webapp", - "version": "0.35.30-alpha", + "version": "0.35.31-alpha", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "airbyte-webapp", - "version": "0.35.30-alpha", + "version": "0.35.31-alpha", "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.36", "@fortawesome/free-brands-svg-icons": "^5.15.4", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index a0fe9cb24e0d..24401cf8c5af 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.35.30-alpha", + "version": "0.35.31-alpha", "private": true, "engines": { "node": ">=16.0.0" diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index 7c694cf2610c..ed059d2b6aae 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -30,7 +30,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD bin/${APPLICATION}-0.35.30-alpha.tar /app +ADD bin/${APPLICATION}-0.35.31-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.30-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.31-alpha/bin/${APPLICATION}"] diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index 7770d53b600b..40122bbac4d9 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.35.30-alpha" +appVersion: "0.35.31-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 150fed477d2c..1fe29507b949 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -29,7 +29,7 @@ | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.30-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.31-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.service.type` | The service type to use for the webapp service | `ClusterIP` | | `webapp.service.port` | The service port to expose the webapp on | `80` | @@ -55,7 +55,7 @@ | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.30-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.31-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -86,7 +86,7 @@ | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.30-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.31-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | | `server.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -120,7 +120,7 @@ | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.30-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.31-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | | `worker.livenessProbe.initialDelaySeconds` | Initial delay seconds for livenessProbe | `30` | @@ -148,7 +148,7 @@ | ----------------------------- | -------------------------------------------------------------------- | -------------------- | | `bootloader.image.repository` | The repository to use for the airbyte bootloader image. | `airbyte/bootloader` | | `bootloader.image.pullPolicy` | the pull policy to use for the airbyte bootloader image | `IfNotPresent` | -| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.30-alpha` | +| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.31-alpha` | ### Temporal parameters diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index 7bf032492cf0..dc9b1c7d7ef2 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -43,7 +43,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.35.30-alpha + tag: 0.35.31-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -145,7 +145,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.35.30-alpha + tag: 0.35.31-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -260,7 +260,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.35.30-alpha + tag: 0.35.31-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -377,7 +377,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.35.30-alpha + tag: 0.35.31-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## @@ -471,7 +471,7 @@ bootloader: image: repository: airbyte/bootloader pullPolicy: IfNotPresent - tag: 0.35.30-alpha + tag: 0.35.31-alpha ## @param bootloader.podAnnotations [object] Add extra annotations to the bootloader pod ## diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 6bb3621718ed..a7753e77e21a 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -101,7 +101,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.35.30-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.35.31-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index 5d5840ff0c35..ad0cfd94c9de 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.30-alpha +AIRBYTE_VERSION=0.35.31-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index ced0168c33e9..63be90234fae 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: airbyte/bootloader - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: airbyte/scheduler - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: airbyte/server - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: airbyte/webapp - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: airbyte/worker - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 5d5840ff0c35..ad0cfd94c9de 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.30-alpha +AIRBYTE_VERSION=0.35.31-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index f28b587a6c35..bce418da26e8 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: airbyte/bootloader - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: airbyte/scheduler - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: airbyte/server - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: airbyte/webapp - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: airbyte/worker - newTag: 0.35.30-alpha + newTag: 0.35.31-alpha - name: temporalio/auto-setup newTag: 1.7.0 From ef673c5695aeaddc24394d9f17224963e0b1233d Mon Sep 17 00:00:00 2001 From: Davin Chia Date: Fri, 18 Feb 2022 16:06:11 +0800 Subject: [PATCH 14/14] Inject check connection resource (#10410) Make it possible to set resource limits specifically for Check Connection. This helps speed up the Check Connection operation for Java based connectors. After this PR is merged, I will do an OSS release and make the required Helm changes in Cloud. --- .../main/java/io/airbyte/config/Configs.java | 24 +++ .../java/io/airbyte/config/EnvConfigs.java | 25 +++ .../io/airbyte/config/EnvConfigsTest.java | 191 ++++++++++++------ .../io/airbyte/workers/WorkerConfigs.java | 8 +- 4 files changed, 185 insertions(+), 63 deletions(-) diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java index 52329f6554ec..4fc366ba5263 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java @@ -238,6 +238,30 @@ public interface Configs { Map getJobDefaultEnvMap(); // Jobs - Kube only + /** + * Define the check job container's minimum CPU request. Defaults to + * {@link #getJobMainContainerCpuRequest()} if not set. Internal-use only. + */ + String getCheckJobMainContainerCpuRequest(); + + /** + * Define the check job container's maximum CPU usage. Defaults to + * {@link #getJobMainContainerCpuLimit()} if not set. Internal-use only. + */ + String getCheckJobMainContainerCpuLimit(); + + /** + * Define the job container's minimum RAM usage. Defaults to + * {@link #getJobMainContainerMemoryRequest()} if not set. Internal-use only. + */ + String getCheckJobMainContainerMemoryRequest(); + + /** + * Define the job container's maximum RAM usage. Defaults to + * {@link #getJobMainContainerMemoryLimit()} if not set. Internal-use only. + */ + String getCheckJobMainContainerMemoryLimit(); + /** * Define one or more Job pod tolerations. Tolerations are separated by ';'. Each toleration * contains k=v pairs mentioning some/all of key, effect, operator and value and separated by `,`. diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java index 958d62447c58..01fafea21e6c 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java @@ -124,6 +124,11 @@ public class EnvConfigs implements Configs { private static final String DISCOVER_WORKER_STATUS_CHECK_INTERVAL = "DISCOVER_WORKER_STATUS_CHECK_INTERVAL"; private static final String REPLICATION_WORKER_STATUS_CHECK_INTERVAL = "REPLICATION_WORKER_STATUS_CHECK_INTERVAL"; + static final String CHECK_JOB_MAIN_CONTAINER_CPU_REQUEST = "CHECK_JOB_MAIN_CONTAINER_CPU_REQUEST"; + static final String CHECK_JOB_MAIN_CONTAINER_CPU_LIMIT = "CHECK_JOB_MAIN_CONTAINER_CPU_LIMIT"; + static final String CHECK_JOB_MAIN_CONTAINER_MEMORY_REQUEST = "CHECK_JOB_MAIN_CONTAINER_MEMORY_REQUEST"; + static final String CHECK_JOB_MAIN_CONTAINER_MEMORY_LIMIT = "CHECK_JOB_MAIN_CONTAINER_MEMORY_LIMIT"; + // defaults private static final String DEFAULT_SPEC_CACHE_BUCKET = "io-airbyte-cloud-spec-cache"; public static final String DEFAULT_JOB_KUBE_NAMESPACE = "default"; @@ -612,6 +617,26 @@ public Map getJobDefaultEnvMap() { .collect(Collectors.toMap(key -> key.replace(JOB_DEFAULT_ENV_PREFIX, ""), getEnv)); } + @Override + public String getCheckJobMainContainerCpuRequest() { + return getEnvOrDefault(CHECK_JOB_MAIN_CONTAINER_CPU_REQUEST, getJobMainContainerCpuRequest()); + } + + @Override + public String getCheckJobMainContainerCpuLimit() { + return getEnvOrDefault(CHECK_JOB_MAIN_CONTAINER_CPU_LIMIT, getJobMainContainerCpuLimit()); + } + + @Override + public String getCheckJobMainContainerMemoryRequest() { + return getEnvOrDefault(CHECK_JOB_MAIN_CONTAINER_MEMORY_REQUEST, getJobMainContainerMemoryRequest()); + } + + @Override + public String getCheckJobMainContainerMemoryLimit() { + return getEnvOrDefault(CHECK_JOB_MAIN_CONTAINER_MEMORY_LIMIT, getJobMainContainerMemoryLimit()); + } + @Override public LogConfigs getLogConfigs() { return logConfigs; diff --git a/airbyte-config/models/src/test/java/io/airbyte/config/EnvConfigsTest.java b/airbyte-config/models/src/test/java/io/airbyte/config/EnvConfigsTest.java index 51336f54b784..0841a94ad799 100644 --- a/airbyte-config/models/src/test/java/io/airbyte/config/EnvConfigsTest.java +++ b/airbyte-config/models/src/test/java/io/airbyte/config/EnvConfigsTest.java @@ -4,13 +4,16 @@ package io.airbyte.config; +import static org.junit.jupiter.api.Assertions.*; + import io.airbyte.commons.version.AirbyteVersion; import java.nio.file.Paths; import java.util.HashMap; import java.util.List; import java.util.Map; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; class EnvConfigsTest { @@ -26,158 +29,158 @@ void setUp() { @Test void ensureGetEnvBehavior() { - Assertions.assertNull(System.getenv("MY_RANDOM_VAR_1234")); + assertNull(System.getenv("MY_RANDOM_VAR_1234")); } @Test void testAirbyteRole() { envMap.put(EnvConfigs.AIRBYTE_ROLE, null); - Assertions.assertNull(config.getAirbyteRole()); + assertNull(config.getAirbyteRole()); envMap.put(EnvConfigs.AIRBYTE_ROLE, "dev"); - Assertions.assertEquals("dev", config.getAirbyteRole()); + assertEquals("dev", config.getAirbyteRole()); } @Test void testAirbyteVersion() { envMap.put(EnvConfigs.AIRBYTE_VERSION, null); - Assertions.assertThrows(IllegalArgumentException.class, () -> config.getAirbyteVersion()); + assertThrows(IllegalArgumentException.class, () -> config.getAirbyteVersion()); envMap.put(EnvConfigs.AIRBYTE_VERSION, "dev"); - Assertions.assertEquals(new AirbyteVersion("dev"), config.getAirbyteVersion()); + assertEquals(new AirbyteVersion("dev"), config.getAirbyteVersion()); } @Test void testWorkspaceRoot() { envMap.put(EnvConfigs.WORKSPACE_ROOT, null); - Assertions.assertThrows(IllegalArgumentException.class, () -> config.getWorkspaceRoot()); + assertThrows(IllegalArgumentException.class, () -> config.getWorkspaceRoot()); envMap.put(EnvConfigs.WORKSPACE_ROOT, "abc/def"); - Assertions.assertEquals(Paths.get("abc/def"), config.getWorkspaceRoot()); + assertEquals(Paths.get("abc/def"), config.getWorkspaceRoot()); } @Test void testLocalRoot() { envMap.put(EnvConfigs.LOCAL_ROOT, null); - Assertions.assertThrows(IllegalArgumentException.class, () -> config.getLocalRoot()); + assertThrows(IllegalArgumentException.class, () -> config.getLocalRoot()); envMap.put(EnvConfigs.LOCAL_ROOT, "abc/def"); - Assertions.assertEquals(Paths.get("abc/def"), config.getLocalRoot()); + assertEquals(Paths.get("abc/def"), config.getLocalRoot()); } @Test void testConfigRoot() { envMap.put(EnvConfigs.CONFIG_ROOT, null); - Assertions.assertThrows(IllegalArgumentException.class, () -> config.getConfigRoot()); + assertThrows(IllegalArgumentException.class, () -> config.getConfigRoot()); envMap.put(EnvConfigs.CONFIG_ROOT, "a/b"); - Assertions.assertEquals(Paths.get("a/b"), config.getConfigRoot()); + assertEquals(Paths.get("a/b"), config.getConfigRoot()); } @Test void testGetDatabaseUser() { envMap.put(EnvConfigs.DATABASE_USER, null); - Assertions.assertThrows(IllegalArgumentException.class, () -> config.getDatabaseUser()); + assertThrows(IllegalArgumentException.class, () -> config.getDatabaseUser()); envMap.put(EnvConfigs.DATABASE_USER, "user"); - Assertions.assertEquals("user", config.getDatabaseUser()); + assertEquals("user", config.getDatabaseUser()); } @Test void testGetDatabasePassword() { envMap.put(EnvConfigs.DATABASE_PASSWORD, null); - Assertions.assertThrows(IllegalArgumentException.class, () -> config.getDatabasePassword()); + assertThrows(IllegalArgumentException.class, () -> config.getDatabasePassword()); envMap.put(EnvConfigs.DATABASE_PASSWORD, "password"); - Assertions.assertEquals("password", config.getDatabasePassword()); + assertEquals("password", config.getDatabasePassword()); } @Test void testGetDatabaseUrl() { envMap.put(EnvConfigs.DATABASE_URL, null); - Assertions.assertThrows(IllegalArgumentException.class, () -> config.getDatabaseUrl()); + assertThrows(IllegalArgumentException.class, () -> config.getDatabaseUrl()); envMap.put(EnvConfigs.DATABASE_URL, "url"); - Assertions.assertEquals("url", config.getDatabaseUrl()); + assertEquals("url", config.getDatabaseUrl()); } @Test void testGetWorkspaceDockerMount() { envMap.put(EnvConfigs.WORKSPACE_DOCKER_MOUNT, null); envMap.put(EnvConfigs.WORKSPACE_ROOT, "abc/def"); - Assertions.assertEquals("abc/def", config.getWorkspaceDockerMount()); + assertEquals("abc/def", config.getWorkspaceDockerMount()); envMap.put(EnvConfigs.WORKSPACE_DOCKER_MOUNT, "root"); envMap.put(EnvConfigs.WORKSPACE_ROOT, "abc/def"); - Assertions.assertEquals("root", config.getWorkspaceDockerMount()); + assertEquals("root", config.getWorkspaceDockerMount()); envMap.put(EnvConfigs.WORKSPACE_DOCKER_MOUNT, null); envMap.put(EnvConfigs.WORKSPACE_ROOT, null); - Assertions.assertThrows(IllegalArgumentException.class, () -> config.getWorkspaceDockerMount()); + assertThrows(IllegalArgumentException.class, () -> config.getWorkspaceDockerMount()); } @Test void testGetLocalDockerMount() { envMap.put(EnvConfigs.LOCAL_DOCKER_MOUNT, null); envMap.put(EnvConfigs.LOCAL_ROOT, "abc/def"); - Assertions.assertEquals("abc/def", config.getLocalDockerMount()); + assertEquals("abc/def", config.getLocalDockerMount()); envMap.put(EnvConfigs.LOCAL_DOCKER_MOUNT, "root"); envMap.put(EnvConfigs.LOCAL_ROOT, "abc/def"); - Assertions.assertEquals("root", config.getLocalDockerMount()); + assertEquals("root", config.getLocalDockerMount()); envMap.put(EnvConfigs.LOCAL_DOCKER_MOUNT, null); envMap.put(EnvConfigs.LOCAL_ROOT, null); - Assertions.assertThrows(IllegalArgumentException.class, () -> config.getLocalDockerMount()); + assertThrows(IllegalArgumentException.class, () -> config.getLocalDockerMount()); } @Test void testDockerNetwork() { envMap.put(EnvConfigs.DOCKER_NETWORK, null); - Assertions.assertEquals("host", config.getDockerNetwork()); + assertEquals("host", config.getDockerNetwork()); envMap.put(EnvConfigs.DOCKER_NETWORK, "abc"); - Assertions.assertEquals("abc", config.getDockerNetwork()); + assertEquals("abc", config.getDockerNetwork()); } @Test void testTrackingStrategy() { envMap.put(EnvConfigs.TRACKING_STRATEGY, null); - Assertions.assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); + assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); envMap.put(EnvConfigs.TRACKING_STRATEGY, "abc"); - Assertions.assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); + assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); envMap.put(EnvConfigs.TRACKING_STRATEGY, "logging"); - Assertions.assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); + assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); envMap.put(EnvConfigs.TRACKING_STRATEGY, "segment"); - Assertions.assertEquals(Configs.TrackingStrategy.SEGMENT, config.getTrackingStrategy()); + assertEquals(Configs.TrackingStrategy.SEGMENT, config.getTrackingStrategy()); envMap.put(EnvConfigs.TRACKING_STRATEGY, "LOGGING"); - Assertions.assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); + assertEquals(Configs.TrackingStrategy.LOGGING, config.getTrackingStrategy()); } @Test void testworkerKubeTolerations() { envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, null); - Assertions.assertEquals(config.getJobKubeTolerations(), List.of()); + assertEquals(config.getJobKubeTolerations(), List.of()); envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, ";;;"); - Assertions.assertEquals(config.getJobKubeTolerations(), List.of()); + assertEquals(config.getJobKubeTolerations(), List.of()); envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, "key=k,value=v;"); - Assertions.assertEquals(config.getJobKubeTolerations(), List.of()); + assertEquals(config.getJobKubeTolerations(), List.of()); envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, "key=airbyte-server,operator=Exists,effect=NoSchedule"); - Assertions.assertEquals(config.getJobKubeTolerations(), List.of(new TolerationPOJO("airbyte-server", "NoSchedule", null, "Exists"))); + assertEquals(config.getJobKubeTolerations(), List.of(new TolerationPOJO("airbyte-server", "NoSchedule", null, "Exists"))); envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, "key=airbyte-server,operator=Equals,value=true,effect=NoSchedule"); - Assertions.assertEquals(config.getJobKubeTolerations(), List.of(new TolerationPOJO("airbyte-server", "NoSchedule", "true", "Equals"))); + assertEquals(config.getJobKubeTolerations(), List.of(new TolerationPOJO("airbyte-server", "NoSchedule", "true", "Equals"))); envMap.put(EnvConfigs.JOB_KUBE_TOLERATIONS, "key=airbyte-server,operator=Exists,effect=NoSchedule;key=airbyte-server,operator=Equals,value=true,effect=NoSchedule"); - Assertions.assertEquals(config.getJobKubeTolerations(), List.of( + assertEquals(config.getJobKubeTolerations(), List.of( new TolerationPOJO("airbyte-server", "NoSchedule", null, "Exists"), new TolerationPOJO("airbyte-server", "NoSchedule", "true", "Equals"))); } @@ -185,78 +188,148 @@ void testworkerKubeTolerations() { @Test void testJobKubeNodeSelectors() { envMap.put(EnvConfigs.JOB_KUBE_NODE_SELECTORS, null); - Assertions.assertFalse(config.getJobKubeNodeSelectors().isPresent()); + assertFalse(config.getJobKubeNodeSelectors().isPresent()); envMap.put(EnvConfigs.JOB_KUBE_NODE_SELECTORS, ",,,"); - Assertions.assertFalse(config.getJobKubeNodeSelectors().isPresent()); + assertFalse(config.getJobKubeNodeSelectors().isPresent()); envMap.put(EnvConfigs.JOB_KUBE_NODE_SELECTORS, "key=k,,;$%&^#"); - Assertions.assertEquals(config.getJobKubeNodeSelectors().get(), Map.of("key", "k")); + assertEquals(config.getJobKubeNodeSelectors().get(), Map.of("key", "k")); envMap.put(EnvConfigs.JOB_KUBE_NODE_SELECTORS, "one=two"); - Assertions.assertEquals(config.getJobKubeNodeSelectors().get(), Map.of("one", "two")); + assertEquals(config.getJobKubeNodeSelectors().get(), Map.of("one", "two")); envMap.put(EnvConfigs.JOB_KUBE_NODE_SELECTORS, "airbyte=server,something=nothing"); - Assertions.assertEquals(config.getJobKubeNodeSelectors().get(), Map.of("airbyte", "server", "something", "nothing")); + assertEquals(config.getJobKubeNodeSelectors().get(), Map.of("airbyte", "server", "something", "nothing")); } @Test void testSpecKubeNodeSelectors() { envMap.put(EnvConfigs.SPEC_JOB_KUBE_NODE_SELECTORS, null); - Assertions.assertFalse(config.getSpecJobKubeNodeSelectors().isPresent()); + assertFalse(config.getSpecJobKubeNodeSelectors().isPresent()); envMap.put(EnvConfigs.SPEC_JOB_KUBE_NODE_SELECTORS, ",,,"); - Assertions.assertFalse(config.getSpecJobKubeNodeSelectors().isPresent()); + assertFalse(config.getSpecJobKubeNodeSelectors().isPresent()); envMap.put(EnvConfigs.SPEC_JOB_KUBE_NODE_SELECTORS, "key=k,,;$%&^#"); - Assertions.assertEquals(config.getSpecJobKubeNodeSelectors().get(), Map.of("key", "k")); + assertEquals(config.getSpecJobKubeNodeSelectors().get(), Map.of("key", "k")); envMap.put(EnvConfigs.SPEC_JOB_KUBE_NODE_SELECTORS, "one=two"); - Assertions.assertEquals(config.getSpecJobKubeNodeSelectors().get(), Map.of("one", "two")); + assertEquals(config.getSpecJobKubeNodeSelectors().get(), Map.of("one", "two")); envMap.put(EnvConfigs.SPEC_JOB_KUBE_NODE_SELECTORS, "airbyte=server,something=nothing"); - Assertions.assertEquals(config.getSpecJobKubeNodeSelectors().get(), Map.of("airbyte", "server", "something", "nothing")); + assertEquals(config.getSpecJobKubeNodeSelectors().get(), Map.of("airbyte", "server", "something", "nothing")); } @Test void testCheckKubeNodeSelectors() { envMap.put(EnvConfigs.CHECK_JOB_KUBE_NODE_SELECTORS, null); - Assertions.assertFalse(config.getCheckJobKubeNodeSelectors().isPresent()); + assertFalse(config.getCheckJobKubeNodeSelectors().isPresent()); envMap.put(EnvConfigs.CHECK_JOB_KUBE_NODE_SELECTORS, ",,,"); - Assertions.assertFalse(config.getCheckJobKubeNodeSelectors().isPresent()); + assertFalse(config.getCheckJobKubeNodeSelectors().isPresent()); envMap.put(EnvConfigs.CHECK_JOB_KUBE_NODE_SELECTORS, "key=k,,;$%&^#"); - Assertions.assertEquals(config.getCheckJobKubeNodeSelectors().get(), Map.of("key", "k")); + assertEquals(config.getCheckJobKubeNodeSelectors().get(), Map.of("key", "k")); envMap.put(EnvConfigs.CHECK_JOB_KUBE_NODE_SELECTORS, "one=two"); - Assertions.assertEquals(config.getCheckJobKubeNodeSelectors().get(), Map.of("one", "two")); + assertEquals(config.getCheckJobKubeNodeSelectors().get(), Map.of("one", "two")); envMap.put(EnvConfigs.CHECK_JOB_KUBE_NODE_SELECTORS, "airbyte=server,something=nothing"); - Assertions.assertEquals(config.getCheckJobKubeNodeSelectors().get(), Map.of("airbyte", "server", "something", "nothing")); + assertEquals(config.getCheckJobKubeNodeSelectors().get(), Map.of("airbyte", "server", "something", "nothing")); } @Test void testDiscoverKubeNodeSelectors() { envMap.put(EnvConfigs.DISCOVER_JOB_KUBE_NODE_SELECTORS, null); - Assertions.assertFalse(config.getDiscoverJobKubeNodeSelectors().isPresent()); + assertFalse(config.getDiscoverJobKubeNodeSelectors().isPresent()); envMap.put(EnvConfigs.DISCOVER_JOB_KUBE_NODE_SELECTORS, ",,,"); - Assertions.assertFalse(config.getDiscoverJobKubeNodeSelectors().isPresent()); + assertFalse(config.getDiscoverJobKubeNodeSelectors().isPresent()); envMap.put(EnvConfigs.DISCOVER_JOB_KUBE_NODE_SELECTORS, "key=k,,;$%&^#"); - Assertions.assertEquals(config.getDiscoverJobKubeNodeSelectors().get(), Map.of("key", "k")); + assertEquals(config.getDiscoverJobKubeNodeSelectors().get(), Map.of("key", "k")); envMap.put(EnvConfigs.DISCOVER_JOB_KUBE_NODE_SELECTORS, "one=two"); - Assertions.assertEquals(config.getDiscoverJobKubeNodeSelectors().get(), Map.of("one", "two")); + assertEquals(config.getDiscoverJobKubeNodeSelectors().get(), Map.of("one", "two")); envMap.put(EnvConfigs.DISCOVER_JOB_KUBE_NODE_SELECTORS, "airbyte=server,something=nothing"); - Assertions.assertEquals(config.getDiscoverJobKubeNodeSelectors().get(), Map.of("airbyte", "server", "something", "nothing")); + assertEquals(config.getDiscoverJobKubeNodeSelectors().get(), Map.of("airbyte", "server", "something", "nothing")); + } + + @Nested + @DisplayName("CheckJobResourceSettings") + public class CheckJobResourceSettings { + + @Test + @DisplayName("should default to JobMainCpuRequest if not set") + void testCpuRequestDefaultToJobMainCpuRequest() { + envMap.put(EnvConfigs.CHECK_JOB_MAIN_CONTAINER_CPU_REQUEST, null); + envMap.put(EnvConfigs.JOB_MAIN_CONTAINER_CPU_REQUEST, "1"); + assertEquals("1", config.getCheckJobMainContainerCpuRequest()); + } + + @Test + @DisplayName("checkJobCpuRequest should take precedent if set") + void testCheckJobCpuRequestTakePrecedentIfSet() { + envMap.put(EnvConfigs.CHECK_JOB_MAIN_CONTAINER_CPU_REQUEST, "1"); + envMap.put(EnvConfigs.JOB_MAIN_CONTAINER_CPU_REQUEST, "2"); + assertEquals("1", config.getCheckJobMainContainerCpuRequest()); + } + + @Test + @DisplayName("should default to JobMainCpuLimit if not set") + void testCpuLimitDefaultToJobMainCpuLimit() { + envMap.put(EnvConfigs.CHECK_JOB_MAIN_CONTAINER_CPU_LIMIT, null); + envMap.put(EnvConfigs.JOB_MAIN_CONTAINER_CPU_LIMIT, "1"); + assertEquals("1", config.getCheckJobMainContainerCpuLimit()); + } + + @Test + @DisplayName("checkJobCpuLimit should take precedent if set") + void testCheckJobCpuLimitTakePrecedentIfSet() { + envMap.put(EnvConfigs.CHECK_JOB_MAIN_CONTAINER_CPU_LIMIT, "1"); + envMap.put(EnvConfigs.JOB_MAIN_CONTAINER_CPU_LIMIT, "2"); + assertEquals("1", config.getCheckJobMainContainerCpuLimit()); + } + + @Test + @DisplayName("should default to JobMainMemoryRequest if not set") + void testMemoryRequestDefaultToJobMainMemoryRequest() { + envMap.put(EnvConfigs.CHECK_JOB_MAIN_CONTAINER_MEMORY_REQUEST, null); + envMap.put(EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_REQUEST, "1"); + assertEquals("1", config.getCheckJobMainContainerMemoryRequest()); + } + + @Test + @DisplayName("checkJobMemoryRequest should take precedent if set") + void testCheckJobMemoryRequestTakePrecedentIfSet() { + envMap.put(EnvConfigs.CHECK_JOB_MAIN_CONTAINER_MEMORY_REQUEST, "1"); + envMap.put(EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_REQUEST, "2"); + assertEquals("1", config.getCheckJobMainContainerMemoryRequest()); + } + + @Test + @DisplayName("should default to JobMainMemoryLimit if not set") + void testMemoryLimitDefaultToJobMainMemoryLimit() { + envMap.put(EnvConfigs.CHECK_JOB_MAIN_CONTAINER_MEMORY_LIMIT, null); + envMap.put(EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_LIMIT, "1"); + assertEquals("1", config.getCheckJobMainContainerMemoryLimit()); + } + + @Test + @DisplayName("checkJobMemoryLimit should take precedent if set") + void testCheckJobMemoryLimitTakePrecedentIfSet() { + envMap.put(EnvConfigs.CHECK_JOB_MAIN_CONTAINER_MEMORY_LIMIT, "1"); + envMap.put(EnvConfigs.JOB_MAIN_CONTAINER_MEMORY_LIMIT, "2"); + assertEquals("1", config.getCheckJobMainContainerMemoryLimit()); + } + } @Test void testEmptyEnvMapRetrieval() { - Assertions.assertEquals(Map.of(), config.getJobDefaultEnvMap()); + assertEquals(Map.of(), config.getJobDefaultEnvMap()); } @Test @@ -265,7 +338,7 @@ void testEnvMapRetrieval() { envMap.put(EnvConfigs.JOB_DEFAULT_ENV_PREFIX + "ENV2", "VAL\"2WithQuotesand$ymbols"); final var expected = Map.of("ENV1", "VAL1", "ENV2", "VAL\"2WithQuotesand$ymbols"); - Assertions.assertEquals(expected, config.getJobDefaultEnvMap()); + assertEquals(expected, config.getJobDefaultEnvMap()); } } diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerConfigs.java b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerConfigs.java index 10d85682f73e..a8556a184a8b 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerConfigs.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerConfigs.java @@ -88,10 +88,10 @@ public static WorkerConfigs buildCheckWorkerConfigs(final Configs configs) { return new WorkerConfigs( configs.getWorkerEnvironment(), new ResourceRequirements() - .withCpuRequest(configs.getJobMainContainerCpuRequest()) - .withCpuLimit(configs.getJobMainContainerCpuLimit()) - .withMemoryRequest(configs.getJobMainContainerMemoryRequest()) - .withMemoryLimit(configs.getJobMainContainerMemoryLimit()), + .withCpuRequest(configs.getCheckJobMainContainerCpuRequest()) + .withCpuLimit(configs.getCheckJobMainContainerCpuLimit()) + .withMemoryRequest(configs.getCheckJobMainContainerMemoryRequest()) + .withMemoryLimit(configs.getCheckJobMainContainerMemoryLimit()), configs.getJobKubeTolerations(), nodeSelectors, configs.getJobKubeMainContainerImagePullSecret(),