diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 2df9ee4866e7..dbb8de43cf1c 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.35.55-alpha +current_version = 0.35.59-alpha commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-[a-z]+)? diff --git a/.env b/.env index bc072208783a..5d263e087944 100644 --- a/.env +++ b/.env @@ -10,7 +10,7 @@ ### SHARED ### -VERSION=0.35.55-alpha +VERSION=0.35.59-alpha # When using the airbyte-db via default docker image CONFIG_ROOT=/data @@ -91,3 +91,4 @@ MAX_DISCOVER_WORKERS=5 ### FEATURE FLAGS ### NEW_SCHEDULER=false +AUTO_DISABLE_FAILING_CONNECTIONS=false diff --git a/.github/actions/start-aws-runner/action.yml b/.github/actions/start-aws-runner/action.yml index 635bb7f5cbc0..22776cb24410 100644 --- a/.github/actions/start-aws-runner/action.yml +++ b/.github/actions/start-aws-runner/action.yml @@ -8,8 +8,8 @@ inputs: github-token: required: true ec2-image-id: - # github-self-hosted-runner-ubuntu-20-100g-disk - default: "ami-0ccd67e0abd945eec" + # github-self-hosted-runner-ubuntu-20-100g-disk-with-cypress-deps + default: "ami-08927c058921b27f4" required: true ec2-instance-type: default: "c5.2xlarge" diff --git a/.github/workflows/gke-kube-test-command.yml b/.github/workflows/gke-kube-test-command.yml index 849713687dc2..31d77f8ac40e 100644 --- a/.github/workflows/gke-kube-test-command.yml +++ b/.github/workflows/gke-kube-test-command.yml @@ -76,7 +76,7 @@ jobs: sudo apt-get install socat - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@master + uses: google-github-actions/setup-gcloud@v0 with: project_id: ${{ secrets.GKE_TEST_PROJECT_ID }} service_account_key: ${{ secrets.GKE_TEST_SA_KEY }} diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index b98c4402407e..2b9204b3a8f5 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -22,11 +22,18 @@ jobs: # The output of this job is used to trigger the following builds. changes: name: "Detect Modified Files" + # The filtering action does not deal with well scheduled events so skip to avoid errors. + # See https://github.com/dorny/paths-filter/issues/100 for more info. + # This is okay this workflow is only scheduled on master, where we want to build everything + # so filtering is not required. Use always() in each start block to force the start task. + if: github.event_name != 'schedule' runs-on: ubuntu-latest outputs: backend: ${{ steps.filter.outputs.backend }} build: ${{ steps.filter.outputs.build }} cli: ${{ steps.filter.outputs.cli }} + connectors: ${{ steps.filter.outputs.connectors }} + db: ${{ steps.filter.outputs.db }} frontend: ${{ steps.filter.outputs.frontend }} steps: - name: Checkout Airbyte @@ -34,13 +41,22 @@ jobs: - uses: dorny/paths-filter@v2 id: filter with: + # Note, the following glob expression within a filters are ORs. filters: | backend: - - 'airbyte-**/**' + - 'airbyte-!(cdk|integrations|webapp|webapp-e2e-tests)/**' build: - '.github/**' + - 'buildSrc/**' + - 'tools/**' cli: - - 'airbyte-cli/**' + - 'airbyte-api/**' + - 'octavia-cli/**' + connectors: + - 'airbyte-cdk/**' + - 'airbyte-integrations/**' + db: + - 'airbyte-db/**' frontend: - 'airbyte-webapp/**' - 'airbyte-webapp-e2e-tests/**' @@ -55,10 +71,87 @@ jobs: # - run: | # echo '${{ toJSON(needs) }}' - # Gradle Build (Connectors Base) + ## BUILDS + octavia-cli-build: + needs: changes + runs-on: ubuntu-latest + # Because scheduled builds on master require us to skip the changes job. Use always() to force this to run on master. + if: needs.changes.outputs.cli == 'true' || needs.changes.outputs.build == 'true' || (always() && github.ref == 'refs/heads/master') + name: "Octavia CLI: Build" + timeout-minutes: 90 + steps: + - name: Checkout Airbyte + uses: actions/checkout@v2 + + - name: Cache Build Artifacts + uses: ./.github/actions/cache-build-artifacts + with: + cache-key: ${{ secrets.CACHE_VERSION }} + cache-python: "false" + + - uses: actions/setup-java@v1 + with: + java-version: "17" + + # octavia-cli install and testing requires Python. + # We use 3.8 in this project because 3.7 is not supported on Apple M1. + - uses: actions/setup-python@v2 + with: + python-version: "3.8" + + - name: Set up CI Gradle Properties + run: | + mkdir -p ~/.gradle/ + cat > ~/.gradle/gradle.properties < ~/.gradle/gradle.properties < ~/.gradle/gradle.properties </dev/null 2>&1; do - sleep 1 - done - - sudo apt-get update && sudo apt-get install -y libgtk2.0-0 libgtk-3-0 libgbm-dev libnotify-dev libgconf-2-4 libnss3 libxss1 libasound2 libxtst6 xauth xvfb - - - name: Set up CI Gradle Properties - run: | - mkdir -p ~/.gradle/ - cat > ~/.gradle/gradle.properties <COPY INTO table to upload\ + \ the file. Recommended for large production workloads for better speed\ + \ and scalability." + required: + - "method" + - "azure_blob_storage_account_name" + - "azure_blob_storage_container_name" + - "azure_blob_storage_sas_token" + properties: + method: + type: "string" + enum: + - "Azure Blob Staging" + default: "Azure Blob Staging" + order: 0 + azure_blob_storage_endpoint_domain_name: + title: "Endpoint Domain Name" + type: "string" + default: "blob.core.windows.net" + description: "This is Azure Blob Storage endpoint domain name. Leave\ + \ default value (or leave it empty if run container from command\ + \ line) to use Microsoft native from example." + examples: + - "blob.core.windows.net" + order: 1 + azure_blob_storage_account_name: + title: "Azure Blob Storage Account Name" + type: "string" + description: "The account's name of the Azure Blob Storage." + examples: + - "airbyte5storage" + order: 2 + azure_blob_storage_container_name: + title: "Azure blob storage container (Bucket) Name" + type: "string" + description: "The name of the Azure blob storage container. *This\ + \ needs to coincide with the container specified in the Snowflake\ + \ Storage Integration and Snowflake Azure External Stage (see description\ + \ of 'Snowflake Azure External Stage' for details" + examples: + - "airbytetestcontainername" + order: 3 + azure_blob_storage_sas_token: + title: "SAS Token" + type: "string" + airbyte_secret: true + description: "Shared access signature(SAS) token to grant Snowflake\ + \ limited access to objects in your storage account. See more https://docs.snowflake.com/en/user-guide/data-load-azure-config.html#option-2-generating-a-sas-token" + examples: + - "?sv=2016-05-31&ss=b&srt=sco&sp=rwdl&se=2018-06-27T10:05:50Z&st=2017-06-27T02:05:50Z&spr=https,http&sig=bgqQwoXwxzuD2GJfagRg7VOS8hzNr3QLT7rhS8OFRLQ%3D" + order: 4 supportsIncremental: true supportsNormalization: true supportsDBT: true diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index c57846e6acad..dc8f9564ac04 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -116,6 +116,13 @@ documentationUrl: https://docs.airbyte.io/integrations/sources/chargebee icon: chargebee.svg sourceType: api +- name: Chargify + sourceDefinitionId: 9b2d3607-7222-4709-9fa2-c2abdebbdd88 + dockerRepository: airbyte/source-chargify + dockerImageTag: 0.1.0 + documentationUrl: https://docs.airbyte.io/integrations/sources/chargify + icon: chargify.svg + sourceType: api - name: Chartmogul sourceDefinitionId: b6604cbd-1b12-4c08-8767-e140d0fb0877 dockerRepository: airbyte/source-chartmogul @@ -259,7 +266,7 @@ - name: Google Ads sourceDefinitionId: 253487c0-2246-43ba-a21f-5116b20a2c50 dockerRepository: airbyte/source-google-ads - dockerImageTag: 0.1.28 + dockerImageTag: 0.1.29 documentationUrl: https://docs.airbyte.io/integrations/sources/google-ads icon: google-adwords.svg sourceType: api @@ -328,7 +335,7 @@ - name: HubSpot sourceDefinitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c dockerRepository: airbyte/source-hubspot - dockerImageTag: 0.1.47 + dockerImageTag: 0.1.50 documentationUrl: https://docs.airbyte.io/integrations/sources/hubspot icon: hubspot.svg sourceType: api @@ -349,7 +356,7 @@ - name: Intercom sourceDefinitionId: d8313939-3782-41b0-be29-b3ca20d8dd3a dockerRepository: airbyte/source-intercom - dockerImageTag: 0.1.13 + dockerImageTag: 0.1.15 documentationUrl: https://docs.airbyte.io/integrations/sources/intercom icon: intercom.svg sourceType: api @@ -697,7 +704,7 @@ - name: Shopify sourceDefinitionId: 9da77001-af33-4bcd-be46-6252bf9342b9 dockerRepository: airbyte/source-shopify - dockerImageTag: 0.1.35 + dockerImageTag: 0.1.36 documentationUrl: https://docs.airbyte.io/integrations/sources/shopify icon: shopify.svg sourceType: api @@ -752,7 +759,7 @@ - name: Stripe sourceDefinitionId: e094cb9a-26de-4645-8761-65c0c425d1de dockerRepository: airbyte/source-stripe - dockerImageTag: 0.1.29 + dockerImageTag: 0.1.30 documentationUrl: https://docs.airbyte.io/integrations/sources/stripe icon: stripe.svg sourceType: api @@ -836,7 +843,7 @@ - name: Zendesk Support sourceDefinitionId: 79c1aa37-dae3-42ae-b333-d1c105477715 dockerRepository: airbyte/source-zendesk-support - dockerImageTag: 0.2.0 + dockerImageTag: 0.2.2 documentationUrl: https://docs.airbyte.io/integrations/sources/zendesk-support icon: zendesk.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index f0d960c48bf2..c617c5bfdb62 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -1000,6 +1000,29 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] +- dockerImage: "airbyte/source-chargify:0.1.0" + spec: + documentationUrl: "https://docs.airbyte.io/integrations/sources/chargify" + connectionSpecification: + $schema: "http://json-schema.org/draft-07/schema#" + title: "Chargify Spec" + type: "object" + required: + - "api_key" + - "domain" + additionalProperties: false + properties: + api_key: + type: "string" + description: "Chargify API Key." + airbyte_secret: true + domain: + type: "string" + description: "Chargify domain. Normally this domain follows the following\ + \ format companyname.chargify.com" + supportsNormalization: false + supportsDBT: false + supported_destination_sync_modes: [] - dockerImage: "airbyte/source-chartmogul:0.1.1" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/chartmogul" @@ -2507,7 +2530,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-google-ads:0.1.28" +- dockerImage: "airbyte/source-google-ads:0.1.29" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/google-ads" connectionSpecification: @@ -3331,7 +3354,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-hubspot:0.1.47" +- dockerImage: "airbyte/source-hubspot:0.1.50" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/hubspot" connectionSpecification: @@ -3376,7 +3399,7 @@ client_id: title: "Client ID" description: "The Client ID of your HubSpot developer application.\ - \ See our docs if you need help finding this id." type: "string" examples: @@ -3384,7 +3407,7 @@ client_secret: title: "Client Secret" description: "The Client Secret of your HubSpot developer application.\ - \ See our docs if you need help finding this secret." type: "string" examples: @@ -3393,7 +3416,7 @@ refresh_token: title: "Refresh Token" description: "Refresh Token to renew the expired Access Token. See\ - \ our docs if you need help generating the token." type: "string" examples: @@ -3416,7 +3439,7 @@ order: 0 api_key: title: "API key" - description: "HubSpot API Key. See our docs if you need help finding this key." type: "string" airbyte_secret: true @@ -3565,7 +3588,7 @@ oauthFlowInitParameters: [] oauthFlowOutputParameters: - - "access_token" -- dockerImage: "airbyte/source-intercom:0.1.13" +- dockerImage: "airbyte/source-intercom:0.1.15" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/intercom" connectionSpecification: @@ -7407,7 +7430,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-shopify:0.1.35" +- dockerImage: "airbyte/source-shopify:0.1.36" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/shopify" connectionSpecification: @@ -8091,7 +8114,7 @@ type: "string" path_in_connector_config: - "client_secret" -- dockerImage: "airbyte/source-stripe:0.1.29" +- dockerImage: "airbyte/source-stripe:0.1.30" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/stripe" connectionSpecification: @@ -8112,7 +8135,7 @@ order: 0 client_secret: type: "string" - title: "Client Secret" + title: "Secret Key" description: "Stripe API key (usually starts with 'sk_live_'; find yours\ \ here)." airbyte_secret: true @@ -8133,7 +8156,8 @@ minimum: 0 description: "When set, the connector will always reload data from the past\ \ N days, where N is the value set here. This is useful if your data is\ - \ updated after creation." + \ updated after creation. More info here" order: 3 supportsNormalization: false supportsDBT: false @@ -8882,7 +8906,7 @@ path_in_connector_config: - "credentials" - "client_secret" -- dockerImage: "airbyte/source-zendesk-support:0.2.0" +- dockerImage: "airbyte/source-zendesk-support:0.2.2" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-support" connectionSpecification: @@ -8892,8 +8916,7 @@ required: - "start_date" - "subdomain" - - "auth_method" - additionalProperties: false + additionalProperties: true properties: start_date: type: "string" @@ -8906,25 +8929,49 @@ pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" subdomain: type: "string" - description: "The subdomain for your Zendesk Support" - auth_method: + title: "Subdomain" + description: "Identifier of your Zendesk Subdomain, like: https://{MY_SUBDOMAIN}.zendesk.com/,\ + \ where MY_SUBDOMAIN is the value of your subdomain" + credentials: title: "Authorization Method" type: "object" - default: "api_token" - description: "Zendesk service provides 2 auth method: API token and OAuth2.\ - \ Now only the first one is available. Another one will be added in the\ - \ future." + description: "Zendesk service provides two authentication methods. Choose\ + \ between: `OAuth2.0` or `API token`." oneOf: + - title: "OAuth2.0" + type: "object" + required: + - "access_token" + additionalProperties: true + properties: + credentials: + type: "string" + const: "oauth2.0" + enum: + - "oauth2.0" + default: "oauth2.0" + order: 0 + access_token: + type: "string" + title: "Access Token" + description: "The value of the API token generated. See the docs\ + \ for more information." + airbyte_secret: true - title: "API Token" type: "object" required: - "email" - "api_token" - additionalProperties: false + additionalProperties: true properties: - auth_method: + credentials: type: "string" const: "api_token" + enum: + - "api_token" + default: "api_token" + order: 0 email: title: "Email" type: "string" @@ -8936,25 +8983,55 @@ https://docs.airbyte.io/integrations/sources/zendesk-support\">docs\ \ for more information." airbyte_secret: true - - title: "OAuth2.0" - type: "object" - required: - - "access_token" - additionalProperties: false - properties: - auth_method: - type: "string" - const: "access_token" - access_token: - title: "Access Token" - type: "string" - description: "The value of the Access token generated. See the docs for more information." - airbyte_secret: true supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] + advanced_auth: + auth_flow_type: "oauth2.0" + predicate_key: + - "credentials" + - "credentials" + predicate_value: "oauth2.0" + oauth_config_specification: + oauth_user_input_from_connector_config_specification: + type: "object" + additionalProperties: false + properties: + subdomain: + type: "string" + path_in_connector_config: + - "subdomain" + complete_oauth_output_specification: + type: "object" + additionalProperties: false + properties: + access_token: + type: "string" + path_in_connector_config: + - "credentials" + - "access_token" + complete_oauth_server_input_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + client_secret: + type: "string" + complete_oauth_server_output_specification: + type: "object" + additionalProperties: false + properties: + client_id: + type: "string" + path_in_connector_config: + - "credentials" + - "client_id" + client_secret: + type: "string" + path_in_connector_config: + - "credentials" + - "client_secret" - dockerImage: "airbyte/source-zendesk-talk:0.1.3" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/zendesk-talk" diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java index 142014a34e99..fee3209c6119 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/Configs.java @@ -249,6 +249,18 @@ public interface Configs { */ Map getJobDefaultEnvMap(); + /** + * Defines the number of consecutive job failures required before a connection is auto-disabled if + * the AUTO_DISABLE_FAILING_CONNECTIONS flag is set to true. + */ + int getMaxFailedJobsInARowBeforeConnectionDisable(); + + /** + * Defines the required number of days with only failed jobs before a connection is auto-disabled if + * the AUTO_DISABLE_FAILING_CONNECTIONS flag is set to true. + */ + int getMaxDaysOfOnlyFailedJobsBeforeConnectionDisable(); + // Jobs - Kube only /** * Define the check job container's minimum CPU request. Defaults to diff --git a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java index 69a8a29bfe1b..90ff7d68b1e1 100644 --- a/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java +++ b/airbyte-config/models/src/main/java/io/airbyte/config/EnvConfigs.java @@ -121,6 +121,9 @@ public class EnvConfigs implements Configs { private static final String SHOULD_RUN_SYNC_WORKFLOWS = "SHOULD_RUN_SYNC_WORKFLOWS"; private static final String SHOULD_RUN_CONNECTION_MANAGER_WORKFLOWS = "SHOULD_RUN_CONNECTION_MANAGER_WORKFLOWS"; + private static final String MAX_FAILED_JOBS_IN_A_ROW_BEFORE_CONNECTION_DISABLE = "MAX_FAILED_JOBS_IN_A_ROW_BEFORE_CONNECTION_DISABLE"; + private static final String MAX_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE = "MAX_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE"; + // job-type-specific overrides public static final String SPEC_JOB_KUBE_NODE_SELECTORS = "SPEC_JOB_KUBE_NODE_SELECTORS"; public static final String CHECK_JOB_KUBE_NODE_SELECTORS = "CHECK_JOB_KUBE_NODE_SELECTORS"; @@ -178,6 +181,9 @@ public class EnvConfigs implements Configs { public static final int DEFAULT_TEMPORAL_HISTORY_RETENTION_IN_DAYS = 30; + public static final int DEFAULT_FAILED_JOBS_IN_A_ROW_BEFORE_CONNECTION_DISABLE = 100; + public static final int DEFAULT_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE = 14; + private final Function getEnv; private final Supplier> getAllEnvKeys; private final LogConfigs logConfigs; @@ -661,6 +667,16 @@ public Map getJobDefaultEnvMap() { return MoreMaps.merge(jobPrefixedEnvMap, jobSharedEnvMap); } + @Override + public int getMaxFailedJobsInARowBeforeConnectionDisable() { + return getEnvOrDefault(MAX_FAILED_JOBS_IN_A_ROW_BEFORE_CONNECTION_DISABLE, DEFAULT_FAILED_JOBS_IN_A_ROW_BEFORE_CONNECTION_DISABLE); + } + + @Override + public int getMaxDaysOfOnlyFailedJobsBeforeConnectionDisable() { + return getEnvOrDefault(MAX_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE, DEFAULT_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE); + } + @Override public String getCheckJobMainContainerCpuRequest() { return getEnvOrDefault(CHECK_JOB_MAIN_CONTAINER_CPU_REQUEST, getJobMainContainerCpuRequest()); diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java index 4f88e2f7b5f4..bedcea2c6d2f 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/ConfigRepository.java @@ -9,10 +9,12 @@ import static io.airbyte.db.instance.configs.jooq.Tables.ACTOR_CATALOG_FETCH_EVENT; import static io.airbyte.db.instance.configs.jooq.Tables.CONNECTION; import static io.airbyte.db.instance.configs.jooq.Tables.CONNECTION_OPERATION; +import static io.airbyte.db.instance.configs.jooq.Tables.WORKSPACE; import static org.jooq.impl.DSL.asterisk; import com.fasterxml.jackson.databind.JsonNode; import com.google.common.base.Charsets; +import com.google.common.collect.Sets; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import io.airbyte.commons.json.Jsons; @@ -88,13 +90,22 @@ public StandardWorkspace getStandardWorkspace(final UUID workspaceId, final bool } public Optional getWorkspaceBySlugOptional(final String slug, final boolean includeTombstone) - throws JsonValidationException, IOException { - for (final StandardWorkspace workspace : listStandardWorkspaces(includeTombstone)) { - if (workspace.getSlug().equals(slug)) { - return Optional.of(workspace); - } + throws IOException { + final Result result; + if (includeTombstone) { + result = database.query(ctx -> ctx.select(WORKSPACE.asterisk()) + .from(WORKSPACE) + .where(WORKSPACE.SLUG.eq(slug))).fetch(); + } else { + result = database.query(ctx -> ctx.select(WORKSPACE.asterisk()) + .from(WORKSPACE) + .where(WORKSPACE.SLUG.eq(slug)).andNot(WORKSPACE.TOMBSTONE)).fetch(); } - return Optional.empty(); + + if (result.size() == 0) { + return Optional.empty(); + } + return Optional.of(DbConverter.buildStandardWorkspace(result.get(0))); } public StandardWorkspace getWorkspaceBySlug(final String slug, final boolean includeTombstone) @@ -426,6 +437,44 @@ public List listStandardSyncOperations() throws IOExcepti return persistence.listConfigs(ConfigSchema.STANDARD_SYNC_OPERATION, StandardSyncOperation.class); } + /** + * Updates {@link io.airbyte.db.instance.configs.jooq.tables.ConnectionOperation} records for the + * given {@code connectionId}. + * + * @param connectionId ID of the associated connection to update operations for + * @param newOperationIds Set of all operationIds that should be associated to the connection + * @throws IOException + */ + public void updateConnectionOperationIds(final UUID connectionId, final Set newOperationIds) throws IOException { + database.transaction(ctx -> { + final Set existingOperationIds = ctx + .selectFrom(CONNECTION_OPERATION) + .where(CONNECTION_OPERATION.CONNECTION_ID.eq(connectionId)) + .fetchSet(CONNECTION_OPERATION.OPERATION_ID); + + final Set existingOperationIdsToKeep = Sets.intersection(existingOperationIds, newOperationIds); + + // DELETE existing connection_operation records that aren't in the input list + final Set operationIdsToDelete = Sets.difference(existingOperationIds, existingOperationIdsToKeep); + + ctx.deleteFrom(CONNECTION_OPERATION) + .where(CONNECTION_OPERATION.CONNECTION_ID.eq(connectionId)) + .and(CONNECTION_OPERATION.OPERATION_ID.in(operationIdsToDelete)) + .execute(); + + // INSERT connection_operation records that are in the input list and don't yet exist + final Set operationIdsToAdd = Sets.difference(newOperationIds, existingOperationIdsToKeep); + + operationIdsToAdd.forEach(operationId -> ctx + .insertInto(CONNECTION_OPERATION) + .columns(CONNECTION_OPERATION.ID, CONNECTION_OPERATION.CONNECTION_ID, CONNECTION_OPERATION.OPERATION_ID) + .values(UUID.randomUUID(), connectionId, operationId) + .execute()); + + return null; + }); + } + public SourceOAuthParameter getSourceOAuthParams(final UUID SourceOAuthParameterId) throws JsonValidationException, IOException, ConfigNotFoundException { return persistence.getConfig(ConfigSchema.SOURCE_OAUTH_PARAM, SourceOAuthParameterId.toString(), SourceOAuthParameter.class); diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java index 1d6379a34ca9..904996e67fbf 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DatabaseConfigPersistence.java @@ -33,7 +33,6 @@ import io.airbyte.config.ConfigWithMetadata; import io.airbyte.config.DestinationConnection; import io.airbyte.config.DestinationOAuthParameter; -import io.airbyte.config.Notification; import io.airbyte.config.OperatorDbt; import io.airbyte.config.OperatorNormalization; import io.airbyte.config.SourceConnection; @@ -317,12 +316,7 @@ private List> listStandardWorkspaceWithMet final List> standardWorkspaces = new ArrayList<>(); for (final Record record : result) { - final List notificationList = new ArrayList<>(); - final List fetchedNotifications = Jsons.deserialize(record.get(WORKSPACE.NOTIFICATIONS).data(), List.class); - for (final Object notification : fetchedNotifications) { - notificationList.add(Jsons.convertValue(notification, Notification.class)); - } - final StandardWorkspace workspace = buildStandardWorkspace(record, notificationList); + final StandardWorkspace workspace = DbConverter.buildStandardWorkspace(record); standardWorkspaces.add(new ConfigWithMetadata<>( record.get(WORKSPACE.ID).toString(), ConfigSchema.STANDARD_WORKSPACE.name(), @@ -333,24 +327,6 @@ private List> listStandardWorkspaceWithMet return standardWorkspaces; } - private StandardWorkspace buildStandardWorkspace(final Record record, final List notificationList) { - return new StandardWorkspace() - .withWorkspaceId(record.get(WORKSPACE.ID)) - .withName(record.get(WORKSPACE.NAME)) - .withSlug(record.get(WORKSPACE.SLUG)) - .withInitialSetupComplete(record.get(WORKSPACE.INITIAL_SETUP_COMPLETE)) - .withCustomerId(record.get(WORKSPACE.CUSTOMER_ID)) - .withEmail(record.get(WORKSPACE.EMAIL)) - .withAnonymousDataCollection(record.get(WORKSPACE.ANONYMOUS_DATA_COLLECTION)) - .withNews(record.get(WORKSPACE.SEND_NEWSLETTER)) - .withSecurityUpdates(record.get(WORKSPACE.SEND_SECURITY_UPDATES)) - .withDisplaySetupWizard(record.get(WORKSPACE.DISPLAY_SETUP_WIZARD)) - .withTombstone(record.get(WORKSPACE.TOMBSTONE)) - .withNotifications(notificationList) - .withFirstCompletedSync(record.get(WORKSPACE.FIRST_SYNC_COMPLETE)) - .withFeedbackDone(record.get(WORKSPACE.FEEDBACK_COMPLETE)); - } - private List> listStandardSourceDefinitionWithMetadata() throws IOException { return listStandardSourceDefinitionWithMetadata(Optional.empty()); } diff --git a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DbConverter.java b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DbConverter.java index de8f43ee4554..506f107a11d6 100644 --- a/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DbConverter.java +++ b/airbyte-config/persistence/src/main/java/io/airbyte/config/persistence/DbConverter.java @@ -5,16 +5,20 @@ package io.airbyte.config.persistence; import static io.airbyte.db.instance.configs.jooq.Tables.CONNECTION; +import static io.airbyte.db.instance.configs.jooq.Tables.WORKSPACE; import io.airbyte.commons.enums.Enums; import io.airbyte.commons.json.Jsons; import io.airbyte.config.JobSyncConfig.NamespaceDefinitionType; +import io.airbyte.config.Notification; import io.airbyte.config.ResourceRequirements; import io.airbyte.config.Schedule; import io.airbyte.config.StandardSync; import io.airbyte.config.StandardSync.Status; +import io.airbyte.config.StandardWorkspace; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.UUID; import org.jooq.Record; @@ -42,4 +46,27 @@ public static StandardSync buildStandardSync(final Record record, final List notificationList = new ArrayList<>(); + final List fetchedNotifications = Jsons.deserialize(record.get(WORKSPACE.NOTIFICATIONS).data(), List.class); + for (final Object notification : fetchedNotifications) { + notificationList.add(Jsons.convertValue(notification, Notification.class)); + } + return new StandardWorkspace() + .withWorkspaceId(record.get(WORKSPACE.ID)) + .withName(record.get(WORKSPACE.NAME)) + .withSlug(record.get(WORKSPACE.SLUG)) + .withInitialSetupComplete(record.get(WORKSPACE.INITIAL_SETUP_COMPLETE)) + .withCustomerId(record.get(WORKSPACE.CUSTOMER_ID)) + .withEmail(record.get(WORKSPACE.EMAIL)) + .withAnonymousDataCollection(record.get(WORKSPACE.ANONYMOUS_DATA_COLLECTION)) + .withNews(record.get(WORKSPACE.SEND_NEWSLETTER)) + .withSecurityUpdates(record.get(WORKSPACE.SEND_SECURITY_UPDATES)) + .withDisplaySetupWizard(record.get(WORKSPACE.DISPLAY_SETUP_WIZARD)) + .withTombstone(record.get(WORKSPACE.TOMBSTONE)) + .withNotifications(notificationList) + .withFirstCompletedSync(record.get(WORKSPACE.FIRST_SYNC_COMPLETE)) + .withFeedbackDone(record.get(WORKSPACE.FEEDBACK_COMPLETE)); + } + } diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/ConfigRepositoryE2EReadWriteTest.java b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/ConfigRepositoryE2EReadWriteTest.java index a83dd8868054..9811ed108470 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/ConfigRepositoryE2EReadWriteTest.java +++ b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/ConfigRepositoryE2EReadWriteTest.java @@ -5,8 +5,11 @@ package io.airbyte.config.persistence; import static io.airbyte.db.instance.configs.jooq.Tables.ACTOR_CATALOG; +import static io.airbyte.db.instance.configs.jooq.Tables.CONNECTION_OPERATION; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.spy; import io.airbyte.commons.json.Jsons; @@ -30,9 +33,12 @@ import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; import java.sql.SQLException; +import java.util.Collections; import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.UUID; +import java.util.stream.Collectors; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; @@ -156,4 +162,59 @@ public void testListWorkspaceStandardSync() throws IOException { assertThat(MockData.standardSyncs().subList(0, 4)).hasSameElementsAs(syncs); } + @Test + public void testGetWorkspaceBySlug() + throws IOException { + + final StandardWorkspace workspace = MockData.standardWorkspaces().get(0); + final StandardWorkspace tombstonedWorkspace = MockData.standardWorkspaces().get(2); + final Optional retrievedWorkspace = configRepository.getWorkspaceBySlugOptional(workspace.getSlug(), false); + final Optional retrievedTombstonedWorkspaceNoTombstone = + configRepository.getWorkspaceBySlugOptional(tombstonedWorkspace.getSlug(), false); + final Optional retrievedTombstonedWorkspace = configRepository.getWorkspaceBySlugOptional(tombstonedWorkspace.getSlug(), true); + + assertTrue(retrievedWorkspace.isPresent()); + assertEquals(workspace, retrievedWorkspace.get()); + + assertFalse(retrievedTombstonedWorkspaceNoTombstone.isPresent()); + assertTrue(retrievedTombstonedWorkspace.isPresent()); + + assertEquals(tombstonedWorkspace, retrievedTombstonedWorkspace.get()); + } + + @Test + public void testUpdateConnectionOperationIds() throws Exception { + final StandardSync sync = MockData.standardSyncs().get(0); + final List existingOperationIds = sync.getOperationIds(); + final UUID connectionId = sync.getConnectionId(); + + // this test only works as intended when there are multiple operationIds + assertTrue(existingOperationIds.size() > 1); + + // first, remove all associated operations + Set expectedOperationIds = Collections.emptySet(); + configRepository.updateConnectionOperationIds(connectionId, expectedOperationIds); + Set actualOperationIds = fetchOperationIdsForConnectionId(connectionId); + assertEquals(expectedOperationIds, actualOperationIds); + + // now, add back one operation + expectedOperationIds = Collections.singleton(existingOperationIds.get(0)); + configRepository.updateConnectionOperationIds(connectionId, expectedOperationIds); + actualOperationIds = fetchOperationIdsForConnectionId(connectionId); + assertEquals(expectedOperationIds, actualOperationIds); + + // finally, remove the first operation while adding back in the rest + expectedOperationIds = existingOperationIds.stream().skip(1).collect(Collectors.toSet()); + configRepository.updateConnectionOperationIds(connectionId, expectedOperationIds); + actualOperationIds = fetchOperationIdsForConnectionId(connectionId); + assertEquals(expectedOperationIds, actualOperationIds); + } + + private Set fetchOperationIdsForConnectionId(final UUID connectionId) throws SQLException { + return database.query(ctx -> ctx + .selectFrom(CONNECTION_OPERATION) + .where(CONNECTION_OPERATION.CONNECTION_ID.eq(connectionId)) + .fetchSet(CONNECTION_OPERATION.OPERATION_ID)); + } + } diff --git a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/MockData.java b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/MockData.java index 3be5c1defa5a..e0e500563ffa 100644 --- a/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/MockData.java +++ b/airbyte-config/persistence/src/test/java/io/airbyte/config/persistence/MockData.java @@ -53,6 +53,7 @@ public class MockData { private static final UUID WORKSPACE_ID_1 = UUID.randomUUID(); private static final UUID WORKSPACE_ID_2 = UUID.randomUUID(); + private static final UUID WORKSPACE_ID_3 = UUID.randomUUID(); private static final UUID WORKSPACE_CUSTOMER_ID = UUID.randomUUID(); private static final UUID SOURCE_DEFINITION_ID_1 = UUID.randomUUID(); private static final UUID SOURCE_DEFINITION_ID_2 = UUID.randomUUID(); @@ -114,7 +115,14 @@ public static List standardWorkspaces() { .withInitialSetupComplete(true) .withTombstone(false); - return Arrays.asList(workspace1, workspace2); + final StandardWorkspace workspace3 = new StandardWorkspace() + .withWorkspaceId(WORKSPACE_ID_3) + .withName("Tombstoned") + .withSlug("tombstoned") + .withInitialSetupComplete(true) + .withTombstone(true); + + return Arrays.asList(workspace1, workspace2, workspace3); } public static List standardSourceDefinitions() { diff --git a/airbyte-container-orchestrator/Dockerfile b/airbyte-container-orchestrator/Dockerfile index 93d25457c6fc..487cf7415cd0 100644 --- a/airbyte-container-orchestrator/Dockerfile +++ b/airbyte-container-orchestrator/Dockerfile @@ -26,12 +26,12 @@ RUN echo "deb [signed-by=/usr/share/keyrings/kubernetes-archive-keyring.gpg] htt RUN apt-get update && apt-get install -y kubectl ENV APPLICATION airbyte-container-orchestrator -ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.55-alpha/bin/${APPLICATION}" +ENV AIRBYTE_ENTRYPOINT "/app/${APPLICATION}-0.35.59-alpha/bin/${APPLICATION}" WORKDIR /app # Move orchestrator app -ADD bin/${APPLICATION}-0.35.55-alpha.tar /app +ADD bin/${APPLICATION}-0.35.59-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.55-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "/app/${APPLICATION}-0.35.59-alpha/bin/${APPLICATION}"] diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/SqlDatabase.java b/airbyte-db/lib/src/main/java/io/airbyte/db/SqlDatabase.java index 3c1d15e56095..9265ffeda026 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/SqlDatabase.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/SqlDatabase.java @@ -11,6 +11,6 @@ public abstract class SqlDatabase extends AbstractDatabase { public abstract void execute(String sql) throws Exception; - public abstract Stream query(String sql, String... params) throws Exception; + public abstract Stream unsafeQuery(String sql, String... params) throws Exception; } diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java b/airbyte-db/lib/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java index a6179564b400..2726458489b3 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/bigquery/BigQueryDatabase.java @@ -91,7 +91,7 @@ public Stream query(final String sql, final QueryParameterValue... par } @Override - public Stream query(final String sql, final String... params) throws Exception { + public Stream unsafeQuery(final String sql, final String... params) throws Exception { final List parameterValueList; if (params == null) parameterValueList = Collections.emptyList(); diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_35_56_001__AddWorkspaceSlugTombstoneIndex.java b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_35_56_001__AddWorkspaceSlugTombstoneIndex.java new file mode 100644 index 000000000000..f7b3729b5fb6 --- /dev/null +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/instance/configs/migrations/V0_35_56_001__AddWorkspaceSlugTombstoneIndex.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.db.instance.configs.migrations; + +import org.flywaydb.core.api.migration.BaseJavaMigration; +import org.flywaydb.core.api.migration.Context; +import org.jooq.DSLContext; +import org.jooq.impl.DSL; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class V0_35_56_001__AddWorkspaceSlugTombstoneIndex extends BaseJavaMigration { + + private static final Logger LOGGER = LoggerFactory.getLogger( + V0_35_56_001__AddWorkspaceSlugTombstoneIndex.class); + + @Override + public void migrate(final Context context) throws Exception { + LOGGER.info("Running migration: {}", this.getClass().getSimpleName()); + + final DSLContext ctx = DSL.using(context.getConnection()); + ctx.createIndexIfNotExists("workspace_slug_idx").on("workspace", "slug"); + ctx.createIndexIfNotExists("workspace_tombstone_idx").on("workspace", "tombstone"); + } + +} diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java index b1280568d5f4..8ccf58971770 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/DefaultJdbcDatabase.java @@ -51,18 +51,18 @@ public List bufferedResultSetQuery(final CheckedFunction recordTransform) throws SQLException { try (final Connection connection = dataSource.getConnection(); - final Stream results = toStream(query.apply(connection), recordTransform)) { + final Stream results = toUnsafeStream(query.apply(connection), recordTransform)) { return results.collect(Collectors.toList()); } } @Override @MustBeClosed - public Stream resultSetQuery(final CheckedFunction query, - final CheckedFunction recordTransform) + public Stream unsafeResultSetQuery(final CheckedFunction query, + final CheckedFunction recordTransform) throws SQLException { final Connection connection = dataSource.getConnection(); - return toStream(query.apply(connection), recordTransform) + return toUnsafeStream(query.apply(connection), recordTransform) .onClose(() -> { try { connection.close(); @@ -96,11 +96,11 @@ public DatabaseMetaData getMetaData() throws SQLException { */ @Override @MustBeClosed - public Stream query(final CheckedFunction statementCreator, - final CheckedFunction recordTransform) + public Stream unsafeQuery(final CheckedFunction statementCreator, + final CheckedFunction recordTransform) throws SQLException { final Connection connection = dataSource.getConnection(); - return toStream(statementCreator.apply(connection).executeQuery(), recordTransform) + return toUnsafeStream(statementCreator.apply(connection).executeQuery(), recordTransform) .onClose(() -> { try { LOGGER.info("closing connection"); diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java index 0dd4f183c0b6..000023122d2e 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/JdbcDatabase.java @@ -59,7 +59,8 @@ public void executeWithinTransaction(final List queries) throws SQLExcep } /** - * Map records returned in a result set. + * Map records returned in a result set. It is an "unsafe" stream because the stream must be + * manually closed. Otherwise, there will be a database connection leak. * * @param resultSet the result set * @param mapper function to make each record of the result set @@ -67,7 +68,7 @@ public void executeWithinTransaction(final List queries) throws SQLExcep * @return stream of records that the result set is mapped to. */ @MustBeClosed - protected static Stream toStream(final ResultSet resultSet, final CheckedFunction mapper) { + protected static Stream toUnsafeStream(final ResultSet resultSet, final CheckedFunction mapper) { return StreamSupport.stream(new Spliterators.AbstractSpliterator<>(Long.MAX_VALUE, Spliterator.ORDERED) { @Override @@ -108,8 +109,8 @@ public abstract List bufferedResultSetQuery(CheckedFunction List bufferedResultSetQuery(CheckedFunction Stream resultSetQuery(CheckedFunction query, - CheckedFunction recordTransform) + public abstract Stream unsafeResultSetQuery(CheckedFunction query, + CheckedFunction recordTransform) throws SQLException; /** * Use a connection to create a {@link PreparedStatement} and map it into a stream. You CANNOT * assume that data will be returned from this method before the entire {@link ResultSet} is * buffered in memory. Review the implementation of the database's JDBC driver or use the - * StreamingJdbcDriver if you need this guarantee. The caller should close the returned stream to - * release the database connection. + * StreamingJdbcDriver if you need this guarantee. It is "unsafe" because the caller should close + * the returned stream to release the database connection. Otherwise, there will be a connection + * leak. * * @param statementCreator create a {@link PreparedStatement} from a {@link Connection}. * @param recordTransform transform each record of that result set into the desired type. do NOT @@ -140,12 +142,12 @@ public abstract Stream resultSetQuery(CheckedFunction Stream query(CheckedFunction statementCreator, - CheckedFunction recordTransform) + public abstract Stream unsafeQuery(CheckedFunction statementCreator, + CheckedFunction recordTransform) throws SQLException; public int queryInt(final String sql, final String... params) throws SQLException { - try (final Stream q = query(c -> { + try (final Stream q = unsafeQuery(c -> { PreparedStatement statement = c.prepareStatement(sql); int i = 1; for (String param : params) { @@ -159,10 +161,14 @@ public int queryInt(final String sql, final String... params) throws SQLExceptio } } + /** + * It is "unsafe" because the caller must manually close the returned stream. Otherwise, there will + * be a database connection leak. + */ @MustBeClosed @Override - public Stream query(final String sql, final String... params) throws SQLException { - return query(connection -> { + public Stream unsafeQuery(final String sql, final String... params) throws SQLException { + return unsafeQuery(connection -> { final PreparedStatement statement = connection.prepareStatement(sql); int i = 1; for (final String param : params) { @@ -174,7 +180,7 @@ public Stream query(final String sql, final String... params) throws S } public ResultSetMetaData queryMetadata(final String sql, final String... params) throws SQLException { - try (final Stream q = query(c -> { + try (final Stream q = unsafeQuery(c -> { PreparedStatement statement = c.prepareStatement(sql); int i = 1; for (String param : params) { diff --git a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java index 90df64f6e716..338fec362d83 100644 --- a/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java +++ b/airbyte-db/lib/src/main/java/io/airbyte/db/jdbc/StreamingJdbcDatabase.java @@ -48,15 +48,15 @@ public StreamingJdbcDatabase(final DataSource dataSource, */ @Override @MustBeClosed - public Stream query(final CheckedFunction statementCreator, - final CheckedFunction recordTransform) + public Stream unsafeQuery(final CheckedFunction statementCreator, + final CheckedFunction recordTransform) throws SQLException { try { final Connection connection = dataSource.getConnection(); final PreparedStatement ps = statementCreator.apply(connection); // allow configuration of connection and prepared statement to make streaming possible. jdbcStreamingQueryConfiguration.accept(connection, ps); - return toStream(ps.executeQuery(), recordTransform) + return toUnsafeStream(ps.executeQuery(), recordTransform) .onClose(() -> { try { connection.setAutoCommit(true); diff --git a/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestDefaultJdbcDatabase.java b/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestDefaultJdbcDatabase.java index d6c318a11b45..43448e3d44d0 100644 --- a/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestDefaultJdbcDatabase.java +++ b/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestDefaultJdbcDatabase.java @@ -81,7 +81,7 @@ void testBufferedResultQuery() throws SQLException { @Test void testResultSetQuery() throws SQLException { - final Stream actual = database.resultSetQuery( + final Stream actual = database.unsafeResultSetQuery( connection -> connection.createStatement().executeQuery("SELECT * FROM id_and_name;"), sourceOperations::rowToJson); final List actualAsList = actual.collect(Collectors.toList()); @@ -92,7 +92,7 @@ void testResultSetQuery() throws SQLException { @Test void testQuery() throws SQLException { - final Stream actual = database.query( + final Stream actual = database.unsafeQuery( connection -> connection.prepareStatement("SELECT * FROM id_and_name;"), sourceOperations::rowToJson); diff --git a/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestJdbcUtils.java b/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestJdbcUtils.java index 932636c19651..001bfa1d3a96 100644 --- a/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestJdbcUtils.java +++ b/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestJdbcUtils.java @@ -106,7 +106,7 @@ void testRowToJson() throws SQLException { void testToStream() throws SQLException { try (final Connection connection = dataSource.getConnection()) { final ResultSet rs = connection.createStatement().executeQuery("SELECT * FROM id_and_name;"); - final List actual = JdbcDatabase.toStream(rs, sourceOperations::rowToJson).collect(Collectors.toList()); + final List actual = JdbcDatabase.toUnsafeStream(rs, sourceOperations::rowToJson).collect(Collectors.toList()); assertEquals(RECORDS_AS_JSON, actual); } } diff --git a/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestStreamingJdbcDatabase.java b/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestStreamingJdbcDatabase.java index ab0f50765577..f1b60896081b 100644 --- a/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestStreamingJdbcDatabase.java +++ b/airbyte-db/lib/src/test/java/io/airbyte/db/jdbc/TestStreamingJdbcDatabase.java @@ -93,7 +93,7 @@ void testQuery() throws SQLException { // invoked. final AtomicReference connection1 = new AtomicReference<>(); final AtomicReference ps1 = new AtomicReference<>(); - final Stream actual = streamingJdbcDatabase.query( + final Stream actual = streamingJdbcDatabase.unsafeQuery( connection -> { connection1.set(connection); final PreparedStatement ps = connection.prepareStatement("SELECT * FROM id_and_name;"); diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/IntegrationRunner.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/IntegrationRunner.java index 7a777edebe25..bd51efe633c1 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/IntegrationRunner.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/base/IntegrationRunner.java @@ -190,7 +190,6 @@ private static void runConsumer(final AirbyteMessageConsumer consumer) throws Ex watchForOrphanThreads( () -> consumeWriteStream(consumer), () -> System.exit(FORCED_EXIT_CODE), - true, INTERRUPT_THREAD_DELAY_MINUTES, TimeUnit.MINUTES, EXIT_THREAD_DELAY_MINUTES, @@ -211,7 +210,6 @@ private static void runConsumer(final AirbyteMessageConsumer consumer) throws Ex @VisibleForTesting static void watchForOrphanThreads(final Procedure runMethod, final Runnable exitHook, - final boolean sentryEnabled, final int interruptTimeDelay, final TimeUnit interruptTimeUnit, final int exitTimeDelay, @@ -249,9 +247,7 @@ static void watchForOrphanThreads(final Procedure runMethod, // So, we schedule an interrupt hook after a fixed time delay instead... scheduledExecutorService.schedule(runningThread::interrupt, interruptTimeDelay, interruptTimeUnit); } - if (!sentryEnabled) { - Sentry.captureMessage(sentryMessageBuilder.toString(), SentryLevel.WARNING); - } + Sentry.captureMessage(sentryMessageBuilder.toString(), SentryLevel.WARNING); scheduledExecutorService.schedule(() -> { if (ThreadUtils.getAllThreads().stream() .anyMatch(runningThread -> !runningThread.isDaemon() && !runningThread.getName().equals(currentThread.getName()))) { diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/NamingConventionTransformer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/NamingConventionTransformer.java index 1069f6f4c2b2..d68a7cd568cc 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/NamingConventionTransformer.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/NamingConventionTransformer.java @@ -21,6 +21,12 @@ public interface NamingConventionTransformer { */ String getIdentifier(String name); + /** + * Handle naming conversions of an input name to output a valid namespace for the desired + * destination. + */ + String getNamespace(String namespace); + /** * Same as getIdentifier but returns also the name of the table for storing raw data * diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/StandardNameTransformer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/StandardNameTransformer.java index 9175ee5e11b2..3b292dbb9c19 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/StandardNameTransformer.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/StandardNameTransformer.java @@ -22,6 +22,14 @@ public String getIdentifier(final String name) { return convertStreamName(name); } + /** + * Most destinations have the same naming requirement for namespace and stream names. + */ + @Override + public String getNamespace(final String namespace) { + return convertStreamName(namespace); + } + @Override public String getRawTableName(final String streamName) { return convertStreamName("_airbyte_raw_" + streamName); diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java index 58dd939a7cce..a37498631496 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumer.java @@ -13,14 +13,12 @@ import io.airbyte.integrations.base.AirbyteMessageConsumer; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; import io.airbyte.integrations.base.FailureTrackingAirbyteMessageConsumer; -import io.airbyte.integrations.base.sentry.AirbyteSentry; +import io.airbyte.integrations.destination.record_buffer.BufferingStrategy; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import java.util.ArrayList; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Consumer; @@ -51,7 +49,7 @@ *

* Throughout the lifecycle of the consumer, messages get promoted from buffered to flushed to * committed. A record message when it is received is immediately buffered. When the buffer fills - * up, all buffered records are flushed out of memory using the user-provided recordWriter. When + * up, all buffered records are flushed out of memory using the user-provided recordBuffer. When * this flush happens, a state message is moved from pending to flushed. On close, if the * user-provided onClose function is successful, then the flushed state record is considered * committed and is then emitted. We expect this class to only ever emit either 1 state message (in @@ -63,11 +61,11 @@ * When a record is "flushed" it is moved from the docker container to the destination. By * convention, it is usually placed in some sort of temporary storage on the destination (e.g. a * temporary database or file store). The logic in close handles committing the temporary - * representation data to the final store (e.g. final table). In the case of Copy destinations they - * often have additional temporary stores. The common pattern for copy destination is that flush - * pushes the data into cloud storage and then close copies from cloud storage to a temporary table - * AND then copies from the temporary table into the final table. This abstraction is blind to that - * detail as it implementation detail of how copy destinations implement close. + * representation data to the final store (e.g. final table). In the case of staging destinations + * they often have additional temporary stores. The common pattern for staging destination is that + * flush pushes the data into a staging area in cloud storage and then close copies from staging to + * a temporary table AND then copies from the temporary table into the final table. This abstraction + * is blind to the detail of how staging destinations implement their close. *

*/ public class BufferedStreamConsumer extends FailureTrackingAirbyteMessageConsumer implements AirbyteMessageConsumer { @@ -75,19 +73,13 @@ public class BufferedStreamConsumer extends FailureTrackingAirbyteMessageConsume private static final Logger LOGGER = LoggerFactory.getLogger(BufferedStreamConsumer.class); private final VoidCallable onStart; - private final RecordWriter recordWriter; - private final CheckAndRemoveRecordWriter checkAndRemoveRecordWriter; private final CheckedConsumer onClose; private final Set streamNames; private final ConfiguredAirbyteCatalog catalog; private final CheckedFunction isValidRecord; private final Map streamToIgnoredRecordCount; private final Consumer outputRecordCollector; - private final long maxQueueSizeInBytes; - private final RecordSizeEstimator recordSizeEstimator; - private long bufferSizeInBytes; - private Map> streamBuffer; - private String fileName; + private final BufferingStrategy bufferingStrategy; private boolean hasStarted; private boolean hasClosed; @@ -97,38 +89,21 @@ public class BufferedStreamConsumer extends FailureTrackingAirbyteMessageConsume public BufferedStreamConsumer(final Consumer outputRecordCollector, final VoidCallable onStart, - final RecordWriter recordWriter, + final BufferingStrategy bufferingStrategy, final CheckedConsumer onClose, final ConfiguredAirbyteCatalog catalog, - final CheckedFunction isValidRecord, - final long maxQueueSizeInBytes) { - this(outputRecordCollector, onStart, recordWriter, null, onClose, catalog, - isValidRecord, maxQueueSizeInBytes); - } - - public BufferedStreamConsumer(final Consumer outputRecordCollector, - final VoidCallable onStart, - final RecordWriter recordWriter, - final CheckAndRemoveRecordWriter checkAndRemoveRecordWriter, - final CheckedConsumer onClose, - final ConfiguredAirbyteCatalog catalog, - final CheckedFunction isValidRecord, - final long maxQueueSizeInBytes) { + final CheckedFunction isValidRecord) { this.outputRecordCollector = outputRecordCollector; - this.maxQueueSizeInBytes = maxQueueSizeInBytes; this.hasStarted = false; this.hasClosed = false; this.onStart = onStart; - this.recordWriter = recordWriter; - this.checkAndRemoveRecordWriter = checkAndRemoveRecordWriter; this.onClose = onClose; this.catalog = catalog; this.streamNames = AirbyteStreamNameNamespacePair.fromConfiguredCatalog(catalog); this.isValidRecord = isValidRecord; - this.bufferSizeInBytes = 0; this.streamToIgnoredRecordCount = new HashMap<>(); - this.streamBuffer = new HashMap<>(); - this.recordSizeEstimator = new RecordSizeEstimator(); + this.bufferingStrategy = bufferingStrategy; + bufferingStrategy.registerFlushAllEventHook(this::flushQueueToDestination); } @Override @@ -159,16 +134,7 @@ protected void acceptTracked(final AirbyteMessage message) throws Exception { return; } - final long messageSizeInBytes = recordSizeEstimator.getEstimatedByteSize(recordMessage); - if (bufferSizeInBytes + messageSizeInBytes > maxQueueSizeInBytes) { - flushQueueToDestination(); - bufferSizeInBytes = 0; - } - - final List bufferedRecords = streamBuffer.computeIfAbsent(stream, k -> new ArrayList<>()); - bufferedRecords.add(message.getRecord()); - bufferSizeInBytes += messageSizeInBytes; - + bufferingStrategy.addRecord(stream, message); } else if (message.getType() == Type.STATE) { pendingState = message; } else { @@ -177,20 +143,7 @@ protected void acceptTracked(final AirbyteMessage message) throws Exception { } - private void flushQueueToDestination() throws Exception { - LOGGER.info("Flushing buffer: {} bytes", bufferSizeInBytes); - - AirbyteSentry.executeWithTracing("FlushBuffer", () -> { - for (final Map.Entry> entry : streamBuffer.entrySet()) { - LOGGER.info("Flushing {}: {} records", entry.getKey().getName(), entry.getValue().size()); - recordWriter.accept(entry.getKey(), entry.getValue()); - if (checkAndRemoveRecordWriter != null) { - fileName = checkAndRemoveRecordWriter.apply(entry.getKey(), fileName); - } - } - }, Map.of("bufferSizeInBytes", bufferSizeInBytes)); - streamBuffer = new HashMap<>(); - + private void flushQueueToDestination() { if (pendingState != null) { lastFlushedState = pendingState; pendingState = null; @@ -215,8 +168,9 @@ protected void close(final boolean hasFailed) throws Exception { LOGGER.error("executing on failed close procedure."); } else { LOGGER.info("executing on success close procedure."); - flushQueueToDestination(); + bufferingStrategy.flushAll(); } + bufferingStrategy.close(); try { // if no state was emitted (i.e. full refresh), if there were still no failures, then we can diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordWriter.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordWriter.java index f8a30bc720a4..28ae42309528 100644 --- a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordWriter.java +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/buffered_stream_consumer/RecordWriter.java @@ -6,12 +6,11 @@ import io.airbyte.commons.functional.CheckedBiConsumer; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; -import io.airbyte.protocol.models.AirbyteRecordMessage; import java.util.List; -public interface RecordWriter extends CheckedBiConsumer, Exception> { +public interface RecordWriter extends CheckedBiConsumer, Exception> { @Override - void accept(AirbyteStreamNameNamespacePair pair, List records) throws Exception; + void accept(AirbyteStreamNameNamespacePair pair, List records) throws Exception; } diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/BaseSerializedBuffer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/BaseSerializedBuffer.java new file mode 100644 index 000000000000..a405a542894e --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/BaseSerializedBuffer.java @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.record_buffer; + +import com.google.common.io.CountingOutputStream; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream; +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Base implementation of a {@link SerializableBuffer}. It is composed of a {@link BufferStorage} + * where the actual data is being stored in a serialized format. + * + * Such data format is defined by concrete implementation inheriting from this base abstract class. + * To do so, necessary methods on handling "writer" methods should be defined. This writer would + * take care of converting {@link AirbyteRecordMessage} into the serialized form of the data such as + * it can be stored in the outputStream of the {@link BufferStorage}. + */ +public abstract class BaseSerializedBuffer implements SerializableBuffer { + + private static final Logger LOGGER = LoggerFactory.getLogger(BaseSerializedBuffer.class); + + private final BufferStorage bufferStorage; + private final CountingOutputStream byteCounter; + + private boolean useCompression; + private GzipCompressorOutputStream compressedBuffer; + private InputStream inputStream; + private boolean isStarted; + private boolean isClosed; + + protected BaseSerializedBuffer(final BufferStorage bufferStorage) throws Exception { + this.bufferStorage = bufferStorage; + byteCounter = new CountingOutputStream(bufferStorage.getOutputStream()); + useCompression = true; + compressedBuffer = null; + inputStream = null; + isStarted = false; + isClosed = false; + } + + /** + * Initializes the writer objects such that it can now write to the downstream @param outputStream + */ + protected abstract void createWriter(OutputStream outputStream) throws Exception; + + /** + * Transform the @param recordMessage into a serialized form of the data and writes it to the + * registered OutputStream provided when {@link BaseSerializedBuffer#createWriter} was called. + */ + protected abstract void writeRecord(AirbyteRecordMessage recordMessage) throws IOException; + + /** + * Stops the writer from receiving new data and prepares it for being finalized and converted into + * an InputStream to read from instead. This is used when flushing the buffer into some other + * destination. + */ + protected abstract void closeWriter() throws IOException; + + public SerializableBuffer withCompression(final boolean useCompression) { + if (!isStarted) { + this.useCompression = useCompression; + return this; + } + throw new RuntimeException("Options should be configured before starting to write"); + } + + @Override + public long accept(final AirbyteRecordMessage recordMessage) throws Exception { + if (!isStarted) { + if (useCompression) { + compressedBuffer = new GzipCompressorOutputStream(byteCounter); + createWriter(compressedBuffer); + } else { + createWriter(byteCounter); + } + isStarted = true; + } + if (inputStream == null && !isClosed) { + final long startCount = byteCounter.getCount(); + writeRecord(recordMessage); + return byteCounter.getCount() - startCount; + } else { + throw new IllegalCallerException("Buffer is already closed, it cannot accept more messages"); + } + } + + public String getFilename() throws IOException { + return bufferStorage.getFilename(); + } + + public File getFile() throws IOException { + return bufferStorage.getFile(); + } + + protected InputStream convertToInputStream() throws IOException { + return bufferStorage.convertToInputStream(); + } + + @Override + public InputStream getInputStream() { + return inputStream; + } + + @Override + public void flush() throws IOException { + if (inputStream == null && !isClosed) { + closeWriter(); + if (compressedBuffer != null) { + // we need to close the gzip stream to finish compression and write trailer data. + compressedBuffer.close(); + } + bufferStorage.close(); + inputStream = convertToInputStream(); + LOGGER.info("Finished writing data to {} ({})", getFilename(), FileUtils.byteCountToDisplaySize(byteCounter.getCount())); + } + } + + @Override + public long getByteCount() { + return byteCounter.getCount(); + } + + @Override + public void close() throws Exception { + if (!isClosed) { + inputStream.close(); + bufferStorage.deleteFile(); + isClosed = true; + } + } + + public long getMaxTotalBufferSizeInBytes() { + return bufferStorage.getMaxTotalBufferSizeInBytes(); + } + + public long getMaxPerStreamBufferSizeInBytes() { + return bufferStorage.getMaxPerStreamBufferSizeInBytes(); + } + + public int getMaxConcurrentStreamsInBuffer() { + return bufferStorage.getMaxConcurrentStreamsInBuffer(); + } + +} diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/BufferStorage.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/BufferStorage.java new file mode 100644 index 000000000000..ec2a60498fdf --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/BufferStorage.java @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.record_buffer; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * This interface abstract the actual object that is used to store incoming data being buffered. It + * could be a file, in-memory or some other objects. + * + * However, in order to be used as part of the {@link SerializableBuffer}, this + * {@link BufferStorage} should implement some methods used to determine how to write into and read + * from the storage once we are done buffering + * + * Some easy methods for manipulating the storage viewed as a file or InputStream are therefore + * required. + * + * Depending on the implementation of the storage medium, it would also determine what storage + * limits are possible. + */ +public interface BufferStorage { + + /** + * Builds a new outputStream on which to write the data for storage. + */ + OutputStream getOutputStream() throws IOException; + + String getFilename() throws IOException; + + File getFile() throws IOException; + + /** + * Once buffering has reached some limits, the storage stream should be turned into an InputStream. + * This method should assume we are not going to write to buffer anymore, and it is safe to convert + * to some other format to be read from now. + */ + InputStream convertToInputStream() throws IOException; + + void close() throws IOException; + + /** + * Cleans-up any file that was produced in the process of buffering (if any were produced) + */ + void deleteFile() throws IOException; + + /* + * Depending on the implementation of the storage, methods below defined reasonable thresholds + * associated with using this kind of buffer storage. + * + * These could also be dynamically configured/tuned at runtime if needed (from user input for + * example?) + */ + + /** + * @return How much storage should be used overall by all buffers + */ + long getMaxTotalBufferSizeInBytes(); + + /** + * @return How much storage should be used for a particular stream at a time before flushing it + */ + long getMaxPerStreamBufferSizeInBytes(); + + /** + * @return How many concurrent buffers can be handled at once in parallel + */ + int getMaxConcurrentStreamsInBuffer(); + +} diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/BufferingStrategy.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/BufferingStrategy.java new file mode 100644 index 000000000000..1488e5dfbd3d --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/BufferingStrategy.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.record_buffer; + +import io.airbyte.commons.concurrency.VoidCallable; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.AirbyteMessage; + +/** + * High-level interface used by + * {@link io.airbyte.integrations.destination.buffered_stream_consumer.BufferedStreamConsumer} + * + * A Record buffering strategy relies on the capacity available of underlying + * {@link SerializableBuffer} to determine what to do when consuming a new {@link AirbyteMessage} + * into the buffer. It also defines when to flush such buffers and how to empty them once they fill + * up. + * + */ +public interface BufferingStrategy extends AutoCloseable { + + /** + * Add a new message to the buffer while consuming streams + */ + void addRecord(AirbyteStreamNameNamespacePair stream, AirbyteMessage message) throws Exception; + + /** + * Flush buffered messages in a writer from a particular stream + */ + void flushWriter(AirbyteStreamNameNamespacePair stream, SerializableBuffer writer) throws Exception; + + /** + * Flush all writers that were buffering message data so far. + */ + void flushAll() throws Exception; + + /** + * Removes all stream buffers. + */ + void clear() throws Exception; + + /** + * When all buffers are being flushed, we can signal some parent function of this event for further + * processing. + * + * THis install such a hook to be triggered when that happens. + */ + void registerFlushAllEventHook(VoidCallable onFlushAllEventHook); + +} diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/FileBuffer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/FileBuffer.java new file mode 100644 index 000000000000..92e3407740fb --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/FileBuffer.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.record_buffer; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.UUID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class FileBuffer implements BufferStorage { + + private static final Logger LOGGER = LoggerFactory.getLogger(FileBuffer.class); + + // The per stream size limit is following recommendations from: + // https://docs.snowflake.com/en/user-guide/data-load-considerations-prepare.html#general-file-sizing-recommendations + // "To optimize the number of parallel operations for a load, + // we recommend aiming to produce data files roughly 100-250 MB (or larger) in size compressed." + public static final long MAX_PER_STREAM_BUFFER_SIZE_BYTES = 200 * 1024 * 1024; // mb + // Other than the per-file size limit, we also limit the total size (which would limit how many + // concurrent streams we can buffer simultaneously too) + // Since this class is storing data on disk, the buffer size limits below are tied to the + // necessary disk storage space. + public static final long MAX_TOTAL_BUFFER_SIZE_BYTES = 1024 * 1024 * 1024; // mb + // we limit number of stream being buffered simultaneously anyway (limit how many files are + // stored/open for writing) + public static final int MAX_CONCURRENT_STREAM_IN_BUFFER = 10; + + private File tempFile; + private OutputStream outputStream; + + public FileBuffer() { + tempFile = null; + outputStream = null; + } + + @Override + public OutputStream getOutputStream() throws IOException { + if (outputStream == null || tempFile == null) { + tempFile = Files.createTempFile(UUID.randomUUID().toString(), ".csv.gz").toFile(); + outputStream = new FileOutputStream(tempFile); + } + return outputStream; + } + + @Override + public String getFilename() throws IOException { + return getFile().getName(); + } + + @Override + public File getFile() throws IOException { + if (tempFile == null) { + getOutputStream(); + } + return tempFile; + } + + @Override + public InputStream convertToInputStream() throws IOException { + return new FileInputStream(getFile()); + } + + @Override + public void close() throws IOException { + outputStream.close(); + } + + @Override + public void deleteFile() throws IOException { + LOGGER.info("Deleting tempFile data {}", getFilename()); + Files.deleteIfExists(getFile().toPath()); + } + + @Override + public long getMaxTotalBufferSizeInBytes() { + return MAX_TOTAL_BUFFER_SIZE_BYTES; + } + + @Override + public long getMaxPerStreamBufferSizeInBytes() { + return MAX_PER_STREAM_BUFFER_SIZE_BYTES; + } + + @Override + public int getMaxConcurrentStreamsInBuffer() { + return MAX_CONCURRENT_STREAM_IN_BUFFER; + } + +} diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/InMemoryBuffer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/InMemoryBuffer.java new file mode 100644 index 000000000000..26bd0877c253 --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/InMemoryBuffer.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.record_buffer; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.UUID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Instead of storing buffered data on disk like the {@link FileBuffer}, this {@link BufferStorage} + * accumulates message data in-memory instead. Thus, a bigger heap size would be required. + */ +public class InMemoryBuffer implements BufferStorage { + + private static final Logger LOGGER = LoggerFactory.getLogger(InMemoryBuffer.class); + + // The per stream size limit is following recommendations from: + // https://docs.snowflake.com/en/user-guide/data-load-considerations-prepare.html#general-file-sizing-recommendations + // "To optimize the number of parallel operations for a load, + // we recommend aiming to produce data files roughly 100-250 MB (or larger) in size compressed." + public static final long MAX_PER_STREAM_BUFFER_SIZE_BYTES = 200 * 1024 * 1024; // mb + // Other than the per-file size limit, we also limit the total size (which would limit how many + // concurrent streams we can buffer simultaneously too) + // Since this class is storing data in memory, the buffer size limits below are tied to the + // necessary RAM space. + public static final long MAX_TOTAL_BUFFER_SIZE_BYTES = 1024 * 1024 * 1024; // mb + // we limit number of stream being buffered simultaneously anyway + public static final int MAX_CONCURRENT_STREAM_IN_BUFFER = 100; + + private final ByteArrayOutputStream byteBuffer = new ByteArrayOutputStream(); + private File tempFile; + private String filename; + + public InMemoryBuffer() { + tempFile = null; + filename = null; + } + + @Override + public OutputStream getOutputStream() { + return byteBuffer; + } + + @Override + public String getFilename() { + if (filename == null) { + filename = UUID.randomUUID().toString(); + } + return filename; + } + + @Override + public File getFile() throws IOException { + if (tempFile == null) { + tempFile = Files.createTempFile(getFilename(), ".csv.gz").toFile(); + } + return tempFile; + } + + @Override + public InputStream convertToInputStream() { + return new ByteArrayInputStream(byteBuffer.toByteArray()); + } + + @Override + public void close() throws IOException { + byteBuffer.close(); + } + + @Override + public void deleteFile() throws IOException { + if (tempFile != null) { + LOGGER.info("Deleting tempFile data {}", getFilename()); + Files.deleteIfExists(tempFile.toPath()); + } + } + + @Override + public long getMaxTotalBufferSizeInBytes() { + return MAX_TOTAL_BUFFER_SIZE_BYTES; + } + + @Override + public long getMaxPerStreamBufferSizeInBytes() { + return MAX_PER_STREAM_BUFFER_SIZE_BYTES; + } + + @Override + public int getMaxConcurrentStreamsInBuffer() { + return MAX_CONCURRENT_STREAM_IN_BUFFER; + } + +} diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.java new file mode 100644 index 000000000000..7a80bc96539f --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/InMemoryRecordBufferingStrategy.java @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.record_buffer; + +import io.airbyte.commons.concurrency.VoidCallable; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.base.sentry.AirbyteSentry; +import io.airbyte.integrations.destination.buffered_stream_consumer.CheckAndRemoveRecordWriter; +import io.airbyte.integrations.destination.buffered_stream_consumer.RecordSizeEstimator; +import io.airbyte.integrations.destination.buffered_stream_consumer.RecordWriter; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This is the default implementation of a {@link BufferStorage} to be backward compatible. Data is + * being buffered in a {@link List} as they are being consumed. + * + * This should be deprecated as we slowly move towards using {@link SerializedBufferingStrategy} + * instead. + */ +public class InMemoryRecordBufferingStrategy implements BufferingStrategy { + + private static final Logger LOGGER = LoggerFactory.getLogger(InMemoryRecordBufferingStrategy.class); + + private Map> streamBuffer = new HashMap<>(); + private final RecordWriter recordWriter; + private final CheckAndRemoveRecordWriter checkAndRemoveRecordWriter; + private String fileName; + + private final RecordSizeEstimator recordSizeEstimator; + private final long maxQueueSizeInBytes; + private long bufferSizeInBytes; + private VoidCallable onFlushAllEventHook; + + public InMemoryRecordBufferingStrategy(final RecordWriter recordWriter, + final long maxQueueSizeInBytes) { + this(recordWriter, null, maxQueueSizeInBytes); + } + + public InMemoryRecordBufferingStrategy(final RecordWriter recordWriter, + final CheckAndRemoveRecordWriter checkAndRemoveRecordWriter, + final long maxQueueSizeInBytes) { + this.recordWriter = recordWriter; + this.checkAndRemoveRecordWriter = checkAndRemoveRecordWriter; + + this.maxQueueSizeInBytes = maxQueueSizeInBytes; + this.bufferSizeInBytes = 0; + this.recordSizeEstimator = new RecordSizeEstimator(); + this.onFlushAllEventHook = null; + } + + @Override + public void addRecord(final AirbyteStreamNameNamespacePair stream, final AirbyteMessage message) throws Exception { + final long messageSizeInBytes = recordSizeEstimator.getEstimatedByteSize(message.getRecord()); + if (bufferSizeInBytes + messageSizeInBytes > maxQueueSizeInBytes) { + flushAll(); + bufferSizeInBytes = 0; + } + + final List bufferedRecords = streamBuffer.computeIfAbsent(stream, k -> new ArrayList<>()); + bufferedRecords.add(message.getRecord()); + bufferSizeInBytes += messageSizeInBytes; + } + + @Override + public void flushWriter(final AirbyteStreamNameNamespacePair stream, final SerializableBuffer writer) throws Exception { + LOGGER.info("Flushing single stream {}: {} records", stream, streamBuffer.get(stream).size()); + recordWriter.accept(stream, streamBuffer.get(stream)); + } + + @Override + public void flushAll() throws Exception { + AirbyteSentry.executeWithTracing("FlushBuffer", () -> { + for (final Map.Entry> entry : streamBuffer.entrySet()) { + LOGGER.info("Flushing {}: {} records", entry.getKey().getName(), entry.getValue().size()); + recordWriter.accept(entry.getKey(), entry.getValue()); + if (checkAndRemoveRecordWriter != null) { + fileName = checkAndRemoveRecordWriter.apply(entry.getKey(), fileName); + } + } + }, Map.of("bufferSizeInBytes", bufferSizeInBytes)); + close(); + clear(); + + if (onFlushAllEventHook != null) { + onFlushAllEventHook.call(); + } + } + + @Override + public void clear() { + streamBuffer = new HashMap<>(); + } + + @Override + public void registerFlushAllEventHook(final VoidCallable onFlushAllEventHook) { + this.onFlushAllEventHook = onFlushAllEventHook; + } + + @Override + public void close() throws Exception {} + +} diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/SerializableBuffer.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/SerializableBuffer.java new file mode 100644 index 000000000000..ba72b15b5412 --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/SerializableBuffer.java @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.record_buffer; + +import io.airbyte.protocol.models.AirbyteRecordMessage; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; + +/** + * A {@link SerializableBuffer} is designed to be used as part of a + * {@link SerializedBufferingStrategy}. + * + * It encapsulates the actual implementation of a buffer: both the medium storage (usually defined + * as part of {@link BufferStorage}. and the format of the serialized data when it is written to the + * buffer. + * + * A {@link BaseSerializedBuffer} class is provided, and should be the expected class to derive from + * when implementing a new format of buffer. The storage aspects are normally provided through + * composition of {@link BufferStorage}. + * + */ +public interface SerializableBuffer extends AutoCloseable { + + /** + * Adds a @param recordMessage to the buffer and @return how many bytes were written to the buffer. + */ + long accept(AirbyteRecordMessage recordMessage) throws Exception; + + /** + * Flush a buffer implementation. + */ + void flush() throws Exception; + + /** + * The buffer implementation should be keeping track of how many bytes it accumulated so far. If any + * flush events were triggered, the amount of bytes accumulated would also have been decreased + * accordingly. This method @return such statistics. + */ + long getByteCount(); + + /** + * @return the filename representation of this buffer. + */ + String getFilename() throws IOException; + + /** + * @return a temporary representation as a file of this buffer. + */ + File getFile() throws IOException; + + /** + * @return the InputStream to read data back from this buffer once it is done adding messages to it. + */ + InputStream getInputStream(); + + /* + * Depending on the implementation of the storage, methods below defined reasonable thresholds + * associated with using this kind of buffer implementation. + * + * These could also be dynamically configured/tuned at runtime if needed (from user input for + * example?) + */ + + /** + * @return How much storage should be used overall by all buffers + */ + long getMaxTotalBufferSizeInBytes(); + + /** + * @return How much storage should be used for a particular stream at a time before flushing it + */ + long getMaxPerStreamBufferSizeInBytes(); + + /** + * @return How many concurrent buffers can be handled at once in parallel + */ + int getMaxConcurrentStreamsInBuffer(); + +} diff --git a/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/SerializedBufferingStrategy.java b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/SerializedBufferingStrategy.java new file mode 100644 index 000000000000..f55d3eeb1599 --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/main/java/io/airbyte/integrations/destination/record_buffer/SerializedBufferingStrategy.java @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.record_buffer; + +import io.airbyte.commons.concurrency.VoidCallable; +import io.airbyte.commons.functional.CheckedBiConsumer; +import io.airbyte.commons.functional.CheckedBiFunction; +import io.airbyte.commons.string.Strings; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.base.sentry.AirbyteSentry; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SerializedBufferingStrategy implements BufferingStrategy { + + private static final Logger LOGGER = LoggerFactory.getLogger(SerializedBufferingStrategy.class); + + private final CheckedBiFunction onCreateBuffer; + private final CheckedBiConsumer onStreamFlush; + private VoidCallable onFlushAllEventHook; + + private Map allBuffers = new HashMap<>(); + private long totalBufferSizeInBytes; + private final ConfiguredAirbyteCatalog catalog; + + public SerializedBufferingStrategy(final CheckedBiFunction onCreateBuffer, + final ConfiguredAirbyteCatalog catalog, + final CheckedBiConsumer onStreamFlush) { + this.onCreateBuffer = onCreateBuffer; + this.catalog = catalog; + this.onStreamFlush = onStreamFlush; + this.totalBufferSizeInBytes = 0; + this.onFlushAllEventHook = null; + } + + @Override + public void registerFlushAllEventHook(final VoidCallable onFlushAllEventHook) { + this.onFlushAllEventHook = onFlushAllEventHook; + } + + @Override + public void addRecord(final AirbyteStreamNameNamespacePair stream, final AirbyteMessage message) throws Exception { + + final SerializableBuffer streamBuffer = allBuffers.computeIfAbsent(stream, k -> { + LOGGER.info("Starting a new buffer for stream {} (current state: {} in {} buffers)", + stream.getName(), + FileUtils.byteCountToDisplaySize(totalBufferSizeInBytes), + allBuffers.size()); + try { + return onCreateBuffer.apply(stream, catalog); + } catch (final Exception e) { + LOGGER.error("Failed to create a new buffer for stream {}", stream.getName(), e); + throw new RuntimeException(e); + } + }); + if (streamBuffer == null) { + throw new RuntimeException(String.format("Failed to create/get streamBuffer for stream %s.%s", stream.getNamespace(), stream.getName())); + } + final long actualMessageSizeInBytes = streamBuffer.accept(message.getRecord()); + totalBufferSizeInBytes += actualMessageSizeInBytes; + if (totalBufferSizeInBytes >= streamBuffer.getMaxTotalBufferSizeInBytes() + || allBuffers.size() >= streamBuffer.getMaxConcurrentStreamsInBuffer()) { + flushAll(); + totalBufferSizeInBytes = 0; + } else if (streamBuffer.getByteCount() >= streamBuffer.getMaxPerStreamBufferSizeInBytes()) { + flushWriter(stream, streamBuffer); + } + } + + @Override + public void flushWriter(final AirbyteStreamNameNamespacePair stream, final SerializableBuffer writer) throws Exception { + LOGGER.info("Flushing buffer of stream {} ({})", stream.getName(), FileUtils.byteCountToDisplaySize(writer.getByteCount())); + AirbyteSentry.executeWithTracing("FlushBuffer", () -> { + onStreamFlush.accept(stream, writer); + }, Map.of("bufferSizeInBytes", writer.getByteCount())); + totalBufferSizeInBytes -= writer.getByteCount(); + allBuffers.remove(stream); + } + + @Override + public void flushAll() throws Exception { + LOGGER.info("Flushing all {} current buffers ({} in total)", allBuffers.size(), FileUtils.byteCountToDisplaySize(totalBufferSizeInBytes)); + AirbyteSentry.executeWithTracing("FlushBuffer", () -> { + for (final Entry entry : allBuffers.entrySet()) { + LOGGER.info("Flushing buffer of stream {} ({})", entry.getKey().getName(), FileUtils.byteCountToDisplaySize(entry.getValue().getByteCount())); + onStreamFlush.accept(entry.getKey(), entry.getValue()); + } + close(); + clear(); + }, Map.of("bufferSizeInBytes", totalBufferSizeInBytes)); + + if (onFlushAllEventHook != null) { + onFlushAllEventHook.call(); + } + totalBufferSizeInBytes = 0; + } + + @Override + public void clear() throws Exception { + LOGGER.debug("Reset all buffers"); + allBuffers = new HashMap<>(); + } + + @Override + public void close() throws Exception { + final List exceptionsThrown = new ArrayList<>(); + for (final Entry entry : allBuffers.entrySet()) { + try { + LOGGER.info("Closing buffer for stream {}", entry.getKey().getName()); + entry.getValue().close(); + } catch (final Exception e) { + exceptionsThrown.add(e); + LOGGER.error("Exception while closing stream buffer", e); + } + } + if (!exceptionsThrown.isEmpty()) { + throw new RuntimeException(String.format("Exceptions thrown while closing buffers: %s", Strings.join(exceptionsThrown, "\n"))); + } + } + +} diff --git a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/base/IntegrationRunnerTest.java b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/base/IntegrationRunnerTest.java index d090d6e172c7..1b6f82cbaa7b 100644 --- a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/base/IntegrationRunnerTest.java +++ b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/base/IntegrationRunnerTest.java @@ -307,7 +307,6 @@ void testInterruptOrphanThreadFailure() { throw new IOException("random error"); }, Assertions::fail, - false, 3, TimeUnit.SECONDS, 10, TimeUnit.SECONDS)); try { @@ -334,7 +333,6 @@ void testNoInterruptOrphanThreadFailure() { throw new IOException("random error"); }, () -> exitCalled.set(true), - false, 3, TimeUnit.SECONDS, 10, TimeUnit.SECONDS)); try { diff --git a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java index fea5e1c5d7e5..1816b3abf1c4 100644 --- a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java +++ b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/buffered_stream_consumer/BufferedStreamConsumerTest.java @@ -21,6 +21,7 @@ import io.airbyte.commons.functional.CheckedFunction; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.destination.record_buffer.InMemoryRecordBufferingStrategy; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; import io.airbyte.protocol.models.AirbyteRecordMessage; @@ -64,7 +65,7 @@ public class BufferedStreamConsumerTest { private BufferedStreamConsumer consumer; private VoidCallable onStart; - private RecordWriter recordWriter; + private RecordWriter recordWriter; private CheckedConsumer onClose; private CheckedFunction isValidRecord; private Consumer outputRecordCollector; @@ -80,11 +81,10 @@ void setup() throws Exception { consumer = new BufferedStreamConsumer( outputRecordCollector, onStart, - recordWriter, + new InMemoryRecordBufferingStrategy(recordWriter, 1_000), onClose, CATALOG, - isValidRecord, - 1_000); + isValidRecord); when(isValidRecord.apply(any())).thenReturn(true); } @@ -163,11 +163,10 @@ void test1StreamWithStateAndThenMoreRecordsSmallerThanBuffer() throws Exception final BufferedStreamConsumer consumer = new BufferedStreamConsumer( outputRecordCollector, onStart, - recordWriter, + new InMemoryRecordBufferingStrategy(recordWriter, 10_000), onClose, CATALOG, - isValidRecord, - 10_000); + isValidRecord); consumer.start(); consumeRecords(consumer, expectedRecordsBatch1); @@ -311,13 +310,14 @@ private static void consumeRecords(final BufferedStreamConsumer consumer, final } private static List generateRecords(final long targetSizeInBytes) { - List output = Lists.newArrayList(); + final List output = Lists.newArrayList(); long bytesCounter = 0; for (int i = 0;; i++) { - JsonNode payload = Jsons.jsonNode(ImmutableMap.of("id", RandomStringUtils.randomAlphabetic(7), "name", "human " + String.format("%8d", i))); - long sizeInBytes = RecordSizeEstimator.getStringByteSize(payload); + final JsonNode payload = + Jsons.jsonNode(ImmutableMap.of("id", RandomStringUtils.randomAlphabetic(7), "name", "human " + String.format("%8d", i))); + final long sizeInBytes = RecordSizeEstimator.getStringByteSize(payload); bytesCounter += sizeInBytes; - AirbyteMessage airbyteMessage = new AirbyteMessage() + final AirbyteMessage airbyteMessage = new AirbyteMessage() .withType(Type.RECORD) .withRecord(new AirbyteRecordMessage() .withStream(STREAM_NAME) diff --git a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.java b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.java new file mode 100644 index 000000000000..27713b7a7860 --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/record_buffer/InMemoryRecordBufferingStrategyTest.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.record_buffer; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.concurrency.VoidCallable; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.destination.buffered_stream_consumer.RecordWriter; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import java.util.List; +import org.junit.jupiter.api.Test; + +public class InMemoryRecordBufferingStrategyTest { + + private static final JsonNode MESSAGE_DATA = Jsons.deserialize("{ \"field1\": 10000 }"); + // MESSAGE_DATA should be 64 bytes long, size the buffer such as it can contain at least 2 message + // instances + private static final int MAX_QUEUE_SIZE_IN_BYTES = 130; + + private final RecordWriter recordWriter = mock(RecordWriter.class); + + @Test + public void testBuffering() throws Exception { + final InMemoryRecordBufferingStrategy buffering = new InMemoryRecordBufferingStrategy(recordWriter, MAX_QUEUE_SIZE_IN_BYTES); + final AirbyteStreamNameNamespacePair stream1 = new AirbyteStreamNameNamespacePair("stream1", "namespace"); + final AirbyteStreamNameNamespacePair stream2 = new AirbyteStreamNameNamespacePair("stream2", null); + final AirbyteMessage message1 = generateMessage(stream1); + final AirbyteMessage message2 = generateMessage(stream2); + final AirbyteMessage message3 = generateMessage(stream2); + final AirbyteMessage message4 = generateMessage(stream2); + final VoidCallable hook = mock(VoidCallable.class); + buffering.registerFlushAllEventHook(hook); + + buffering.addRecord(stream1, message1); + buffering.addRecord(stream2, message2); + // Buffer still has room + verify(hook, times(0)).call(); + + buffering.addRecord(stream2, message3); + // Buffer limit reach, flushing all messages so far before adding the new incoming one + verify(hook, times(1)).call(); + verify(recordWriter, times(1)).accept(stream1, List.of(message1.getRecord())); + verify(recordWriter, times(1)).accept(stream2, List.of(message2.getRecord())); + + buffering.addRecord(stream2, message4); + + // force flush to terminate test + buffering.flushAll(); + verify(hook, times(2)).call(); + verify(recordWriter, times(1)).accept(stream2, List.of(message3.getRecord(), message4.getRecord())); + } + + private static AirbyteMessage generateMessage(final AirbyteStreamNameNamespacePair stream) { + return new AirbyteMessage().withRecord(new AirbyteRecordMessage() + .withStream(stream.getName()) + .withNamespace(stream.getNamespace()) + .withData(MESSAGE_DATA)); + } + +} diff --git a/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/record_buffer/SerializedBufferingStrategyTest.java b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/record_buffer/SerializedBufferingStrategyTest.java new file mode 100644 index 000000000000..67007fe5d20a --- /dev/null +++ b/airbyte-integrations/bases/base-java/src/test/java/io/airbyte/integrations/destination/record_buffer/SerializedBufferingStrategyTest.java @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.record_buffer; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.concurrency.VoidCallable; +import io.airbyte.commons.functional.CheckedBiConsumer; +import io.airbyte.commons.functional.CheckedBiFunction; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class SerializedBufferingStrategyTest { + + private static final JsonNode MESSAGE_DATA = Jsons.deserialize("{ \"field1\": 10000 }"); + private static final String STREAM_1 = "stream1"; + private static final String STREAM_2 = "stream2"; + private static final String STREAM_3 = "stream3"; + private static final String STREAM_4 = "stream4"; + + // we set the limit to hold at most 4 messages of 10b total + private static final long MAX_TOTAL_BUFFER_SIZE_BYTES = 42L; + // we set the limit to hold at most 2 messages of 10b per stream + private static final long MAX_PER_STREAM_BUFFER_SIZE_BYTES = 21L; + + private final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + private final CheckedBiConsumer perStreamFlushHook = + mock(CheckedBiConsumer.class); + private final VoidCallable flushAllHook = mock(VoidCallable.class); + + private final SerializableBuffer recordWriter1 = mock(SerializableBuffer.class); + private final SerializableBuffer recordWriter2 = mock(SerializableBuffer.class); + private final SerializableBuffer recordWriter3 = mock(SerializableBuffer.class); + private final SerializableBuffer recordWriter4 = mock(SerializableBuffer.class); + + @BeforeEach + public void setup() throws Exception { + setupMock(recordWriter1); + setupMock(recordWriter2); + setupMock(recordWriter3); + setupMock(recordWriter4); + } + + private void setupMock(final SerializableBuffer mockObject) throws Exception { + when(mockObject.accept(any())).thenReturn(10L); + when(mockObject.getByteCount()).thenReturn(10L); + when(mockObject.getMaxTotalBufferSizeInBytes()).thenReturn(MAX_TOTAL_BUFFER_SIZE_BYTES); + when(mockObject.getMaxPerStreamBufferSizeInBytes()).thenReturn(MAX_PER_STREAM_BUFFER_SIZE_BYTES); + when(mockObject.getMaxConcurrentStreamsInBuffer()).thenReturn(4); + } + + @Test + public void testPerStreamThresholdFlush() throws Exception { + final SerializedBufferingStrategy buffering = new SerializedBufferingStrategy(onCreateBufferFunction(), catalog, perStreamFlushHook); + final AirbyteStreamNameNamespacePair stream1 = new AirbyteStreamNameNamespacePair(STREAM_1, "namespace"); + final AirbyteStreamNameNamespacePair stream2 = new AirbyteStreamNameNamespacePair(STREAM_2, null); + // To test per stream threshold, we are sending multiple test messages on a single stream + final AirbyteMessage message1 = generateMessage(stream1); + final AirbyteMessage message2 = generateMessage(stream2); + final AirbyteMessage message3 = generateMessage(stream2); + final AirbyteMessage message4 = generateMessage(stream2); + final AirbyteMessage message5 = generateMessage(stream2); + buffering.registerFlushAllEventHook(flushAllHook); + + when(recordWriter1.getByteCount()).thenReturn(10L); // one record in recordWriter1 + buffering.addRecord(stream1, message1); + when(recordWriter2.getByteCount()).thenReturn(10L); // one record in recordWriter2 + buffering.addRecord(stream2, message2); + + // Total and per stream Buffers still have room + verify(flushAllHook, times(0)).call(); + verify(perStreamFlushHook, times(0)).accept(stream1, recordWriter1); + verify(perStreamFlushHook, times(0)).accept(stream2, recordWriter2); + + when(recordWriter2.getByteCount()).thenReturn(20L); // second record in recordWriter2 + buffering.addRecord(stream2, message3); + when(recordWriter2.getByteCount()).thenReturn(30L); // third record in recordWriter2 + buffering.addRecord(stream2, message4); + + // The buffer limit is now reached for stream2, flushing that single stream only + verify(flushAllHook, times(0)).call(); + verify(perStreamFlushHook, times(0)).accept(stream1, recordWriter1); + verify(perStreamFlushHook, times(1)).accept(stream2, recordWriter2); + + when(recordWriter2.getByteCount()).thenReturn(10L); // back to one record in recordWriter2 + buffering.addRecord(stream2, message5); + + // force flush to terminate test + buffering.flushAll(); + verify(flushAllHook, times(1)).call(); + verify(perStreamFlushHook, times(1)).accept(stream1, recordWriter1); + verify(perStreamFlushHook, times(2)).accept(stream2, recordWriter2); + } + + @Test + public void testTotalStreamThresholdFlush() throws Exception { + final SerializedBufferingStrategy buffering = new SerializedBufferingStrategy(onCreateBufferFunction(), catalog, perStreamFlushHook); + final AirbyteStreamNameNamespacePair stream1 = new AirbyteStreamNameNamespacePair(STREAM_1, "namespace"); + final AirbyteStreamNameNamespacePair stream2 = new AirbyteStreamNameNamespacePair(STREAM_2, "namespace"); + final AirbyteStreamNameNamespacePair stream3 = new AirbyteStreamNameNamespacePair(STREAM_3, "namespace"); + // To test total stream threshold, we are sending test messages to multiple streams without reaching + // per stream limits + final AirbyteMessage message1 = generateMessage(stream1); + final AirbyteMessage message2 = generateMessage(stream2); + final AirbyteMessage message3 = generateMessage(stream3); + final AirbyteMessage message4 = generateMessage(stream1); + final AirbyteMessage message5 = generateMessage(stream2); + final AirbyteMessage message6 = generateMessage(stream3); + buffering.registerFlushAllEventHook(flushAllHook); + + buffering.addRecord(stream1, message1); + buffering.addRecord(stream2, message2); + // Total and per stream Buffers still have room + verify(flushAllHook, times(0)).call(); + verify(perStreamFlushHook, times(0)).accept(stream1, recordWriter1); + verify(perStreamFlushHook, times(0)).accept(stream2, recordWriter2); + verify(perStreamFlushHook, times(0)).accept(stream3, recordWriter3); + + buffering.addRecord(stream3, message3); + when(recordWriter1.getByteCount()).thenReturn(20L); // second record in recordWriter1 + buffering.addRecord(stream1, message4); + when(recordWriter2.getByteCount()).thenReturn(20L); // second record in recordWriter2 + buffering.addRecord(stream2, message5); + // Buffer limit reached for total streams, flushing all streams + verify(flushAllHook, times(1)).call(); + verify(perStreamFlushHook, times(1)).accept(stream1, recordWriter1); + verify(perStreamFlushHook, times(1)).accept(stream2, recordWriter2); + verify(perStreamFlushHook, times(1)).accept(stream3, recordWriter3); + + buffering.addRecord(stream3, message6); + // force flush to terminate test + buffering.flushAll(); + verify(flushAllHook, times(2)).call(); + verify(perStreamFlushHook, times(1)).accept(stream1, recordWriter1); + verify(perStreamFlushHook, times(1)).accept(stream2, recordWriter2); + verify(perStreamFlushHook, times(2)).accept(stream3, recordWriter3); + } + + @Test + public void testConcurrentStreamThresholdFlush() throws Exception { + final SerializedBufferingStrategy buffering = new SerializedBufferingStrategy(onCreateBufferFunction(), catalog, perStreamFlushHook); + final AirbyteStreamNameNamespacePair stream1 = new AirbyteStreamNameNamespacePair(STREAM_1, "namespace1"); + final AirbyteStreamNameNamespacePair stream2 = new AirbyteStreamNameNamespacePair(STREAM_2, "namespace2"); + final AirbyteStreamNameNamespacePair stream3 = new AirbyteStreamNameNamespacePair(STREAM_3, null); + final AirbyteStreamNameNamespacePair stream4 = new AirbyteStreamNameNamespacePair(STREAM_4, null); + // To test concurrent stream threshold, we are sending test messages to multiple streams + final AirbyteMessage message1 = generateMessage(stream1); + final AirbyteMessage message2 = generateMessage(stream2); + final AirbyteMessage message3 = generateMessage(stream3); + final AirbyteMessage message4 = generateMessage(stream4); + final AirbyteMessage message5 = generateMessage(stream1); + buffering.registerFlushAllEventHook(flushAllHook); + + buffering.addRecord(stream1, message1); + buffering.addRecord(stream2, message2); + buffering.addRecord(stream3, message3); + // Total and per stream Buffers still have room + verify(flushAllHook, times(0)).call(); + verify(perStreamFlushHook, times(0)).accept(stream1, recordWriter1); + verify(perStreamFlushHook, times(0)).accept(stream2, recordWriter2); + verify(perStreamFlushHook, times(0)).accept(stream3, recordWriter3); + + buffering.addRecord(stream4, message4); + // Buffer limit reached for concurrent streams, flushing all streams + verify(flushAllHook, times(1)).call(); + verify(perStreamFlushHook, times(1)).accept(stream1, recordWriter1); + verify(perStreamFlushHook, times(1)).accept(stream2, recordWriter2); + verify(perStreamFlushHook, times(1)).accept(stream3, recordWriter3); + verify(perStreamFlushHook, times(1)).accept(stream4, recordWriter4); + + buffering.addRecord(stream1, message5); + // force flush to terminate test + buffering.flushAll(); + verify(flushAllHook, times(2)).call(); + verify(perStreamFlushHook, times(2)).accept(stream1, recordWriter1); + verify(perStreamFlushHook, times(1)).accept(stream2, recordWriter2); + verify(perStreamFlushHook, times(1)).accept(stream3, recordWriter3); + verify(perStreamFlushHook, times(1)).accept(stream4, recordWriter4); + } + + @Test + public void testCreateBufferFailure() { + final SerializedBufferingStrategy buffering = new SerializedBufferingStrategy(onCreateBufferFunction(), catalog, perStreamFlushHook); + final AirbyteStreamNameNamespacePair stream = new AirbyteStreamNameNamespacePair("unknown_stream", "namespace1"); + assertThrows(RuntimeException.class, () -> buffering.addRecord(stream, generateMessage(stream))); + } + + private static AirbyteMessage generateMessage(final AirbyteStreamNameNamespacePair stream) { + return new AirbyteMessage().withRecord(new AirbyteRecordMessage() + .withStream(stream.getName()) + .withNamespace(stream.getNamespace()) + .withData(MESSAGE_DATA)); + } + + private CheckedBiFunction onCreateBufferFunction() { + return (stream, catalog) -> switch (stream.getName()) { + case STREAM_1 -> recordWriter1; + case STREAM_2 -> recordWriter2; + case STREAM_3 -> recordWriter3; + case STREAM_4 -> recordWriter4; + default -> null; + }; + } + +} diff --git a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DataArgumentsProvider.java b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DataArgumentsProvider.java index 505771088751..76d33b713f64 100644 --- a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DataArgumentsProvider.java +++ b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DataArgumentsProvider.java @@ -20,6 +20,8 @@ public class DataArgumentsProvider implements ArgumentsProvider { new CatalogMessageTestConfigPair("exchange_rate_catalog.json", "exchange_rate_messages.txt"); public static final CatalogMessageTestConfigPair EDGE_CASE_CONFIG = new CatalogMessageTestConfigPair("edge_case_catalog.json", "edge_case_messages.txt"); + public static final CatalogMessageTestConfigPair NAMESPACE_CONFIG = + new CatalogMessageTestConfigPair("namespace_catalog.json", "namespace_messages.txt"); @Override public Stream provideArguments(final ExtensionContext context) { diff --git a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java index a8fcfaa4f7c7..891c550a7952 100644 --- a/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java +++ b/airbyte-integrations/bases/standard-destination-test/src/main/java/io/airbyte/integrations/standardtest/destination/DestinationAcceptanceTest.java @@ -19,6 +19,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.Exceptions; import io.airbyte.commons.resources.MoreResources; +import io.airbyte.commons.util.MoreIterators; import io.airbyte.commons.util.MoreLists; import io.airbyte.config.EnvConfigs; import io.airbyte.config.JobGetSpecConfig; @@ -27,6 +28,7 @@ import io.airbyte.config.StandardCheckConnectionOutput; import io.airbyte.config.StandardCheckConnectionOutput.Status; import io.airbyte.config.WorkerDestinationConfig; +import io.airbyte.integrations.destination.NamingConventionTransformer; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteMessage.Type; @@ -59,21 +61,27 @@ import java.time.Instant; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Optional; import java.util.Random; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.joda.time.DateTime; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; import org.junit.jupiter.params.provider.ArgumentsSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -376,7 +384,15 @@ public void testSyncWithLargeRecordBatch(final String messagesFilename, final St final List messages = MoreResources.readResource(messagesFilename).lines() .map(record -> Jsons.deserialize(record, AirbyteMessage.class)).collect(Collectors.toList()); - final List largeNumberRecords = Collections.nCopies(400, messages).stream().flatMap(List::stream).collect(Collectors.toList()); + final List largeNumberRecords = Collections + .nCopies(400, messages) + .stream() + .flatMap(List::stream) + // regroup messages per stream + .sorted(Comparator + .comparing(AirbyteMessage::getType) + .thenComparing(message -> message.getType().equals(Type.RECORD) ? message.getRecord().getStream() : message.toString())) + .collect(Collectors.toList()); final JsonNode config = getConfig(); runSyncAndVerifyStateOutput(config, largeNumberRecords, configuredCatalog, false); @@ -851,17 +867,12 @@ void testSyncUsesAirbyteStreamNamespaceIfNotNull() throws Exception { final List messages = MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.messageFile).lines() .map(record -> Jsons.deserialize(record, AirbyteMessage.class)).collect(Collectors.toList()); - messages.forEach( - message -> { - if (message.getRecord() != null) { - message.getRecord().setNamespace(namespace); - } - }); + final List messagesWithNewNamespace = getRecordMessagesWithNewNamespace(messages, namespace); final JsonNode config = getConfig(); final String defaultSchema = getDefaultSchema(config); - runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false); - retrieveRawRecordsAndAssertSameMessages(catalog, messages, defaultSchema); + runSyncAndVerifyStateOutput(config, messagesWithNewNamespace, configuredCatalog, false); + retrieveRawRecordsAndAssertSameMessages(catalog, messagesWithNewNamespace, defaultSchema); } /** @@ -891,24 +902,15 @@ void testSyncWriteSameTableNameDifferentNamespace() throws Exception { final var configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog); - final var ns1Msgs = MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.messageFile).lines() + final var ns1Messages = MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.messageFile).lines() .map(record -> Jsons.deserialize(record, AirbyteMessage.class)).collect(Collectors.toList()); - ns1Msgs.forEach( - message -> { - if (message.getRecord() != null) { - message.getRecord().setNamespace(namespace1); - } - }); - final var ns2Msgs = MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.messageFile).lines() + final var ns1MessagesAtNamespace1 = getRecordMessagesWithNewNamespace(ns1Messages, namespace1); + final var ns2Messages = MoreResources.readResource(DataArgumentsProvider.EXCHANGE_RATE_CONFIG.messageFile).lines() .map(record -> Jsons.deserialize(record, AirbyteMessage.class)).collect(Collectors.toList()); - ns2Msgs.forEach( - message -> { - if (message.getRecord() != null) { - message.getRecord().setNamespace(namespace2); - } - }); - final var allMessages = new ArrayList<>(ns1Msgs); - allMessages.addAll(ns2Msgs); + final var ns2MessagesAtNamespace2 = getRecordMessagesWithNewNamespace(ns2Messages, namespace2); + + final var allMessages = new ArrayList<>(ns1MessagesAtNamespace1); + allMessages.addAll(ns2MessagesAtNamespace2); final JsonNode config = getConfig(); final String defaultSchema = getDefaultSchema(config); @@ -916,6 +918,52 @@ void testSyncWriteSameTableNameDifferentNamespace() throws Exception { retrieveRawRecordsAndAssertSameMessages(catalog, allMessages, defaultSchema); } + public static class NamespaceTestCaseProvider implements ArgumentsProvider { + + @Override + public Stream provideArguments(final ExtensionContext context) throws Exception { + final JsonNode testCases = + Jsons.deserialize(MoreResources.readResource("namespace_test_cases.json")); + return MoreIterators.toList(testCases.elements()).stream() + .filter(testCase -> testCase.get("enabled").asBoolean()) + .map(testCase -> Arguments.of( + testCase.get("id").asText(), + testCase.get("namespace").asText(), + testCase.get("normalized").asText())); + } + + } + + @ParameterizedTest + @ArgumentsSource(NamespaceTestCaseProvider.class) + public void testNamespaces(final String testCaseId, final String namespace, final String normalizedNamespace) throws Exception { + final Optional nameTransformer = getNameTransformer(); + nameTransformer.ifPresent(namingConventionTransformer -> assertNamespaceNormalization(testCaseId, normalizedNamespace, + namingConventionTransformer.getNamespace(namespace))); + + if (!implementsNamespaces() || !supportNamespaceTest()) { + return; + } + + final AirbyteCatalog catalog = Jsons.deserialize( + MoreResources.readResource(DataArgumentsProvider.NAMESPACE_CONFIG.catalogFile), AirbyteCatalog.class); + catalog.getStreams().forEach(stream -> stream.setNamespace(namespace)); + final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog); + + final List messages = MoreResources.readResource(DataArgumentsProvider.NAMESPACE_CONFIG.messageFile).lines() + .map(record -> Jsons.deserialize(record, AirbyteMessage.class)).collect(Collectors.toList()); + final List messagesWithNewNamespace = getRecordMessagesWithNewNamespace(messages, namespace); + + final JsonNode config = getConfig(); + try { + runSyncAndVerifyStateOutput(config, messagesWithNewNamespace, configuredCatalog, false); + } catch (final Exception e) { + throw new IOException(String.format( + "[Test Case %s] Destination failed to sync data to namespace %s, see \"namespace_test_cases.json for details\"", + testCaseId, namespace), e); + } + } + /** * In order to launch a source on Kubernetes in a pod, we need to be able to wrap the entrypoint. * The source connector must specify its entrypoint in the AIRBYTE_ENTRYPOINT variable. This test @@ -934,6 +982,32 @@ public void testEntrypointEnvVar() throws Exception { assertFalse(entrypoint.isBlank()); } + /** + * Whether the destination should be tested against different namespaces. + */ + protected boolean supportNamespaceTest() { + return false; + } + + /** + * Set up the name transformer used by a destination to test it against a variety of namespaces. + */ + protected Optional getNameTransformer() { + return Optional.empty(); + } + + /** + * Override this method if the normalized namespace is different from the default one. E.g. BigQuery + * does allow a name starting with a number. So it should change the expected normalized namespace + * when testCaseId = "S3A-1". Find the testCaseId in "namespace_test_cases.json". + */ + protected void assertNamespaceNormalization(final String testCaseId, + final String expectedNormalizedNamespace, + final String actualNormalizedNamespace) { + assertEquals(expectedNormalizedNamespace, actualNormalizedNamespace, + String.format("Test case %s failed; if this is expected, please override assertNamespaceNormalization", testCaseId)); + } + private ConnectorSpecification runSpec() throws WorkerException { return new DefaultGetSpecWorker( workerConfigs, new AirbyteIntegrationLauncher(JOB_ID, JOB_ATTEMPT, getImageName(), processFactory, null)) @@ -1058,12 +1132,12 @@ protected void assertSameMessages(final List expected, .map(AirbyteMessage::getRecord) .peek(recordMessage -> recordMessage.setEmittedAt(null)) .map(recordMessage -> pruneAirbyteInternalFields ? safePrune(recordMessage) : recordMessage) - .map(recordMessage -> recordMessage.getData()) + .map(AirbyteRecordMessage::getData) .collect(Collectors.toList()); final List actualProcessed = actual.stream() .map(recordMessage -> pruneAirbyteInternalFields ? safePrune(recordMessage) : recordMessage) - .map(recordMessage -> recordMessage.getData()) + .map(AirbyteRecordMessage::getData) .collect(Collectors.toList()); assertSameData(expectedProcessed, actualProcessed); @@ -1382,4 +1456,16 @@ private void runAndCheckWithoutNormalization(final List messages retrieveRawRecordsAndAssertSameMessages(catalog, messages, getDefaultSchema(config)); } + /** + * Mutate the input airbyte record message namespace. + */ + private static List getRecordMessagesWithNewNamespace(final List airbyteMessages, final String namespace) { + airbyteMessages.forEach(message -> { + if (message.getRecord() != null) { + message.getRecord().setNamespace(namespace); + } + }); + return airbyteMessages; + } + } diff --git a/airbyte-integrations/bases/standard-destination-test/src/main/resources/namespace_catalog.json b/airbyte-integrations/bases/standard-destination-test/src/main/resources/namespace_catalog.json new file mode 100644 index 000000000000..a361581fcb1a --- /dev/null +++ b/airbyte-integrations/bases/standard-destination-test/src/main/resources/namespace_catalog.json @@ -0,0 +1,14 @@ +{ + "streams": [ + { + "name": "data_stream", + "json_schema": { + "properties": { + "field1": { + "type": "boolean" + } + } + } + } + ] +} diff --git a/airbyte-integrations/bases/standard-destination-test/src/main/resources/namespace_messages.txt b/airbyte-integrations/bases/standard-destination-test/src/main/resources/namespace_messages.txt new file mode 100644 index 000000000000..e40a257741e4 --- /dev/null +++ b/airbyte-integrations/bases/standard-destination-test/src/main/resources/namespace_messages.txt @@ -0,0 +1,2 @@ +{"type": "RECORD", "record": {"stream": "data_stream", "emitted_at": 1602637589000, "data": { "field1" : true }}} +{"type": "STATE", "state": { "data": {"start_date": "2022-08-17"}}} diff --git a/airbyte-integrations/bases/standard-destination-test/src/main/resources/namespace_test_cases.json b/airbyte-integrations/bases/standard-destination-test/src/main/resources/namespace_test_cases.json new file mode 100644 index 000000000000..f9ad2f047859 --- /dev/null +++ b/airbyte-integrations/bases/standard-destination-test/src/main/resources/namespace_test_cases.json @@ -0,0 +1,54 @@ +[ + { + "id": "S1-1", + "description": "namespace are converted to lowercase", + "namespace": "NAMESPACE", + "enabled": false, + "normalized": "namespace", + "comment": "this test case is disabled because it is not critical and we are not ready to change the behavior of existing destinations yet" + }, + { + "id": "S2-1", + "description": "namespace allows alphabets, numbers, and underscore", + "namespace": "dest_1001_namespace", + "enabled": true, + "normalized": "dest_1001_namespace" + }, + { + "id": "S2A-1", + "description": "namespace romanization", + "namespace": "namespace_with_spécial_character", + "enabled": true, + "normalized": "namespace_with_special_character" + }, + { + "id": "S2A-2", + "description": "namespace romanization (japanese)", + "namespace": "namespace_こんにちは", + "enabled": false, + "normalized": "namespace_konnichiwa" + }, + { + "id": "S3A-1", + "description": "namespace starting with a number", + "namespace": "99namespace", + "enabled": true, + "normalized": "_99namespace" + }, + { + "id": "S3B-1", + "description": "long namespace (300 characters)", + "namespace": "a_300_characters_looooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo_namespace", + "enabled": false, + "normalized": "", + "comment": "this test case is disabled because it is for future testing only" + }, + { + "id": "S3C-1", + "description": "reserved word", + "namespace": "select", + "enabled": false, + "normalized": "", + "comment": "this test case is disabled because it is for future testing only" + } +] diff --git a/airbyte-integrations/builds.md b/airbyte-integrations/builds.md index ec80010bbb49..536bed04b0fc 100644 --- a/airbyte-integrations/builds.md +++ b/airbyte-integrations/builds.md @@ -20,6 +20,7 @@ | BigQuery | [![source-bigquery](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-bigquery%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-bigquery/) | | Bing Ads | [![source-bing-ads](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-bing-ads%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-bing-ads) | | Chargebee | [![source-chargebee](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-chargebee%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-chargebee/) | +| Chargify | [![source-chargify](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-chargify%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-chargify/) | | Chartmogul | [![source-chartmogul](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-chartmogul%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-chartmogul/) | | Cart.com | [![source-cart](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-cart%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-cart/) | | Close.com | [![source-close-com](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fsource-close-com%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/source-close-com/) | diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile index 2db1049130a2..57afd61f02f9 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/Dockerfile @@ -17,5 +17,5 @@ ENV ENABLE_SENTRY true COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.2.10 +LABEL io.airbyte.version=0.2.11 LABEL io.airbyte.name=airbyte/destination-bigquery-denormalized diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationAcceptanceTest.java index 9b25b368edff..6603f588f937 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationAcceptanceTest.java @@ -4,6 +4,8 @@ package io.airbyte.integrations.destination.bigquery; +import static org.junit.jupiter.api.Assertions.assertEquals; + import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.auth.oauth2.ServiceAccountCredentials; @@ -26,6 +28,7 @@ import io.airbyte.commons.resources.MoreResources; import io.airbyte.commons.string.Strings; import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.destination.NamingConventionTransformer; import io.airbyte.integrations.destination.StandardNameTransformer; import io.airbyte.integrations.standardtest.destination.DataArgumentsProvider; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; @@ -35,11 +38,14 @@ import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -52,11 +58,12 @@ public class BigQueryDenormalizedDestinationAcceptanceTest extends DestinationAcceptanceTest { private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDenormalizedDestinationAcceptanceTest.class); + private static final BigQuerySQLNameTransformer NAME_TRANSFORMER = new BigQuerySQLNameTransformer(); - private static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); + protected static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); private static final String CONFIG_DATASET_ID = "dataset_id"; - private static final String CONFIG_PROJECT_ID = "project_id"; + protected static final String CONFIG_PROJECT_ID = "project_id"; private static final String CONFIG_DATASET_LOCATION = "dataset_location"; private static final String CONFIG_CREDS = "credentials_json"; private static final List AIRBYTE_COLUMNS = List.of(JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_EMITTED_AT); @@ -78,7 +85,7 @@ protected JsonNode getConfig() { } @Override - protected JsonNode getFailCheckConfig() throws Exception { + protected JsonNode getFailCheckConfig() { ((ObjectNode) config).put(CONFIG_PROJECT_ID, "fake"); return config; } @@ -93,6 +100,30 @@ protected boolean implementsNamespaces() { return true; } + @Override + protected boolean supportNamespaceTest() { + return true; + } + + @Override + protected Optional getNameTransformer() { + return Optional.of(NAME_TRANSFORMER); + } + + @Override + protected void assertNamespaceNormalization(final String testCaseId, + final String expectedNormalizedNamespace, + final String actualNormalizedNamespace) { + final String message = String.format("Test case %s failed; if this is expected, please override assertNamespaceNormalization", testCaseId); + if (testCaseId.equals("S3A-1")) { + // bigquery allows namespace starting with a number, and prepending underscore + // will hide the dataset, so we don't do it as we do for other destinations + assertEquals("99namespace", actualNormalizedNamespace, message); + } else { + assertEquals(expectedNormalizedNamespace, actualNormalizedNamespace, message); + } + } + @Override protected String getDefaultSchema(final JsonNode config) { return config.get(CONFIG_DATASET_ID).asText(); @@ -173,31 +204,33 @@ private Object getTypedFieldValue(final FieldValueList row, final Field field) { } } - @Override - protected void setup(final TestDestinationEnv testEnv) throws Exception { - if (!Files.exists(CREDENTIALS_PATH)) { - throw new IllegalStateException( - "Must provide path to a big query credentials file. By default {module-root}/" + CREDENTIALS_PATH - + ". Override by setting setting path with the CREDENTIALS_PATH constant."); - } - + protected JsonNode createConfig() throws IOException { final String credentialsJsonString = Files.readString(CREDENTIALS_PATH); - final JsonNode credentialsJson = Jsons.deserialize(credentialsJsonString).get(BigQueryConsts.BIGQUERY_BASIC_CONFIG); final String projectId = credentialsJson.get(CONFIG_PROJECT_ID).asText(); final String datasetLocation = "US"; - final String datasetId = Strings.addRandomSuffix("airbyte_tests", "_", 8); - config = Jsons.jsonNode(ImmutableMap.builder() + return Jsons.jsonNode(ImmutableMap.builder() .put(CONFIG_PROJECT_ID, projectId) .put(CONFIG_CREDS, credentialsJson.toString()) .put(CONFIG_DATASET_ID, datasetId) .put(CONFIG_DATASET_LOCATION, datasetLocation) .build()); + } + + @Override + protected void setup(final TestDestinationEnv testEnv) throws Exception { + if (!Files.exists(CREDENTIALS_PATH)) { + throw new IllegalStateException( + "Must provide path to a big query credentials file. By default {module-root}/" + CREDENTIALS_PATH + + ". Override by setting setting path with the CREDENTIALS_PATH constant."); + } + + config = createConfig(); + final ServiceAccountCredentials credentials = ServiceAccountCredentials + .fromStream(new ByteArrayInputStream(config.get(CONFIG_CREDS).asText().getBytes(StandardCharsets.UTF_8))); - final ServiceAccountCredentials credentials = - ServiceAccountCredentials.fromStream(new ByteArrayInputStream(config.get(CONFIG_CREDS).asText().getBytes())); bigquery = BigQueryOptions.newBuilder() .setProjectId(config.get(CONFIG_PROJECT_ID).asText()) .setCredentials(credentials) @@ -224,7 +257,7 @@ protected void tearDown(final TestDestinationEnv testEnv) { tearDownBigQuery(); } - private void tearDownBigQuery() { + protected void tearDownBigQuery() { // allows deletion of a dataset that has contents final BigQuery.DatasetDeleteOption option = BigQuery.DatasetDeleteOption.deleteContents(); diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java index b097048196ee..68943171d67c 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedDestinationTest.java @@ -5,7 +5,18 @@ package io.airbyte.integrations.destination.bigquery; import static io.airbyte.integrations.destination.bigquery.formatter.DefaultBigQueryDenormalizedRecordFormatter.NESTED_ARRAY_FIELD; -import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.*; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getData; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataWithEmptyObjectAndArray; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataWithFormats; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataWithJSONDateTimeFormats; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataWithJSONWithReference; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getDataWithNestedDatetimeInsideNullObject; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchema; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaWithDateTime; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaWithFormats; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaWithInvalidArrayType; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaWithNestedDatetimeInsideNullObject; +import static io.airbyte.integrations.destination.bigquery.util.BigQueryDenormalizedTestDataUtils.getSchemaWithReferenceDefinition; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.params.provider.Arguments.arguments; @@ -36,6 +47,7 @@ import io.airbyte.protocol.models.SyncMode; import java.io.ByteArrayInputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.time.Instant; @@ -101,7 +113,7 @@ void setup(final TestInfo info) throws IOException { final String projectId = credentialsJson.get(BigQueryConsts.CONFIG_PROJECT_ID).asText(); final ServiceAccountCredentials credentials = - ServiceAccountCredentials.fromStream(new ByteArrayInputStream(credentialsJson.toString().getBytes())); + ServiceAccountCredentials.fromStream(new ByteArrayInputStream(credentialsJson.toString().getBytes(StandardCharsets.UTF_8))); bigquery = BigQueryOptions.newBuilder() .setProjectId(projectId) .setCredentials(credentials) diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGcsDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGcsDestinationAcceptanceTest.java index d4458e157b44..d49bd7f4097d 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGcsDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGcsDestinationAcceptanceTest.java @@ -5,184 +5,19 @@ package io.airbyte.integrations.destination.bigquery; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.google.auth.oauth2.ServiceAccountCredentials; -import com.google.cloud.bigquery.BigQuery; -import com.google.cloud.bigquery.BigQueryOptions; -import com.google.cloud.bigquery.Dataset; -import com.google.cloud.bigquery.DatasetInfo; -import com.google.cloud.bigquery.Field; -import com.google.cloud.bigquery.FieldList; -import com.google.cloud.bigquery.FieldValue; -import com.google.cloud.bigquery.FieldValueList; -import com.google.cloud.bigquery.Job; -import com.google.cloud.bigquery.JobId; -import com.google.cloud.bigquery.JobInfo; -import com.google.cloud.bigquery.QueryJobConfiguration; -import com.google.cloud.bigquery.TableResult; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.resources.MoreResources; import io.airbyte.commons.string.Strings; -import io.airbyte.integrations.base.JavaBaseConstants; -import io.airbyte.integrations.destination.StandardNameTransformer; -import io.airbyte.integrations.standardtest.destination.DataArgumentsProvider; -import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; -import io.airbyte.protocol.models.AirbyteCatalog; -import io.airbyte.protocol.models.AirbyteMessage; -import io.airbyte.protocol.models.AirbyteRecordMessage; -import io.airbyte.protocol.models.CatalogHelpers; -import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import java.io.ByteArrayInputStream; +import java.io.IOException; import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.UUID; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ArgumentsSource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -public class BigQueryDenormalizedGcsDestinationAcceptanceTest extends DestinationAcceptanceTest { - - private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDenormalizedGcsDestinationAcceptanceTest.class); - - private static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); - - private static final String CONFIG_DATASET_ID = "dataset_id"; - private static final String CONFIG_PROJECT_ID = "project_id"; - private static final String CONFIG_DATASET_LOCATION = "dataset_location"; - private static final List AIRBYTE_COLUMNS = List.of(JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_EMITTED_AT); - - private BigQuery bigquery; - private Dataset dataset; - private boolean tornDown; - private JsonNode config; - private final StandardNameTransformer namingResolver = new StandardNameTransformer(); - - @Override - protected String getImageName() { - return "airbyte/destination-bigquery-denormalized:dev"; - } - - @Override - protected JsonNode getConfig() { - return config; - } - - @Override - protected JsonNode getFailCheckConfig() throws Exception { - ((ObjectNode) config).put(CONFIG_PROJECT_ID, "fake"); - return config; - } - - @Override - protected boolean supportsDBT() { - return true; - } - - @Override - protected boolean implementsNamespaces() { - return true; - } - - @Override - protected String getDefaultSchema(final JsonNode config) { - return config.get(CONFIG_DATASET_ID).asText(); - } - - @Override - protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, final String streamName, final String namespace) - throws Exception { - final String tableName = namingResolver.getIdentifier(streamName); - final String schema = namingResolver.getIdentifier(namespace); - return retrieveRecordsFromTable(tableName, schema); - } - - @Override - protected List retrieveRecords(final TestDestinationEnv env, - final String streamName, - final String namespace, - final JsonNode streamSchema) - throws Exception { - return new ArrayList<>(retrieveRecordsFromTable(namingResolver.getIdentifier(streamName), namingResolver.getIdentifier(namespace))); - } - - @Override - protected List resolveIdentifier(final String identifier) { - final List result = new ArrayList<>(); - result.add(identifier); - result.add(namingResolver.getIdentifier(identifier)); - return result; - } - - private List retrieveRecordsFromTable(final String tableName, final String schema) throws InterruptedException { - final QueryJobConfiguration queryConfig = - QueryJobConfiguration - .newBuilder( - String.format("SELECT * FROM `%s`.`%s` order by %s asc;", schema, tableName, - JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) - .setUseLegacySql(false).build(); - - final TableResult queryResults = executeQuery(bigquery, queryConfig).getLeft().getQueryResults(); - final FieldList fields = queryResults.getSchema().getFields(); - - return StreamSupport - .stream(queryResults.iterateAll().spliterator(), false) - .map(row -> { - final Map jsonMap = Maps.newHashMap(); - for (final Field field : fields) { - final Object value = getTypedFieldValue(row, field); - if (!isAirbyteColumn(field.getName()) && value != null) { - jsonMap.put(field.getName(), value); - } - } - return jsonMap; - }) - .map(Jsons::jsonNode) - .collect(Collectors.toList()); - } - - private boolean isAirbyteColumn(final String name) { - if (AIRBYTE_COLUMNS.contains(name)) { - return true; - } - return name.startsWith("_airbyte") && name.endsWith("_hashid"); - } - - private Object getTypedFieldValue(final FieldValueList row, final Field field) { - final FieldValue fieldValue = row.get(field.getName()); - if (fieldValue.getValue() != null) { - return switch (field.getType().getStandardType()) { - case FLOAT64, NUMERIC -> fieldValue.getDoubleValue(); - case INT64 -> fieldValue.getNumericValue().intValue(); - case STRING -> fieldValue.getStringValue(); - case BOOL -> fieldValue.getBooleanValue(); - case STRUCT -> fieldValue.getRecordValue().toString(); - default -> fieldValue.getValue(); - }; - } else { - return null; - } - } +public class BigQueryDenormalizedGcsDestinationAcceptanceTest extends BigQueryDenormalizedDestinationAcceptanceTest { @Override - protected void setup(final TestDestinationEnv testEnv) throws Exception { - if (!Files.exists(CREDENTIALS_PATH)) { - throw new IllegalStateException( - "Must provide path to a big query credentials file. By default {module-root}/" + CREDENTIALS_PATH - + ". Override by setting setting path with the CREDENTIALS_PATH constant."); - } + protected JsonNode createConfig() throws IOException { + final String credentialsJsonString = Files.readString(CREDENTIALS_PATH); - final String fullConfigFromSecretFileAsString = Files.readString(CREDENTIALS_PATH); - - final JsonNode fullConfigFromSecretFileJson = Jsons.deserialize(fullConfigFromSecretFileAsString); + final JsonNode fullConfigFromSecretFileJson = Jsons.deserialize(credentialsJsonString); final JsonNode bigqueryConfigFromSecretFile = fullConfigFromSecretFileJson.get(BigQueryConsts.BIGQUERY_BASIC_CONFIG); final JsonNode gcsConfigFromSecretFile = fullConfigFromSecretFileJson.get(BigQueryConsts.GCS_CONFIG); @@ -206,107 +41,13 @@ protected void setup(final TestDestinationEnv testEnv) throws Exception { .put(BigQueryConsts.CREDENTIAL, credential) .build()); - config = Jsons.jsonNode(ImmutableMap.builder() + return Jsons.jsonNode(ImmutableMap.builder() .put(BigQueryConsts.CONFIG_PROJECT_ID, projectId) .put(BigQueryConsts.CONFIG_CREDS, bigqueryConfigFromSecretFile.toString()) .put(BigQueryConsts.CONFIG_DATASET_ID, datasetId) .put(BigQueryConsts.CONFIG_DATASET_LOCATION, datasetLocation) .put(BigQueryConsts.LOADING_METHOD, loadingMethod) .build()); - - final ServiceAccountCredentials credentials = ServiceAccountCredentials - .fromStream(new ByteArrayInputStream(bigqueryConfigFromSecretFile.toString().getBytes())); - - bigquery = BigQueryOptions.newBuilder() - .setProjectId(config.get(CONFIG_PROJECT_ID).asText()) - .setCredentials(credentials) - .build() - .getService(); - - final DatasetInfo datasetInfo = - DatasetInfo.newBuilder(config.get(CONFIG_DATASET_ID).asText()).setLocation(config.get(CONFIG_DATASET_LOCATION).asText()).build(); - dataset = bigquery.create(datasetInfo); - - tornDown = false; - Runtime.getRuntime() - .addShutdownHook( - new Thread( - () -> { - if (!tornDown) { - tearDownBigQuery(); - } - })); - } - - @Override - protected void tearDown(final TestDestinationEnv testEnv) { - // gcs tmp files are supposed to be removed automatically by consumer - tearDownBigQuery(); - } - - private void tearDownBigQuery() { - // allows deletion of a dataset that has contents - final BigQuery.DatasetDeleteOption option = BigQuery.DatasetDeleteOption.deleteContents(); - - final boolean success = bigquery.delete(dataset.getDatasetId(), option); - if (success) { - LOGGER.info("BQ Dataset " + dataset + " deleted..."); - } else { - LOGGER.info("BQ Dataset cleanup for " + dataset + " failed!"); - } - - tornDown = true; - } - - // todo (cgardens) - figure out how to share these helpers. they are currently copied from - // BigQueryDestination. - private static ImmutablePair executeQuery(final BigQuery bigquery, final QueryJobConfiguration queryConfig) { - final JobId jobId = JobId.of(UUID.randomUUID().toString()); - final Job queryJob = bigquery.create(JobInfo.newBuilder(queryConfig).setJobId(jobId).build()); - return executeQuery(queryJob); - } - - private static ImmutablePair executeQuery(final Job queryJob) { - final Job completedJob = waitForQuery(queryJob); - if (completedJob == null) { - throw new RuntimeException("Job no longer exists"); - } else if (completedJob.getStatus().getError() != null) { - // You can also look at queryJob.getStatus().getExecutionErrors() for all - // errors, not just the latest one. - return ImmutablePair.of(null, (completedJob.getStatus().getError().toString())); - } - - return ImmutablePair.of(completedJob, null); - } - - private static Job waitForQuery(final Job queryJob) { - try { - return queryJob.waitFor(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - /** - * Verify that the integration successfully writes normalized records successfully (without actually - * running the normalization module) Tests a wide variety of messages an schemas (aspirationally, - * anyway). - */ - @ParameterizedTest - @ArgumentsSource(DataArgumentsProvider.class) - public void testSyncNormalizedWithoutNormalization(final String messagesFilename, final String catalogFilename) throws Exception { - final AirbyteCatalog catalog = Jsons.deserialize(MoreResources.readResource(catalogFilename), AirbyteCatalog.class); - final ConfiguredAirbyteCatalog configuredCatalog = CatalogHelpers.toDefaultConfiguredCatalog(catalog); - final List messages = MoreResources.readResource(messagesFilename).lines() - .map(record -> Jsons.deserialize(record, AirbyteMessage.class)).collect(Collectors.toList()); - - final JsonNode config = getConfig(); - // don't run normalization though - runSyncAndVerifyStateOutput(config, messages, configuredCatalog, false); - - final String defaultSchema = getDefaultSchema(config); - final List actualMessages = retrieveNormalizedRecords(catalog, defaultSchema); - assertSameMessages(messages, actualMessages, true); } } diff --git a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGcsDestinationTest.java b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGcsDestinationTest.java index 032b32d165c4..7654c3941ef0 100644 --- a/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGcsDestinationTest.java +++ b/airbyte-integrations/connectors/destination-bigquery-denormalized/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDenormalizedGcsDestinationTest.java @@ -51,6 +51,7 @@ import io.airbyte.protocol.models.SyncMode; import java.io.ByteArrayInputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.time.Instant; @@ -117,7 +118,7 @@ void setup(final TestInfo info) throws IOException { final String projectId = credentialsJson.get(BigQueryConsts.CONFIG_PROJECT_ID).asText(); final ServiceAccountCredentials credentials = - ServiceAccountCredentials.fromStream(new ByteArrayInputStream(credentialsJson.toString().getBytes())); + ServiceAccountCredentials.fromStream(new ByteArrayInputStream(credentialsJson.toString().getBytes(StandardCharsets.UTF_8))); bigquery = BigQueryOptions.newBuilder() .setProjectId(projectId) .setCredentials(credentials) diff --git a/airbyte-integrations/connectors/destination-bigquery/Dockerfile b/airbyte-integrations/connectors/destination-bigquery/Dockerfile index 57925e21f75d..5642239cba1c 100644 --- a/airbyte-integrations/connectors/destination-bigquery/Dockerfile +++ b/airbyte-integrations/connectors/destination-bigquery/Dockerfile @@ -17,5 +17,5 @@ ENV ENABLE_SENTRY true COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.6.11 +LABEL io.airbyte.version=0.6.12 LABEL io.airbyte.name=airbyte/destination-bigquery diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryConsts.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryConsts.java index 12510288c81e..e3dfd07aa5b8 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryConsts.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryConsts.java @@ -25,6 +25,8 @@ public class BigQueryConsts { public static final String KEEP_GCS_FILES_VAL = "Keep all tmp files in GCS"; public static final String PART_SIZE = "part_size_mb"; + public static final String NAMESPACE_PREFIX = "n"; + // tests public static final String BIGQUERY_BASIC_CONFIG = "basic_bigquery_config"; public static final String GCS_CONFIG = "gcs_config"; diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQuerySQLNameTransformer.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQuerySQLNameTransformer.java index 4e8ea1b158ae..10d3ec442274 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQuerySQLNameTransformer.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQuerySQLNameTransformer.java @@ -10,12 +10,35 @@ public class BigQuerySQLNameTransformer extends StandardNameTransformer { @Override public String convertStreamName(final String input) { - String result = super.convertStreamName(input); + if (input == null) { + return null; + } + + final String result = super.convertStreamName(input); if (!result.substring(0, 1).matches("[A-Za-z_]")) { // has to start with a letter or _ - result = "_" + result; + return "_" + result; } return result; } + /** + * BigQuery allows a number to be the first character of a namespace. Datasets that begin with an + * underscore are hidden databases, and we cannot query .INFORMATION_SCHEMA. + * So we append a letter instead of underscore for normalization. + * Reference: https://cloud.google.com/bigquery/docs/datasets#dataset-naming + */ + @Override + public String getNamespace(final String input) { + if (input == null) { + return null; + } + + final String normalizedName = super.convertStreamName(input); + if (!normalizedName.substring(0, 1).matches("[A-Za-z0-9]")) { + return BigQueryConsts.NAMESPACE_PREFIX + normalizedName; + } + return normalizedName; + } + } diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java index 2c7a3dddd583..0f657d9282e2 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/BigQueryUtils.java @@ -46,6 +46,7 @@ public class BigQueryUtils { private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryUtils.class); private static final String BIG_QUERY_DATETIME_FORMAT = "yyyy-MM-dd HH:mm:ss.SSSSSS"; + private static final BigQuerySQLNameTransformer NAME_TRANSFORMER = new BigQuerySQLNameTransformer(); public static ImmutablePair executeQuery(final BigQuery bigquery, final QueryJobConfiguration queryConfig) { final JobId jobId = JobId.of(UUID.randomUUID().toString()); @@ -162,6 +163,9 @@ public static JsonNode getGcsAvroJsonNodeConfig(final JsonNode config) { return gcsJsonNode; } + /** + * @return a default schema name based on the config. + */ public static String getDatasetId(final JsonNode config) { String datasetId = config.get(BigQueryConsts.CONFIG_DATASET_ID).asText(); @@ -233,12 +237,9 @@ public static void transformJsonDateTimeToBigDataFormat(List dateTimeFie } public static String getSchema(final JsonNode config, final ConfiguredAirbyteStream stream) { - final String defaultSchema = getDatasetId(config); final String srcNamespace = stream.getStream().getNamespace(); - if (srcNamespace == null) { - return defaultSchema; - } - return srcNamespace; + final String schemaName = srcNamespace == null ? getDatasetId(config) : srcNamespace; + return NAME_TRANSFORMER.getNamespace(schemaName); } public static JobInfo.WriteDisposition getWriteDisposition(final DestinationSyncMode syncMode) { diff --git a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/uploader/BigQueryUploaderFactory.java b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/uploader/BigQueryUploaderFactory.java index 39d45f796f96..9083c5cdc558 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/uploader/BigQueryUploaderFactory.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/main/java/io/airbyte/integrations/destination/bigquery/uploader/BigQueryUploaderFactory.java @@ -29,17 +29,14 @@ import java.sql.Timestamp; import java.util.HashSet; import java.util.Set; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; public class BigQueryUploaderFactory { - private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryUploaderFactory.class); - public static AbstractBigQueryUploader getUploader(final UploaderConfig uploaderConfig) throws IOException { - final String schemaName = - BigQueryUtils.getSchema(uploaderConfig.getConfig(), uploaderConfig.getConfigStream()); + final String schemaName = BigQueryUtils.getSchema( + uploaderConfig.getConfig(), + uploaderConfig.getConfigStream()); final String datasetLocation = BigQueryUtils.getDatasetLocation(uploaderConfig.getConfig()); final Set existingSchemas = new HashSet<>(); diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationAcceptanceTest.java index e5272c974743..806064854746 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationAcceptanceTest.java @@ -4,6 +4,8 @@ package io.airbyte.integrations.destination.bigquery; +import static org.junit.jupiter.api.Assertions.assertEquals; + import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.auth.oauth2.ServiceAccountCredentials; @@ -25,6 +27,7 @@ import io.airbyte.commons.json.Jsons; import io.airbyte.commons.string.Strings; import io.airbyte.integrations.base.JavaBaseConstants; +import io.airbyte.integrations.destination.NamingConventionTransformer; import io.airbyte.integrations.destination.StandardNameTransformer; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; import java.io.ByteArrayInputStream; @@ -35,6 +38,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -44,6 +48,7 @@ public class BigQueryDestinationAcceptanceTest extends DestinationAcceptanceTest { + private static final NamingConventionTransformer NAME_TRANSFORMER = new BigQuerySQLNameTransformer(); private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDestinationAcceptanceTest.class); protected static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); @@ -70,7 +75,7 @@ protected JsonNode getConfig() { } @Override - protected JsonNode getFailCheckConfig() throws Exception { + protected JsonNode getFailCheckConfig() { ((ObjectNode) config).put(CONFIG_PROJECT_ID, "fake"); return config; } @@ -90,6 +95,30 @@ protected boolean implementsNamespaces() { return true; } + @Override + protected boolean supportNamespaceTest() { + return true; + } + + @Override + protected Optional getNameTransformer() { + return Optional.of(NAME_TRANSFORMER); + } + + @Override + protected void assertNamespaceNormalization(final String testCaseId, + final String expectedNormalizedNamespace, + final String actualNormalizedNamespace) { + final String message = String.format("Test case %s failed; if this is expected, please override assertNamespaceNormalization", testCaseId); + if (testCaseId.equals("S3A-1")) { + // bigquery allows namespace starting with a number, and prepending underscore + // will hide the dataset, so we don't do it as we do for other destinations + assertEquals("99namespace", actualNormalizedNamespace, message); + } else { + assertEquals(expectedNormalizedNamespace, actualNormalizedNamespace, message); + } + } + @Override protected String getDefaultSchema(final JsonNode config) { return config.get(CONFIG_DATASET_ID).asText(); diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java index fb1089ead4e9..aafe47a63d85 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryDestinationTest.java @@ -34,7 +34,6 @@ import io.airbyte.integrations.base.Destination; import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.NamingConventionTransformer; -import io.airbyte.integrations.destination.StandardNameTransformer; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; import io.airbyte.protocol.models.AirbyteMessage; @@ -73,42 +72,56 @@ class BigQueryDestinationTest { - private static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); + protected static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryDestinationTest.class); + private static final String DATASET_NAME_PREFIX = "bq_dest_integration_test"; - private static final String BIG_QUERY_CLIENT_CHUNK_SIZE = "big_query_client_buffer_size_mb"; + protected static final String DATASET_LOCATION = "EU"; + protected static final String BIG_QUERY_CLIENT_CHUNK_SIZE = "big_query_client_buffer_size_mb"; private static final Instant NOW = Instant.now(); - private static final String USERS_STREAM_NAME = "users"; - private static final String TASKS_STREAM_NAME = "tasks"; - private static final AirbyteMessage MESSAGE_USERS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + protected static final String USERS_STREAM_NAME = "users"; + protected static final String TASKS_STREAM_NAME = "tasks"; + protected static final AirbyteMessage MESSAGE_USERS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) .withData(Jsons.jsonNode(ImmutableMap.builder().put("name", "john").put("id", "10").build())) .withEmittedAt(NOW.toEpochMilli())); - private static final AirbyteMessage MESSAGE_USERS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + protected static final AirbyteMessage MESSAGE_USERS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) .withData(Jsons.jsonNode(ImmutableMap.builder().put("name", "susan").put("id", "30").build())) .withEmittedAt(NOW.toEpochMilli())); - private static final AirbyteMessage MESSAGE_TASKS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + protected static final AirbyteMessage MESSAGE_TASKS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(TASKS_STREAM_NAME) .withData(Jsons.jsonNode(ImmutableMap.builder().put("goal", "announce the game.").build())) .withEmittedAt(NOW.toEpochMilli())); - private static final AirbyteMessage MESSAGE_TASKS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + protected static final AirbyteMessage MESSAGE_TASKS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) .withRecord(new AirbyteRecordMessage().withStream(TASKS_STREAM_NAME) .withData(Jsons.jsonNode(ImmutableMap.builder().put("goal", "ship some code.").build())) .withEmittedAt(NOW.toEpochMilli())); - private static final AirbyteMessage MESSAGE_STATE = new AirbyteMessage().withType(AirbyteMessage.Type.STATE) + protected static final AirbyteMessage MESSAGE_STATE = new AirbyteMessage().withType(AirbyteMessage.Type.STATE) .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.builder().put("checkpoint", "now!").build()))); - private static final NamingConventionTransformer NAMING_RESOLVER = new StandardNameTransformer(); + private static final NamingConventionTransformer NAMING_RESOLVER = new BigQuerySQLNameTransformer(); - private JsonNode config; + protected JsonNode config; + protected BigQuery bigquery; + protected Dataset dataset; + protected ConfiguredAirbyteCatalog catalog; + protected boolean tornDown = true; - private BigQuery bigquery; - private Dataset dataset; - private ConfiguredAirbyteCatalog catalog; - - private boolean tornDown = true; + private static Stream datasetIdResetterProvider() { + // parameterized test with two dataset-id patterns: `dataset_id` and `project-id:dataset_id` + return Stream.of( + Arguments.arguments(new DatasetIdResetter(config -> { + })), + Arguments.arguments(new DatasetIdResetter( + config -> { + final String projectId = config.get(BigQueryConsts.CONFIG_PROJECT_ID).asText(); + final String datasetId = config.get(BigQueryConsts.CONFIG_DATASET_ID).asText(); + ((ObjectNode) config).put(BigQueryConsts.CONFIG_DATASET_ID, + String.format("%s:%s", projectId, datasetId)); + }))); + } @BeforeEach void setup(final TestInfo info) throws IOException { @@ -133,8 +146,7 @@ void setup(final TestInfo info) throws IOException { .build() .getService(); - final String datasetId = Strings.addRandomSuffix("111airbyte_tests", "_", 8); - final String datasetLocation = "EU"; + final String datasetId = Strings.addRandomSuffix(DATASET_NAME_PREFIX, "_", 8); MESSAGE_USERS1.getRecord().setNamespace(datasetId); MESSAGE_USERS2.getRecord().setNamespace(datasetId); MESSAGE_TASKS1.getRecord().setNamespace(datasetId); @@ -142,33 +154,33 @@ void setup(final TestInfo info) throws IOException { catalog = new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList( CatalogHelpers.createConfiguredAirbyteStream(USERS_STREAM_NAME, datasetId, - io.airbyte.protocol.models.Field.of("name", JsonSchemaType.STRING), - io.airbyte.protocol.models.Field - .of("id", JsonSchemaType.STRING)) + io.airbyte.protocol.models.Field.of("name", JsonSchemaType.STRING), + io.airbyte.protocol.models.Field + .of("id", JsonSchemaType.STRING)) .withDestinationSyncMode(DestinationSyncMode.APPEND), CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, datasetId, Field.of("goal", JsonSchemaType.STRING)))); - final DatasetInfo datasetInfo = DatasetInfo.newBuilder(datasetId).setLocation(datasetLocation).build(); + final DatasetInfo datasetInfo = DatasetInfo.newBuilder(datasetId).setLocation(DATASET_LOCATION).build(); dataset = bigquery.create(datasetInfo); config = Jsons.jsonNode(ImmutableMap.builder() .put(BigQueryConsts.CONFIG_PROJECT_ID, projectId) .put(BigQueryConsts.CONFIG_CREDS, credentialsJson.toString()) .put(BigQueryConsts.CONFIG_DATASET_ID, datasetId) - .put(BigQueryConsts.CONFIG_DATASET_LOCATION, datasetLocation) + .put(BigQueryConsts.CONFIG_DATASET_LOCATION, DATASET_LOCATION) .put(BIG_QUERY_CLIENT_CHUNK_SIZE, 10) .build()); tornDown = false; - Runtime.getRuntime() - .addShutdownHook( - new Thread( - () -> { - if (!tornDown) { - tearDownBigQuery(); - } - })); + addShutdownHook(); + } + protected void addShutdownHook() { + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + if (!tornDown) { + tearDownBigQuery(); + } + })); } @AfterEach @@ -180,7 +192,7 @@ void tearDown(final TestInfo info) { tearDownBigQuery(); } - private void tearDownBigQuery() { + protected void tearDownBigQuery() { // allows deletion of a dataset that has contents final BigQuery.DatasetDeleteOption option = BigQuery.DatasetDeleteOption.deleteContents(); @@ -385,7 +397,7 @@ private boolean isTablePartitioned(final BigQuery bigquery, final Dataset datase return false; } - private static class DatasetIdResetter { + protected static class DatasetIdResetter { private final Consumer consumer; @@ -399,17 +411,4 @@ public void accept(final JsonNode config) { } - private static Stream datasetIdResetterProvider() { - // parameterized test with two dataset-id patterns: `dataset_id` and `project-id:dataset_id` - return Stream.of( - Arguments.arguments(new DatasetIdResetter(config -> {})), - Arguments.arguments(new DatasetIdResetter( - config -> { - final String projectId = ((ObjectNode) config).get(BigQueryConsts.CONFIG_PROJECT_ID).asText(); - final String datasetId = ((ObjectNode) config).get(BigQueryConsts.CONFIG_DATASET_ID).asText(); - ((ObjectNode) config).put(BigQueryConsts.CONFIG_DATASET_ID, - String.format("%s:%s", projectId, datasetId)); - }))); - } - } diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryGcsDestinationTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryGcsDestinationTest.java index 766b0f1f8be8..1394968be537 100644 --- a/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryGcsDestinationTest.java +++ b/airbyte-integrations/connectors/destination-bigquery/src/test-integration/java/io/airbyte/integrations/destination/bigquery/BigQueryGcsDestinationTest.java @@ -4,105 +4,44 @@ package io.airbyte.integrations.destination.bigquery; -import static java.util.stream.Collectors.toList; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.spy; - import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; import com.amazonaws.services.s3.model.S3ObjectSummary; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.auth.oauth2.ServiceAccountCredentials; -import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryOptions; -import com.google.cloud.bigquery.Dataset; import com.google.cloud.bigquery.DatasetInfo; -import com.google.cloud.bigquery.QueryJobConfiguration; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import io.airbyte.commons.json.Jsons; -import io.airbyte.commons.resources.MoreResources; import io.airbyte.commons.string.Strings; -import io.airbyte.integrations.base.AirbyteMessageConsumer; -import io.airbyte.integrations.base.Destination; -import io.airbyte.integrations.base.JavaBaseConstants; -import io.airbyte.integrations.destination.NamingConventionTransformer; -import io.airbyte.integrations.destination.StandardNameTransformer; import io.airbyte.integrations.destination.gcs.GcsDestinationConfig; import io.airbyte.integrations.destination.gcs.GcsS3Helper; -import io.airbyte.protocol.models.AirbyteConnectionStatus; -import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; -import io.airbyte.protocol.models.AirbyteMessage; -import io.airbyte.protocol.models.AirbyteRecordMessage; -import io.airbyte.protocol.models.AirbyteStateMessage; -import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import io.airbyte.protocol.models.ConfiguredAirbyteStream; -import io.airbyte.protocol.models.ConnectorSpecification; import io.airbyte.protocol.models.Field; import io.airbyte.protocol.models.JsonSchemaType; import java.io.ByteArrayInputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.nio.file.Path; -import java.time.Instant; import java.util.LinkedList; import java.util.List; -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -class BigQueryGcsDestinationTest { +class BigQueryGcsDestinationTest extends BigQueryDestinationTest { private static final Logger LOGGER = LoggerFactory.getLogger(BigQueryGcsDestinationTest.class); - private static final Path CREDENTIALS_PATH = Path.of("secrets/credentials.json"); - - private static final String BIG_QUERY_CLIENT_CHUNK_SIZE = "big_query_client_buffer_size_mb"; - private static final Instant NOW = Instant.now(); - private static final String USERS_STREAM_NAME = "users"; - private static final String TASKS_STREAM_NAME = "tasks"; - private static final AirbyteMessage MESSAGE_USERS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) - .withData(Jsons.jsonNode(ImmutableMap.builder().put("name", "john").put("id", "10").build())) - .withEmittedAt(NOW.toEpochMilli())); - private static final AirbyteMessage MESSAGE_USERS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) - .withData(Jsons.jsonNode(ImmutableMap.builder().put("name", "susan").put("id", "30").build())) - .withEmittedAt(NOW.toEpochMilli())); - private static final AirbyteMessage MESSAGE_TASKS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(TASKS_STREAM_NAME) - .withData(Jsons.jsonNode(ImmutableMap.builder().put("goal", "announce the game.").build())) - .withEmittedAt(NOW.toEpochMilli())); - private static final AirbyteMessage MESSAGE_TASKS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) - .withRecord(new AirbyteRecordMessage().withStream(TASKS_STREAM_NAME) - .withData(Jsons.jsonNode(ImmutableMap.builder().put("goal", "ship some code.").build())) - .withEmittedAt(NOW.toEpochMilli())); - private static final AirbyteMessage MESSAGE_STATE = new AirbyteMessage().withType(AirbyteMessage.Type.STATE) - .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.builder().put("checkpoint", "now!").build()))); - - private static final NamingConventionTransformer NAMING_RESOLVER = new StandardNameTransformer(); - private JsonNode config; + private static final String DATASET_NAME_PREFIX = "bq_gcs_dest_integration_test"; - private BigQuery bigquery; private AmazonS3 s3Client; - private Dataset dataset; - private ConfiguredAirbyteCatalog catalog; - - private boolean tornDown = true; + @Override @BeforeEach void setup(final TestInfo info) throws IOException { if (info.getDisplayName().equals("testSpec()")) { @@ -127,8 +66,7 @@ void setup(final TestInfo info) throws IOException { .build() .getService(); - final String datasetId = Strings.addRandomSuffix("airbyte_tests", "_", 8); - final String datasetLocation = "EU"; + final String datasetId = Strings.addRandomSuffix(DATASET_NAME_PREFIX, "_", 8); MESSAGE_USERS1.getRecord().setNamespace(datasetId); MESSAGE_USERS2.getRecord().setNamespace(datasetId); MESSAGE_TASKS1.getRecord().setNamespace(datasetId); @@ -141,7 +79,7 @@ void setup(final TestInfo info) throws IOException { .of("id", JsonSchemaType.STRING)), CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, datasetId, Field.of("goal", JsonSchemaType.STRING)))); - final DatasetInfo datasetInfo = DatasetInfo.newBuilder(datasetId).setLocation(datasetLocation).build(); + final DatasetInfo datasetInfo = DatasetInfo.newBuilder(datasetId).setLocation(DATASET_LOCATION).build(); dataset = bigquery.create(datasetInfo); final JsonNode credentialFromSecretFile = credentialsGcsJson.get(BigQueryConsts.CREDENTIAL); @@ -163,7 +101,7 @@ void setup(final TestInfo info) throws IOException { .put(BigQueryConsts.CONFIG_PROJECT_ID, projectId) .put(BigQueryConsts.CONFIG_CREDS, credentialsJson.toString()) .put(BigQueryConsts.CONFIG_DATASET_ID, datasetId) - .put(BigQueryConsts.CONFIG_DATASET_LOCATION, datasetLocation) + .put(BigQueryConsts.CONFIG_DATASET_LOCATION, DATASET_LOCATION) .put(BigQueryConsts.LOADING_METHOD, loadingMethod) .put(BIG_QUERY_CLIENT_CHUNK_SIZE, 10) .build()); @@ -173,18 +111,11 @@ void setup(final TestInfo info) throws IOException { this.s3Client = GcsS3Helper.getGcsS3Client(gcsDestinationConfig); tornDown = false; - Runtime.getRuntime() - .addShutdownHook( - new Thread( - () -> { - if (!tornDown) { - tearDownBigQuery(); - } - })); - + addShutdownHook(); } @AfterEach + @Override void tearDown(final TestInfo info) { if (info.getDisplayName().equals("testSpec()")) { return; @@ -220,134 +151,12 @@ protected void tearDownGcs() { } } - private void tearDownBigQuery() { - // allows deletion of a dataset that has contents - final BigQuery.DatasetDeleteOption option = BigQuery.DatasetDeleteOption.deleteContents(); - - final boolean success = bigquery.delete(dataset.getDatasetId(), option); - if (success) { - LOGGER.info("BQ Dataset " + dataset + " deleted..."); - } else { - LOGGER.info("BQ Dataset cleanup for " + dataset + " failed!"); - } - - tornDown = true; - } - - @Test - void testSpec() throws Exception { - final ConnectorSpecification actual = new BigQueryDestination().spec(); - final String resourceString = MoreResources.readResource("spec.json"); - final ConnectorSpecification expected = Jsons.deserialize(resourceString, ConnectorSpecification.class); - - assertEquals(expected, actual); - } - - @Test - void testCheckSuccess() { - final AirbyteConnectionStatus actual = new BigQueryDestination().check(config); - final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); - assertEquals(expected, actual); - } - @Test - void testCheckFailure() { - ((ObjectNode) config).put(BigQueryConsts.CONFIG_PROJECT_ID, "fake"); - final AirbyteConnectionStatus actual = new BigQueryDestination().check(config); - final String actualMessage = actual.getMessage(); - LOGGER.info("Checking expected failure message:" + actualMessage); - assertTrue(actualMessage.contains("Access Denied:")); - final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.FAILED).withMessage(""); - assertEquals(expected, actual.withMessage("")); + @Override + void testWritePartitionOverUnpartitioned(final DatasetIdResetter resetDatasetId) throws Exception { + // This test is skipped for GCS staging mode because we load Avro data to BigQuery, but do not + // use the use_avro_logical_types flag to automatically convert the Avro logical timestamp + // type. Therefore, the emission timestamp, which should be used as the partition field, has + // an incorrect type. See https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-avro#logical_types } - - @Test - void testWriteSuccess() throws Exception { - final BigQueryDestination destination = new BigQueryDestination(); - final AirbyteMessageConsumer consumer = destination.getConsumer(config, catalog, Destination::defaultOutputRecordCollector); - - consumer.accept(MESSAGE_USERS1); - consumer.accept(MESSAGE_TASKS1); - consumer.accept(MESSAGE_USERS2); - consumer.accept(MESSAGE_TASKS2); - consumer.accept(MESSAGE_STATE); - consumer.close(); - - final List usersActual = retrieveRecords(NAMING_RESOLVER.getRawTableName(USERS_STREAM_NAME)); - final List expectedUsersJson = Lists.newArrayList(MESSAGE_USERS1.getRecord().getData(), MESSAGE_USERS2.getRecord().getData()); - assertEquals(expectedUsersJson.size(), usersActual.size()); - assertTrue(expectedUsersJson.containsAll(usersActual) && usersActual.containsAll(expectedUsersJson)); - - final List tasksActual = retrieveRecords(NAMING_RESOLVER.getRawTableName(TASKS_STREAM_NAME)); - final List expectedTasksJson = Lists.newArrayList(MESSAGE_TASKS1.getRecord().getData(), MESSAGE_TASKS2.getRecord().getData()); - assertEquals(expectedTasksJson.size(), tasksActual.size()); - assertTrue(expectedTasksJson.containsAll(tasksActual) && tasksActual.containsAll(expectedTasksJson)); - - assertTmpTablesNotPresent(catalog.getStreams() - .stream() - .map(ConfiguredAirbyteStream::getStream) - .map(AirbyteStream::getName) - .collect(Collectors.toList())); - } - - @Test - void testWriteFailure() throws Exception { - // hack to force an exception to be thrown from within the consumer. - final AirbyteMessage spiedMessage = spy(MESSAGE_USERS1); - doThrow(new RuntimeException()).when(spiedMessage).getRecord(); - - final AirbyteMessageConsumer consumer = spy(new BigQueryDestination().getConsumer(config, catalog, Destination::defaultOutputRecordCollector)); - - assertThrows(RuntimeException.class, () -> consumer.accept(spiedMessage)); - consumer.accept(MESSAGE_USERS2); - - final List tableNames = catalog.getStreams() - .stream() - .map(ConfiguredAirbyteStream::getStream) - .map(AirbyteStream::getName) - .collect(toList()); - assertTmpTablesNotPresent(catalog.getStreams() - .stream() - .map(ConfiguredAirbyteStream::getStream) - .map(AirbyteStream::getName) - .collect(Collectors.toList())); - // assert that no tables were created. - assertTrue(fetchNamesOfTablesInDb().stream().noneMatch(tableName -> tableNames.stream().anyMatch(tableName::startsWith))); - } - - private Set fetchNamesOfTablesInDb() throws InterruptedException { - final QueryJobConfiguration queryConfig = QueryJobConfiguration - .newBuilder(String.format("SELECT * FROM %s.INFORMATION_SCHEMA.TABLES;", dataset.getDatasetId().getDataset())) - .setUseLegacySql(false) - .build(); - - return StreamSupport - .stream(BigQueryUtils.executeQuery(bigquery, queryConfig).getLeft().getQueryResults().iterateAll().spliterator(), false) - .map(v -> v.get("TABLE_NAME").getStringValue()).collect(Collectors.toSet()); - } - - private void assertTmpTablesNotPresent(final List tableNames) throws InterruptedException { - final Set tmpTableNamePrefixes = tableNames.stream().map(name -> name + "_").collect(Collectors.toSet()); - final Set finalTableNames = tableNames.stream().map(name -> name + "_raw").collect(Collectors.toSet()); - // search for table names that have the tmp table prefix but are not raw tables. - assertTrue(fetchNamesOfTablesInDb() - .stream() - .filter(tableName -> !finalTableNames.contains(tableName)) - .noneMatch(tableName -> tmpTableNamePrefixes.stream().anyMatch(tableName::startsWith))); - } - - private List retrieveRecords(final String tableName) throws Exception { - final QueryJobConfiguration queryConfig = - QueryJobConfiguration.newBuilder(String.format("SELECT * FROM %s.%s;", dataset.getDatasetId().getDataset(), tableName.toLowerCase())) - .setUseLegacySql(false).build(); - - BigQueryUtils.executeQuery(bigquery, queryConfig); - - return StreamSupport - .stream(BigQueryUtils.executeQuery(bigquery, queryConfig).getLeft().getQueryResults().iterateAll().spliterator(), false) - .map(v -> v.get(JavaBaseConstants.COLUMN_NAME_DATA).getStringValue()) - .map(Jsons::deserialize) - .collect(Collectors.toList()); - } - } diff --git a/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQuerySQLNameTransformerTest.java b/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQuerySQLNameTransformerTest.java new file mode 100644 index 000000000000..7817b9d6d2fc --- /dev/null +++ b/airbyte-integrations/connectors/destination-bigquery/src/test/java/io/airbyte/integrations/destination/bigquery/BigQuerySQLNameTransformerTest.java @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.bigquery; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.Map; +import org.junit.jupiter.api.Test; + +class BigQuerySQLNameTransformerTest { + + private static final BigQuerySQLNameTransformer INSTANCE = new BigQuerySQLNameTransformer(); + private static final Map RAW_TO_NORMALIZED_IDENTIFIERS = Map.of( + "name-space", "name_space", + "spécial_character", "special_character", + "99namespace", "_99namespace", + "*_namespace", "__namespace", + "_namespace", "_namespace"); + + + private static final Map RAW_TO_NORMALIZED_NAMESPACES = Map.of( + "name-space", "name_space", + "spécial_character", "special_character", + // dataset name is allowed to start with a number + "99namespace", "99namespace", + // dataset name starting with an underscore is hidden, so we prepend a letter + "*_namespace", "n__namespace", + "_namespace", "n_namespace"); + + @Test + public void testGetIdentifier() { + assertNull(INSTANCE.getIdentifier(null)); + assertNull(INSTANCE.convertStreamName(null)); + RAW_TO_NORMALIZED_IDENTIFIERS.forEach((raw, normalized) -> { + assertEquals(normalized, INSTANCE.getIdentifier(raw)); + assertEquals(normalized, INSTANCE.convertStreamName(raw)); + }); + } + + @Test + public void testGetNamespace() { + assertNull(INSTANCE.convertStreamName(null)); + RAW_TO_NORMALIZED_NAMESPACES.forEach((raw, normalized) -> { + assertEquals(normalized, INSTANCE.getNamespace(raw)); + }); + } + +} diff --git a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java index f59bfdcaaf5b..d423eb2f9a6d 100644 --- a/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationStrictEncryptAcceptanceTest.java @@ -110,7 +110,7 @@ protected List retrieveRecords(TestDestinationEnv testEnv, private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { final JdbcDatabase jdbcDB = getDatabase(getConfig()); - return jdbcDB.query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, + return jdbcDB.unsafeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .collect(Collectors.toList()); } diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java index cf25f5211d54..d897950a23ed 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/ClickhouseDestinationAcceptanceTest.java @@ -101,7 +101,7 @@ protected List retrieveRecords(TestDestinationEnv testEnv, private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { final JdbcDatabase jdbcDB = getDatabase(getConfig()); - return jdbcDB.query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, + return jdbcDB.unsafeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .collect(Collectors.toList()); } diff --git a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java index ed50b11027f1..dd6b640fdfb8 100644 --- a/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-clickhouse/src/test-integration/java/io/airbyte/integrations/destination/clickhouse/SshClickhouseDestinationAcceptanceTest.java @@ -103,7 +103,7 @@ private List retrieveRecordsFromTable(final String tableName, final St ClickhouseDestination.HOST_KEY, ClickhouseDestination.PORT_KEY, (CheckedFunction, Exception>) mangledConfig -> getDatabase(mangledConfig) - .query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, + .unsafeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .collect(Collectors.toList())); } diff --git a/airbyte-integrations/connectors/destination-elasticsearch/src/main/java/io/airbyte/integrations/destination/elasticsearch/ElasticsearchAirbyteMessageConsumerFactory.java b/airbyte-integrations/connectors/destination-elasticsearch/src/main/java/io/airbyte/integrations/destination/elasticsearch/ElasticsearchAirbyteMessageConsumerFactory.java index 391210bc3fab..b254d14a0691 100644 --- a/airbyte-integrations/connectors/destination-elasticsearch/src/main/java/io/airbyte/integrations/destination/elasticsearch/ElasticsearchAirbyteMessageConsumerFactory.java +++ b/airbyte-integrations/connectors/destination-elasticsearch/src/main/java/io/airbyte/integrations/destination/elasticsearch/ElasticsearchAirbyteMessageConsumerFactory.java @@ -13,7 +13,9 @@ import io.airbyte.integrations.base.AirbyteMessageConsumer; import io.airbyte.integrations.destination.buffered_stream_consumer.BufferedStreamConsumer; import io.airbyte.integrations.destination.buffered_stream_consumer.RecordWriter; +import io.airbyte.integrations.destination.record_buffer.InMemoryRecordBufferingStrategy; import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import java.util.HashMap; import java.util.List; @@ -30,7 +32,7 @@ public class ElasticsearchAirbyteMessageConsumerFactory { private static final int MAX_BATCH_SIZE_BYTES = 1024 * 1024 * 1024 / 4; // 256mib private static final ObjectMapper mapper = new ObjectMapper(); - private static AtomicLong recordsWritten = new AtomicLong(0); + private static final AtomicLong recordsWritten = new AtomicLong(0); /** * Holds a mapping of temp to target indices. After closing a sync job, the target index is removed @@ -38,27 +40,26 @@ public class ElasticsearchAirbyteMessageConsumerFactory { */ private static final Map tempIndices = new HashMap<>(); - public static AirbyteMessageConsumer create(Consumer outputRecordCollector, - ElasticsearchConnection connection, - List writeConfigs, - ConfiguredAirbyteCatalog catalog) { + public static AirbyteMessageConsumer create(final Consumer outputRecordCollector, + final ElasticsearchConnection connection, + final List writeConfigs, + final ConfiguredAirbyteCatalog catalog) { return new BufferedStreamConsumer( outputRecordCollector, onStartFunction(connection, writeConfigs), - recordWriterFunction(connection, writeConfigs), + new InMemoryRecordBufferingStrategy(recordWriterFunction(connection, writeConfigs), MAX_BATCH_SIZE_BYTES), onCloseFunction(connection), catalog, - isValidFunction(connection), - MAX_BATCH_SIZE_BYTES); + isValidFunction(connection)); } // is there any json node that wont fit in the index? - private static CheckedFunction isValidFunction(ElasticsearchConnection connection) { + private static CheckedFunction isValidFunction(final ElasticsearchConnection connection) { return jsonNode -> true; } - private static CheckedConsumer onCloseFunction(ElasticsearchConnection connection) { + private static CheckedConsumer onCloseFunction(final ElasticsearchConnection connection) { return (hasFailed) -> { if (!tempIndices.isEmpty() && !hasFailed) { @@ -68,13 +69,13 @@ private static CheckedConsumer onCloseFunction(Elasticsearch }; } - private static RecordWriter recordWriterFunction( - ElasticsearchConnection connection, - List writeConfigs) { + private static RecordWriter recordWriterFunction( + final ElasticsearchConnection connection, + final List writeConfigs) { return (pair, records) -> { log.info("writing {} records in bulk operation", records.size()); - var optConfig = writeConfigs.stream() + final var optConfig = writeConfigs.stream() .filter(c -> Objects.equals(c.getStreamName(), pair.getName()) && Objects.equals(c.getNamespace(), pair.getNamespace())) .findFirst(); @@ -82,14 +83,14 @@ private static RecordWriter recordWriterFunction( throw new Exception(String.format("missing write config: %s", pair)); } final var config = optConfig.get(); - BulkResponse response; + final BulkResponse response; if (config.useTempIndex()) { response = connection.indexDocuments(config.getTempIndexName(), records, config); } else { response = connection.indexDocuments(config.getIndexName(), records, config); } if (Objects.nonNull(response) && response.errors()) { - String msg = String.format("failed to write bulk records: %s", mapper.valueToTree(response)); + final String msg = String.format("failed to write bulk records: %s", mapper.valueToTree(response)); throw new Exception(msg); } else { log.info("bulk write took: {}ms", response.took()); @@ -97,9 +98,9 @@ private static RecordWriter recordWriterFunction( }; } - private static VoidCallable onStartFunction(ElasticsearchConnection connection, List writeConfigs) { + private static VoidCallable onStartFunction(final ElasticsearchConnection connection, final List writeConfigs) { return () -> { - for (var config : writeConfigs) { + for (final var config : writeConfigs) { if (config.useTempIndex()) { tempIndices.put(config.getTempIndexName(), config.getIndexName()); connection.deleteIndexIfPresent(config.getTempIndexName()); diff --git a/airbyte-integrations/connectors/destination-elasticsearch/src/test/java/io/airbyte/integrations/destination/elasticsearch/ElasticsearchConnectionTest.java b/airbyte-integrations/connectors/destination-elasticsearch/src/test/java/io/airbyte/integrations/destination/elasticsearch/ElasticsearchConnectionTest.java index 994f73bccb9a..75661fb786aa 100644 --- a/airbyte-integrations/connectors/destination-elasticsearch/src/test/java/io/airbyte/integrations/destination/elasticsearch/ElasticsearchConnectionTest.java +++ b/airbyte-integrations/connectors/destination-elasticsearch/src/test/java/io/airbyte/integrations/destination/elasticsearch/ElasticsearchConnectionTest.java @@ -4,6 +4,7 @@ package io.airbyte.integrations.destination.elasticsearch; +import java.nio.charset.Charset; import java.util.Base64; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -14,46 +15,46 @@ public class ElasticsearchConnectionTest { @Test public void testDefaultHeadersAuthNone() { - var config = new ConnectorConfiguration(); + final var config = new ConnectorConfiguration(); config.setEndpoint(endpoint); config.getAuthenticationMethod().setMethod(ElasticsearchAuthenticationMethod.none); - var connection = new ElasticsearchConnection(config); - var headers = connection.configureHeaders(config); + final var connection = new ElasticsearchConnection(config); + final var headers = connection.configureHeaders(config); Assertions.assertEquals(0, headers.length); } @Test public void testDefaultHeadersAuthBasic() { - var config = new ConnectorConfiguration(); + final var config = new ConnectorConfiguration(); config.setEndpoint(endpoint); config.getAuthenticationMethod().setUsername("user"); config.getAuthenticationMethod().setPassword("password"); config.getAuthenticationMethod().setMethod(ElasticsearchAuthenticationMethod.basic); - var connection = new ElasticsearchConnection(config); - var headers = connection.configureHeaders(config); + final var connection = new ElasticsearchConnection(config); + final var headers = connection.configureHeaders(config); Assertions.assertEquals(1, headers.length); - var headerValues = headers[0].getValue().split(" "); + final var headerValues = headers[0].getValue().split(" "); Assertions.assertEquals("Basic", headerValues[0]); - var decoded = Base64.getDecoder().decode(headerValues[1]); - Assertions.assertTrue("user:password".contentEquals(new String(decoded))); + final var decoded = Base64.getDecoder().decode(headerValues[1]); + Assertions.assertTrue("user:password".contentEquals(new String(decoded, Charset.defaultCharset()))); } @Test public void testDefaultHeadersAuthSecret() { - var config = new ConnectorConfiguration(); + final var config = new ConnectorConfiguration(); config.setEndpoint(endpoint); config.getAuthenticationMethod().setApiKeyId("id"); config.getAuthenticationMethod().setApiKeySecret("secret"); config.getAuthenticationMethod().setMethod(ElasticsearchAuthenticationMethod.secret); - var connection = new ElasticsearchConnection(config); - var headers = connection.configureHeaders(config); + final var connection = new ElasticsearchConnection(config); + final var headers = connection.configureHeaders(config); Assertions.assertEquals(1, headers.length); - var headerValues = headers[0].getValue().split(" "); + final var headerValues = headers[0].getValue().split(" "); Assertions.assertEquals("ApiKey", headerValues[0]); - var decoded = Base64.getDecoder().decode(headerValues[1]); - Assertions.assertTrue("id:secret".contentEquals(new String(decoded))); + final var decoded = Base64.getDecoder().decode(headerValues[1]); + Assertions.assertTrue("id:secret".contentEquals(new String(decoded, Charset.defaultCharset()))); } } diff --git a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/util/GcsUtils.java b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/util/GcsUtils.java index 1e78e263ee42..7ef402f1e65f 100644 --- a/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/util/GcsUtils.java +++ b/airbyte-integrations/connectors/destination-gcs/src/main/java/io/airbyte/integrations/destination/gcs/util/GcsUtils.java @@ -22,10 +22,11 @@ public static Schema getDefaultAvroSchema(final String name, final boolean appendAirbyteFields) { LOGGER.info("Default schema."); final String stdName = AvroConstants.NAME_TRANSFORMER.getIdentifier(name); + final String stdNamespace = AvroConstants.NAME_TRANSFORMER.getNamespace(namespace); SchemaBuilder.RecordBuilder builder = SchemaBuilder.record(stdName); - if (namespace != null) { - builder = builder.namespace(namespace); + if (stdNamespace != null) { + builder = builder.namespace(stdNamespace); } SchemaBuilder.FieldAssembler assembler = builder.fields(); diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcBufferedConsumerFactory.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcBufferedConsumerFactory.java index bf411e8a6c9d..937750b9f301 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcBufferedConsumerFactory.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcBufferedConsumerFactory.java @@ -17,7 +17,9 @@ import io.airbyte.integrations.destination.buffered_stream_consumer.OnCloseFunction; import io.airbyte.integrations.destination.buffered_stream_consumer.OnStartFunction; import io.airbyte.integrations.destination.buffered_stream_consumer.RecordWriter; +import io.airbyte.integrations.destination.record_buffer.InMemoryRecordBufferingStrategy; import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; import io.airbyte.protocol.models.ConfiguredAirbyteStream; @@ -56,11 +58,10 @@ public static AirbyteMessageConsumer create(final Consumer outpu return new BufferedStreamConsumer( outputRecordCollector, onStartFunction(database, sqlOperations, writeConfigs), - recordWriterFunction(database, sqlOperations, writeConfigs, catalog), + new InMemoryRecordBufferingStrategy(recordWriterFunction(database, sqlOperations, writeConfigs, catalog), DEFAULT_MAX_BATCH_SIZE_BYTES), onCloseFunction(database, sqlOperations, writeConfigs), catalog, - sqlOperations::isValidData, - DEFAULT_MAX_BATCH_SIZE_BYTES); + sqlOperations::isValidData); } private static List createWriteConfigs(final NamingConventionTransformer namingResolver, @@ -132,10 +133,10 @@ private static OnStartFunction onStartFunction(final JdbcDatabase database, }; } - private static RecordWriter recordWriterFunction(final JdbcDatabase database, - final SqlOperations sqlOperations, - final List writeConfigs, - final ConfiguredAirbyteCatalog catalog) { + private static RecordWriter recordWriterFunction(final JdbcDatabase database, + final SqlOperations sqlOperations, + final List writeConfigs, + final ConfiguredAirbyteCatalog catalog) { final Map pairToWriteConfig = writeConfigs.stream() .collect(Collectors.toUnmodifiableMap(JdbcBufferedConsumerFactory::toNameNamespacePair, Function.identity())); diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcSqlOperations.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcSqlOperations.java index 7adab6bb7429..0b5c1877984a 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcSqlOperations.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/JdbcSqlOperations.java @@ -16,10 +16,12 @@ import java.sql.SQLException; import java.sql.Timestamp; import java.time.Instant; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.UUID; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVPrinter; @@ -32,6 +34,7 @@ public abstract class JdbcSqlOperations implements SqlOperations { // this adapter modifies record message before inserting them to the destination protected final Optional dataAdapter; + private final Set schemaSet = new HashSet<>(); protected JdbcSqlOperations() { this.dataAdapter = Optional.empty(); @@ -43,10 +46,11 @@ protected JdbcSqlOperations(final DataAdapter dataAdapter) { @Override public void createSchemaIfNotExists(final JdbcDatabase database, final String schemaName) throws Exception { - if (!isSchemaExists(database, schemaName)) { + if (!schemaSet.contains(schemaName) && !isSchemaExists(database, schemaName)) { AirbyteSentry.executeWithTracing("CreateSchema", () -> database.execute(String.format("CREATE SCHEMA IF NOT EXISTS %s;", schemaName)), Map.of("schema", schemaName)); + schemaSet.add(schemaName); } } diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyConsumerFactory.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyConsumerFactory.java index 5a7446f87a65..58eeb6be3d81 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyConsumerFactory.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/CopyConsumerFactory.java @@ -16,6 +16,7 @@ import io.airbyte.integrations.destination.buffered_stream_consumer.OnStartFunction; import io.airbyte.integrations.destination.buffered_stream_consumer.RecordWriter; import io.airbyte.integrations.destination.jdbc.SqlOperations; +import io.airbyte.integrations.destination.record_buffer.InMemoryRecordBufferingStrategy; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteRecordMessage; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; @@ -53,12 +54,13 @@ public static AirbyteMessageConsumer create(final Consumer o return new BufferedStreamConsumer( outputRecordCollector, onStartFunction(pairToIgnoredRecordCount), - recordWriterFunction(pairToCopier, sqlOperations, pairToIgnoredRecordCount), - removeStagingFilePrinter(pairToCopier), + new InMemoryRecordBufferingStrategy( + recordWriterFunction(pairToCopier, sqlOperations, pairToIgnoredRecordCount), + removeStagingFilePrinter(pairToCopier), + DEFAULT_MAX_BATCH_SIZE_BYTES), onCloseFunction(pairToCopier, database, sqlOperations, pairToIgnoredRecordCount), catalog, - sqlOperations::isValidData, - DEFAULT_MAX_BATCH_SIZE_BYTES); + sqlOperations::isValidData); } private static Map createWriteConfigs(final ExtendedNameTransformer namingResolver, @@ -85,9 +87,9 @@ private static OnStartFunction onStartFunction(final Map pairToCopier, - final SqlOperations sqlOperations, - final Map pairToIgnoredRecordCount) { + private static RecordWriter recordWriterFunction(final Map pairToCopier, + final SqlOperations sqlOperations, + final Map pairToIgnoredRecordCount) { return (AirbyteStreamNameNamespacePair pair, List records) -> { final var fileName = pairToCopier.get(pair).prepareStagingFile(); for (final AirbyteRecordMessage recordMessage : records) { @@ -105,7 +107,7 @@ private static RecordWriter recordWriterFunction(final Map pairToCopier) { return (AirbyteStreamNameNamespacePair pair, String stagingFileName) -> { - String currentFileName = pairToCopier.get(pair).getCurrentFile(); + final String currentFileName = pairToCopier.get(pair).getCurrentFile(); if (stagingFileName != null && currentFileName != null && !stagingFileName.equals(currentFileName)) { pairToCopier.get(pair).closeNonCurrentStagingFileWriters(); } diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java index cde82c6ec2a4..ded91cd04930 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/jdbc/copy/s3/S3StreamCopier.java @@ -223,6 +223,11 @@ public Map getStagingWritersByFile() { return stagingWritersByFile; } + @VisibleForTesting + public Set getStagingFiles() { + return stagingFileNames; + } + public abstract void copyS3CsvFileIntoTable(JdbcDatabase database, String s3FileLocation, String schema, diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/staging/StagingConsumerFactory.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/staging/StagingConsumerFactory.java index ebdc0c0d2356..d319ae68229d 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/staging/StagingConsumerFactory.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/staging/StagingConsumerFactory.java @@ -6,6 +6,8 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.base.Preconditions; +import io.airbyte.commons.functional.CheckedBiConsumer; +import io.airbyte.commons.functional.CheckedBiFunction; import io.airbyte.commons.json.Jsons; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.base.AirbyteMessageConsumer; @@ -15,8 +17,9 @@ import io.airbyte.integrations.destination.buffered_stream_consumer.BufferedStreamConsumer; import io.airbyte.integrations.destination.buffered_stream_consumer.OnCloseFunction; import io.airbyte.integrations.destination.buffered_stream_consumer.OnStartFunction; -import io.airbyte.integrations.destination.buffered_stream_consumer.RecordWriter; import io.airbyte.integrations.destination.jdbc.WriteConfig; +import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; +import io.airbyte.integrations.destination.record_buffer.SerializedBufferingStrategy; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; @@ -29,6 +32,7 @@ import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; +import org.apache.commons.io.FileUtils; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.slf4j.Logger; @@ -38,8 +42,6 @@ public class StagingConsumerFactory { private static final Logger LOGGER = LoggerFactory.getLogger(StagingConsumerFactory.class); - private static final long MAX_BATCH_SIZE_BYTES = 128 * 1024 * 1024; // 128mb - private final DateTime CURRENT_SYNC_PATH = DateTime.now(DateTimeZone.UTC); // using a random string here as a placeholder for the moment. // This would avoid mixing data in the staging area between different syncs (especially if they // manipulate streams with similar names) @@ -48,24 +50,25 @@ public class StagingConsumerFactory { // in a previous attempt but failed to load to the warehouse for some reason (interrupted?) instead. // This would also allow other programs/scripts // to load (or reload backups?) in the connection's staging area to be loaded at the next sync. - private final String RANDOM_CONNECTION_ID = UUID.randomUUID().toString(); + private static final DateTime SYNC_DATETIME = DateTime.now(DateTimeZone.UTC); + private final UUID RANDOM_CONNECTION_ID = UUID.randomUUID(); public AirbyteMessageConsumer create(final Consumer outputRecordCollector, final JdbcDatabase database, final StagingOperations sqlOperations, final NamingConventionTransformer namingResolver, + final CheckedBiFunction onCreateBuffer, final JsonNode config, final ConfiguredAirbyteCatalog catalog) { final List writeConfigs = createWriteConfigs(namingResolver, config, catalog); - return new BufferedStreamConsumer( outputRecordCollector, onStartFunction(database, sqlOperations, writeConfigs), - recordWriterFunction(database, sqlOperations, writeConfigs, catalog), + new SerializedBufferingStrategy(onCreateBuffer, catalog, + flushBufferFunction(database, sqlOperations, writeConfigs, catalog)), onCloseFunction(database, sqlOperations, writeConfigs), catalog, - sqlOperations::isValidData, - MAX_BATCH_SIZE_BYTES); + sqlOperations::isValidData); } private static List createWriteConfigs(final NamingConventionTransformer namingResolver, @@ -88,7 +91,8 @@ private static Function toWriteConfig(fina final String tmpTableName = namingResolver.getTmpTableName(streamName); final DestinationSyncMode syncMode = stream.getDestinationSyncMode(); - final WriteConfig writeConfig = new WriteConfig(streamName, abStream.getNamespace(), outputSchema, tmpTableName, tableName, syncMode); + final WriteConfig writeConfig = + new WriteConfig(streamName, abStream.getNamespace(), outputSchema, tmpTableName, tableName, syncMode, SYNC_DATETIME); LOGGER.info("Write config: {}", writeConfig); return writeConfig; @@ -99,37 +103,37 @@ private static String getOutputSchema(final AirbyteStream stream, final String defaultDestSchema, final NamingConventionTransformer namingResolver) { return stream.getNamespace() != null - ? namingResolver.getIdentifier(stream.getNamespace()) - : namingResolver.getIdentifier(defaultDestSchema); + ? namingResolver.getNamespace(stream.getNamespace()) + : namingResolver.getNamespace(defaultDestSchema); } - private static OnStartFunction onStartFunction(final JdbcDatabase database, - final StagingOperations stagingOperations, - final List writeConfigs) { + private OnStartFunction onStartFunction(final JdbcDatabase database, + final StagingOperations stagingOperations, + final List writeConfigs) { return () -> { LOGGER.info("Preparing tmp tables in destination started for {} streams", writeConfigs.size()); - for (final WriteConfig writeConfig : writeConfigs) { final String schema = writeConfig.getOutputSchemaName(); final String stream = writeConfig.getStreamName(); final String tmpTable = writeConfig.getTmpTableName(); - final String stage = stagingOperations.getStageName(schema, writeConfig.getOutputTableName()); + final String stageName = stagingOperations.getStageName(schema, stream); + final String stagingPath = stagingOperations.getStagingPath(RANDOM_CONNECTION_ID, schema, stream, writeConfig.getWriteDatetime()); - LOGGER.info("Preparing stage in destination started for schema {} stream {}: tmp table: {}, stage: {}", - schema, stream, tmpTable, stage); + LOGGER.info("Preparing staging area in destination started for schema {} stream {}: tmp table: {}, stage: {}", + schema, stream, tmpTable, stagingPath); AirbyteSentry.executeWithTracing("PrepareStreamStage", () -> { stagingOperations.createSchemaIfNotExists(database, schema); stagingOperations.createTableIfNotExists(database, schema, tmpTable); - stagingOperations.createStageIfNotExists(database, stage); + stagingOperations.createStageIfNotExists(database, stageName); }, - Map.of("schema", schema, "stream", stream, "tmpTable", tmpTable, "stage", stage)); + Map.of("schema", schema, "stream", stream, "tmpTable", tmpTable, "stage", stagingPath)); - LOGGER.info("Preparing stage in destination completed for schema {} stream {}", schema, stream); + LOGGER.info("Preparing staging area in destination completed for schema {} stream {}", schema, stream); } - LOGGER.info("Preparing tables in destination completed."); + LOGGER.info("Preparing tmp tables in destination completed."); }; } @@ -137,16 +141,18 @@ private static AirbyteStreamNameNamespacePair toNameNamespacePair(final WriteCon return new AirbyteStreamNameNamespacePair(config.getStreamName(), config.getNamespace()); } - private RecordWriter recordWriterFunction(final JdbcDatabase database, - final StagingOperations stagingOperations, - final List writeConfigs, - final ConfiguredAirbyteCatalog catalog) { + private CheckedBiConsumer flushBufferFunction( + final JdbcDatabase database, + final StagingOperations stagingOperations, + final List writeConfigs, + final ConfiguredAirbyteCatalog catalog) { final Map pairToWriteConfig = writeConfigs.stream() .collect(Collectors.toUnmodifiableMap( StagingConsumerFactory::toNameNamespacePair, Function.identity())); - return (pair, records) -> { + return (pair, writer) -> { + LOGGER.info("Flushing buffer for stream {} ({}) to staging", pair.getName(), FileUtils.byteCountToDisplaySize(writer.getByteCount())); if (!pairToWriteConfig.containsKey(pair)) { throw new IllegalArgumentException( String.format("Message contained record from a stream that was not in the catalog. \ncatalog: %s", Jsons.serialize(catalog))); @@ -154,9 +160,16 @@ private RecordWriter recordWriterFunction(final JdbcDatabase database, final WriteConfig writeConfig = pairToWriteConfig.get(pair); final String schemaName = writeConfig.getOutputSchemaName(); - final String tableName = writeConfig.getOutputTableName(); - final String path = stagingOperations.getStagingPath(RANDOM_CONNECTION_ID, schemaName, tableName, CURRENT_SYNC_PATH); - stagingOperations.insertRecords(database, records, schemaName, path); + final String stageName = stagingOperations.getStageName(schemaName, writeConfig.getStreamName()); + final String stagingPath = + stagingOperations.getStagingPath(RANDOM_CONNECTION_ID, schemaName, writeConfig.getStreamName(), writeConfig.getWriteDatetime()); + try (writer) { + writer.flush(); + writeConfig.addStagedFile(stagingOperations.uploadRecordsToStage(database, writer, schemaName, stageName, stagingPath)); + } catch (final Exception e) { + LOGGER.error("Failed to flush and upload buffer to stage:", e); + throw new RuntimeException("Failed to upload buffer to stage", e); + } }; } @@ -166,24 +179,27 @@ private OnCloseFunction onCloseFunction(final JdbcDatabase database, return (hasFailed) -> { if (!hasFailed) { final List queryList = new ArrayList<>(); - LOGGER.info("Finalizing tables in destination started for {} streams", writeConfigs.size()); + LOGGER.info("Copying into tables in destination started for {} streams", writeConfigs.size()); for (final WriteConfig writeConfig : writeConfigs) { final String schemaName = writeConfig.getOutputSchemaName(); final String streamName = writeConfig.getStreamName(); final String srcTableName = writeConfig.getTmpTableName(); final String dstTableName = writeConfig.getOutputTableName(); - final String path = stagingOperations.getStagingPath(RANDOM_CONNECTION_ID, schemaName, dstTableName, CURRENT_SYNC_PATH); - LOGGER.info("Finalizing stream {}. schema {}, tmp table {}, final table {}, stage path {}", - streamName, schemaName, srcTableName, dstTableName, path); + final String stageName = stagingOperations.getStageName(schemaName, streamName); + final String stagingPath = stagingOperations.getStagingPath(RANDOM_CONNECTION_ID, schemaName, streamName, writeConfig.getWriteDatetime()); + LOGGER.info("Copying stream {} of schema {} into tmp table {} to final table {} from stage path {} with {} file(s) [{}]", + streamName, schemaName, srcTableName, dstTableName, stagingPath, writeConfig.getStagedFiles().size(), + String.join(",", writeConfig.getStagedFiles())); try { - stagingOperations.copyIntoTmpTableFromStage(database, path, srcTableName, schemaName); + stagingOperations.copyIntoTmpTableFromStage(database, stageName, stagingPath, writeConfig.getStagedFiles(), srcTableName, schemaName); } catch (final Exception e) { - stagingOperations.cleanUpStage(database, path); - LOGGER.info("Cleaning stage path {}", path); - throw new RuntimeException("Failed to upload data from stage " + path, e); + stagingOperations.cleanUpStage(database, stageName, writeConfig.getStagedFiles()); + LOGGER.info("Cleaning stage path {}", stagingPath); + throw new RuntimeException("Failed to upload data from stage " + stagingPath, e); } + writeConfig.clearStagedFiles(); stagingOperations.createTableIfNotExists(database, schemaName, dstTableName); switch (writeConfig.getSyncMode()) { @@ -198,7 +214,7 @@ private OnCloseFunction onCloseFunction(final JdbcDatabase database, stagingOperations.executeTransaction(database, queryList); LOGGER.info("Finalizing tables in destination completed."); } - LOGGER.info("Cleaning tmp tables in destination started for {} streams", writeConfigs.size()); + LOGGER.info("Cleaning up destination started for {} streams", writeConfigs.size()); for (final WriteConfig writeConfig : writeConfigs) { final String schemaName = writeConfig.getOutputSchemaName(); final String tmpTableName = writeConfig.getTmpTableName(); @@ -206,13 +222,12 @@ private OnCloseFunction onCloseFunction(final JdbcDatabase database, tmpTableName); stagingOperations.dropTableIfExists(database, schemaName, tmpTableName); - final String outputTableName = writeConfig.getOutputTableName(); - final String stageName = stagingOperations.getStageName(schemaName, outputTableName); + final String stageName = stagingOperations.getStageName(schemaName, writeConfig.getStreamName()); LOGGER.info("Cleaning stage in destination started for stream {}. schema {}, stage: {}", writeConfig.getStreamName(), schemaName, stageName); stagingOperations.dropStageIfExists(database, stageName); } - LOGGER.info("Cleaning tmp tables and stages in destination completed."); + LOGGER.info("Cleaning up destination completed."); }; } diff --git a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/staging/StagingOperations.java b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/staging/StagingOperations.java index f0ee658a7a76..e2a1b799e48c 100644 --- a/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/staging/StagingOperations.java +++ b/airbyte-integrations/connectors/destination-jdbc/src/main/java/io/airbyte/integrations/destination/staging/StagingOperations.java @@ -6,34 +6,45 @@ import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.destination.jdbc.SqlOperations; -import java.io.File; +import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; +import java.util.List; +import java.util.UUID; import org.joda.time.DateTime; public interface StagingOperations extends SqlOperations { - String getStageName(String schemaName, String tableName); + String getStageName(String namespace, String streamName); - String getStagingPath(String connectionId, String schemaName, String tableName, DateTime writeDatetime); + String getStagingPath(UUID connectionId, String namespace, String streamName, DateTime writeDatetime); /** * Create a staging folder where to upload temporary files before loading into the final destination */ - void createStageIfNotExists(JdbcDatabase database, String stage) throws Exception; + void createStageIfNotExists(JdbcDatabase database, String stageName) throws Exception; /** - * Upload the data file into the stage area.* + * Upload the data file into the stage area. + * + * @return the name of the file that was uploaded. */ - void uploadRecordsToStage(JdbcDatabase database, File dataFile, String schemaName, String path) throws Exception; + String uploadRecordsToStage(JdbcDatabase database, SerializableBuffer recordsData, String schemaName, String stageName, String stagingPath) + throws Exception; /** * Load the data stored in the stage area into a temporary table in the destination */ - void copyIntoTmpTableFromStage(JdbcDatabase database, String path, String srcTableName, String schemaName) throws Exception; + void copyIntoTmpTableFromStage(JdbcDatabase database, + String stageName, + String stagingPath, + List stagedFiles, + String srcTableName, + String schemaName) + throws Exception; /** * Remove files that were just staged */ - void cleanUpStage(JdbcDatabase database, String path) throws Exception; + void cleanUpStage(JdbcDatabase database, String stageName, List stagedFiles) throws Exception; /** * Delete the stage area and all staged files that was in it diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreSqlOperations.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreSqlOperations.java index 0e181b2d118a..4460ca17e085 100644 --- a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreSqlOperations.java +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreSqlOperations.java @@ -103,7 +103,7 @@ VersionCompatibility isCompatibleVersion(final JdbcDatabase database) throws SQL } private Semver getVersion(final JdbcDatabase database) throws SQLException { - final List value = database.resultSetQuery(connection -> connection.createStatement().executeQuery("SELECT version()"), + final List value = database.unsafeResultSetQuery(connection -> connection.createStatement().executeQuery("SELECT version()"), resultSet -> resultSet.getString("version()")).collect(Collectors.toList()); Matcher matcher = VERSION_PATTERN.matcher(value.get(0)); if (matcher.find()) { @@ -123,7 +123,7 @@ void verifyLocalFileEnabled(final JdbcDatabase database) throws SQLException { private boolean checkIfLocalFileIsEnabled(final JdbcDatabase database) throws SQLException { final List value = - database.resultSetQuery(connection -> connection.createStatement().executeQuery("SHOW GLOBAL VARIABLES LIKE 'local_infile'"), + database.unsafeResultSetQuery(connection -> connection.createStatement().executeQuery("SHOW GLOBAL VARIABLES LIKE 'local_infile'"), resultSet -> resultSet.getString("Value")).collect(Collectors.toList()); return value.get(0).equalsIgnoreCase("on"); diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestinationAcceptanceTest.java index cdf4a94498fb..6b37b1e5e93a 100644 --- a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestinationAcceptanceTest.java @@ -82,7 +82,7 @@ protected List retrieveRecords(TestDestinationEnv testEnv, private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { JdbcDatabase database = getDatabase(getConfig()); - return database.query(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, + return database.unsafeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) .collect(Collectors.toList()); } diff --git a/airbyte-integrations/connectors/destination-meilisearch/src/main/java/io/airbyte/integrations/destination/meilisearch/MeiliSearchDestination.java b/airbyte-integrations/connectors/destination-meilisearch/src/main/java/io/airbyte/integrations/destination/meilisearch/MeiliSearchDestination.java index fe996c8e0a51..c8cf60999201 100644 --- a/airbyte-integrations/connectors/destination-meilisearch/src/main/java/io/airbyte/integrations/destination/meilisearch/MeiliSearchDestination.java +++ b/airbyte-integrations/connectors/destination-meilisearch/src/main/java/io/airbyte/integrations/destination/meilisearch/MeiliSearchDestination.java @@ -17,6 +17,7 @@ import io.airbyte.integrations.base.IntegrationRunner; import io.airbyte.integrations.destination.buffered_stream_consumer.BufferedStreamConsumer; import io.airbyte.integrations.destination.buffered_stream_consumer.RecordWriter; +import io.airbyte.integrations.destination.record_buffer.InMemoryRecordBufferingStrategy; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteConnectionStatus.Status; import io.airbyte.protocol.models.AirbyteMessage; @@ -98,11 +99,10 @@ public AirbyteMessageConsumer getConsumer(final JsonNode config, return new BufferedStreamConsumer( outputRecordCollector, () -> LOGGER.info("Starting write to MeiliSearch."), - recordWriterFunction(indexNameToIndex), + new InMemoryRecordBufferingStrategy(recordWriterFunction(indexNameToIndex), MAX_BATCH_SIZE_BYTES), (hasFailed) -> LOGGER.info("Completed writing to MeiliSearch. Status: {}", hasFailed ? "FAILED" : "SUCCEEDED"), catalog, - (data) -> true, - MAX_BATCH_SIZE_BYTES); + (data) -> true); } private static Map createIndices(final ConfiguredAirbyteCatalog catalog, final Client client) throws Exception { @@ -129,7 +129,7 @@ private static boolean indexExists(final Client client, final String indexName) .anyMatch(actualIndexName -> actualIndexName.equals(indexName)); } - private static RecordWriter recordWriterFunction(final Map indexNameToWriteConfig) { + private static RecordWriter recordWriterFunction(final Map indexNameToWriteConfig) { return (namePair, records) -> { final String resolvedIndexName = getIndexName(namePair.getName()); if (!indexNameToWriteConfig.containsKey(resolvedIndexName)) { diff --git a/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/MySQLSqlOperations.java b/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/MySQLSqlOperations.java index 250ad94c33f0..181fb8d8bf94 100644 --- a/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/MySQLSqlOperations.java +++ b/airbyte-integrations/connectors/destination-mysql/src/main/java/io/airbyte/integrations/destination/mysql/MySQLSqlOperations.java @@ -100,7 +100,7 @@ private void tryEnableLocalFile(final JdbcDatabase database) throws SQLException } private double getVersion(final JdbcDatabase database) throws SQLException { - final List value = database.resultSetQuery(connection -> connection.createStatement().executeQuery("select version()"), + final List value = database.unsafeResultSetQuery(connection -> connection.createStatement().executeQuery("select version()"), resultSet -> resultSet.getString("version()")).collect(Collectors.toList()); return Double.parseDouble(value.get(0).substring(0, 3)); } @@ -117,7 +117,7 @@ public boolean isSchemaRequired() { private boolean checkIfLocalFileIsEnabled(final JdbcDatabase database) throws SQLException { final List value = - database.resultSetQuery(connection -> connection.createStatement().executeQuery("SHOW GLOBAL VARIABLES LIKE 'local_infile'"), + database.unsafeResultSetQuery(connection -> connection.createStatement().executeQuery("SHOW GLOBAL VARIABLES LIKE 'local_infile'"), resultSet -> resultSet.getString("Value")).collect(Collectors.toList()); return value.get(0).equalsIgnoreCase("on"); diff --git a/airbyte-integrations/connectors/destination-oracle-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/oracle_strict_encrypt/OracleStrictEncryptDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-oracle-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/oracle_strict_encrypt/OracleStrictEncryptDestinationAcceptanceTest.java index d8cdc1a31b71..b360964abcc4 100644 --- a/airbyte-integrations/connectors/destination-oracle-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/oracle_strict_encrypt/OracleStrictEncryptDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-oracle-strict-encrypt/src/test-integration/java/io/airbyte/integrations/destination/oracle_strict_encrypt/OracleStrictEncryptDestinationAcceptanceTest.java @@ -183,7 +183,7 @@ public void testEncryption() throws SQLException { final String network_service_banner = "select network_service_banner from v$session_connect_info where sid in (select distinct sid from v$mystat)"; - final List collect = database.query(network_service_banner).collect(Collectors.toList()); + final List collect = database.unsafeQuery(network_service_banner).collect(Collectors.toList()); assertThat(collect.get(2).get("NETWORK_SERVICE_BANNER").asText(), equals("Oracle Advanced Security: " + algorithm + " encryption")); @@ -208,7 +208,7 @@ public void testCheckProtocol() throws SQLException { + algorithm + " )")); final String network_service_banner = "SELECT sys_context('USERENV', 'NETWORK_PROTOCOL') as network_protocol FROM dual"; - final List collect = database.query(network_service_banner).collect(Collectors.toList()); + final List collect = database.unsafeQuery(network_service_banner).collect(Collectors.toList()); assertEquals("tcp", collect.get(0).get("NETWORK_PROTOCOL").asText()); } diff --git a/airbyte-integrations/connectors/destination-oracle/certificate.pem b/airbyte-integrations/connectors/destination-oracle/certificate.pem new file mode 100644 index 000000000000..c225f2d78944 --- /dev/null +++ b/airbyte-integrations/connectors/destination-oracle/certificate.pem @@ -0,0 +1 @@ +certificate \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/NneOracleDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/NneOracleDestinationAcceptanceTest.java index 4b8f108868b8..8a65723e2a0c 100644 --- a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/NneOracleDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/NneOracleDestinationAcceptanceTest.java @@ -42,7 +42,7 @@ public void testEncryption() throws SQLException { final String network_service_banner = "select network_service_banner from v$session_connect_info where sid in (select distinct sid from v$mystat)"; - final List collect = database.query(network_service_banner).toList(); + final List collect = database.unsafeQuery(network_service_banner).toList(); assertThat(collect.get(2).get("NETWORK_SERVICE_BANNER").asText(), equals("Oracle Advanced Security: " + algorithm + " encryption")); @@ -74,7 +74,7 @@ public void testCheckProtocol() throws SQLException { getAdditionalProperties(algorithm)); final String network_service_banner = "SELECT sys_context('USERENV', 'NETWORK_PROTOCOL') as network_protocol FROM dual"; - final List collect = database.query(network_service_banner).collect(Collectors.toList()); + final List collect = database.unsafeQuery(network_service_banner).collect(Collectors.toList()); assertEquals("tcp", collect.get(0).get("NETWORK_PROTOCOL").asText()); } diff --git a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java index f072506204ab..f97bcaaec706 100644 --- a/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-oracle/src/test-integration/java/io/airbyte/integrations/destination/oracle/UnencryptedOracleDestinationAcceptanceTest.java @@ -175,7 +175,7 @@ public void testNoneEncryption() throws SQLException { final String network_service_banner = "select network_service_banner from v$session_connect_info where sid in (select distinct sid from v$mystat)"; - final List collect = database.query(network_service_banner).collect(Collectors.toList()); + final List collect = database.unsafeQuery(network_service_banner).collect(Collectors.toList()); assertTrue(collect.get(1).get("NETWORK_SERVICE_BANNER").asText() .contains("Oracle Advanced Security: encryption")); diff --git a/airbyte-integrations/connectors/destination-redshift/Dockerfile b/airbyte-integrations/connectors/destination-redshift/Dockerfile index d6d8d7ad3659..f99699e788fa 100644 --- a/airbyte-integrations/connectors/destination-redshift/Dockerfile +++ b/airbyte-integrations/connectors/destination-redshift/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-redshift COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.3.27 +LABEL io.airbyte.version=0.3.28 LABEL io.airbyte.name=airbyte/destination-redshift diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStreamCopier.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStreamCopier.java index bd6e878d3683..8e65ceda6449 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStreamCopier.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStreamCopier.java @@ -117,11 +117,11 @@ public void removeFileAndDropTmpTable() throws Exception { * @return null if no stagingFiles exist otherwise the manifest body String */ private String createManifest() { - if (stagingWritersByFile.isEmpty()) { + if (getStagingFiles().isEmpty()) { return null; } - final var s3FileEntries = stagingWritersByFile.keySet().stream() + final var s3FileEntries = getStagingFiles().stream() .map(filePath -> new Entry(getFullS3Path(s3Config.getBucketName(), filePath))) .collect(Collectors.toList()); final var manifest = new Manifest(s3FileEntries); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/BlobStorageOperations.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/BlobStorageOperations.java new file mode 100644 index 000000000000..fb2ad5d5be74 --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/BlobStorageOperations.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.s3; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; +import java.util.List; +import org.joda.time.DateTime; + +public interface BlobStorageOperations { + + String getBucketObjectName(String namespace, String streamName); + + String getBucketObjectPath(final String prefix, String namespace, String streamName, DateTime writeDatetime); + + /** + * Create a storage object where to store data in the destination for a @param streamName using + * location of @param objectPath + */ + void createBucketObjectIfNotExists(String streamName) throws Exception; + + /** + * Upload the data files into the storage area. + * + * @return the name of the file that was uploaded. + */ + String uploadRecordsToBucket(SerializableBuffer recordsData, String namespace, String streamName, String objectPath) throws Exception; + + /** + * Remove files that were just stored in the bucket + */ + void cleanUpBucketObject(String streamName, List stagedFiles) throws Exception; + + void dropBucketObject(String streamName); + + boolean isValidData(JsonNode jsonNode); + +} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java new file mode 100644 index 000000000000..271673884ad1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.s3; + +import alex.mojaki.s3upload.MultiPartOutputStream; +import alex.mojaki.s3upload.StreamTransferManager; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.DeleteObjectsRequest; +import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; +import com.amazonaws.services.s3.model.ObjectListing; +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.string.Strings; +import io.airbyte.integrations.destination.NamingConventionTransformer; +import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; +import io.airbyte.integrations.destination.s3.util.S3StreamTransferManagerHelper; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import org.joda.time.DateTime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class S3StorageOperations implements BlobStorageOperations { + + private static final Logger LOGGER = LoggerFactory.getLogger(S3StorageOperations.class); + + private static final int DEFAULT_UPLOAD_THREADS = 10; // The S3 cli uses 10 threads by default. + private static final int DEFAULT_QUEUE_CAPACITY = DEFAULT_UPLOAD_THREADS; + private static final int DEFAULT_PART_SIZE = 10; + private static final int UPLOAD_RETRY_LIMIT = 3; + + private final NamingConventionTransformer nameTransformer; + private final S3DestinationConfig s3Config; + private AmazonS3 s3Client; + + public S3StorageOperations(final NamingConventionTransformer nameTransformer, final AmazonS3 s3Client, final S3DestinationConfig s3Config) { + this.nameTransformer = nameTransformer; + this.s3Client = s3Client; + this.s3Config = s3Config; + } + + @Override + public String getBucketObjectName(final String namespace, final String streamName) { + return nameTransformer.applyDefaultCase(String.join("_", + nameTransformer.convertStreamName(namespace), + nameTransformer.convertStreamName(streamName))); + } + + @Override + public String getBucketObjectPath(final String prefix, final String namespace, final String streamName, final DateTime writeDatetime) { + return nameTransformer.applyDefaultCase(String.format("%s/%s/%s/%02d/%02d/%02d/", + prefix, + getBucketObjectName(namespace, streamName), + writeDatetime.year().get(), + writeDatetime.monthOfYear().get(), + writeDatetime.dayOfMonth().get(), + writeDatetime.hourOfDay().get())); + } + + @Override + public void createBucketObjectIfNotExists(final String objectPath) { + final String bucket = s3Config.getBucketName(); + if (!s3Client.doesBucketExistV2(bucket)) { + LOGGER.info("Bucket {} does not exist; creating...", bucket); + s3Client.createBucket(bucket); + LOGGER.info("Bucket {} has been created.", bucket); + } + if (!s3Client.doesObjectExist(bucket, objectPath)) { + LOGGER.info("Storage Object {}/{} does not exist in bucket; creating...", bucket, objectPath); + s3Client.putObject(bucket, objectPath.endsWith("/") ? objectPath : objectPath + "/", ""); + LOGGER.info("Storage Object {}/{} has been created in bucket.", bucket, objectPath); + } + } + + @Override + public String uploadRecordsToBucket(final SerializableBuffer recordsData, final String namespace, final String streamName, final String objectPath) + throws Exception { + final List exceptionsThrown = new ArrayList<>(); + boolean succeeded = false; + while (exceptionsThrown.size() < UPLOAD_RETRY_LIMIT && !succeeded) { + try { + loadDataIntoBucket(objectPath, recordsData); + succeeded = true; + } catch (final Exception e) { + LOGGER.error("Failed to upload records into storage {}", objectPath, e); + exceptionsThrown.add(e); + } + if (!succeeded) { + LOGGER.info("Retrying to upload records into storage {} ({}/{}})", objectPath, exceptionsThrown.size(), UPLOAD_RETRY_LIMIT); + // Force a reconnection before retrying in case error was due to network issues... + s3Client = s3Config.resetS3Client(); + } + } + if (!succeeded) { + throw new RuntimeException(String.format("Exceptions thrown while uploading records into storage: %s", Strings.join(exceptionsThrown, "\n"))); + } + return recordsData.getFilename(); + } + + private void loadDataIntoBucket(final String objectPath, final SerializableBuffer recordsData) throws IOException { + final long partSize = s3Config.getFormatConfig() != null ? s3Config.getFormatConfig().getPartSize() : DEFAULT_PART_SIZE; + final String bucket = s3Config.getBucketName(); + final String objectKey = String.format("%s%s", objectPath, recordsData.getFilename()); + final StreamTransferManager uploadManager = S3StreamTransferManagerHelper + .getDefault(bucket, objectKey, s3Client, partSize) + .checkIntegrity(true) + .numUploadThreads(DEFAULT_UPLOAD_THREADS) + .queueCapacity(DEFAULT_QUEUE_CAPACITY); + boolean hasFailed = false; + try (final MultiPartOutputStream outputStream = uploadManager.getMultiPartOutputStreams().get(0); + final InputStream dataStream = recordsData.getInputStream()) { + dataStream.transferTo(outputStream); + } catch (final Exception e) { + LOGGER.error("Failed to load data into storage {}", objectPath, e); + hasFailed = true; + throw new RuntimeException(e); + } finally { + if (hasFailed) { + uploadManager.abort(); + } else { + uploadManager.complete(); + } + } + if (!s3Client.doesObjectExist(bucket, objectKey)) { + LOGGER.error("Failed to upload data into storage, object {} not found", objectKey); + throw new RuntimeException("Upload failed"); + } + } + + @Override + public void dropBucketObject(final String streamName) { + LOGGER.info("Dropping bucket object {}...", streamName); + final String bucket = s3Config.getBucketName(); + if (s3Client.doesObjectExist(bucket, streamName)) { + s3Client.deleteObject(bucket, streamName); + } + LOGGER.info("Bucket object {} has been deleted...", streamName); + } + + @Override + public void cleanUpBucketObject(final String objectPath, final List stagedFiles) { + final String bucket = s3Config.getBucketName(); + ObjectListing objects = s3Client.listObjects(bucket, objectPath); + while (objects.getObjectSummaries().size() > 0) { + final List toDelete = objects.getObjectSummaries() + .stream() + .map(obj -> new KeyVersion(obj.getKey())) + .filter(obj -> stagedFiles.isEmpty() || stagedFiles.contains(obj.getKey())) + .toList(); + s3Client.deleteObjects(new DeleteObjectsRequest(bucket).withKeys(toDelete)); + LOGGER.info("Storage bucket {} has been cleaned-up ({} objects were deleted)...", objectPath, toDelete.size()); + if (objects.isTruncated()) { + objects = s3Client.listNextBatchOfObjects(objects); + } else { + break; + } + } + } + + @Override + public boolean isValidData(final JsonNode jsonNode) { + return true; + } + +} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/WriteConfig.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/WriteConfig.java new file mode 100644 index 000000000000..4083b686a445 --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/WriteConfig.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.s3; + +import io.airbyte.protocol.models.DestinationSyncMode; +import java.util.ArrayList; +import java.util.List; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; + +/** + * Write configuration POJO for blob storage destinations + */ +public class WriteConfig { + + private final String streamName; + + private final String namespace; + + private final String outputNamespace; + private final String outputBucket; + private final DestinationSyncMode syncMode; + private final DateTime writeDatetime; + private final List storedFiles; + + public WriteConfig(final String streamName, + final String namespace, + final String outputNamespace, + final String outputBucket, + final DestinationSyncMode syncMode) { + this(streamName, namespace, outputNamespace, outputBucket, syncMode, DateTime.now(DateTimeZone.UTC)); + } + + public WriteConfig(final String streamName, + final String namespace, + final String outputNamespace, + final String outputBucket, + final DestinationSyncMode syncMode, + final DateTime writeDatetime) { + this.streamName = streamName; + this.namespace = namespace; + this.outputNamespace = outputNamespace; + this.outputBucket = outputBucket; + this.syncMode = syncMode; + this.storedFiles = new ArrayList<>(); + this.writeDatetime = writeDatetime; + } + + public String getStreamName() { + return streamName; + } + + public String getNamespace() { + return namespace; + } + + public String getOutputNamespace() { + return outputNamespace; + } + + public String getOutputBucket() { + return outputBucket; + } + + public DestinationSyncMode getSyncMode() { + return syncMode; + } + + public DateTime getWriteDatetime() { + return writeDatetime; + } + + public List getStoredFiles() { + return storedFiles; + } + + public void addStoredFile(final String file) { + storedFiles.add(file); + } + + public void clearStoredFiles() { + storedFiles.clear(); + } + + @Override + public String toString() { + return "WriteConfig{" + + "streamName=" + streamName + + ", namespace=" + namespace + + ", outputNamespace=" + outputNamespace + + ", outputBucket=" + outputBucket + + ", syncMode=" + syncMode + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java index d356e177eaa3..689be3e6746f 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformer.java @@ -5,7 +5,16 @@ package io.airbyte.integrations.destination.s3.avro; import io.airbyte.integrations.destination.ExtendedNameTransformer; - +import java.util.Arrays; +import java.util.List; + +/** + *
    + *
  • An Avro name starts with [A-Za-z_], followed by [A-Za-z0-9_].
  • + *
  • An Avro namespace is a dot-separated sequence of such names.
  • + *
  • Reference: https://avro.apache.org/docs/current/spec.html#names
  • + *
+ */ public class AvroNameTransformer extends ExtendedNameTransformer { @Override @@ -14,20 +23,29 @@ public String applyDefaultCase(final String input) { } @Override - public String getIdentifier(final String name) { - return replaceForbiddenCharacters(checkFirsCharInStreamName(convertStreamName(name))); - } + public String convertStreamName(final String input) { + if (input == null) { + return null; + } else if (input.isBlank()) { + return input; + } - private String checkFirsCharInStreamName(final String name) { - if (name.substring(0, 1).matches("[A-Za-z_]")) { - return name; + final String normalizedName = super.convertStreamName(input); + if (normalizedName.substring(0, 1).matches("[A-Za-z_]")) { + return normalizedName; } else { - return "_" + name; + return "_" + normalizedName; } } - private String replaceForbiddenCharacters(final String name) { - return name.replace("-", "_"); + @Override + public String getNamespace(final String input) { + if (input == null) { + return null; + } + + final String[] tokens = input.split("\\."); + return String.join(".", Arrays.stream(tokens).map(this::getIdentifier).toList()); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java index 7289d15a8d05..379674ebecb2 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/JsonToAvroSchemaConverter.java @@ -122,6 +122,7 @@ public Schema getAvroSchema(final JsonNode jsonSchema, final boolean addStringToLogicalTypes, final boolean isRootNode) { final String stdName = AvroConstants.NAME_TRANSFORMER.getIdentifier(fieldName); + final String stdNamespace = AvroConstants.NAME_TRANSFORMER.getNamespace(fieldNamespace); final SchemaBuilder.RecordBuilder builder = SchemaBuilder.record(stdName); if (!stdName.equals(fieldName)) { standardizedNames.put(fieldName, stdName); @@ -133,8 +134,8 @@ public Schema getAvroSchema(final JsonNode jsonSchema, AvroConstants.DOC_KEY_VALUE_DELIMITER, fieldName)); } - if (fieldNamespace != null) { - builder.namespace(fieldNamespace); + if (stdNamespace != null) { + builder.namespace(stdNamespace); } final JsonNode properties = jsonSchema.get("properties"); @@ -175,7 +176,7 @@ public Schema getAvroSchema(final JsonNode jsonSchema, // Omit the namespace for root level fields, because it is directly assigned in the builder above. // This may not be the correct choice. ? null - : (fieldNamespace == null ? stdName : (fieldNamespace + "." + stdName)); + : (stdNamespace == null ? stdName : (stdNamespace + "." + stdName)); fieldBuilder.type(parseJsonField(subfieldName, subfieldNamespace, subfieldDefinition, appendExtraProps, addStringToLogicalTypes)) .withDefault(null); } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/CsvSerializedBuffer.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/CsvSerializedBuffer.java new file mode 100644 index 000000000000..0ec15d1575c8 --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/CsvSerializedBuffer.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.s3.csv; + +import io.airbyte.commons.functional.CheckedBiFunction; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.destination.record_buffer.BaseSerializedBuffer; +import io.airbyte.integrations.destination.record_buffer.BufferStorage; +import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.io.IOException; +import java.io.OutputStream; +import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; +import java.util.UUID; +import java.util.concurrent.Callable; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.lang3.StringUtils; + +public class CsvSerializedBuffer extends BaseSerializedBuffer { + + private final CsvSheetGenerator csvSheetGenerator; + private CSVPrinter csvPrinter; + private CSVFormat csvFormat; + + protected CsvSerializedBuffer(final BufferStorage bufferStorage, final CsvSheetGenerator csvSheetGenerator) throws Exception { + super(bufferStorage); + this.csvSheetGenerator = csvSheetGenerator; + this.csvPrinter = null; + this.csvFormat = CSVFormat.DEFAULT; + // we always want to compress csv files + withCompression(true); + } + + public CsvSerializedBuffer withCsvFormat(final CSVFormat csvFormat) { + if (csvPrinter == null) { + this.csvFormat = csvFormat; + return this; + } + throw new RuntimeException("Options should be configured before starting to write"); + } + + @Override + protected void createWriter(final OutputStream outputStream) throws IOException { + csvPrinter = new CSVPrinter(new PrintWriter(outputStream, true, StandardCharsets.UTF_8), csvFormat); + } + + @Override + protected void writeRecord(final AirbyteRecordMessage recordMessage) throws IOException { + csvPrinter.printRecord(csvSheetGenerator.getDataRow(UUID.randomUUID(), recordMessage)); + } + + @Override + protected void closeWriter() throws IOException { + csvPrinter.flush(); + csvPrinter.close(); + } + + public static CheckedBiFunction createFunction( + final S3CsvFormatConfig config, + final Callable createStorageFunction) { + return (final AirbyteStreamNameNamespacePair stream, final ConfiguredAirbyteCatalog catalog) -> { + final CsvSheetGenerator csvSheetGenerator; + if (config != null) { + csvSheetGenerator = CsvSheetGenerator.Factory.create(catalog.getStreams() + .stream() + .filter(s -> s.getStream().getName().equals(stream.getName()) && StringUtils.equals(s.getStream().getNamespace(), stream.getNamespace())) + .findFirst() + .orElseThrow(() -> new RuntimeException(String.format("No such stream %s.%s", stream.getNamespace(), stream.getName()))) + .getStream() + .getJsonSchema(), + config); + } else { + csvSheetGenerator = new StagingDatabaseCsvSheetGenerator(); + } + return new CsvSerializedBuffer(createStorageFunction.call(), csvSheetGenerator); + }; + } + +} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvFormatConfig.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvFormatConfig.java index eaa3d1091723..d3647f045f67 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvFormatConfig.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvFormatConfig.java @@ -47,7 +47,7 @@ public String getValue() { public S3CsvFormatConfig(final JsonNode formatConfig) { this( - Flattening.fromValue(formatConfig.get("flattening").asText()), + Flattening.fromValue(formatConfig.has("flattening") ? formatConfig.get("flattening").asText() : Flattening.NO.value), formatConfig.get(PART_SIZE_MB_ARG_NAME) != null ? formatConfig.get(PART_SIZE_MB_ARG_NAME).asLong() : null); } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/StagingDatabaseCsvSheetGenerator.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/StagingDatabaseCsvSheetGenerator.java index 4f3e137f6186..e194ebf7c7c0 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/StagingDatabaseCsvSheetGenerator.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/StagingDatabaseCsvSheetGenerator.java @@ -45,7 +45,7 @@ public List getDataRow(final UUID id, final AirbyteRecordMessage recordM } @Override - public List getDataRow(JsonNode formattedData) { + public List getDataRow(final JsonNode formattedData) { return new LinkedList<>(Collections.singletonList(Jsons.serialize(formattedData))); } diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformerTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformerTest.java new file mode 100644 index 000000000000..780502d769fb --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/avro/AvroNameTransformerTest.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.s3.avro; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.Map; +import org.junit.jupiter.api.Test; + +class AvroNameTransformerTest { + + private static final AvroNameTransformer INSTANCE = new AvroNameTransformer(); + private static final Map RAW_TO_NORMALIZED_IDENTIFIERS = Map.of( + "name-space", "name_space", + "spécial_character", "special_character", + "99namespace", "_99namespace"); + + private static final Map RAW_TO_NORMALIZED_NAMESPACES = Map.of( + "", "", + "name-space1.name-space2.namespace3", "name_space1.name_space2.namespace3", + "namespace1.spécial_character", "namespace1.special_character", + "99namespace.namespace2", "_99namespace.namespace2"); + + @Test + public void testGetIdentifier() { + assertNull(INSTANCE.getIdentifier(null)); + assertNull(INSTANCE.convertStreamName(null)); + RAW_TO_NORMALIZED_IDENTIFIERS.forEach((raw, normalized) -> { + assertEquals(normalized, INSTANCE.getIdentifier(raw)); + assertEquals(normalized, INSTANCE.convertStreamName(raw)); + }); + } + + @Test + public void testGetNamespace() { + assertNull(INSTANCE.getNamespace(null)); + RAW_TO_NORMALIZED_NAMESPACES.forEach((raw, normalized) -> { + assertEquals(normalized, INSTANCE.getNamespace(raw)); + }); + } + +} diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/csv/CsvSerializedBufferTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/csv/CsvSerializedBufferTest.java new file mode 100644 index 000000000000..6ae0baed0856 --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/csv/CsvSerializedBufferTest.java @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.s3.csv; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.base.AirbyteStreamNameNamespacePair; +import io.airbyte.integrations.destination.record_buffer.BufferStorage; +import io.airbyte.integrations.destination.record_buffer.FileBuffer; +import io.airbyte.integrations.destination.record_buffer.InMemoryBuffer; +import io.airbyte.protocol.models.AirbyteRecordMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.io.File; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.zip.GZIPInputStream; +import org.apache.commons.csv.CSVFormat; +import org.junit.jupiter.api.Test; + +public class CsvSerializedBufferTest { + + private static final JsonNode MESSAGE_DATA = Jsons.jsonNode(Map.of( + "field1", 10000, + "column2", "string value", + "another field", true, + "nested_column", Map.of( + "column", "value", + "array_column", List.of(1, 2, 3)))); + private static final String STREAM = "stream1"; + private static final AirbyteStreamNameNamespacePair streamPair = new AirbyteStreamNameNamespacePair(STREAM, null); + private static final AirbyteRecordMessage message = new AirbyteRecordMessage() + .withStream(STREAM) + .withData(MESSAGE_DATA) + .withEmittedAt(System.currentTimeMillis()); + private static final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class); + + @Test + public void testUncompressedDefaultCsvFormatWriter() throws Exception { + runTest(new InMemoryBuffer(), CSVFormat.DEFAULT, false, 395L, 405L); + } + + @Test + public void testUncompressedCsvWriter() throws Exception { + runTest(new InMemoryBuffer(), CSVFormat.newFormat(','), false, 355L, 365L); + } + + @Test + public void testCompressedCsvWriter() throws Exception { + runTest(new InMemoryBuffer(), CSVFormat.newFormat(','), true, 175L, 190L); + } + + @Test + public void testCompressedCsvFileWriter() throws Exception { + runTest(new FileBuffer(), CSVFormat.newFormat(','), true, 175L, 190L); + } + + private static void runTest(final BufferStorage buffer, + final CSVFormat csvFormat, + final boolean withCompression, + final Long minExpectedByte, + final Long maxExpectedByte) + throws Exception { + final File outputFile = buffer.getFile(); + try (final CsvSerializedBuffer writer = (CsvSerializedBuffer) CsvSerializedBuffer + .createFunction(null, () -> buffer) + .apply(streamPair, catalog)) { + writer.withCsvFormat(csvFormat); + writer.withCompression(withCompression); + writer.accept(message); + writer.accept(message); + writer.flush(); + // some data are randomized (uuid, timestamp, compression?) so the expected byte count is not always + // deterministic + assertTrue(minExpectedByte <= writer.getByteCount() && writer.getByteCount() <= maxExpectedByte, + String.format("Expected size between %d and %d, but actual size was %d", + minExpectedByte, maxExpectedByte, writer.getByteCount())); + String expectedData = Jsons.serialize(MESSAGE_DATA); + if (csvFormat.equals(CSVFormat.DEFAULT)) { + expectedData = "\"" + expectedData.replace("\"", "\"\"") + "\""; + } + final InputStream inputStream; + if (withCompression) { + inputStream = new GZIPInputStream(writer.getInputStream()); + } else { + inputStream = writer.getInputStream(); + } + final String actualData = new String(inputStream.readAllBytes(), StandardCharsets.UTF_8) + // remove the UUID string at the beginning + .substring(UUID.randomUUID().toString().length() + 1) + // remove the last part of the string with random timestamp + .substring(0, expectedData.length()); + assertEquals(expectedData, actualData); + } + assertFalse(outputFile.exists()); + } + +} diff --git a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json index 1a490a2835c2..5bd54d969dc2 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json +++ b/airbyte-integrations/connectors/destination-s3/src/test/resources/parquet/json_schema_converter/json_conversion_test_cases.json @@ -1562,5 +1562,48 @@ "array_field": ["1234", "true", "false", "0.001"], "_airbyte_additional_properties": null } + }, + { + "schemaName": "namespace_with_special_characters", + "namespace": "namespace_with:spécial:characters", + "appendAirbyteFields": false, + "jsonSchema": { + "type": "object", + "properties": { + "node_id": { + "type": ["null", "string"] + } + } + }, + "jsonObject": { + "node_id": "abc123" + }, + "avroSchema": { + "type": "record", + "name": "namespace_with_special_characters", + "namespace": "namespace_with_special_characters", + "fields": [ + { + "name": "node_id", + "type": ["null", "string"], + "default": null + }, + { + "name": "_airbyte_additional_properties", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ], + "default": null + } + ] + }, + "avroObject": { + "node_id": "abc123", + "_airbyte_additional_properties": null + } } ] diff --git a/airbyte-integrations/connectors/destination-snowflake/Dockerfile b/airbyte-integrations/connectors/destination-snowflake/Dockerfile index f2354f40aaa0..8a7cae7a2d9f 100644 --- a/airbyte-integrations/connectors/destination-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/destination-snowflake/Dockerfile @@ -20,5 +20,5 @@ RUN tar xf ${APPLICATION}.tar --strip-components=1 ENV ENABLE_SENTRY true -LABEL io.airbyte.version=0.4.20 +LABEL io.airbyte.version=0.4.22 LABEL io.airbyte.name=airbyte/destination-snowflake diff --git a/airbyte-integrations/connectors/destination-snowflake/build.gradle b/airbyte-integrations/connectors/destination-snowflake/build.gradle index 4cfb02d451f9..d31f433dfc5b 100644 --- a/airbyte-integrations/connectors/destination-snowflake/build.gradle +++ b/airbyte-integrations/connectors/destination-snowflake/build.gradle @@ -10,6 +10,7 @@ application { applicationDefaultJvmArgs = [ '-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0', +// '-Xmx2000m', // '-XX:NativeMemoryTracking=detail', // "-Djava.rmi.server.hostname=localhost", // '-Dcom.sun.management.jmxremote=true', diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationResolver.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationResolver.java index ca1f684b565a..2145eee51628 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationResolver.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationResolver.java @@ -38,13 +38,13 @@ public static boolean isAzureBlobCopy(final JsonNode config) { } public static Map getTypeToDestination() { - final SnowflakeCopyS3Destination copyS3Destination = new SnowflakeCopyS3Destination(); + final SnowflakeS3StagingDestination s3StagingDestination = new SnowflakeS3StagingDestination(); final SnowflakeCopyGcsDestination copyGcsDestination = new SnowflakeCopyGcsDestination(); final SnowflakeInternalStagingDestination internalStagingDestination = new SnowflakeInternalStagingDestination(); final SnowflakeCopyAzureBlobStorageDestination azureBlobStorageDestination = new SnowflakeCopyAzureBlobStorageDestination(); return ImmutableMap.of( - DestinationType.COPY_S3, copyS3Destination, + DestinationType.COPY_S3, s3StagingDestination, DestinationType.COPY_GCS, copyGcsDestination, DestinationType.COPY_AZURE_BLOB, azureBlobStorageDestination, DestinationType.INTERNAL_STAGING, internalStagingDestination); diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java index f0ee2f98978c..bdcad13abc59 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestination.java @@ -12,6 +12,8 @@ import io.airbyte.integrations.base.sentry.AirbyteSentry; import io.airbyte.integrations.destination.NamingConventionTransformer; import io.airbyte.integrations.destination.jdbc.AbstractJdbcDestination; +import io.airbyte.integrations.destination.record_buffer.FileBuffer; +import io.airbyte.integrations.destination.s3.csv.CsvSerializedBuffer; import io.airbyte.integrations.destination.staging.StagingConsumerFactory; import io.airbyte.protocol.models.AirbyteConnectionStatus; import io.airbyte.protocol.models.AirbyteMessage; @@ -87,8 +89,14 @@ public JsonNode toJdbcConfig(final JsonNode config) { public AirbyteMessageConsumer getConsumer(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final Consumer outputRecordCollector) { - return new StagingConsumerFactory().create(outputRecordCollector, getDatabase(config), - new SnowflakeInternalStagingSqlOperations(getNamingResolver()), getNamingResolver(), config, catalog); + return new StagingConsumerFactory().create( + outputRecordCollector, + getDatabase(config), + new SnowflakeInternalStagingSqlOperations(getNamingResolver()), + getNamingResolver(), + CsvSerializedBuffer.createFunction(null, FileBuffer::new), + config, + catalog); } } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingSqlOperations.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingSqlOperations.java index 2fd44c800f42..fbb548025bdc 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingSqlOperations.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingSqlOperations.java @@ -4,30 +4,39 @@ package io.airbyte.integrations.destination.snowflake; +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.string.Strings; import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.base.sentry.AirbyteSentry; import io.airbyte.integrations.destination.NamingConventionTransformer; +import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; import io.airbyte.integrations.destination.staging.StagingOperations; -import io.airbyte.protocol.models.AirbyteRecordMessage; -import java.io.File; -import java.nio.file.Files; +import java.io.IOException; import java.sql.SQLException; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.UUID; -import org.apache.commons.lang3.NotImplementedException; +import java.util.stream.Stream; import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class SnowflakeInternalStagingSqlOperations extends SnowflakeSqlOperations implements StagingOperations { - public static final String CREATE_STAGE_QUERY = + private static final int UPLOAD_RETRY_LIMIT = 3; + + private static final String CREATE_STAGE_QUERY = "CREATE STAGE IF NOT EXISTS %s encryption = (type = 'SNOWFLAKE_SSE') copy_options = (on_error='skip_file');"; - public static final String COPY_QUERY = "COPY INTO %s.%s FROM @%s file_format = " + - "(type = csv field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"')"; - public static final String DROP_STAGE_QUERY = "DROP STAGE IF EXISTS %s;"; + private static final String PUT_FILE_QUERY = "PUT file://%s @%s/%s PARALLEL = %d;"; + private static final String LIST_STAGE_QUERY = "LIST @%s/%s%s;"; + private static final String COPY_QUERY = "COPY INTO %s.%s FROM '@%s/%s' " + + "file_format = (type = csv compression = auto field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"')"; + private static final String DROP_STAGE_QUERY = "DROP STAGE IF EXISTS %s;"; + private static final String REMOVE_QUERY = "REMOVE @%s;"; + private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeSqlOperations.class); + private final NamingConventionTransformer nameTransformer; public SnowflakeInternalStagingSqlOperations(final NamingConventionTransformer nameTransformer) { @@ -42,10 +51,9 @@ public String getStageName(final String namespace, final String streamName) { } @Override - public String getStagingPath(final String connectionId, final String namespace, final String streamName, final DateTime writeDatetime) { + public String getStagingPath(final UUID connectionId, final String namespace, final String streamName, final DateTime writeDatetime) { // see https://docs.snowflake.com/en/user-guide/data-load-considerations-stage.html - return nameTransformer.applyDefaultCase(String.format("%s/%s/%02d/%02d/%02d/%s/", - getStageName(namespace, streamName), + return nameTransformer.applyDefaultCase(String.format("%s/%02d/%02d/%02d/%s/", writeDatetime.year().get(), writeDatetime.monthOfYear().get(), writeDatetime.dayOfMonth().get(), @@ -54,74 +62,131 @@ public String getStagingPath(final String connectionId, final String namespace, } @Override - public void uploadRecordsToStage(final JdbcDatabase database, final File dataFile, final String schemaName, final String path) throws Exception { - throw new NotImplementedException("placeholder function is not implemented yet"); + public String uploadRecordsToStage(final JdbcDatabase database, + final SerializableBuffer recordsData, + final String namespace, + final String stageName, + final String stagingPath) + throws IOException { + AirbyteSentry.executeWithTracing("UploadRecordsToStage", + () -> { + final List exceptionsThrown = new ArrayList<>(); + boolean succeeded = false; + while (exceptionsThrown.size() < UPLOAD_RETRY_LIMIT && !succeeded) { + try { + loadDataIntoStage(database, stageName, stagingPath, recordsData); + succeeded = true; + } catch (final Exception e) { + LOGGER.error("Failed to upload records into stage {}", stagingPath, e); + exceptionsThrown.add(e); + } + if (!succeeded) { + LOGGER.info("Retrying to upload records into stage {} ({}/{}})", stagingPath, exceptionsThrown.size(), UPLOAD_RETRY_LIMIT); + } + } + if (!succeeded) { + throw new RuntimeException( + String.format("Exceptions thrown while uploading records into stage: %s", Strings.join(exceptionsThrown, "\n"))); + } + }, Map.of("stage", stageName, "path", stagingPath)); + return recordsData.getFilename(); } - @Override - public void insertRecordsInternal(final JdbcDatabase database, - final List records, - final String schemaName, - final String stage) { - LOGGER.info("Writing {} records to {}", records.size(), stage); - - if (records.isEmpty()) { - return; + private void loadDataIntoStage(final JdbcDatabase database, final String stageName, final String stagingPath, final SerializableBuffer recordsData) + throws Exception { + final String query = getPutQuery(stageName, stagingPath, recordsData.getFile().getAbsolutePath()); + LOGGER.debug("Executing query: {}", query); + database.execute(query); + if (!checkStageObjectExists(database, stageName, stagingPath, recordsData.getFilename())) { + LOGGER.error(String.format("Failed to upload data into stage, object @%s/%s not found", stagingPath, recordsData.getFilename())); + throw new RuntimeException("Upload failed"); } - try { - loadDataIntoStage(database, stage, records); - } catch (final Exception e) { - LOGGER.error("Failed to upload records into stage {}", stage, e); - throw new RuntimeException(e); + } + + protected String getPutQuery(final String stageName, final String stagingPath, final String filePath) { + return String.format(PUT_FILE_QUERY, filePath, stageName, stagingPath, Runtime.getRuntime().availableProcessors()); + } + + private boolean checkStageObjectExists(final JdbcDatabase database, final String stageName, final String stagingPath, final String filename) + throws SQLException { + final String query = getListQuery(stageName, stagingPath, filename); + LOGGER.debug("Executing query: {}", query); + final boolean result; + try (final Stream stream = database.unsafeQuery(query)) { + result = stream.findAny().isPresent(); } + return result; } - private void loadDataIntoStage(final JdbcDatabase database, final String stage, final List partition) throws Exception { - final File tempFile = Files.createTempFile(UUID.randomUUID().toString(), ".csv").toFile(); - writeBatchToFile(tempFile, partition); - database.execute(String.format("PUT file://%s @%s PARALLEL = %d", tempFile.getAbsolutePath(), stage, Runtime.getRuntime().availableProcessors())); - Files.delete(tempFile.toPath()); + protected String getListQuery(final String stageName, final String stagingPath, final String filename) { + return String.format(LIST_STAGE_QUERY, stageName, stagingPath, filename); } @Override - public void createStageIfNotExists(final JdbcDatabase database, final String stageName) throws SQLException { + public void createStageIfNotExists(final JdbcDatabase database, final String stageName) throws Exception { + final String query = getCreateStageQuery(stageName); + LOGGER.debug("Executing query: {}", query); AirbyteSentry.executeWithTracing("CreateStageIfNotExists", - () -> database.execute(getCreateStageQuery(stageName)), + () -> database.execute(query), Map.of("stage", stageName)); } + protected String getCreateStageQuery(final String stageName) { + return String.format(CREATE_STAGE_QUERY, stageName); + } + @Override - public void copyIntoTmpTableFromStage(final JdbcDatabase database, final String stageName, final String dstTableName, final String schemaName) + public void copyIntoTmpTableFromStage(final JdbcDatabase database, + final String stageName, + final String stagingPath, + final List stagedFiles, + final String dstTableName, + final String schemaName) throws SQLException { + final String query = getCopyQuery(stageName, stagingPath, stagedFiles, dstTableName, schemaName); + LOGGER.debug("Executing query: {}", query); AirbyteSentry.executeWithTracing("CopyIntoTableFromStage", - () -> database.execute(getCopyQuery(stageName, dstTableName, schemaName)), - Map.of("schema", schemaName, "stage", stageName, "table", dstTableName)); + () -> database.execute(query), + Map.of( + "schema", schemaName, + "stage", stageName, + "path", stagingPath, + "files", String.join(",", stagedFiles), + "table", dstTableName)); } - String getCreateStageQuery(String stageName) { - return String.format(CREATE_STAGE_QUERY, stageName); - } - - String getCopyQuery(String stageName, String dstTableName, String schemaName) { - return String.format(COPY_QUERY, schemaName, dstTableName, stageName); + protected String getCopyQuery(final String stageName, + final String stagingPath, + final List stagedFiles, + final String dstTableName, + final String schemaName) { + return String.format(COPY_QUERY + generateFilesList(stagedFiles) + ";", schemaName, dstTableName, stageName, stagingPath); } @Override - public void dropStageIfExists(final JdbcDatabase database, final String stageName) throws SQLException { + public void dropStageIfExists(final JdbcDatabase database, final String stageName) throws Exception { + final String query = getDropQuery(stageName); + LOGGER.debug("Executing query: {}", query); AirbyteSentry.executeWithTracing("DropStageIfExists", - () -> database.execute(getDropQuery(stageName)), + () -> database.execute(query), Map.of("stage", stageName)); } - String getDropQuery(String stageName) { + protected String getDropQuery(final String stageName) { return String.format(DROP_STAGE_QUERY, stageName); } @Override - public void cleanUpStage(final JdbcDatabase database, final String path) throws SQLException { + public void cleanUpStage(final JdbcDatabase database, final String stageName, final List stagedFiles) throws Exception { + final String query = getRemoveQuery(stageName); + LOGGER.debug("Executing query: {}", query); AirbyteSentry.executeWithTracing("CleanStage", - () -> database.execute(String.format("REMOVE @%s;", path)), - Map.of("path", path)); + () -> database.execute(query), + Map.of("stage", stageName)); + } + + protected String getRemoveQuery(final String stageName) { + return String.format(REMOVE_QUERY, stageName); } } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StagingDestination.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StagingDestination.java new file mode 100644 index 000000000000..f1a01be5f638 --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StagingDestination.java @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.snowflake; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.commons.json.Jsons; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.AirbyteMessageConsumer; +import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.base.sentry.AirbyteSentry; +import io.airbyte.integrations.destination.NamingConventionTransformer; +import io.airbyte.integrations.destination.jdbc.AbstractJdbcDestination; +import io.airbyte.integrations.destination.record_buffer.FileBuffer; +import io.airbyte.integrations.destination.s3.S3DestinationConfig; +import io.airbyte.integrations.destination.s3.csv.CsvSerializedBuffer; +import io.airbyte.integrations.destination.staging.StagingConsumerFactory; +import io.airbyte.protocol.models.AirbyteConnectionStatus; +import io.airbyte.protocol.models.AirbyteMessage; +import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; +import java.util.Collections; +import java.util.Map; +import java.util.UUID; +import java.util.function.Consumer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SnowflakeS3StagingDestination extends AbstractJdbcDestination implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeS3StagingDestination.class); + + public SnowflakeS3StagingDestination() { + this(new SnowflakeSQLNameTransformer()); + } + + public SnowflakeS3StagingDestination(final SnowflakeSQLNameTransformer nameTransformer) { + super("", nameTransformer, new SnowflakeSqlOperations()); + } + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + final S3DestinationConfig s3Config = getS3DestinationConfig(config); + final NamingConventionTransformer nameTransformer = getNamingResolver(); + final SnowflakeS3StagingSqlOperations SnowflakeS3StagingSqlOperations = + new SnowflakeS3StagingSqlOperations(nameTransformer, s3Config.getS3Client(), s3Config); + try (final JdbcDatabase database = getDatabase(config)) { + final String outputSchema = super.getNamingResolver().getIdentifier(config.get("schema").asText()); + AirbyteSentry.executeWithTracing("CreateAndDropTable", + () -> attemptSQLCreateAndDropTableOperations(outputSchema, database, nameTransformer, SnowflakeS3StagingSqlOperations)); + AirbyteSentry.executeWithTracing("CreateAndDropStage", + () -> attemptSQLCreateAndDropStages(outputSchema, database, nameTransformer, SnowflakeS3StagingSqlOperations)); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); + } catch (final Exception e) { + LOGGER.error("Exception while checking connection: ", e); + return new AirbyteConnectionStatus() + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage("Could not connect with provided configuration. \n" + e.getMessage()); + } + } + + private static void attemptSQLCreateAndDropStages(final String outputSchema, + final JdbcDatabase database, + final NamingConventionTransformer namingResolver, + final SnowflakeS3StagingSqlOperations sqlOperations) + throws Exception { + + // verify we have permissions to create/drop stage + final String outputTableName = namingResolver.getIdentifier("_airbyte_connection_test_" + UUID.randomUUID()); + final String stageName = sqlOperations.getStageName(outputSchema, outputTableName); + sqlOperations.createStageIfNotExists(database, stageName); + sqlOperations.dropStageIfExists(database, stageName); + } + + @Override + protected JdbcDatabase getDatabase(final JsonNode config) { + return SnowflakeDatabase.getDatabase(config); + } + + @Override + protected Map getDefaultConnectionProperties(final JsonNode config) { + return Collections.emptyMap(); + } + + // this is a no op since we override getDatabase. + @Override + public JsonNode toJdbcConfig(final JsonNode config) { + return Jsons.emptyObject(); + } + + @Override + public AirbyteMessageConsumer getConsumer(final JsonNode config, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector) { + final S3DestinationConfig s3Config = getS3DestinationConfig(config); + return new StagingConsumerFactory().create( + outputRecordCollector, + getDatabase(config), + new SnowflakeS3StagingSqlOperations(getNamingResolver(), s3Config.getS3Client(), s3Config), + getNamingResolver(), + CsvSerializedBuffer.createFunction(null, FileBuffer::new), + config, + catalog); + } + + private S3DestinationConfig getS3DestinationConfig(final JsonNode config) { + final JsonNode loadingMethod = config.get("loading_method"); + return S3DestinationConfig.getS3DestinationConfig(loadingMethod); + } + +} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StagingSqlOperations.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StagingSqlOperations.java new file mode 100644 index 000000000000..f5602d357c8b --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StagingSqlOperations.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.snowflake; + +import com.amazonaws.services.s3.AmazonS3; +import io.airbyte.commons.lang.Exceptions; +import io.airbyte.db.jdbc.JdbcDatabase; +import io.airbyte.integrations.base.sentry.AirbyteSentry; +import io.airbyte.integrations.destination.NamingConventionTransformer; +import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; +import io.airbyte.integrations.destination.s3.S3DestinationConfig; +import io.airbyte.integrations.destination.s3.S3StorageOperations; +import io.airbyte.integrations.destination.staging.StagingOperations; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.joda.time.DateTime; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SnowflakeS3StagingSqlOperations extends SnowflakeSqlOperations implements StagingOperations { + + private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeSqlOperations.class); + private static final String COPY_QUERY = "COPY INTO %s.%s FROM '%s' " + + "CREDENTIALS=(aws_key_id='%s' aws_secret_key='%s') " + + "file_format = (type = csv compression = auto field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"')"; + + private final NamingConventionTransformer nameTransformer; + private final S3StorageOperations s3StorageOperations; + private final S3DestinationConfig s3Config; + + public SnowflakeS3StagingSqlOperations(final NamingConventionTransformer nameTransformer, + final AmazonS3 s3Client, + final S3DestinationConfig s3Config) { + this.nameTransformer = nameTransformer; + this.s3StorageOperations = new S3StorageOperations(nameTransformer, s3Client, s3Config); + this.s3Config = s3Config; + } + + @Override + public String getStageName(final String namespace, final String streamName) { + return nameTransformer.applyDefaultCase(String.join("_", + nameTransformer.convertStreamName(namespace), + nameTransformer.convertStreamName(streamName))); + } + + @Override + public String getStagingPath(final UUID connectionId, final String namespace, final String streamName, final DateTime writeDatetime) { + // see https://docs.snowflake.com/en/user-guide/data-load-considerations-stage.html + return nameTransformer.applyDefaultCase(String.format("%s/%s/%02d/%02d/%02d/%s/", + getStageName(namespace, streamName), + writeDatetime.year().get(), + writeDatetime.monthOfYear().get(), + writeDatetime.dayOfMonth().get(), + writeDatetime.hourOfDay().get(), + connectionId)); + } + + @Override + public String uploadRecordsToStage(final JdbcDatabase database, + final SerializableBuffer recordsData, + final String schemaName, + final String stageName, + final String stagingPath) + throws Exception { + AirbyteSentry.executeWithTracing("UploadRecordsToStage", + () -> s3StorageOperations.uploadRecordsToBucket(recordsData, schemaName, stageName, stagingPath), + Map.of("stage", stageName, "path", stagingPath)); + return recordsData.getFilename(); + } + + @Override + public void createStageIfNotExists(final JdbcDatabase database, final String stageName) { + AirbyteSentry.executeWithTracing("CreateStageIfNotExists", + () -> s3StorageOperations.createBucketObjectIfNotExists(stageName), + Map.of("stage", stageName)); + } + + @Override + public void copyIntoTmpTableFromStage(final JdbcDatabase database, + final String stageName, + final String stagingPath, + final List stagedFiles, + final String dstTableName, + final String schemaName) { + LOGGER.info("Starting copy to tmp table from stage: {} in destination from stage: {}, schema: {}, .", dstTableName, stagingPath, schemaName); + // Print actual SQL query if user needs to manually force reload from staging + AirbyteSentry.executeWithTracing("CopyIntoTableFromStage", + () -> Exceptions.toRuntime(() -> database.execute(getCopyQuery(stageName, stagingPath, stagedFiles, dstTableName, schemaName))), + Map.of("schema", schemaName, "path", stagingPath, "table", dstTableName)); + LOGGER.info("Copy to tmp table {}.{} in destination complete.", schemaName, dstTableName); + } + + protected String getCopyQuery(final String stageName, + final String stagingPath, + final List stagedFiles, + final String dstTableName, + final String schemaName) { + return String.format(COPY_QUERY + generateFilesList(stagedFiles) + ";", + schemaName, + dstTableName, + generateBucketPath(stageName, stagingPath), + s3Config.getAccessKeyId(), + s3Config.getSecretAccessKey()); + } + + private String generateBucketPath(final String stageName, final String stagingPath) { + return "s3://" + s3Config.getBucketName() + "/" + stagingPath; + } + + @Override + public void dropStageIfExists(final JdbcDatabase database, final String stageName) { + AirbyteSentry.executeWithTracing("DropStageIfExists", + () -> s3StorageOperations.dropBucketObject(stageName), + Map.of("stage", stageName)); + } + + @Override + public void cleanUpStage(final JdbcDatabase database, final String stageName, final List stagedFiles) { + AirbyteSentry.executeWithTracing("CleanStage", + () -> s3StorageOperations.cleanUpBucketObject(stageName, stagedFiles), + Map.of("stage", stageName)); + } + +} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java index 5a7d414aa63a..302969d0707f 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopier.java @@ -79,7 +79,7 @@ public SnowflakeS3StreamCopier(final String stagingFolder, @Override public void copyStagingFileToTemporaryTable() throws Exception { - List> partitions = Lists.partition(new ArrayList<>(stagingWritersByFile.keySet()), MAX_FILES_PER_COPY); + List> partitions = Lists.partition(new ArrayList<>(getStagingFiles()), MAX_FILES_PER_COPY); LOGGER.info("Starting parallel copy to tmp table: {} in destination for stream: {}, schema: {}. Chunks count {}", tmpTableName, streamName, schemaName, partitions.size()); diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java index cea4bf0b88ea..86a2c0f14269 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSQLNameTransformer.java @@ -13,4 +13,21 @@ public String applyDefaultCase(final String input) { return input.toUpperCase(); } + /** + * The first character can only be alphanumeric or an underscore. + */ + @Override + public String convertStreamName(final String input) { + if (input == null) { + return null; + } + + final String normalizedName = super.convertStreamName(input); + if (normalizedName.substring(0, 1).matches("[A-Za-z_]")) { + return normalizedName; + } else { + return "_" + normalizedName; + } + } + } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperations.java b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperations.java index 3e624d41940a..ef4be90e93df 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperations.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperations.java @@ -13,6 +13,7 @@ import io.airbyte.protocol.models.AirbyteRecordMessage; import java.sql.SQLException; import java.util.List; +import java.util.StringJoiner; import java.util.stream.Stream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -20,6 +21,7 @@ class SnowflakeSqlOperations extends JdbcSqlOperations implements SqlOperations { private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeSqlOperations.class); + private static final int MAX_FILES_IN_LOADING_QUERY_LIMIT = 1000; @Override public String createTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) { @@ -33,8 +35,8 @@ public String createTableQuery(final JdbcDatabase database, final String schemaN } @Override - public boolean isSchemaExists(JdbcDatabase database, String outputSchema) throws Exception { - try (final Stream results = database.query(SHOW_SCHEMAS)) { + public boolean isSchemaExists(final JdbcDatabase database, final String outputSchema) throws Exception { + try (final Stream results = database.unsafeQuery(SHOW_SCHEMAS)) { return results.map(schemas -> schemas.get(NAME).asText()).anyMatch(outputSchema::equalsIgnoreCase); } } @@ -60,4 +62,15 @@ public void insertRecordsInternal(final JdbcDatabase database, SqlOperationsUtils.insertRawRecordsInSingleQuery(insertQuery, recordQuery, database, records); } + protected String generateFilesList(final List files) { + if (0 < files.size() && files.size() < MAX_FILES_IN_LOADING_QUERY_LIMIT) { + // see https://docs.snowflake.com/en/user-guide/data-load-considerations-load.html#lists-of-files + final StringJoiner joiner = new StringJoiner(","); + files.forEach(filename -> joiner.add("'" + filename.substring(filename.lastIndexOf("/") + 1) + "'")); + return " files = (" + joiner + ")"; + } else { + return ""; + } + } + } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java index 928f281befa0..28cb59fc4d44 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInsertDestinationAcceptanceTest.java @@ -18,20 +18,20 @@ import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.db.jdbc.JdbcUtils; import io.airbyte.integrations.base.JavaBaseConstants; -import io.airbyte.integrations.destination.ExtendedNameTransformer; +import io.airbyte.integrations.destination.NamingConventionTransformer; import io.airbyte.integrations.standardtest.destination.DataArgumentsProvider; import io.airbyte.integrations.standardtest.destination.DestinationAcceptanceTest; import io.airbyte.protocol.models.AirbyteCatalog; import io.airbyte.protocol.models.AirbyteMessage; import io.airbyte.protocol.models.CatalogHelpers; import io.airbyte.protocol.models.ConfiguredAirbyteCatalog; -import java.io.IOException; import java.nio.file.Path; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -40,11 +40,12 @@ public class SnowflakeInsertDestinationAcceptanceTest extends DestinationAcceptanceTest { + private static final NamingConventionTransformer NAME_TRANSFORMER = new SnowflakeSQLNameTransformer(); + // this config is based on the static config, and it contains a random // schema name that is different for each test run private JsonNode config; private JdbcDatabase database; - private final ExtendedNameTransformer namingResolver = new ExtendedNameTransformer(); @Override protected String getImageName() { @@ -76,7 +77,7 @@ protected List retrieveRecords(final TestDestinationEnv env, final String namespace, final JsonNode streamSchema) throws Exception { - return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namingResolver.getIdentifier(namespace)) + return retrieveRecordsFromTable(NAME_TRANSFORMER.getRawTableName(streamName), NAME_TRANSFORMER.getNamespace(namespace)) .stream() .map(j -> Jsons.deserialize(j.get(JavaBaseConstants.COLUMN_NAME_DATA.toUpperCase()).asText())) .collect(Collectors.toList()); @@ -97,11 +98,21 @@ protected boolean implementsNamespaces() { return true; } + @Override + protected boolean supportNamespaceTest() { + return true; + } + + @Override + protected Optional getNameTransformer() { + return Optional.of(NAME_TRANSFORMER); + } + @Override protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, final String streamName, final String namespace) throws Exception { - final String tableName = namingResolver.getIdentifier(streamName); - final String schema = namingResolver.getIdentifier(namespace); + final String tableName = NAME_TRANSFORMER.getIdentifier(streamName); + final String schema = NAME_TRANSFORMER.getNamespace(namespace); // Temporarily disabling the behavior of the ExtendedNameTransformer, see (issue #1785) so we don't // use quoted names // if (!tableName.startsWith("\"")) { @@ -114,7 +125,7 @@ protected List retrieveNormalizedRecords(final TestDestinationEnv test @Override protected List resolveIdentifier(final String identifier) { final List result = new ArrayList<>(); - final String resolved = namingResolver.getIdentifier(identifier); + final String resolved = NAME_TRANSFORMER.getIdentifier(identifier); result.add(identifier); result.add(resolved); if (!resolved.startsWith("\"")) { @@ -127,14 +138,15 @@ protected List resolveIdentifier(final String identifier) { private List retrieveRecordsFromTable(final String tableName, final String schema) throws SQLException { return database.bufferedResultSetQuery( connection -> { - final ResultSet tableInfo = connection.createStatement() - .executeQuery(String.format("SHOW TABLES LIKE '%s' IN SCHEMA %s;", tableName, schema)); - assertTrue(tableInfo.next()); - // check that we're creating permanent tables. DBT defaults to transient tables, which have - // `TRANSIENT` as the value for the `kind` column. - assertEquals("TABLE", tableInfo.getString("kind")); - return connection.createStatement() - .executeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)); + try (final ResultSet tableInfo = connection.createStatement() + .executeQuery(String.format("SHOW TABLES LIKE '%s' IN SCHEMA %s;", tableName, schema));) { + assertTrue(tableInfo.next()); + // check that we're creating permanent tables. DBT defaults to transient tables, which have + // `TRANSIENT` as the value for the `kind` column. + assertEquals("TABLE", tableInfo.getString("kind")); + return connection.createStatement() + .executeQuery(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schema, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)); + } }, JdbcUtils.getDefaultSourceOperations()::rowToJson); } @@ -189,12 +201,4 @@ public void testSyncWithBillionRecords(final String messagesFilename, final Stri runSyncAndVerifyStateOutput(config, largeNumberRecords, configuredCatalog, false); } - private T parseConfig(final String path, final Class clazz) throws IOException { - return Jsons.deserialize(MoreResources.readResource(path), clazz); - } - - private JsonNode parseConfig(final String path) throws IOException { - return Jsons.deserialize(MoreResources.readResource(path)); - } - } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestinatiomAcceptanceTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestinationAcceptanceTest.java similarity index 82% rename from airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestinatiomAcceptanceTest.java rename to airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestinationAcceptanceTest.java index 53adb9c4eac4..a3b1295fef56 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestinatiomAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test-integration/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingDestinationAcceptanceTest.java @@ -8,9 +8,11 @@ import com.google.common.base.Preconditions; import io.airbyte.commons.io.IOs; import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.NamingConventionTransformer; import java.nio.file.Path; +import java.util.Optional; -public class SnowflakeInternalStagingDestinatiomAcceptanceTest extends SnowflakeInsertDestinationAcceptanceTest { +public class SnowflakeInternalStagingDestinationAcceptanceTest extends SnowflakeInsertDestinationAcceptanceTest { public JsonNode getStaticConfig() { final JsonNode internalStagingConfig = Jsons.deserialize(IOs.readFile(Path.of("secrets/internal_staging_config.json"))); diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java index 52ab30c6be48..33c670b57863 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeDestinationTest.java @@ -9,6 +9,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.params.provider.Arguments.arguments; +import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.any; import static org.mockito.Mockito.doThrow; @@ -26,6 +27,8 @@ import io.airbyte.db.jdbc.JdbcDatabase; import io.airbyte.integrations.base.AirbyteMessageConsumer; import io.airbyte.integrations.base.Destination; +import io.airbyte.integrations.destination.record_buffer.FileBuffer; +import io.airbyte.integrations.destination.s3.csv.CsvSerializedBuffer; import io.airbyte.integrations.destination.snowflake.SnowflakeDestination.DestinationType; import io.airbyte.integrations.destination.staging.StagingConsumerFactory; import io.airbyte.protocol.models.AirbyteMessage; @@ -38,6 +41,7 @@ import java.sql.SQLException; import java.time.Instant; import java.util.List; +import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.Stream; @@ -94,13 +98,14 @@ public void testCleanupStageOnFailure() throws Exception { final JdbcDatabase mockDb = mock(JdbcDatabase.class); final SnowflakeInternalStagingSqlOperations sqlOperations = mock(SnowflakeInternalStagingSqlOperations.class); when(sqlOperations.getStageName(anyString(), anyString())).thenReturn("stage_name"); - when(sqlOperations.getStagingPath(anyString(), anyString(), anyString(), any())).thenReturn("staging_path"); + when(sqlOperations.getStagingPath(any(UUID.class), anyString(), anyString(), any())).thenReturn("staging_path"); final var testMessages = generateTestMessages(); final JsonNode config = Jsons.deserialize(MoreResources.readResource("insert_config.json"), JsonNode.class); final AirbyteMessageConsumer airbyteMessageConsumer = new StagingConsumerFactory() .create(Destination::defaultOutputRecordCollector, mockDb, - sqlOperations, new SnowflakeSQLNameTransformer(), config, getCatalog()); - doThrow(SQLException.class).when(sqlOperations).copyIntoTmpTableFromStage(any(), anyString(), anyString(), anyString()); + sqlOperations, new SnowflakeSQLNameTransformer(), CsvSerializedBuffer.createFunction(null, FileBuffer::new), + config, getCatalog()); + doThrow(SQLException.class).when(sqlOperations).copyIntoTmpTableFromStage(any(), anyString(), anyString(), anyList(), anyString(), anyString()); airbyteMessageConsumer.start(); for (final AirbyteMessage m : testMessages) { @@ -108,14 +113,14 @@ public void testCleanupStageOnFailure() throws Exception { } assertThrows(RuntimeException.class, airbyteMessageConsumer::close); - verify(sqlOperations, times(1)).cleanUpStage(any(), anyString()); + verify(sqlOperations, times(1)).cleanUpStage(any(), anyString(), anyList()); } @ParameterizedTest @MethodSource("destinationTypeToConfig") - public void testS3ConfigType(String configFileName, DestinationType expectedDestinationType) throws Exception { + public void testS3ConfigType(final String configFileName, final DestinationType expectedDestinationType) throws Exception { final JsonNode config = Jsons.deserialize(MoreResources.readResource(configFileName), JsonNode.class); - DestinationType typeFromConfig = SnowflakeDestinationResolver.getTypeFromConfig(config); + final DestinationType typeFromConfig = SnowflakeDestinationResolver.getTypeFromConfig(config); assertEquals(expectedDestinationType, typeFromConfig); } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingSqlOperationsTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingSqlOperationsTest.java index a9d3cc3d7a75..df40e263721b 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingSqlOperationsTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeInternalStagingSqlOperationsTest.java @@ -5,37 +5,65 @@ package io.airbyte.integrations.destination.snowflake; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.List; import org.junit.jupiter.api.Test; class SnowflakeInternalStagingSqlOperationsTest { - public static final String SCHEMA_NAME = "schemaName"; - public static final String STAGE_NAME = "stageName"; + private static final String SCHEMA_NAME = "schemaName"; + private static final String STAGE_NAME = "stageName"; + private static final String STAGE_PATH = "stagePath/2022/"; + private static final String FILE_PATH = "filepath/filename"; + private final SnowflakeInternalStagingSqlOperations snowflakeStagingSqlOperations = new SnowflakeInternalStagingSqlOperations(new SnowflakeSQLNameTransformer()); @Test void createStageIfNotExists() { - String actualCreateStageQuery = snowflakeStagingSqlOperations.getCreateStageQuery(STAGE_NAME); - String expectedCreateStageQuery = + final String actualCreateStageQuery = snowflakeStagingSqlOperations.getCreateStageQuery(STAGE_NAME); + final String expectedCreateStageQuery = "CREATE STAGE IF NOT EXISTS " + STAGE_NAME + " encryption = (type = 'SNOWFLAKE_SSE') copy_options = (on_error='skip_file');"; assertEquals(expectedCreateStageQuery, actualCreateStageQuery); } + @Test + void putFileToStage() { + final String expectedQuery = "PUT file://" + FILE_PATH + " @" + STAGE_NAME + "/" + STAGE_PATH + " PARALLEL ="; + final String actualPutQuery = snowflakeStagingSqlOperations.getPutQuery(STAGE_NAME, STAGE_PATH, FILE_PATH); + assertTrue(actualPutQuery.startsWith(expectedQuery)); + } + + @Test + void listStage() { + final String expectedQuery = "LIST @" + STAGE_NAME + "/" + STAGE_PATH + FILE_PATH + ";"; + final String actualListQuery = snowflakeStagingSqlOperations.getListQuery(STAGE_NAME, STAGE_PATH, FILE_PATH); + assertEquals(expectedQuery, actualListQuery); + } + @Test void copyIntoTmpTableFromStage() { - String expectedQuery = "COPY INTO schemaName.tableName FROM @stageName file_format = " + - "(type = csv field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"')"; - String actualCopyQuery = snowflakeStagingSqlOperations.getCopyQuery(STAGE_NAME, "tableName", SCHEMA_NAME); + final String expectedQuery = "COPY INTO schemaName.tableName FROM '@" + STAGE_NAME + "/" + STAGE_PATH + "' " + + "file_format = (type = csv compression = auto field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"') " + + "files = ('filename1','filename2');"; + final String actualCopyQuery = + snowflakeStagingSqlOperations.getCopyQuery(STAGE_NAME, STAGE_PATH, List.of("filename1", "filename2"), "tableName", SCHEMA_NAME); assertEquals(expectedQuery, actualCopyQuery); } @Test void dropStageIfExists() { - String expectedQuery = "DROP STAGE IF EXISTS " + STAGE_NAME + ";"; - String actualDropQuery = snowflakeStagingSqlOperations.getDropQuery(STAGE_NAME); + final String expectedQuery = "DROP STAGE IF EXISTS " + STAGE_NAME + ";"; + final String actualDropQuery = snowflakeStagingSqlOperations.getDropQuery(STAGE_NAME); assertEquals(expectedQuery, actualDropQuery); } + @Test + void removeStage() { + final String expectedQuery = "REMOVE @" + STAGE_NAME + ";"; + final String actualRemoveQuery = snowflakeStagingSqlOperations.getRemoveQuery(STAGE_NAME); + assertEquals(expectedQuery, actualRemoveQuery); + } + } diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StagingSqlOperationsTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StagingSqlOperationsTest.java new file mode 100644 index 000000000000..f086021c424d --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StagingSqlOperationsTest.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.snowflake; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import com.amazonaws.services.s3.AmazonS3; +import io.airbyte.integrations.destination.s3.S3DestinationConfig; +import java.util.List; +import org.junit.jupiter.api.Test; + +class SnowflakeS3StagingSqlOperationsTest { + + private static final String SCHEMA_NAME = "schemaName"; + private static final String STAGE_NAME = "stageName"; + private static final String STAGE_PATH = "stagePath/2022/"; + private static final String TABLE_NAME = "tableName"; + private static final String BUCKET_NAME = "bucket_name"; + + private final AmazonS3 s3Client = mock(AmazonS3.class); + private final S3DestinationConfig s3Config = mock(S3DestinationConfig.class); + + private final SnowflakeS3StagingSqlOperations snowflakeStagingSqlOperations = + new SnowflakeS3StagingSqlOperations(new SnowflakeSQLNameTransformer(), s3Client, s3Config); + + @Test + void copyIntoTmpTableFromStage() { + final String expectedQuery = "COPY INTO " + SCHEMA_NAME + "." + TABLE_NAME + " FROM 's3://" + BUCKET_NAME + "/" + STAGE_PATH + "' " + + "CREDENTIALS=(aws_key_id='aws_access_key_id' aws_secret_key='aws_secret_access_key') file_format = (type = csv compression = auto " + + "field_delimiter = ',' skip_header = 0 FIELD_OPTIONALLY_ENCLOSED_BY = '\"') files = ('filename1','filename2');"; + when(s3Config.getBucketName()).thenReturn(BUCKET_NAME); + when(s3Config.getAccessKeyId()).thenReturn("aws_access_key_id"); + when(s3Config.getSecretAccessKey()).thenReturn("aws_secret_access_key"); + final String actualCopyQuery = + snowflakeStagingSqlOperations.getCopyQuery(STAGE_NAME, STAGE_PATH, List.of("filename1", "filename2"), TABLE_NAME, SCHEMA_NAME); + assertEquals(expectedQuery, actualCopyQuery); + } + +} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java index 8171f65b5202..d8043bf8ab83 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeS3StreamCopierTest.java @@ -23,6 +23,8 @@ import java.time.Instant; import java.util.ArrayList; import java.util.List; +import java.util.Set; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -78,7 +80,11 @@ public void copiesCorrectFilesToTable() throws Exception { } copier.copyStagingFileToTemporaryTable(); - final List> partition = Lists.partition(new ArrayList<>(copier.getStagingWritersByFile().keySet()), 1000); + Set stagingFiles = copier.getStagingFiles(); + // check the use of all files for staging + Assertions.assertTrue(stagingFiles.size() > 1); + + final List> partition = Lists.partition(new ArrayList<>(stagingFiles), 1000); for (final List files : partition) { verify(db).execute(String.format( "COPY INTO fake-schema.%s FROM '%s' " diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlNameTransformerTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlNameTransformerTest.java new file mode 100644 index 000000000000..133d5c050cac --- /dev/null +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlNameTransformerTest.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.snowflake; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +import java.util.Map; +import org.junit.jupiter.api.Test; + +class SnowflakeSqlNameTransformerTest { + + private static final SnowflakeSQLNameTransformer INSTANCE = new SnowflakeSQLNameTransformer(); + private static final Map RAW_TO_NORMALIZED_IDENTIFIERS = Map.of( + "name-space", "name_space", + "spécial_character", "special_character", + "99namespace", "_99namespace"); + + @Test + public void testGetIdentifier() { + assertNull(INSTANCE.getIdentifier(null)); + assertNull(INSTANCE.convertStreamName(null)); + RAW_TO_NORMALIZED_IDENTIFIERS.forEach((raw, normalized) -> { + assertEquals(normalized, INSTANCE.convertStreamName(raw)); + assertEquals(normalized, INSTANCE.getIdentifier(raw)); + assertEquals(normalized, INSTANCE.getNamespace(raw)); + }); + } + +} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperationsTest.java b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperationsTest.java index b4de44c0aa0e..bd0674bdbddf 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperationsTest.java +++ b/airbyte-integrations/connectors/destination-snowflake/src/test/java/io/airbyte/integrations/destination/snowflake/SnowflakeSqlOperationsTest.java @@ -44,7 +44,7 @@ void createTableQuery() { @Test void isSchemaExists() throws Exception { snowflakeSqlOperations.isSchemaExists(db, SCHEMA_NAME); - verify(db, times(1)).query(anyString()); + verify(db, times(1)).unsafeQuery(anyString()); } @Test diff --git a/airbyte-integrations/connectors/source-chargify/.dockerignore b/airbyte-integrations/connectors/source-chargify/.dockerignore new file mode 100644 index 000000000000..1a69703313fd --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!main.py +!source_chargify +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-chargify/Dockerfile b/airbyte-integrations/connectors/source-chargify/Dockerfile new file mode 100644 index 000000000000..aea82a991173 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.7.11-alpine3.14 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY source_chargify ./source_chargify + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-chargify diff --git a/airbyte-integrations/connectors/source-chargify/README.md b/airbyte-integrations/connectors/source-chargify/README.md new file mode 100644 index 000000000000..afd69a52da00 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/README.md @@ -0,0 +1,132 @@ +# Chargify Source + +This is the repository for the Chargify source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/chargify). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +pip install '.[tests]' +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-chargify:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/chargify) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_chargify/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source chargify test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-chargify:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-chargify:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-chargify:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-chargify:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-chargify:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/source-chargify:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing +Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all source connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +python -m pytest integration_tests -p integration_tests.acceptance +``` +To run your integration tests with docker + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:source-chargify:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:source-chargify:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/source-chargify/acceptance-test-config.yml b/airbyte-integrations/connectors/source-chargify/acceptance-test-config.yml new file mode 100644 index 000000000000..4d98e83c9e31 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/acceptance-test-config.yml @@ -0,0 +1,20 @@ +# See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) +# for more information about how to configure these tests +connector_image: airbyte/source-chargify:dev +tests: + spec: + - spec_path: "source_chargify/spec.json" + connection: + - config_path: "secrets/config.json" + status: "succeed" + - config_path: "integration_tests/invalid_config.json" + status: "failed" + discovery: + - config_path: "secrets/config.json" + basic_read: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" + empty_streams: [] + full_refresh: + - config_path: "secrets/config.json" + configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-chargify/acceptance-test-docker.sh b/airbyte-integrations/connectors/source-chargify/acceptance-test-docker.sh new file mode 100644 index 000000000000..c51577d10690 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/acceptance-test-docker.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +# Build latest connector image +docker build . -t $(cat acceptance-test-config.yml | grep "connector_image" | head -n 1 | cut -d: -f2-) + +# Pull latest acctest image +docker pull airbyte/source-acceptance-test:latest + +# Run +docker run --rm -it \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v /tmp:/tmp \ + -v $(pwd):/test_input \ + airbyte/source-acceptance-test \ + --acceptance-test-config /test_input + diff --git a/airbyte-integrations/connectors/source-chargify/build.gradle b/airbyte-integrations/connectors/source-chargify/build.gradle new file mode 100644 index 000000000000..2dcfc0aad3b6 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/build.gradle @@ -0,0 +1,9 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-source-acceptance-test' +} + +airbytePython { + moduleDirectory 'source_chargify' +} diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-chargify/integration_tests/__init__.py similarity index 57% rename from airbyte-integrations/connectors/source-instagram/unit_tests/unit_test.py rename to airbyte-integrations/connectors/source-chargify/integration_tests/__init__.py index e1814314fc3b..46b7376756ec 100644 --- a/airbyte-integrations/connectors/source-instagram/unit_tests/unit_test.py +++ b/airbyte-integrations/connectors/source-chargify/integration_tests/__init__.py @@ -1,7 +1,3 @@ # # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # - - -def test_example_method(): - assert True diff --git a/airbyte-integrations/connectors/source-chargify/integration_tests/acceptance.py b/airbyte-integrations/connectors/source-chargify/integration_tests/acceptance.py new file mode 100644 index 000000000000..0347f2a0b143 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/integration_tests/acceptance.py @@ -0,0 +1,14 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import pytest + +pytest_plugins = ("source_acceptance_test.plugin",) + + +@pytest.fixture(scope="session", autouse=True) +def connector_setup(): + """This fixture is a placeholder for external resources that acceptance test might require.""" + yield diff --git a/airbyte-integrations/connectors/source-chargify/integration_tests/catalog.json b/airbyte-integrations/connectors/source-chargify/integration_tests/catalog.json new file mode 100644 index 000000000000..9a6c0e1fc5d5 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/integration_tests/catalog.json @@ -0,0 +1,272 @@ +{ + "streams": [ + { + "name": "customers", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "default_subscription_group_uid": { + "type": ["null", "string"] + }, + "portal_invite_last_sent_at": { + "type": "string", + "format": "date-time" + }, + "vat_number": { + "type": ["null", "string"] + }, + "email": { + "type": "string" + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "zip": { + "type": "string" + }, + "first_name": { + "type": "string" + }, + "country_name": { + "type": ["null", "string"] + }, + "state": { + "type": "string" + }, + "city": { + "type": "string" + }, + "parent_id": { + "type": ["null", "integer"] + }, + "locale": { + "type": ["null", "string"] + }, + "portal_customer_created_at": { + "type": "string" + }, + "updated_at": { + "type": "string", + "format": "date-time" + }, + "country": { + "type": "string" + }, + "portal_invite_last_accepted_at": { + "type": ["null", "string"] + }, + "tax_exempt": { + "type": "boolean" + }, + "id": { + "type": "integer" + }, + "reference": { + "type": ["null", "string"] + }, + "last_name": { + "type": "string" + }, + "address_2": { + "type": "string" + }, + "phone": { + "type": "string" + }, + "organization": { + "type": "string" + }, + "address": { + "type": "string" + }, + "verified": { + "type": ["null", "string"] + }, + "cc_emails": { + "type": "string" + }, + "state_name": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + }, + { + "name": "subscriptions", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["integer"] + }, + "state": { + "type": ["string"] + }, + "balance_in_cents": { + "type": ["integer"] + }, + "total_revenue_in_cents": { + "type": ["integer"] + }, + "product_price_in_cents": { + "type": ["integer"] + }, + "product_version_number": { + "type": ["integer"] + }, + "current_period_ends_at": { + "type": ["string"] + }, + "next_assessment_at": { + "type": ["string"], + "format": "date-time" + }, + "trial_started_at": { + "type": ["string"], + "format": "date-time" + }, + "trial_ended_at": { + "type": ["string"], + "format": "date-time" + }, + "activated_at": { + "type": ["string"], + "format": "date-time" + }, + "expires_at": { + "type": ["string"], + "format": "date-time" + }, + "created_at": { + "type": ["string"], + "format": "date-time" + }, + "updated_at": { + "type": ["string"], + "format": "date-time" + }, + "cancellation_message": { + "type": ["string"] + }, + "cancellation_method": { + "type": ["null", "string"] + }, + "cancel_at_end_of_period": { + "type": ["boolean"] + }, + "canceled_at": { + "type": ["string"], + "format": "date-time" + }, + "current_period_started_at": { + "type": ["string"], + "format": "date-time" + }, + "previous_state": { + "type": ["string"] + }, + "signup_payment_id": { + "type": ["integer"] + }, + "signup_revenue": { + "type": ["string"] + }, + "delayed_cancel_at": { + "type": ["string"], + "format": "date-time" + }, + "coupon_code": { + "type": ["string"] + }, + "snap_day": { + "type": ["string"] + }, + "payment_collection_method": { + "type": ["string"] + }, + "customer": { + "type": ["object"] + }, + "product": { + "type": ["object"] + }, + "credit_card": { + "type": ["object"] + }, + "group": { + "type": ["null", "object"] + }, + "bank_account": { + "type": ["object"] + }, + "payment_type": { + "type": ["string"] + }, + "referral_code": { + "type": ["string"] + }, + "next_product_id": { + "type": ["string"] + }, + "next_product_handle": { + "type": ["string"] + }, + "coupon_use_count": { + "type": ["integer"] + }, + "coupon_uses_allowed": { + "type": ["integer"] + }, + "reason_code": { + "type": ["string"] + }, + "automatically_resume_at": { + "type": ["string"], + "format": "date-time" + }, + "coupon_codes": { + "type": ["array"] + }, + "offer_id": { + "type": ["string"] + }, + "payer_id": { + "type": ["integer"] + }, + "current_billing_amount_in_cents": { + "type": ["integer"] + }, + "product_price_point_id": { + "type": ["integer"] + }, + "next_product_price_point_id": { + "type": ["integer"] + }, + "net_terms": { + "type": ["integer"] + }, + "stored_credential_transaction_id": { + "type": ["integer"] + }, + "reference": { + "type": ["string"] + }, + "on_hold_at": { + "type": ["string"], + "format": "date-time" + }, + "prepaid_dunning": { + "type": ["boolean"] + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_primary_key": [["id"]] + } + ] +} diff --git a/airbyte-integrations/connectors/source-chargify/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-chargify/integration_tests/configured_catalog.json new file mode 100644 index 000000000000..a97bddfc209e --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/integration_tests/configured_catalog.json @@ -0,0 +1,22 @@ +{ + "streams": [ + { + "stream": { + "name": "customers", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "subscriptions", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-chargify/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-chargify/integration_tests/invalid_config.json new file mode 100644 index 000000000000..b883b8ac15f6 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/integration_tests/invalid_config.json @@ -0,0 +1,4 @@ +{ + "api_key": "1235", + "domain": "subdomain.chargify.com" +} diff --git a/airbyte-integrations/connectors/source-chargify/integration_tests/sample_config.json b/airbyte-integrations/connectors/source-chargify/integration_tests/sample_config.json new file mode 100644 index 000000000000..8af764477f29 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/integration_tests/sample_config.json @@ -0,0 +1,4 @@ +{ + "api_key": "1235", + "domain": "stoplight.io/mocks/chargify/api-docs/14108261/" +} diff --git a/airbyte-integrations/connectors/source-chargify/main.py b/airbyte-integrations/connectors/source-chargify/main.py new file mode 100644 index 000000000000..54d1c92f1463 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/main.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from source_chargify import SourceChargify + +if __name__ == "__main__": + source = SourceChargify() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-chargify/requirements.txt b/airbyte-integrations/connectors/source-chargify/requirements.txt new file mode 100644 index 000000000000..0411042aa091 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/requirements.txt @@ -0,0 +1,2 @@ +-e ../../bases/source-acceptance-test +-e . diff --git a/airbyte-integrations/connectors/source-chargify/setup.py b/airbyte-integrations/connectors/source-chargify/setup.py new file mode 100644 index 000000000000..ac59d3fa25b3 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/setup.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.1", +] + +TEST_REQUIREMENTS = [ + "pytest~=6.1", + "pytest-mock~=3.6.1", + "source-acceptance-test", +] + +setup( + name="source_chargify", + description="Source implementation for Chargify.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/source-chargify/source_chargify/__init__.py b/airbyte-integrations/connectors/source-chargify/source_chargify/__init__.py new file mode 100644 index 000000000000..fb413107366d --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/source_chargify/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceChargify + +__all__ = ["SourceChargify"] diff --git a/airbyte-integrations/connectors/source-chargify/source_chargify/schemas/customers.json b/airbyte-integrations/connectors/source-chargify/source_chargify/schemas/customers.json new file mode 100644 index 000000000000..6b80d4074023 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/source_chargify/schemas/customers.json @@ -0,0 +1,110 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": [ + "portal_invite_last_sent_at", + "email", + "created_at", + "zip", + "first_name", + "state", + "city", + "portal_customer_created_at", + "updated_at", + "country", + "tax_exempt", + "id", + "last_name", + "address_2", + "phone", + "organization", + "address", + "cc_emails" + ], + "properties": { + "default_subscription_group_uid": { + "type": ["null", "string"] + }, + "portal_invite_last_sent_at": { + "type": "string", + "format": "date-time" + }, + "vat_number": { + "type": ["null", "string"] + }, + "email": { + "type": "string" + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "zip": { + "type": "string" + }, + "first_name": { + "type": "string" + }, + "country_name": { + "type": ["null", "string"] + }, + "state": { + "type": "string" + }, + "city": { + "type": "string" + }, + "parent_id": { + "type": ["null", "integer"] + }, + "locale": { + "type": ["null", "string"] + }, + "portal_customer_created_at": { + "type": "string" + }, + "updated_at": { + "type": "string", + "format": "date-time" + }, + "country": { + "type": "string" + }, + "portal_invite_last_accepted_at": { + "type": ["null", "string"] + }, + "tax_exempt": { + "type": "boolean" + }, + "id": { + "type": "integer" + }, + "reference": { + "type": ["null", "string"] + }, + "last_name": { + "type": "string" + }, + "address_2": { + "type": "string" + }, + "phone": { + "type": "string" + }, + "organization": { + "type": "string" + }, + "address": { + "type": "string" + }, + "verified": { + "type": ["null", "string"] + }, + "cc_emails": { + "type": "string" + }, + "state_name": { + "type": ["null", "string"] + } + } +} diff --git a/airbyte-integrations/connectors/source-chargify/source_chargify/schemas/subscriptions.json b/airbyte-integrations/connectors/source-chargify/source_chargify/schemas/subscriptions.json new file mode 100644 index 000000000000..b8a9eed74af3 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/source_chargify/schemas/subscriptions.json @@ -0,0 +1,538 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "state": { + "type": "string" + }, + "balance_in_cents": { + "type": "integer" + }, + "total_revenue_in_cents": { + "type": "integer" + }, + "product_price_in_cents": { + "type": "integer" + }, + "product_version_number": { + "type": "integer" + }, + "current_period_ends_at": { + "type": "string" + }, + "next_assessment_at": { + "type": "string" + }, + "trial_started_at": { + "type": "string" + }, + "trial_ended_at": { + "type": "string" + }, + "activated_at": { + "type": "string" + }, + "expires_at": { + "type": "string" + }, + "created_at": { + "type": "string" + }, + "updated_at": { + "type": "string" + }, + "cancellation_message": { + "type": "string" + }, + "cancellation_method": { + "type": "string" + }, + "cancel_at_end_of_period": { + "type": "boolean" + }, + "canceled_at": { + "type": "string" + }, + "current_period_started_at": { + "type": "string" + }, + "previous_state": { + "type": "string" + }, + "signup_payment_id": { + "type": "integer" + }, + "signup_revenue": { + "type": "string" + }, + "delayed_cancel_at": { + "type": "string" + }, + "coupon_code": { + "type": "string" + }, + "snap_day": { + "type": "string" + }, + "payment_collection_method": { + "type": "string" + }, + "customer": { + "type": "object", + "properties": { + "first_name": { + "type": "string" + }, + "last_name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "cc_emails": { + "type": "string" + }, + "organization": { + "type": "string" + }, + "reference": { + "type": "string" + }, + "id": { + "type": "integer" + }, + "created_at": { + "type": "string" + }, + "updated_at": { + "type": "string" + }, + "address": { + "type": "string" + }, + "address_2": { + "type": "string" + }, + "city": { + "type": "string" + }, + "state": { + "type": "string" + }, + "state_name": { + "type": "string" + }, + "zip": { + "type": "string" + }, + "country": { + "type": "string" + }, + "country_name": { + "type": "string" + }, + "phone": { + "type": "string" + }, + "verified": { + "type": "boolean" + }, + "portal_customer_created_at": { + "type": "string" + }, + "portal_invite_last_sent_at": { + "type": "string" + }, + "portal_invite_last_accepted_at": { + "type": ["null", "string"] + }, + "tax_exempt": { + "type": "boolean" + }, + "vat_number": { + "type": "string" + }, + "parent_id": { + "type": "integer" + }, + "locale": { + "type": "string" + }, + "default_subscription_group_uid": { + "type": "string" + } + } + }, + "product": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": "string" + }, + "handle": { + "type": "string" + }, + "description": { + "type": "string" + }, + "accounting_code": { + "type": "string" + }, + "request_credit_card": { + "type": "boolean" + }, + "expiration_interval": { + "type": "integer" + }, + "expiration_interval_unit": { + "type": "string" + }, + "created_at": { + "type": "string" + }, + "updated_at": { + "type": "string" + }, + "price_in_cents": { + "type": "integer" + }, + "interval": { + "type": "integer" + }, + "interval_unit": { + "type": "string" + }, + "initial_charge_in_cents": { + "type": "integer" + }, + "trial_price_in_cents": { + "type": "integer" + }, + "trial_interval": { + "type": "integer" + }, + "trial_interval_unit": { + "type": "string" + }, + "archived_at": { + "type": "string" + }, + "require_credit_card": { + "type": "boolean" + }, + "return_params": { + "type": "string" + }, + "taxable": { + "type": "boolean" + }, + "update_return_url": { + "type": "string" + }, + "initial_charge_after_trial": { + "type": "boolean" + }, + "version_number": { + "type": "integer" + }, + "update_return_params": { + "type": "string" + }, + "product_family": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": "string" + }, + "handle": { + "type": "string" + }, + "accounting_code": { + "type": "null" + }, + "description": { + "type": "string" + }, + "created_at": { + "type": "string" + }, + "updated_at": { + "type": "string" + } + } + }, + "public_signup_pages": { + "type": "array", + "items": { + "anyOf": [ + { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "return_url": { + "type": "string" + }, + "return_params": { + "type": "string" + }, + "url": { + "type": "string" + } + } + } + ] + } + }, + "product_price_point_name": { + "type": "string" + }, + "request_billing_address": { + "type": "boolean" + }, + "require_billing_address": { + "type": "boolean" + }, + "require_shipping_address": { + "type": "boolean" + }, + "tax_code": { + "type": "string" + }, + "default_product_price_point_id": { + "type": "integer" + } + } + }, + "credit_card": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "first_name": { + "type": "string" + }, + "last_name": { + "type": "string" + }, + "masked_card_number": { + "type": "string" + }, + "card_type": { + "type": "string" + }, + "expiration_month": { + "type": "integer" + }, + "expiration_year": { + "type": "integer" + }, + "customer_id": { + "type": "integer" + }, + "current_vault": { + "type": "string" + }, + "vault_token": { + "type": "string" + }, + "billing_address": { + "type": "string" + }, + "billing_city": { + "type": "string" + }, + "billing_state": { + "type": "string" + }, + "billing_zip": { + "type": "string" + }, + "billing_country": { + "type": "string" + }, + "customer_vault_token": { + "type": "string" + }, + "billing_address_2": { + "type": "string" + }, + "payment_type": { + "type": "string" + }, + "disabled": { + "type": "boolean" + }, + "chargify_token": { + "type": "string" + }, + "site_gateway_setting_id": { + "type": "integer" + }, + "gateway_handle": { + "type": "string" + } + } + }, + "group": { + "type": "object", + "properties": { + "uid": { + "type": "string" + }, + "scheme": { + "type": "string" + }, + "primary_subscription_id": { + "type": "string" + }, + "primary": { + "type": "string" + } + } + }, + "bank_account": { + "type": "object", + "properties": { + "bank_account_holder_type": { + "type": "string" + }, + "bank_account_type": { + "type": "string" + }, + "bank_name": { + "type": "string" + }, + "billing_address": { + "type": "string" + }, + "billing_address_2": { + "type": "string" + }, + "billing_city": { + "type": "string" + }, + "billing_state": { + "type": "string" + }, + "billing_zip": { + "type": "string" + }, + "billing_country": { + "type": "string" + }, + "current_vault": { + "type": "string" + }, + "customer_id": { + "type": "integer" + }, + "customer_vault_token": { + "type": "string" + }, + "first_name": { + "type": "string" + }, + "last_name": { + "type": "string" + }, + "id": { + "type": "integer" + }, + "masked_bank_account_number": { + "type": "string" + }, + "masked_bank_routing_number": { + "type": "string" + }, + "vault_token": { + "type": "string" + }, + "chargify_token": { + "type": "string" + }, + "site_gateway_setting_id": { + "type": "integer" + }, + "gateway_handle": { + "type": "string" + } + } + }, + "payment_type": { + "type": "string" + }, + "referral_code": { + "type": "string" + }, + "next_product_id": { + "type": "integer" + }, + "next_product_handle": { + "type": "string" + }, + "coupon_use_count": { + "type": "integer" + }, + "coupon_uses_allowed": { + "type": "integer" + }, + "reason_code": { + "type": "string" + }, + "automatically_resume_at": { + "type": "string" + }, + "coupon_codes": { + "type": "array", + "items": { + "anyOf": [ + { + "type": "string" + } + ] + } + }, + "offer_id": { + "type": "string" + }, + "payer_id": { + "type": "integer" + }, + "current_billing_amount_in_cents": { + "type": "integer" + }, + "product_price_point_id": { + "type": "integer" + }, + "next_product_price_point_id": { + "type": "integer" + }, + "net_terms": { + "type": "integer" + }, + "stored_credential_transaction_id": { + "type": "integer" + }, + "reference": { + "type": "string" + }, + "on_hold_at": { + "type": "string" + }, + "prepaid_dunning": { + "type": "boolean" + } + } +} diff --git a/airbyte-integrations/connectors/source-chargify/source_chargify/source.py b/airbyte-integrations/connectors/source-chargify/source_chargify/source.py new file mode 100644 index 000000000000..380837e59d16 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/source_chargify/source.py @@ -0,0 +1,116 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple +from urllib.parse import parse_qs, urlparse + +import requests +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream + + +# Basic full refresh stream +class ChargifyStream(HttpStream, ABC): + + PER_PAGE = 200 + FIRST_PAGE = 1 + + def __init__(self, *args, domain: str, **kwargs): + super().__init__(*args, **kwargs) + self._domain = domain + + @property + def url_base(self): + return f"https://{self._domain}" + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + + results = response.json() + + if results: + if len(results) == self.PER_PAGE: + url_query = urlparse(response.url).query + query_params = parse_qs(url_query) + + new_params = {param_name: param_value[0] for param_name, param_value in query_params.items()} + if "page" in new_params: + new_params["page"] = int(new_params["page"]) + 1 + return new_params + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + + if next_page_token is None: + return {"page": self.FIRST_PAGE, "per_page": self.PER_PAGE} + + return next_page_token + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + + yield response.json() + + +class Customers(ChargifyStream): + + primary_key = "id" + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + + return "customers.json" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + # Chargify API: https://developers.chargify.com/docs/api-docs/b3A6MTQxMDgyNzY-list-or-find-customers + # it returns a generator of Customers objects. + customers = response.json() + for customer in customers: + yield customer["customer"] + + +class Subscriptions(ChargifyStream): + + primary_key = "id" + + def path(self, **kwargs) -> str: + + return "subscriptions.json" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + # Chargify API: https://developers.chargify.com/docs/api-docs/b3A6MTQxMDgzODk-list-subscriptions + # it returns a generator of Subscriptions objects. + subscriptions = response.json() + for subscription in subscriptions: + yield subscription["subscription"] + + +# Source +class SourceChargify(AbstractSource): + BASIC_AUTH_PASSWORD = "x" + + def get_basic_auth(self, config: Mapping[str, Any]) -> requests.auth.HTTPBasicAuth: + return requests.auth.HTTPBasicAuth( + config["api_key"], SourceChargify.BASIC_AUTH_PASSWORD + ) # https://developers.chargify.com/docs/api-docs/YXBpOjE0MTA4MjYx-chargify-api-documentation + + def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]: + try: + authenticator = self.get_basic_auth(config) + customers_gen = Customers(authenticator, domain=config["domain"]).read_records(SyncMode.full_refresh) + next(customers_gen) + subcriptions_gen = Subscriptions(authenticator, domain=config["domain"]).read_records(SyncMode.full_refresh) + next(subcriptions_gen) + return True, None + except Exception as error: + return False, f"Unable to connect to Chargify API with the provided credentials - {repr(error)}" + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + authenticator = self.get_basic_auth(config) + return [Customers(authenticator, domain=config["domain"]), Subscriptions(authenticator, domain=config["domain"])] diff --git a/airbyte-integrations/connectors/source-chargify/source_chargify/spec.json b/airbyte-integrations/connectors/source-chargify/source_chargify/spec.json new file mode 100644 index 000000000000..4d77ed7f983d --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/source_chargify/spec.json @@ -0,0 +1,21 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/chargify", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Chargify Spec", + "type": "object", + "required": ["api_key", "domain"], + "additionalProperties": false, + "properties": { + "api_key": { + "type": "string", + "description": "Chargify API Key.", + "airbyte_secret": true + }, + "domain": { + "type": "string", + "description": "Chargify domain. Normally this domain follows the following format companyname.chargify.com" + } + } + } +} diff --git a/airbyte-integrations/connectors/source-chargify/unit_tests/__init__.py b/airbyte-integrations/connectors/source-chargify/unit_tests/__init__.py new file mode 100644 index 000000000000..46b7376756ec --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/unit_tests/__init__.py @@ -0,0 +1,3 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# diff --git a/airbyte-integrations/connectors/source-chargify/unit_tests/test_source.py b/airbyte-integrations/connectors/source-chargify/unit_tests/test_source.py new file mode 100644 index 000000000000..e1071572aa11 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/unit_tests/test_source.py @@ -0,0 +1,15 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +from source_chargify.source import SourceChargify + + +def test_streams(mocker): + source = SourceChargify() + config_mock = MagicMock() + streams = source.streams(config_mock) + expected_streams_number = 2 + assert len(streams) == expected_streams_number diff --git a/airbyte-integrations/connectors/source-chargify/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-chargify/unit_tests/test_streams.py new file mode 100644 index 000000000000..3eedddcfb963 --- /dev/null +++ b/airbyte-integrations/connectors/source-chargify/unit_tests/test_streams.py @@ -0,0 +1,135 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import MagicMock + +import pytest +import requests +from source_chargify.source import ChargifyStream, Customers, Subscriptions + + +@pytest.fixture() +def ChargifyStreamInstance(mocker) -> ChargifyStream: + + mocker.patch.object(ChargifyStream, "path", "v0/example_endpoint") + mocker.patch.object(ChargifyStream, "primary_key", "test_primary_key") + mocker.patch.object(ChargifyStream, "__abstractmethods__", set()) + + return ChargifyStream( + authenticator=MagicMock(), + domain="test", + ) + + +@pytest.fixture() +def CustomerStreamInstance(mocker) -> Customers: + + mocker.patch.object(Customers, "path", "v0/example_endpoint") + mocker.patch.object(Customers, "primary_key", "test_primary_key") + mocker.patch.object(Customers, "__abstractmethods__", set()) + + return Customers(authenticator=MagicMock(), domain="test") + + +@pytest.fixture() +def SubscriptionsStreamInstance(mocker) -> Subscriptions: + + mocker.patch.object(Subscriptions, "path", "v0/example_endpoint") + mocker.patch.object(Subscriptions, "primary_key", "test_primary_key") + mocker.patch.object(Subscriptions, "__abstractmethods__", set()) + + return Subscriptions( + authenticator=MagicMock(), + domain="test", + ) + + +@pytest.mark.parametrize("domain", [("test"), ("test1"), ("test2")]) +def test_stream_config(domain, mocker): + + mocker.patch.object(ChargifyStream, "path", "v0/example_endpoint") + mocker.patch.object(ChargifyStream, "primary_key", "test_primary_key") + mocker.patch.object(ChargifyStream, "__abstractmethods__", set()) + + stream: ChargifyStream = ChargifyStream( + domain=domain, + ) + assert stream._domain == domain + + customers_stream: Customers = Customers(domain=domain) + assert customers_stream.path() == "customers.json" + assert customers_stream.primary_key == "id" + + subscriptions_stream: Subscriptions = Subscriptions(domain=domain) + assert subscriptions_stream.path() == "subscriptions.json" + assert subscriptions_stream.primary_key == "id" + + +def test_next_page_token(ChargifyStreamInstance: ChargifyStream): + response = requests.Response() + response.url = "https://test.chargify.com/subscriptions.json?page=1&per_page=2" + response.json = MagicMock() + response.json.return_value = [{"id": 1}, {"id": 2}] + + ChargifyStream.PER_PAGE = 2 + + token_params = ChargifyStreamInstance.next_page_token(response=response) + + assert token_params == {"page": 2, "per_page": "2"} + + response = requests.Response() + response.url = "https://test.chargify.com/subscriptions.json?page=1&per_page=2" + response.json = MagicMock() + response.json.return_value = {} + + token_params = ChargifyStreamInstance.next_page_token(response=response) + + assert token_params is None + + +def test_requests_params(ChargifyStreamInstance: ChargifyStream): + + ChargifyStream.PER_PAGE = 200 + + params = ChargifyStreamInstance.request_params(stream_state={}, next_page_token=None) + + assert params == {"page": 1, "per_page": 200} + + params = ChargifyStreamInstance.request_params(stream_state={}, next_page_token={"page": 2, "per_page": 200}) + + assert params == {"page": 2, "per_page": 200} + + +def test_parse_subscriptions_response(SubscriptionsStreamInstance: Subscriptions): + + response = MagicMock() + response.json.return_value = [ + {"subscription": {"id": 0, "state": "string", "balance_in_cents": 0}}, + {"subscription": {"id": 2, "state": "string", "balance_in_cents": 1000}}, + {"subscription": {"id": 3, "state": "string", "balance_in_cents": 100}}, + ] + + response = list(SubscriptionsStreamInstance.parse_response(response=response)) + + assert len(response) == 3 + assert response[0] == {"id": 0, "state": "string", "balance_in_cents": 0} + assert response[1] == {"id": 2, "state": "string", "balance_in_cents": 1000} + assert response[2] == {"id": 3, "state": "string", "balance_in_cents": 100} + + +def test_parse_customers_response(CustomerStreamInstance: Customers): + + response = MagicMock() + response.json.return_value = [ + {"customer": {"id": 0, "state": "string", "balance_in_cents": 0}}, + {"customer": {"id": 2, "state": "string", "balance_in_cents": 1000}}, + {"customer": {"id": 3, "state": "string", "balance_in_cents": 100}}, + ] + + response = list(CustomerStreamInstance.parse_response(response=response)) + + assert len(response) == 3 + assert response[0] == {"id": 0, "state": "string", "balance_in_cents": 0} + assert response[1] == {"id": 2, "state": "string", "balance_in_cents": 1000} + assert response[2] == {"id": 3, "state": "string", "balance_in_cents": 100} diff --git a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java index 5cd626a81ec9..aa255ccace20 100644 --- a/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java +++ b/airbyte-integrations/connectors/source-clickhouse/src/main/java/io/airbyte/integrations/source/clickhouse/ClickHouseSource.java @@ -50,7 +50,7 @@ protected Map> discoverPrimaryKeys(final JdbcDatabase datab .getFullyQualifiedTableName(tableInfo.getNameSpace(), tableInfo.getName()), tableInfo -> { try { - return database.resultSetQuery(connection -> { + return database.unsafeResultSetQuery(connection -> { final String sql = "SELECT name FROM system.columns WHERE database = ? AND table = ? AND is_in_primary_key = 1"; final PreparedStatement preparedStatement = connection.prepareStatement(sql); preparedStatement.setString(1, tableInfo.getNameSpace()); diff --git a/airbyte-integrations/connectors/source-cockroachdb/src/main/java/io/airbyte/integrations/source/cockroachdb/CockroachDbSource.java b/airbyte-integrations/connectors/source-cockroachdb/src/main/java/io/airbyte/integrations/source/cockroachdb/CockroachDbSource.java index a7e8c9b33247..7e6d63de5878 100644 --- a/airbyte-integrations/connectors/source-cockroachdb/src/main/java/io/airbyte/integrations/source/cockroachdb/CockroachDbSource.java +++ b/airbyte-integrations/connectors/source-cockroachdb/src/main/java/io/airbyte/integrations/source/cockroachdb/CockroachDbSource.java @@ -101,7 +101,7 @@ public AutoCloseableIterator read(final JsonNode config, @Override public Set getPrivilegesTableForCurrentUser(final JdbcDatabase database, final String schema) throws SQLException { return database - .query(getPrivileges(database), sourceOperations::rowToJson) + .unsafeQuery(getPrivileges(database), sourceOperations::rowToJson) .map(this::getPrivilegeDto) .collect(Collectors.toSet()); } diff --git a/airbyte-integrations/connectors/source-cockroachdb/src/main/java/io/airbyte/integrations/source/cockroachdb/CockroachJdbcDatabase.java b/airbyte-integrations/connectors/source-cockroachdb/src/main/java/io/airbyte/integrations/source/cockroachdb/CockroachJdbcDatabase.java index 0aa9572cb5a2..e3037d0b4897 100644 --- a/airbyte-integrations/connectors/source-cockroachdb/src/main/java/io/airbyte/integrations/source/cockroachdb/CockroachJdbcDatabase.java +++ b/airbyte-integrations/connectors/source-cockroachdb/src/main/java/io/airbyte/integrations/source/cockroachdb/CockroachJdbcDatabase.java @@ -54,21 +54,21 @@ public List bufferedResultSetQuery(final CheckedFunction Stream resultSetQuery(final CheckedFunction query, - final CheckedFunction recordTransform) + public Stream unsafeResultSetQuery(final CheckedFunction query, + final CheckedFunction recordTransform) throws SQLException { - return database.resultSetQuery(query, recordTransform); + return database.unsafeResultSetQuery(query, recordTransform); } @Override - public Stream query(final CheckedFunction statementCreator, - final CheckedFunction recordTransform) + public Stream unsafeQuery(final CheckedFunction statementCreator, + final CheckedFunction recordTransform) throws SQLException { - return database.query(statementCreator, recordTransform); + return database.unsafeQuery(statementCreator, recordTransform); } @Override - public Stream query(final String sql, final String... params) throws SQLException { + public Stream unsafeQuery(final String sql, final String... params) throws SQLException { return bufferedResultSetQuery(connection -> { final PreparedStatement statement = connection.prepareStatement(sql); int i = 1; diff --git a/airbyte-integrations/connectors/source-db2/src/main/java/io.airbyte.integrations.source.db2/Db2Source.java b/airbyte-integrations/connectors/source-db2/src/main/java/io.airbyte.integrations.source.db2/Db2Source.java index c20433a3d1c9..8ec8eab83d0f 100644 --- a/airbyte-integrations/connectors/source-db2/src/main/java/io.airbyte.integrations.source.db2/Db2Source.java +++ b/airbyte-integrations/connectors/source-db2/src/main/java/io.airbyte.integrations.source.db2/Db2Source.java @@ -90,7 +90,7 @@ public Set getExcludedInternalNameSpaces() { @Override public Set getPrivilegesTableForCurrentUser(final JdbcDatabase database, final String schema) throws SQLException { return database - .query(getPrivileges(), sourceOperations::rowToJson) + .unsafeQuery(getPrivileges(), sourceOperations::rowToJson) .map(this::getPrivilegeDto) .collect(Collectors.toSet()); } diff --git a/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json index acc8aaee9ea0..21f48a879028 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-github/integration_tests/configured_catalog.json @@ -15,7 +15,7 @@ "name": "branches", "json_schema": {}, "supported_sync_modes": ["full_refresh"], - "source_defined_primary_key": [["id"]] + "source_defined_primary_key": [["repository"], ["name"]] }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" @@ -336,7 +336,7 @@ "name": "tags", "json_schema": {}, "supported_sync_modes": ["full_refresh"], - "source_defined_primary_key": [["id"]] + "source_defined_primary_key": [["repository"], ["name"]] }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" diff --git a/airbyte-integrations/connectors/source-github/setup.py b/airbyte-integrations/connectors/source-github/setup.py index 208034d88693..d1edbf0ebf36 100644 --- a/airbyte-integrations/connectors/source-github/setup.py +++ b/airbyte-integrations/connectors/source-github/setup.py @@ -10,7 +10,7 @@ "vcrpy==4.1.1", ] -TEST_REQUIREMENTS = ["pytest~=6.1", "source-acceptance-test", "responses==0.13.3"] +TEST_REQUIREMENTS = ["pytest~=6.1", "source-acceptance-test", "responses~=0.19.0"] setup( name="source_github", diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 26e40e7d9fd9..a5410aaed660 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -5,7 +5,6 @@ import time from abc import ABC, abstractmethod from copy import deepcopy -from time import sleep from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union from urllib import parse @@ -72,7 +71,7 @@ def should_retry(self, response: requests.Response) -> bool: elif response.headers.get("Retry-After"): time_delay = int(response.headers["Retry-After"]) self.logger.info(f"Handling Secondary Rate limits, setting sync delay for {time_delay} second(s)") - sleep(time_delay) + time.sleep(time_delay) return retry_flag def backoff_time(self, response: requests.Response) -> Union[int, float]: @@ -103,7 +102,9 @@ def read_records(self, stream_slice: Mapping[str, any] = None, **kwargs) -> Iter if e.response.status_code == requests.codes.NOT_FOUND: # A lot of streams are not available for repositories owned by a user instead of an organization. if isinstance(self, Organizations): - error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for organization `{stream_slice['organization']}`." + error_msg = ( + f"Syncing `{self.__class__.__name__}` stream isn't available for organization `{stream_slice['organization']}`." + ) else: error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{stream_slice['repository']}`." elif e.response.status_code == requests.codes.FORBIDDEN: @@ -213,13 +214,13 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late current_stream_state[current_repository] = {self.cursor_field: state_value} return current_stream_state - def get_starting_point(self, stream_state: Mapping[str, Any], repository: str) -> str: - start_point = self._start_date - - if stream_state and stream_state.get(repository, {}).get(self.cursor_field): - start_point = max(start_point, stream_state[repository][self.cursor_field]) - - return start_point + def get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str: + if stream_state: + repository = stream_slice["repository"] + stream_state_value = stream_state.get(repository, {}).get(self.cursor_field) + if stream_state_value: + return max(self._start_date, stream_state_value) + return self._start_date def read_records( self, @@ -228,7 +229,7 @@ def read_records( stream_slice: Mapping[str, Any] = None, stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: - start_point = self.get_starting_point(stream_state=stream_state, repository=stream_slice["repository"]) + start_point = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice) for record in super().read_records( sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state ): @@ -241,7 +242,7 @@ def read_records( class IncrementalGithubStream(SemiIncrementalGithubStream): def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]: params = super().request_params(stream_state=stream_state, **kwargs) - since_params = self.get_starting_point(stream_state=stream_state, repository=stream_slice["repository"]) + since_params = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice) if since_params: params["since"] = since_params return params @@ -274,7 +275,7 @@ class Branches(GithubStream): API docs: https://docs.github.com/en/rest/reference/repos#list-branches """ - primary_key = None + primary_key = ["repository", "name"] def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: return f"repos/{stream_slice['repository']}/branches" @@ -340,7 +341,7 @@ class Tags(GithubStream): API docs: https://docs.github.com/en/rest/reference/repos#list-repository-tags """ - primary_key = None + primary_key = ["repository", "name"] def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: return f"repos/{stream_slice['repository']}/tags" @@ -559,9 +560,7 @@ def __init__(self, branches_to_pull: Mapping[str, List[str]], default_branches: def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]: params = super(IncrementalGithubStream, self).request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs) - params["since"] = self.get_starting_point( - stream_state=stream_state, repository=stream_slice["repository"], branch=stream_slice["branch"] - ) + params["since"] = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice) params["sha"] = stream_slice["branch"] return params @@ -605,31 +604,16 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late current_stream_state[current_repository][current_branch] = {self.cursor_field: state_value} return current_stream_state - def get_starting_point(self, stream_state: Mapping[str, Any], repository: str, branch: str) -> str: - start_point = self._start_date - if stream_state and stream_state.get(repository, {}).get(branch, {}).get(self.cursor_field): - return max(start_point, stream_state[repository][branch][self.cursor_field]) + def get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str: + repository = stream_slice["repository"] + branch = stream_slice["branch"] + if stream_state: + stream_state_value = stream_state.get(repository, {}).get(branch, {}).get(self.cursor_field) + if stream_state_value: + return max(self._start_date, stream_state_value) if branch == self.default_branches[repository]: - return super().get_starting_point(stream_state=stream_state, repository=repository) - return start_point - - def read_records( - self, - sync_mode: SyncMode, - cursor_field: List[str] = None, - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, - ) -> Iterable[Mapping[str, Any]]: - start_point = self.get_starting_point( - stream_state=stream_state, repository=stream_slice["repository"], branch=stream_slice["branch"] - ) - for record in super(SemiIncrementalGithubStream, self).read_records( - sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state - ): - if record[self.cursor_field] > start_point: - yield record - elif self.is_sorted_descending and record[self.cursor_field] < start_point: - break + return super().get_starting_point(stream_state=stream_state, stream_slice=stream_slice) + return self._start_date class Issues(IncrementalGithubStream): diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_source.py b/airbyte-integrations/connectors/source-github/unit_tests/test_source.py index 2ba675796fd6..db1e99f74196 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_source.py @@ -4,6 +4,7 @@ from unittest.mock import MagicMock +import pytest import responses from airbyte_cdk.models import AirbyteConnectionStatus, Status from source_github.source import SourceGithub @@ -52,3 +53,69 @@ def test_check_connection_org_only(): assert status.status == Status.SUCCEEDED # One request to check organization assert len(responses.calls) == 1 + + +@responses.activate +def test_get_branches_data(): + + repository_args = {"repositories": ["airbytehq/integration-test"], "page_size_for_large_streams": 10} + + source = SourceGithub() + + responses.add( + "GET", + "https://api.github.com/repos/airbytehq/integration-test", + json={"full_name": "airbytehq/integration-test", "default_branch": "master"}, + ) + + responses.add( + "GET", + "https://api.github.com/repos/airbytehq/integration-test/branches", + json=[ + {"repository": "airbytehq/integration-test", "name": "feature/branch_0"}, + {"repository": "airbytehq/integration-test", "name": "feature/branch_1"}, + {"repository": "airbytehq/integration-test", "name": "feature/branch_2"}, + {"repository": "airbytehq/integration-test", "name": "master"}, + ], + ) + + default_branches, branches_to_pull = source._get_branches_data("", repository_args) + assert default_branches == {"airbytehq/integration-test": "master"} + assert branches_to_pull == {"airbytehq/integration-test": ["master"]} + + default_branches, branches_to_pull = source._get_branches_data( + "airbytehq/integration-test/feature/branch_0 airbytehq/integration-test/feature/branch_1 airbytehq/integration-test/feature/branch_3", + repository_args, + ) + + assert default_branches == {"airbytehq/integration-test": "master"} + assert len(branches_to_pull["airbytehq/integration-test"]) == 2 + assert "feature/branch_0" in branches_to_pull["airbytehq/integration-test"] + assert "feature/branch_1" in branches_to_pull["airbytehq/integration-test"] + + +@responses.activate +def test_generate_repositories(): + + source = SourceGithub() + + with pytest.raises(Exception): + config = {"repository": ""} + source._generate_repositories(config, authenticator=None) + + responses.add( + "GET", + "https://api.github.com/orgs/docker/repos", + json=[ + {"full_name": "docker/docker-py"}, + {"full_name": "docker/compose"}, + ], + ) + + config = {"repository": "airbytehq/integration-test docker/*"} + repositories_list, organisation_repos = source._generate_repositories(config, authenticator=None) + + assert repositories_list == ["airbytehq/integration-test"] + assert len(organisation_repos) == 2 + assert "docker/compose" in organisation_repos + assert "docker/docker-py" in organisation_repos diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index d31731e8e5ed..29d21f31e2ce 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -9,9 +9,33 @@ import requests import responses from airbyte_cdk.sources.streams.http.exceptions import BaseBackoffException -from source_github.streams import Projects, PullRequestCommentReactions, Repositories, Teams +from responses import matchers +from source_github.streams import ( + Branches, + Collaborators, + Comments, + CommitComments, + Commits, + Deployments, + IssueEvents, + IssueLabels, + IssueMilestones, + Organizations, + ProjectCards, + ProjectColumns, + Projects, + PullRequestCommentReactions, + PullRequestCommits, + PullRequests, + Releases, + Repositories, + Stargazers, + Tags, + Teams, + Users, +) -from .utils import read_full_refresh +from .utils import ProjectsResponsesAPI, read_full_refresh, read_incremental DEFAULT_BACKOFF_DELAYS = [5, 10, 20, 40, 80] @@ -52,10 +76,21 @@ def test_backoff_time(http_status, response_text, expected_backoff_time): assert stream.backoff_time(response_mock) == expected_backoff_time +@responses.activate +@patch("time.sleep") +def test_retry_after(time_mock): + stream = Organizations(organizations=["airbytehq"]) + responses.add("GET", "https://api.github.com/orgs/airbytehq", json={"login": "airbytehq"}, headers={"Retry-After": "10"}) + read_full_refresh(stream) + assert time_mock.call_args[0][0] == 10 + assert len(responses.calls) == 1 + assert responses.calls[0].request.url == "https://api.github.com/orgs/airbytehq?per_page=100" + + @responses.activate def test_stream_teams_404(): - kwargs = {"organizations": ["org_name"]} - stream = Teams(**kwargs) + organization_args = {"organizations": ["org_name"]} + stream = Teams(**organization_args) responses.add( "GET", @@ -69,10 +104,46 @@ def test_stream_teams_404(): assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/teams?per_page=100" +@responses.activate +def test_stream_organizations_read(): + organization_args = {"organizations": ["org1", "org2"]} + stream = Organizations(**organization_args) + responses.add("GET", "https://api.github.com/orgs/org1", json={"id": 1}) + responses.add("GET", "https://api.github.com/orgs/org2", json={"id": 2}) + records = read_full_refresh(stream) + assert records == [{"id": 1}, {"id": 2}] + + +@responses.activate +def test_stream_teams_read(): + organization_args = {"organizations": ["org1", "org2"]} + stream = Teams(**organization_args) + responses.add("GET", "https://api.github.com/orgs/org1/teams", json=[{"id": 1}, {"id": 2}]) + responses.add("GET", "https://api.github.com/orgs/org2/teams", json=[{"id": 3}]) + records = read_full_refresh(stream) + assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}] + assert len(responses.calls) == 2 + assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/teams?per_page=100" + assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/teams?per_page=100" + + +@responses.activate +def test_stream_users_read(): + organization_args = {"organizations": ["org1", "org2"]} + stream = Users(**organization_args) + responses.add("GET", "https://api.github.com/orgs/org1/members", json=[{"id": 1}, {"id": 2}]) + responses.add("GET", "https://api.github.com/orgs/org2/members", json=[{"id": 3}]) + records = read_full_refresh(stream) + assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}] + assert len(responses.calls) == 2 + assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/members?per_page=100" + assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/members?per_page=100" + + @responses.activate def test_stream_repositories_404(): - kwargs = {"organizations": ["org_name"]} - stream = Repositories(**kwargs) + organization_args = {"organizations": ["org_name"]} + stream = Repositories(**organization_args) responses.add( "GET", @@ -86,11 +157,25 @@ def test_stream_repositories_404(): assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100" +@responses.activate +def test_stream_repositories_read(): + organization_args = {"organizations": ["org1", "org2"]} + stream = Repositories(**organization_args) + responses.add("GET", "https://api.github.com/orgs/org1/repos", json=[{"id": 1}, {"id": 2}]) + responses.add("GET", "https://api.github.com/orgs/org2/repos", json=[{"id": 3}]) + records = read_full_refresh(stream) + assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}] + assert len(responses.calls) == 2 + assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/repos?per_page=100" + assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/repos?per_page=100" + + @responses.activate def test_stream_projects_disabled(): - kwargs = {"start_date": "start_date", "page_size_for_large_streams": 30, "repositories": ["test_repo"]} - stream = Projects(**kwargs) + repository_args_with_start_date = {"start_date": "start_date", "page_size_for_large_streams": 30, "repositories": ["test_repo"]} + + stream = Projects(**repository_args_with_start_date) responses.add( "GET", "https://api.github.com/repos/test_repo/projects", @@ -101,3 +186,476 @@ def test_stream_projects_disabled(): assert read_full_refresh(stream) == [] assert len(responses.calls) == 1 assert responses.calls[0].request.url == "https://api.github.com/repos/test_repo/projects?per_page=100&state=all" + + +@responses.activate +def test_stream_pull_requests_incremental_read(): + + page_size = 2 + repository_args_with_start_date = { + "repositories": ["organization/repository"], + "page_size_for_large_streams": page_size, + "start_date": "2022-02-02T10:10:03Z", + } + + stream = PullRequests(**repository_args_with_start_date) + + data = [ + {"id": 1, "updated_at": "2022-02-02T10:10:02Z"}, + {"id": 2, "updated_at": "2022-02-02T10:10:04Z"}, + {"id": 3, "updated_at": "2022-02-02T10:10:06Z"}, + {"id": 4, "updated_at": "2022-02-02T10:10:08Z"}, + {"id": 5, "updated_at": "2022-02-02T10:10:10Z"}, + {"id": 6, "updated_at": "2022-02-02T10:10:12Z"}, + ] + + api_url = "https://api.github.com/repos/organization/repository/pulls" + + responses.add( + "GET", + api_url, + json=data[0:2], + headers={"Link": '; rel="next"'}, + match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "asc"}, strict_match=False)], + ) + + responses.add( + "GET", + api_url, + json=data[2:4], + match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "asc", "page": "2"}, strict_match=False)], + ) + + responses.add( + "GET", + api_url, + json=data[5:3:-1], + headers={"Link": '; rel="next"'}, + match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "desc"}, strict_match=False)], + ) + + responses.add( + "GET", + api_url, + json=data[3:1:-1], + headers={"Link": '; rel="next"'}, + match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "desc", "page": "2"}, strict_match=False)], + ) + + stream_state = {} + records = read_incremental(stream, stream_state) + assert [r["id"] for r in records] == [2, 3, 4] + assert stream_state == {"organization/repository": {"updated_at": "2022-02-02T10:10:08Z"}} + + records = read_incremental(stream, stream_state) + assert [r["id"] for r in records] == [6, 5] + assert stream_state == {"organization/repository": {"updated_at": "2022-02-02T10:10:12Z"}} + + +@responses.activate +def test_stream_commits_incremental_read(): + + repository_args_with_start_date = { + "repositories": ["organization/repository"], + "page_size_for_large_streams": 100, + "start_date": "2022-02-02T10:10:03Z", + } + + default_branches = {"organization/repository": "master"} + branches_to_pull = {"organization/repository": ["branch"]} + + stream = Commits(**repository_args_with_start_date, branches_to_pull=branches_to_pull, default_branches=default_branches) + + data = [ + {"sha": 1, "commit": {"author": {"date": "2022-02-02T10:10:02Z"}}}, + {"sha": 2, "commit": {"author": {"date": "2022-02-02T10:10:04Z"}}}, + {"sha": 3, "commit": {"author": {"date": "2022-02-02T10:10:06Z"}}}, + {"sha": 4, "commit": {"author": {"date": "2022-02-02T10:10:08Z"}}}, + {"sha": 5, "commit": {"author": {"date": "2022-02-02T10:10:10Z"}}}, + ] + + api_url = "https://api.github.com/repos/organization/repository/commits" + + responses.add( + "GET", + api_url, + json=data[0:3], + match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:03Z", "sha": "branch"}, strict_match=False)], + ) + + responses.add( + "GET", + api_url, + json=data[3:5], + match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:06Z", "sha": "branch"}, strict_match=False)], + ) + + stream_state = {} + records = read_incremental(stream, stream_state) + assert [r["sha"] for r in records] == [2, 3] + assert stream_state == {"organization/repository": {"branch": {"created_at": "2022-02-02T10:10:06Z"}}} + records = read_incremental(stream, stream_state) + assert [r["sha"] for r in records] == [4, 5] + assert stream_state == {"organization/repository": {"branch": {"created_at": "2022-02-02T10:10:10Z"}}} + + +@responses.activate +def test_stream_commits_state_upgrade(): + + repository_args_with_start_date = { + "repositories": ["organization/repository"], + "page_size_for_large_streams": 100, + "start_date": "2022-02-02T10:10:02Z", + } + + default_branches = {"organization/repository": "master"} + branches_to_pull = {"organization/repository": ["master"]} + + stream = Commits(**repository_args_with_start_date, branches_to_pull=branches_to_pull, default_branches=default_branches) + + responses.add( + "GET", + "https://api.github.com/repos/organization/repository/commits", + json=[ + {"sha": 1, "commit": {"author": {"date": "2022-02-02T10:10:02Z"}}}, + {"sha": 2, "commit": {"author": {"date": "2022-02-02T10:10:04Z"}}}, + ], + match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:02Z", "sha": "master"}, strict_match=False)], + ) + + stream_state = {"organization/repository": {"created_at": "2022-02-02T10:10:02Z"}} + records = read_incremental(stream, stream_state) + assert [r["sha"] for r in records] == [2] + assert stream_state == {"organization/repository": {"master": {"created_at": "2022-02-02T10:10:04Z"}}} + + +@responses.activate +def test_stream_pull_request_commits(): + + repository_args = { + "repositories": ["organization/repository"], + "page_size_for_large_streams": 100, + } + repository_args_with_start_date = {**repository_args, "start_date": "2022-02-02T10:10:02Z"} + + stream = PullRequestCommits(PullRequests(**repository_args_with_start_date), **repository_args) + + responses.add( + "GET", + "https://api.github.com/repos/organization/repository/pulls", + json=[ + {"id": 1, "updated_at": "2022-02-02T10:10:02Z", "number": 1}, + {"id": 2, "updated_at": "2022-02-02T10:10:04Z", "number": 2}, + {"id": 3, "updated_at": "2022-02-02T10:10:06Z", "number": 3}, + ], + ) + + responses.add( + "GET", + "https://api.github.com/repos/organization/repository/pulls/2/commits", + json=[{"sha": 1}, {"sha": 2}], + ) + + responses.add( + "GET", + "https://api.github.com/repos/organization/repository/pulls/3/commits", + json=[{"sha": 3}, {"sha": 4}], + ) + + records = read_full_refresh(stream) + assert records == [ + {"sha": 1, "repository": "organization/repository", "pull_number": 2}, + {"sha": 2, "repository": "organization/repository", "pull_number": 2}, + {"sha": 3, "repository": "organization/repository", "pull_number": 3}, + {"sha": 4, "repository": "organization/repository", "pull_number": 3}, + ] + + +@responses.activate +def test_stream_project_columns(): + + repository_args_with_start_date = { + "repositories": ["organization/repository"], + "page_size_for_large_streams": 100, + "start_date": "2022-02-01T00:00:00Z", + } + + data = [ + { + "updated_at": "2022-01-01T10:00:00Z", + }, + { + "updated_at": "2022-03-01T10:00:00Z", + "columns": [ + {"updated_at": "2022-01-01T10:00:00Z"}, + {"updated_at": "2022-03-01T09:00:00Z"}, + {"updated_at": "2022-03-01T10:00:00Z"}, + ], + }, + { + "updated_at": "2022-05-01T10:00:00Z", + "columns": [ + {"updated_at": "2022-01-01T10:00:00Z"}, + {"updated_at": "2022-05-01T10:00:00Z"}, + ], + }, + ] + + ProjectsResponsesAPI.register(data) + + stream = ProjectColumns(Projects(**repository_args_with_start_date), **repository_args_with_start_date) + + stream_state = {} + + records = read_incremental(stream, stream_state=stream_state) + + assert records == [ + {"id": 22, "name": "column_22", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-03-01T09:00:00Z"}, + {"id": 23, "name": "column_23", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-03-01T10:00:00Z"}, + {"id": 32, "name": "column_32", "project_id": 3, "repository": "organization/repository", "updated_at": "2022-05-01T10:00:00Z"}, + ] + + assert stream_state == { + "organization/repository": {"2": {"updated_at": "2022-03-01T10:00:00Z"}, "3": {"updated_at": "2022-05-01T10:00:00Z"}} + } + + data = [ + {"updated_at": "2022-01-01T10:00:00Z"}, + { + "updated_at": "2022-04-01T10:00:00Z", + "columns": [ + {"updated_at": "2022-01-01T10:00:00Z"}, + {"updated_at": "2022-03-01T09:00:00Z"}, + {"updated_at": "2022-03-01T10:00:00Z"}, + {"updated_at": "2022-04-01T10:00:00Z"}, + ], + }, + { + "updated_at": "2022-05-01T10:00:00Z", + "columns": [ + {"updated_at": "2022-01-01T10:00:00Z"}, + {"updated_at": "2022-05-01T10:00:00Z"}, + ], + }, + { + "updated_at": "2022-06-01T10:00:00Z", + "columns": [{"updated_at": "2022-06-01T10:00:00Z"}], + }, + ] + + ProjectsResponsesAPI.register(data) + + records = read_incremental(stream, stream_state=stream_state) + assert records == [ + {"id": 24, "name": "column_24", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-04-01T10:00:00Z"}, + {"id": 41, "name": "column_41", "project_id": 4, "repository": "organization/repository", "updated_at": "2022-06-01T10:00:00Z"}, + ] + + assert stream_state == { + "organization/repository": { + "2": {"updated_at": "2022-04-01T10:00:00Z"}, + "3": {"updated_at": "2022-05-01T10:00:00Z"}, + "4": {"updated_at": "2022-06-01T10:00:00Z"}, + } + } + + +@responses.activate +def test_stream_project_cards(): + + repository_args_with_start_date = { + "repositories": ["organization/repository"], + "page_size_for_large_streams": 100, + "start_date": "2022-03-01T00:00:00Z", + } + + projects_stream = Projects(**repository_args_with_start_date) + project_columns_stream = ProjectColumns(projects_stream, **repository_args_with_start_date) + stream = ProjectCards(project_columns_stream, **repository_args_with_start_date) + + data = [ + { + "updated_at": "2022-01-01T00:00:00Z", + }, + { + "updated_at": "2022-06-01T00:00:00Z", + "columns": [ + { + "updated_at": "2022-04-01T00:00:00Z", + "cards": [ + {"updated_at": "2022-03-01T00:00:00Z"}, + {"updated_at": "2022-04-01T00:00:00Z"}, + ], + }, + {"updated_at": "2022-05-01T09:00:00Z"}, + { + "updated_at": "2022-06-01T00:00:00Z", + "cards": [ + {"updated_at": "2022-05-01T00:00:00Z"}, + {"updated_at": "2022-06-01T00:00:00Z"}, + ], + }, + ], + }, + { + "updated_at": "2022-05-01T00:00:00Z", + "columns": [ + {"updated_at": "2022-01-01T00:00:00Z"}, + { + "updated_at": "2022-05-01T00:00:00Z", + "cards": [ + {"updated_at": "2022-02-01T00:00:00Z"}, + {"updated_at": "2022-05-01T00:00:00Z"}, + ], + }, + ], + }, + ] + + ProjectsResponsesAPI.register(data) + + stream_state = {} + records = read_incremental(stream, stream_state=stream_state) + + assert records == [ + { + "column_id": 21, + "id": 212, + "name": "card_212", + "project_id": 2, + "repository": "organization/repository", + "updated_at": "2022-04-01T00:00:00Z", + }, + { + "column_id": 23, + "id": 231, + "name": "card_231", + "project_id": 2, + "repository": "organization/repository", + "updated_at": "2022-05-01T00:00:00Z", + }, + { + "column_id": 23, + "id": 232, + "name": "card_232", + "project_id": 2, + "repository": "organization/repository", + "updated_at": "2022-06-01T00:00:00Z", + }, + { + "column_id": 32, + "id": 322, + "name": "card_322", + "project_id": 3, + "repository": "organization/repository", + "updated_at": "2022-05-01T00:00:00Z", + }, + ] + + +@responses.activate +def test_stream_comments(): + + repository_args_with_start_date = { + "repositories": ["organization/repository"], + "page_size_for_large_streams": 100, + "start_date": "2022-02-02T10:10:03Z", + } + + stream = Comments(**repository_args_with_start_date) + + data = [ + {"id": 1, "updated_at": "2022-02-02T10:10:02Z"}, + {"id": 2, "updated_at": "2022-02-02T10:10:04Z"}, + {"id": 3, "updated_at": "2022-02-02T10:10:06Z"}, + {"id": 4, "updated_at": "2022-02-02T10:10:08Z"}, + ] + + api_url = "https://api.github.com/repos/organization/repository/issues/comments" + + responses.add( + "GET", + api_url, + json=data[0:2], + match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:03Z"}, strict_match=False)], + ) + + responses.add( + "GET", + api_url, + json=data[2:4], + match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:04Z"}, strict_match=False)], + ) + + stream_state = {} + records = read_incremental(stream, stream_state) + assert records == [{"id": 2, "repository": "organization/repository", "updated_at": "2022-02-02T10:10:04Z"}] + assert stream_state == {"organization/repository": {"updated_at": "2022-02-02T10:10:04Z"}} + + records = read_incremental(stream, stream_state) + assert records == [ + {"id": 3, "repository": "organization/repository", "updated_at": "2022-02-02T10:10:06Z"}, + {"id": 4, "repository": "organization/repository", "updated_at": "2022-02-02T10:10:08Z"}, + ] + assert stream_state == {"organization/repository": {"updated_at": "2022-02-02T10:10:08Z"}} + + +@responses.activate +def test_streams_read_full_refresh(): + + repository_args = { + "repositories": ["organization/repository"], + "page_size_for_large_streams": 100, + } + + repository_args_with_start_date = {**repository_args, "start_date": "2022-02-01T00:00:00Z"} + + def get_json_response(cursor_field): + cursor_field = cursor_field or "updated_at" + return [ + {"id": 1, cursor_field: "2022-02-01T00:00:00Z"}, + {"id": 2, cursor_field: "2022-02-02T00:00:00Z"}, + ] + + def get_records(cursor_field): + cursor_field = cursor_field or "updated_at" + return [ + {"id": 1, cursor_field: "2022-02-01T00:00:00Z", "repository": "organization/repository"}, + {"id": 2, cursor_field: "2022-02-02T00:00:00Z", "repository": "organization/repository"}, + ] + + for cls, url in [ + (Releases, "https://api.github.com/repos/organization/repository/releases"), + (IssueEvents, "https://api.github.com/repos/organization/repository/issues/events"), + (IssueMilestones, "https://api.github.com/repos/organization/repository/milestones"), + (CommitComments, "https://api.github.com/repos/organization/repository/comments"), + (Deployments, "https://api.github.com/repos/organization/repository/deployments"), + ]: + stream = cls(**repository_args_with_start_date) + responses.add("GET", url, json=get_json_response(stream.cursor_field)) + records = read_full_refresh(stream) + assert records == get_records(stream.cursor_field)[1:2] + + for cls, url in [ + (Tags, "https://api.github.com/repos/organization/repository/tags"), + (IssueLabels, "https://api.github.com/repos/organization/repository/labels"), + (Collaborators, "https://api.github.com/repos/organization/repository/collaborators"), + (Branches, "https://api.github.com/repos/organization/repository/branches"), + ]: + stream = cls(**repository_args) + responses.add("GET", url, json=get_json_response(stream.cursor_field)) + records = read_full_refresh(stream) + assert records == get_records(stream.cursor_field) + + responses.add( + "GET", + "https://api.github.com/repos/organization/repository/stargazers", + json=[ + {"starred_at": "2022-02-01T00:00:00Z", "user": {"id": 1}}, + {"starred_at": "2022-02-02T00:00:00Z", "user": {"id": 2}}, + ], + ) + + stream = Stargazers(**repository_args_with_start_date) + records = read_full_refresh(stream) + assert records == [{"repository": "organization/repository", "starred_at": "2022-02-02T00:00:00Z", "user": {"id": 2}, "user_id": 2}] diff --git a/airbyte-integrations/connectors/source-github/unit_tests/utils.py b/airbyte-integrations/connectors/source-github/unit_tests/utils.py index 73c566ab02be..b318429a30cb 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/utils.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/utils.py @@ -2,7 +2,9 @@ # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # +from typing import Any, MutableMapping +import responses from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams import Stream @@ -13,3 +15,59 @@ def read_full_refresh(stream_instance: Stream): for slice in slices: records.extend(list(stream_instance.read_records(stream_slice=slice, sync_mode=SyncMode.full_refresh))) return records + + +def read_incremental(stream_instance: Stream, stream_state: MutableMapping[str, Any]): + res = [] + slices = stream_instance.stream_slices(sync_mode=SyncMode.incremental, stream_state=stream_state) + for slice in slices: + records = stream_instance.read_records(sync_mode=SyncMode.incremental, stream_slice=slice, stream_state=stream_state) + for record in records: + stream_state = stream_instance.get_updated_state(stream_state, record) + res.append(record) + return res + + +class ProjectsResponsesAPI: + """ + Fake Responses API for github projects, columns, cards + """ + + projects_url = "https://api.github.com/repos/organization/repository/projects" + columns_url = "https://api.github.com/projects/{project_id}/columns" + cards_url = "https://api.github.com/projects/columns/{column_id}/cards" + + @classmethod + def get_json_projects(cls, data): + res = [] + for n, project in enumerate(data, start=1): + name = f"project_{n}" + res.append({"id": n, "name": name, "updated_at": project["updated_at"]}) + return res + + @classmethod + def get_json_columns(cls, project, project_id): + res = [] + for n, column in enumerate(project.get("columns", []), start=1): + column_id = int(str(project_id) + str(n)) + name = f"column_{column_id}" + res.append({"id": column_id, "name": name, "updated_at": column["updated_at"]}) + return res + + @classmethod + def get_json_cards(cls, column, column_id): + res = [] + for n, card in enumerate(column.get("cards", []), start=1): + card_id = int(str(column_id) + str(n)) + name = f"card_{card_id}" + res.append({"id": card_id, "name": name, "updated_at": card["updated_at"]}) + return res + + @classmethod + def register(cls, data): + responses.upsert("GET", cls.projects_url, json=cls.get_json_projects(data)) + for project_id, project in enumerate(data, start=1): + responses.upsert("GET", cls.columns_url.format(project_id=project_id), json=cls.get_json_columns(project, project_id)) + for n, column in enumerate(project.get("columns", []), start=1): + column_id = int(str(project_id) + str(n)) + responses.upsert("GET", cls.cards_url.format(column_id=column_id), json=cls.get_json_cards(column, column_id)) diff --git a/airbyte-integrations/connectors/source-google-ads/Dockerfile b/airbyte-integrations/connectors/source-google-ads/Dockerfile index 591e045c6e22..780849ada303 100644 --- a/airbyte-integrations/connectors/source-google-ads/Dockerfile +++ b/airbyte-integrations/connectors/source-google-ads/Dockerfile @@ -13,5 +13,5 @@ RUN pip install . ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.28 +LABEL io.airbyte.version=0.1.29 LABEL io.airbyte.name=airbyte/source-google-ads diff --git a/airbyte-integrations/connectors/source-google-ads/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-google-ads/integration_tests/configured_catalog.json index cbffa662539e..e3e93e8fe3fa 100644 --- a/airbyte-integrations/connectors/source-google-ads/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-google-ads/integration_tests/configured_catalog.json @@ -152,6 +152,18 @@ "destination_sync_mode": "overwrite", "cursor_field": ["segments.date"] }, + { + "stream": { + "name": "user_location_report", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["segments.date"] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite", + "cursor_field": ["segments.date"] + }, { "stream": { "name": "happytable", diff --git a/airbyte-integrations/connectors/source-google-ads/source_google_ads/schemas/user_location_report.json b/airbyte-integrations/connectors/source-google-ads/source_google_ads/schemas/user_location_report.json index 4e92ede6d75c..c9ef9cb5ae0c 100644 --- a/airbyte-integrations/connectors/source-google-ads/source_google_ads/schemas/user_location_report.json +++ b/airbyte-integrations/connectors/source-google-ads/source_google_ads/schemas/user_location_report.json @@ -37,7 +37,7 @@ "type": ["null", "string"] }, "user_location_view.country_criterion_id": { - "type": ["null", "string"] + "type": ["null", "integer"] }, "user_location_view.resource_name": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-hubspot/Dockerfile b/airbyte-integrations/connectors/source-hubspot/Dockerfile index 81cfecf35036..01daac6a0874 100644 --- a/airbyte-integrations/connectors/source-hubspot/Dockerfile +++ b/airbyte-integrations/connectors/source-hubspot/Dockerfile @@ -34,5 +34,5 @@ COPY source_hubspot ./source_hubspot ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.47 +LABEL io.airbyte.version=0.1.50 LABEL io.airbyte.name=airbyte/source-hubspot diff --git a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json index 210a49a5af81..de022f543666 100644 --- a/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json +++ b/airbyte-integrations/connectors/source-hubspot/sample_files/configured_catalog.json @@ -4,9 +4,7 @@ "stream": { "name": "campaigns", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh" - ] + "supported_sync_modes": ["full_refresh"] }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" @@ -15,66 +13,43 @@ "stream": { "name": "companies", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "contact_lists", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "contacts", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "deal_pipelines", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh" - ] + "supported_sync_modes": ["full_refresh"] }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" @@ -83,180 +58,115 @@ "stream": { "name": "deals", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "email_events", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "created" - ] + "default_cursor_field": ["created"] }, "sync_mode": "incremental", - "cursor_field": [ - "created" - ], + "cursor_field": ["created"], "destination_sync_mode": "append" }, { "stream": { "name": "engagements", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "lastUpdated" - ] + "default_cursor_field": ["lastUpdated"] }, "sync_mode": "incremental", - "cursor_field": [ - "lastUpdated" - ], + "cursor_field": ["lastUpdated"], "destination_sync_mode": "append" }, { "stream": { "name": "engagements_calls", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "engagements_emails", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "engagements_meetings", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "engagements_notes", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "engagements_tasks", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "feedback_submissions", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "forms", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh" - ] + "supported_sync_modes": ["full_refresh"] }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" @@ -265,9 +175,7 @@ "stream": { "name": "form_submissions", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh" - ] + "supported_sync_modes": ["full_refresh"] }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" @@ -276,32 +184,21 @@ "stream": { "name": "line_items", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "marketing_emails", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh" - ], + "supported_sync_modes": ["full_refresh"], "source_defined_cursor": false, - "default_cursor_field": [ - "updated" - ] + "default_cursor_field": ["updated"] }, "sync_mode": "full_refresh", "cursor_field": null, @@ -311,9 +208,7 @@ "stream": { "name": "owners", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh" - ] + "supported_sync_modes": ["full_refresh"] }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" @@ -322,103 +217,66 @@ "stream": { "name": "products", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "property_history", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], - "default_cursor_field": [ - "timestamp" - ] + "supported_sync_modes": ["full_refresh", "incremental"], + "default_cursor_field": ["timestamp"] }, "sync_mode": "full_refresh", - "cursor_field": [ - "timestamp" - ], + "cursor_field": ["timestamp"], "destination_sync_mode": "overwrite" }, { "stream": { "name": "quotes", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "subscription_changes", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "timestamp" - ] + "default_cursor_field": ["timestamp"] }, "sync_mode": "incremental", - "cursor_field": [ - "timestamp" - ], + "cursor_field": ["timestamp"], "destination_sync_mode": "append" }, { "stream": { "name": "tickets", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh", - "incremental" - ], + "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": true, - "default_cursor_field": [ - "updatedAt" - ] + "default_cursor_field": ["updatedAt"] }, "sync_mode": "incremental", - "cursor_field": [ - "updatedAt" - ], + "cursor_field": ["updatedAt"], "destination_sync_mode": "append" }, { "stream": { "name": "ticket_pipelines", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh" - ] + "supported_sync_modes": ["full_refresh"] }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" @@ -427,9 +285,7 @@ "stream": { "name": "workflows", "json_schema": {}, - "supported_sync_modes": [ - "full_refresh" - ] + "supported_sync_modes": ["full_refresh"] }, "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json b/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json index 02a5e2b8f993..72d9884d84b7 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/spec.json @@ -4,7 +4,10 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "HubSpot Source Spec", "type": "object", - "required": ["start_date", "credentials"], + "required": [ + "start_date", + "credentials" + ], "additionalProperties": true, "properties": { "start_date": { @@ -12,7 +15,9 @@ "title": "Start Date", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$", "description": "UTC date and time in the format 2017-01-25T00:00:00Z. Any data before this date will not be replicated.", - "examples": ["2017-01-25T00:00:00Z"] + "examples": [ + "2017-01-25T00:00:00Z" + ] }, "credentials": { "title": "Authentication mechanism", @@ -34,28 +39,36 @@ "title": "Credentials Title", "description": "Name of the credentials set", "const": "OAuth Credentials", - "enum": ["OAuth Credentials"], + "enum": [ + "OAuth Credentials" + ], "default": "OAuth Credentials", "order": 0 }, "client_id": { "title": "Client ID", - "description": "The Client ID of your HubSpot developer application. See our docs if you need help finding this id.", + "description": "The Client ID of your HubSpot developer application. See our docs if you need help finding this id.", "type": "string", - "examples": ["123456789000"] + "examples": [ + "123456789000" + ] }, "client_secret": { "title": "Client Secret", - "description": "The Client Secret of your HubSpot developer application. See our docs if you need help finding this secret.", + "description": "The Client Secret of your HubSpot developer application. See our docs if you need help finding this secret.", "type": "string", - "examples": ["secret"], + "examples": [ + "secret" + ], "airbyte_secret": true }, "refresh_token": { "title": "Refresh Token", - "description": "Refresh Token to renew the expired Access Token. See our docs if you need help generating the token.", + "description": "Refresh Token to renew the expired Access Token. See our docs if you need help generating the token.", "type": "string", - "examples": ["refresh_token"], + "examples": [ + "refresh_token" + ], "airbyte_secret": true } } @@ -63,20 +76,25 @@ { "type": "object", "title": "API key", - "required": ["api_key", "credentials_title"], + "required": [ + "api_key", + "credentials_title" + ], "properties": { "credentials_title": { "type": "string", "title": "Credentials title", "description": "Name of the credentials set", "const": "API Key Credentials", - "enum": ["API Key Credentials"], + "enum": [ + "API Key Credentials" + ], "default": "API Key Credentials", "order": 0 }, "api_key": { "title": "API key", - "description": "HubSpot API Key. See our docs if you need help finding this key.", + "description": "HubSpot API Key. See our docs if you need help finding this key.", "type": "string", "airbyte_secret": true } @@ -89,9 +107,23 @@ "authSpecification": { "auth_type": "oauth2.0", "oauth2Specification": { - "rootObject": ["credentials", "0"], - "oauthFlowInitParameters": [["client_id"], ["client_secret"]], - "oauthFlowOutputParameters": [["refresh_token"]] + "rootObject": [ + "credentials", + "0" + ], + "oauthFlowInitParameters": [ + [ + "client_id" + ], + [ + "client_secret" + ] + ], + "oauthFlowOutputParameters": [ + [ + "refresh_token" + ] + ] } } } diff --git a/airbyte-integrations/connectors/source-hubspot/source_hubspot/streams.py b/airbyte-integrations/connectors/source-hubspot/source_hubspot/streams.py index 6c0e1c6c50ee..db6c88cc8a52 100644 --- a/airbyte-integrations/connectors/source-hubspot/source_hubspot/streams.py +++ b/airbyte-integrations/connectors/source-hubspot/source_hubspot/streams.py @@ -3,27 +3,24 @@ # -import backoff -import pendulum as pendulum -import requests import sys import time import urllib.parse from abc import ABC, abstractmethod +from functools import lru_cache, partial +from http import HTTPStatus +from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Tuple, Union + +import backoff +import pendulum as pendulum +import requests from airbyte_cdk.entrypoint import logger from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams.http import HttpStream -from airbyte_cdk.sources.streams.http.requests_native_auth import \ - Oauth2Authenticator +from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator from airbyte_cdk.sources.utils.sentry import AirbyteSentry -from functools import lru_cache, partial -from http import HTTPStatus from requests import codes -from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, \ - Optional, Tuple, Union - -from source_hubspot.errors import HubspotAccessDenied, HubspotInvalidAuth, \ - HubspotRateLimited, HubspotTimeout +from source_hubspot.errors import HubspotAccessDenied, HubspotInvalidAuth, HubspotRateLimited, HubspotTimeout # The value is obtained experimentally, HubSpot allows the URL length up to ~16300 symbols, # so it was decided to limit the length of the `properties` parameter to 15000 characters. @@ -184,13 +181,13 @@ def _parse_and_handle_errors(response) -> Union[MutableMapping[str, Any], List[M @retry_connection_handler(max_tries=5, factor=5) @retry_after_handler(max_tries=3) def get( - self, url: str, params: MutableMapping[str, Any] = None + self, url: str, params: MutableMapping[str, Any] = None ) -> Tuple[Union[MutableMapping[str, Any], List[MutableMapping[str, Any]]], requests.Response]: response = self._session.get(self.BASE_URL + url, params=params) return self._parse_and_handle_errors(response), response def post( - self, url: str, data: Mapping[str, Any], params: MutableMapping[str, Any] = None + self, url: str, data: Mapping[str, Any], params: MutableMapping[str, Any] = None ) -> Tuple[Union[Mapping[str, Any], List[Mapping[str, Any]]], requests.Response]: response = self._session.post(self.BASE_URL + url, params=params, json=data) return self._parse_and_handle_errors(response), response @@ -224,11 +221,11 @@ def url(self): """Default URL to read from""" def path( - self, - *, - stream_state: Mapping[str, Any] = None, - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + *, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> str: return self.url @@ -245,7 +242,7 @@ def backoff_time(self, response: requests.Response) -> Optional[float]: return float(response.headers.get("Retry-After", 3)) def request_headers( - self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None ) -> Mapping[str, Any]: return { "Content-Type": "application/json", @@ -259,11 +256,11 @@ def get_json_schema(self) -> Mapping[str, Any]: return json_schema def handle_request( - self, - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, - params: Mapping[str, Any] = None, + self, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + params: Mapping[str, Any] = None, ) -> requests.Response: request_headers = self.request_headers(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) request_params = self.request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) @@ -293,11 +290,11 @@ def handle_request( return response def _read_stream_records( - self, - properties_list: List[str], - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + properties_list: List[str], + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> Tuple[dict, Any]: # TODO: Additional processing was added due to the fact that users receive 414 errors while syncing their streams (issues #3977 and #5835). @@ -324,11 +321,11 @@ def _read_stream_records( return stream_records, response def read_records( - self, - sync_mode: SyncMode, - cursor_field: List[str] = None, - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: stream_state = stream_state or {} pagination_complete = False @@ -425,6 +422,7 @@ def _cast_value(cls, declared_field_types: List, field_name: str, field_value: A if target_type_name == "number": # do not cast numeric IDs into float, use integer instead target_type = int if field_name.endswith("_id") else target_type + field_value = field_value.replace(",", "") if target_type_name != "string" and field_value == "": # do not cast empty strings, return None instead to be properly casted. @@ -486,10 +484,10 @@ def _filter_old_records(self, records: Iterable) -> Iterable: yield record def request_params( - self, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> MutableMapping[str, Any]: default_params = {self.limit_field: self.limit} params = {**default_params} @@ -501,12 +499,12 @@ def _parse_response(self, response: requests.Response): return self._api._parse_and_handle_errors(response) def parse_response( - self, - response: requests.Response, - *, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + response: requests.Response, + *, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> Iterable[Mapping]: response = self._parse_response(response) @@ -633,11 +631,11 @@ def updated_at_field(self): """Name of the field associated with the state""" def read_records( - self, - sync_mode: SyncMode, - cursor_field: List[str] = None, - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: records = super().read_records(sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state) latest_cursor = None @@ -690,7 +688,7 @@ def _update_state(self, latest_cursor): self._start_date = self._state def stream_slices( - self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None + self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None ) -> Iterable[Optional[Mapping[str, Any]]]: chunk_size = pendulum.duration(days=30) slices = [] @@ -712,10 +710,10 @@ def stream_slices( return slices def request_params( - self, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> MutableMapping[str, Any]: params = super().request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) if stream_slice: @@ -735,9 +733,9 @@ def url(self): return f"/crm/v3/objects/{self.entity}/search" if self.state else f"/crm/v3/objects/{self.entity}" def __init__( - self, - include_archived_only: bool = False, - **kwargs, + self, + include_archived_only: bool = False, + **kwargs, ): super().__init__(**kwargs) self._state = None @@ -746,7 +744,7 @@ def __init__( @retry_connection_handler(max_tries=5, factor=5) @retry_after_handler(fixed_retry_after=1, max_tries=3) def search( - self, url: str, data: Mapping[str, Any], params: MutableMapping[str, Any] = None + self, url: str, data: Mapping[str, Any], params: MutableMapping[str, Any] = None ) -> Tuple[Union[Mapping[str, Any], List[Mapping[str, Any]]], requests.Response]: # We can safely retry this POST call, because it's a search operation. # Given Hubspot does not return any Retry-After header (https://developers.hubspot.com/docs/api/crm/search) @@ -755,11 +753,11 @@ def search( return self._api.post(url=url, data=data, params=params) def _process_search( - self, - properties_list: List[str], - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + properties_list: List[str], + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> Tuple[dict, requests.Response]: stream_records = {} payload = ( @@ -782,11 +780,11 @@ def _process_search( return stream_records, raw_response def read_records( - self, - sync_mode: SyncMode, - cursor_field: List[str] = None, - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: stream_state = stream_state or {} pagination_complete = False @@ -826,7 +824,7 @@ def read_records( if not next_page_token: pagination_complete = True elif self.state and next_page_token["payload"]["after"] >= 10000: - # Hubspot documentations states that the search endpoints are limited to 10,000 total results + # Hubspot documentation states that the search endpoints are limited to 10,000 total results # for any given query. Attempting to page beyond 10,000 will result in a 400 error. # https://developers.hubspot.com/docs/api/crm/search. We stop getting data at 10,000 and # start a new search query with the latest state that has been collected. @@ -838,10 +836,10 @@ def read_records( yield from [] def request_params( - self, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> MutableMapping[str, Any]: params = {"archived": str(self._include_archived_only).lower(), "associations": self.associations, "limit": self.limit} if next_page_token: @@ -860,7 +858,7 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, return {"params": params, "payload": payload} def stream_slices( - self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None + self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None ) -> Iterable[Optional[Mapping[str, Any]]]: return [None] @@ -901,10 +899,10 @@ def __init__(self, **kwargs): super().__init__(**kwargs) def request_params( - self, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> MutableMapping[str, Any]: params = IncrementalStream.request_params( self, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token @@ -918,11 +916,11 @@ def request_params( return params def read_records( - self, - sync_mode: SyncMode, - cursor_field: List[str] = None, - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: records = IncrementalStream.read_records( self, @@ -948,11 +946,11 @@ class Campaigns(Stream): primary_key = "id" def read_records( - self, - sync_mode: SyncMode, - cursor_field: List[str] = None, - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: for row in super().read_records(sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state): record, response = self._api.get(f"/email/public/v1/campaigns/{row['id']}") @@ -1003,10 +1001,10 @@ def _transform(self, records: Iterable) -> Iterable: yield {"canonical-vid": canonical_vid, **item} def request_params( - self, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> MutableMapping[str, Any]: params = super().request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) params.update({"showListMemberships": True}) @@ -1067,7 +1065,6 @@ class Engagements(IncrementalStream): url = "/engagements/v1/engagements/paged" more_key = "hasMore" - limit = 250 updated_at_field = "lastUpdated" created_at_field = "createdAt" primary_key = "id" @@ -1082,15 +1079,64 @@ def _transform(self, records: Iterable) -> Iterable: yield from super()._transform({**record.pop("engagement"), **record} for record in records) def request_params( - self, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> MutableMapping[str, Any]: - params = {self.limit_field: self.limit} + params = {"count": 250} + if next_page_token: + params["offset"] = next_page_token["offset"] if self.state: - params["since"] = int(self._state.timestamp() * 1000) + params.update({"since": int(self._state.timestamp() * 1000), "count": 100}) return params + + def stream_slices( + self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None + ) -> Iterable[Optional[Mapping[str, Any]]]: + return [None] + + def read_records( + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, + ) -> Iterable[Mapping[str, Any]]: + stream_state = stream_state or {} + pagination_complete = False + + next_page_token = None + latest_cursor = None + with AirbyteSentry.start_transaction("read_records", self.name), AirbyteSentry.start_transaction_span("read_records"): + while not pagination_complete: + response = self.handle_request(stream_slice=stream_slice, stream_state=stream_state, next_page_token=next_page_token) + records = self._transform(self.parse_response(response, stream_state=stream_state, stream_slice=stream_slice)) + + if self.filter_old_records: + records = self._filter_old_records(records) + + for record in records: + cursor = self._field_to_datetime(record[self.updated_at_field]) + latest_cursor = max(cursor, latest_cursor) if latest_cursor else cursor + yield record + + next_page_token = self.next_page_token(response) + if self.state and next_page_token and next_page_token["offset"] >= 10000: + # As per Hubspot documentation, the recent engagements endpoint will only return the 10K + # most recently updated engagements. Since they are returned sorted by `lastUpdated` in + # descending order, we stop getting records if we have already reached 10,000. Attempting + # to get more than 10K will result in a HTTP 400 error. + # https://legacydocs.hubspot.com/docs/methods/engagements/get-recent-engagements + next_page_token = None + + if not next_page_token: + pagination_complete = True + + # Always return an empty generator just in case no records were ever yielded + yield from [] + + self._update_state(latest_cursor=latest_cursor) class Forms(Stream): @@ -1117,11 +1163,11 @@ class FormSubmissions(Stream): updated_at_field = "updatedAt" def path( - self, - *, - stream_state: Mapping[str, Any] = None, - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, + self, + *, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, ) -> str: return f"{self.url}/{stream_slice['form_id']}" @@ -1141,7 +1187,7 @@ def _transform(self, records: Iterable) -> Iterable: yield record def stream_slices( - self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None + self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None ) -> Iterable[Optional[Mapping[str, Any]]]: slices = [] seen = set() @@ -1154,11 +1200,11 @@ def stream_slices( return slices def read_records( - self, - sync_mode: SyncMode, - cursor_field: List[str] = None, - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, ) -> Iterable[Mapping[str, Any]]: for record in super().read_records(sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state): record["formId"] = stream_slice["form_id"] diff --git a/airbyte-integrations/connectors/source-hubspot/unit_tests/conftest.py b/airbyte-integrations/connectors/source-hubspot/unit_tests/conftest.py new file mode 100644 index 000000000000..311d2be22557 --- /dev/null +++ b/airbyte-integrations/connectors/source-hubspot/unit_tests/conftest.py @@ -0,0 +1,57 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pytest +from source_hubspot.source import SourceHubspot +from source_hubspot.streams import API + +NUMBER_OF_PROPERTIES = 2000 + + +@pytest.fixture(name="oauth_config") +def oauth_config_fixture(): + return { + "start_date": "2021-10-10T00:00:00Z", + "credentials": { + "credentials_title": "OAuth Credentials", + "redirect_uri": "https://airbyte.io", + "client_id": "test_client_id", + "client_secret": "test_client_secret", + "refresh_token": "test_refresh_token", + "access_token": "test_access_token", + "token_expires": "2021-05-30T06:00:00Z", + }, + } + + +@pytest.fixture(name="common_params") +def common_params_fixture(config): + source = SourceHubspot() + common_params = source.get_common_params(config=config) + return common_params + + +@pytest.fixture(name="config") +def config_fixture(): + return {"start_date": "2021-01-10T00:00:00Z", "credentials": {"credentials_title": "API Key Credentials", "api_key": "test_api_key"}} + + +@pytest.fixture(name="some_credentials") +def some_credentials_fixture(): + return {"credentials_title": "API Key Credentials", "api_key": "wrong_key"} + + +@pytest.fixture(name="creds_with_wrong_permissions") +def creds_with_wrong_permissions(): + return {"credentials_title": "API Key Credentials", "api_key": "THIS-IS-THE-API_KEY"} + + +@pytest.fixture(name="fake_properties_list") +def fake_properties_list(): + return [f"property_number_{i}" for i in range(NUMBER_OF_PROPERTIES)] + + +@pytest.fixture(name="api") +def api(some_credentials): + return API(some_credentials) diff --git a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py index c74c92ca0aa2..798902cdde62 100644 --- a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py +++ b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_field_type_converting.py @@ -59,6 +59,7 @@ def test_bad_field_type_converting(field_type, expected, caplog, capsys): # specific cases ("string", "some_field", "test", None, "test"), (["null", "number"], "some_field", "123.456", None, 123.456), + (["null", "number"], "some_field", "123,123.456", None, 123123.456), (["null", "number"], "user_id", "123", None, 123), (["null", "string"], "some_field", "123", None, "123"), # when string has empty field_value (empty string) diff --git a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_source.py b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_source.py index fbcdbeb3ca33..adf3947c47ef 100644 --- a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_source.py @@ -4,58 +4,96 @@ import logging +from http import HTTPStatus +from unittest.mock import MagicMock +import pendulum import pytest from airbyte_cdk.models import ConfiguredAirbyteCatalog, SyncMode, Type +from source_hubspot.errors import HubspotRateLimited from source_hubspot.source import SourceHubspot -from source_hubspot.streams import API, PROPERTIES_PARAM_MAX_LENGTH, Companies, Deals, Products, Workflows, split_properties +from source_hubspot.streams import API, PROPERTIES_PARAM_MAX_LENGTH, Companies, Deals, Engagements, Products, Stream, Workflows, split_properties NUMBER_OF_PROPERTIES = 2000 logger = logging.getLogger("test_client") -@pytest.fixture(name="oauth_config") -def oauth_config_fixture(): - return { - "start_date": "2021-10-10T00:00:00Z", - "credentials": { - "credentials_title": "OAuth Credentials", - "redirect_uri": "https://airbyte.io", - "client_id": "test_client_id", - "client_secret": "test_client_secret", - "refresh_token": "test_refresh_token", - "access_token": "test_access_token", - "token_expires": "2021-05-30T06:00:00Z", - }, - } +def test_check_connection_ok(requests_mock, config): + responses = [ + {"json": [], "status_code": 200}, + ] + + requests_mock.register_uri("GET", "/properties/v2/contact/properties", responses) + ok, error_msg = SourceHubspot().check_connection(logger, config=config) + assert ok + assert not error_msg + + +def test_check_connection_empty_config(config): + config = {} + + with pytest.raises(KeyError): + SourceHubspot().check_connection(logger, config=config) + + +def test_check_connection_invalid_config(config): + config.pop("start_date") + + with pytest.raises(TypeError): + SourceHubspot().check_connection(logger, config=config) + + +def test_check_connection_exception(config): + ok, error_msg = SourceHubspot().check_connection(logger, config=config) + + assert not ok + assert error_msg + + +def test_streams(config): + streams = SourceHubspot().streams(config) + + assert len(streams) == 27 + + +def test_check_credential_title_exception(config): + config["credentials"].pop("credentials_title") + + with pytest.raises(Exception): + SourceHubspot().check_connection(logger, config=config) -@pytest.fixture(name="common_params") -def common_params_fixture(config): - source = SourceHubspot() - common_params = source.get_common_params(config=config) - return common_params +def test_parse_and_handle_errors(some_credentials): + response = MagicMock() + response.status_code = HTTPStatus.TOO_MANY_REQUESTS -@pytest.fixture(name="config") -def config_fixture(): - return {"start_date": "2021-01-10T00:00:00Z", "credentials": {"credentials_title": "API Key Credentials", "api_key": "test_api_key"}} + with pytest.raises(HubspotRateLimited): + API(some_credentials)._parse_and_handle_errors(response) -@pytest.fixture(name="some_credentials") -def some_credentials_fixture(): - return {"credentials_title": "API Key Credentials", "api_key": "wrong_key"} +def test_convert_datetime_to_string(): + pendulum_time = pendulum.now() + assert Stream._convert_datetime_to_string(pendulum_time, declared_format="date") + assert Stream._convert_datetime_to_string(pendulum_time, declared_format="date-time") -@pytest.fixture(name="creds_with_wrong_permissions") -def creds_with_wrong_permissions(): - return {"credentials_title": "API Key Credentials", "api_key": "THIS-IS-THE-API_KEY"} +def test_cast_datetime(common_params, caplog): + field_value = pendulum.now() + field_name = "curent_time" -@pytest.fixture(name="fake_properties_list") -def fake_properties_list(): - return [f"property_number_{i}" for i in range(NUMBER_OF_PROPERTIES)] + Companies(**common_params)._cast_datetime(field_name, field_value) + + expected_warining_message = { + "type": "LOG", + "log": { + "level": "WARN", + "message": f"Couldn't parse date/datetime string in {field_name}, trying to parse timestamp... Field value: {field_value}. Ex: argument of type 'DateTime' is not iterable", + }, + } + assert expected_warining_message["log"]["message"] in caplog.text def test_check_connection_backoff_on_limit_reached(requests_mock, config): @@ -132,10 +170,6 @@ class TestSplittingPropertiesFunctionality: "archived": False, } - @pytest.fixture - def api(self, some_credentials): - return API(some_credentials) - @staticmethod def set_mock_properties(requests_mock, url, fake_properties_list): properties_response = [ @@ -365,3 +399,106 @@ def test_search_based_stream_should_not_attempt_to_get_more_than_10k_records(req # Instead, it should use the new state to start a new search query. assert len(records) == 11000 assert test_stream.state["updatedAt"] == "2022-03-01T00:00:00+00:00" + + +def test_engagements_stream_pagination_works(requests_mock, common_params): + """ + Tests the engagements stream handles pagination correctly, for both + full_refresh and incremental sync modes. + """ + + # Mocking Request + requests_mock.register_uri("GET", "/engagements/v1/engagements/paged?hapikey=test_api_key&count=250", [ + { + "json": { + "results": [{"engagement": {"id": f"{y}", "lastUpdated": 1641234593251}} for y in range(250)], + "hasMore": True, + "offset": 250 + }, + "status_code": 200, + }, + { + "json": { + "results": [{"engagement": {"id": f"{y}", "lastUpdated": 1641234593251}} for y in range(250, 500)], + "hasMore": True, + "offset": 500 + }, + "status_code": 200, + }, + { + "json": { + "results": [{"engagement": {"id": f"{y}", "lastUpdated": 1641234595251}} for y in range(500, 600)], + "hasMore": False + }, + "status_code": 200, + } + ]) + + requests_mock.register_uri("GET", "/engagements/v1/engagements/recent/modified?hapikey=test_api_key&count=100", [ + { + "json": { + "results": [{"engagement": {"id": f"{y}", "lastUpdated": 1641234595252}} for y in range(100)], + "hasMore": True, + "offset": 100 + }, + "status_code": 200, + }, + { + "json": { + "results": [{"engagement": {"id": f"{y}", "lastUpdated": 1641234595252}} for y in range(100, 200)], + "hasMore": True, + "offset": 200 + }, + "status_code": 200, + }, + { + "json": { + "results": [{"engagement": {"id": f"{y}", "lastUpdated": 1641234595252}} for y in range(200, 250)], + "hasMore": False + }, + "status_code": 200, + } + ]) + + # Create test_stream instance for full refresh. + test_stream = Engagements(**common_params) + + records = list(test_stream.read_records(sync_mode=SyncMode.full_refresh)) + # The stream should handle pagination correctly and output 600 records. + assert len(records) == 600 + assert test_stream.state["lastUpdated"] == 1641234595251 + + records = list(test_stream.read_records(sync_mode=SyncMode.incremental)) + # The stream should handle pagination correctly and output 600 records. + assert len(records) == 250 + assert test_stream.state["lastUpdated"] == 1641234595252 + + +def test_incremental_engagements_stream_stops_at_10K_records(requests_mock, common_params, fake_properties_list): + """ + If there are more than 10,000 engagements that would be returned by the Hubspot recent engagements endpoint, + the Engagements instance should stop at the 10Kth record. + """ + + responses = [ + { + "json": { + "results": [{"engagement": {"id": f"{y}", "lastUpdated": 1641234595252}} for y in range(100)], + "hasMore": True, + "offset": x*100 + }, + "status_code": 200, + } + for x in range(1, 102) + ] + + # Create test_stream instance with some state + test_stream = Engagements(**common_params) + test_stream.state = {"lastUpdated": 1641234595251} + + # Mocking Request + requests_mock.register_uri("GET", "/engagements/v1/engagements/recent/modified?hapikey=test_api_key&count=100", responses) + records = list(test_stream.read_records(sync_mode=SyncMode.incremental)) + # The stream should not attempt to get more than 10K records. + assert len(records) == 10000 + assert test_stream.state["lastUpdated"] == +1641234595252 diff --git a/airbyte-integrations/connectors/source-hubspot/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_streams.py new file mode 100644 index 000000000000..7c2088ee49e0 --- /dev/null +++ b/airbyte-integrations/connectors/source-hubspot/unit_tests/test_streams.py @@ -0,0 +1,140 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import pytest +from source_hubspot.streams import ( + Campaigns, + Companies, + ContactLists, + Contacts, + DealPipelines, + Deals, + EmailEvents, + EngagementsCalls, + EngagementsEmails, + EngagementsMeetings, + EngagementsNotes, + EngagementsTasks, + FeedbackSubmissions, + Forms, + FormSubmissions, + LineItems, + MarketingEmails, + Owners, + Products, + Quotes, + TicketPipelines, + Tickets, + Workflows, +) + +from .utils import read_full_refresh, read_incremental + + +@pytest.mark.parametrize( + "stream, endpoint", + [ + (Campaigns, "campaigns"), + (Companies, "company"), + (ContactLists, "contact"), + (Contacts, "contact"), + (Deals, "deal"), + (DealPipelines, "deal"), + (Quotes, "quote"), + (EmailEvents, ""), + (EngagementsCalls, "calls"), + (EngagementsEmails, "emails"), + (EngagementsMeetings, "meetings"), + (EngagementsNotes, "notes"), + (EngagementsTasks, "tasks"), + (FeedbackSubmissions, "feedback_submissions"), + (Forms, "form"), + (FormSubmissions, "form"), + (LineItems, "line_item"), + (MarketingEmails, ""), + (Owners, ""), + (Products, "product"), + (Quotes, "quote"), + (TicketPipelines, ""), + (Tickets, "ticket"), + (Workflows, ""), + ], +) +def test_streams_read(stream, endpoint, requests_mock, common_params, fake_properties_list): + stream = stream(**common_params) + responses = [ + { + "json": { + stream.data_field: [ + { + "id": "test_id", + "created": "2022-02-25T16:43:11Z", + "updatedAt": "2022-02-25T16:43:11Z", + "lastUpdatedTime": "2022-02-25T16:43:11Z", + } + ], + } + } + ] + properties_response = [ + { + "json": [ + {"name": property_name, "type": "string", "updatedAt": 1571085954360, "createdAt": 1565059306048} + for property_name in fake_properties_list + ], + "status_code": 200, + } + ] + is_form_submission = isinstance(stream, FormSubmissions) + stream_url = stream.url + "/test_id" if is_form_submission else stream.url + + requests_mock.register_uri("GET", stream_url, responses) + requests_mock.register_uri("GET", "/marketing/v3/forms", responses) + requests_mock.register_uri("GET", "/email/public/v1/campaigns/test_id", responses) + requests_mock.register_uri("GET", f"/properties/v2/{endpoint}/properties", properties_response) + + records = read_incremental(stream, {}) + + assert records + + +@pytest.mark.parametrize( + "error_response", + [ + {"json": {}, "status_code": 429}, + {"json": {}, "status_code": 502}, + {"json": {}, "status_code": 504}, + ], +) +def test_common_error_retry(error_response, requests_mock, common_params, fake_properties_list): + """Error once, check that we retry and not fail""" + properties_response = [ + {"name": property_name, "type": "string", "updatedAt": 1571085954360, "createdAt": 1565059306048} + for property_name in fake_properties_list + ] + responses = [ + error_response, + { + "json": properties_response, + "status_code": 200, + }, + ] + + stream = Companies(**common_params) + + response = { + stream.data_field: [ + { + "id": "test_id", + "created": "2022-02-25T16:43:11Z", + "updatedAt": "2022-02-25T16:43:11Z", + "lastUpdatedTime": "2022-02-25T16:43:11Z", + } + ], + } + requests_mock.register_uri("GET", "/properties/v2/company/properties", responses) + requests_mock.register_uri("GET", stream.url, [{"json": response}]) + records = read_full_refresh(stream) + + assert [response[stream.data_field][0]] == records diff --git a/airbyte-integrations/connectors/source-hubspot/unit_tests/utils.py b/airbyte-integrations/connectors/source-hubspot/unit_tests/utils.py new file mode 100644 index 000000000000..c92ac97b0373 --- /dev/null +++ b/airbyte-integrations/connectors/source-hubspot/unit_tests/utils.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from typing import Any, MutableMapping + +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources.streams import Stream + + +def read_incremental(stream_instance: Stream, stream_state: MutableMapping[str, Any]): + res = [] + slices = stream_instance.stream_slices(sync_mode=SyncMode.incremental, stream_state=stream_state) + for slice in slices: + records = stream_instance.read_records(sync_mode=SyncMode.incremental, stream_slice=slice, stream_state=stream_state) + for record in records: + stream_state = stream_instance.get_updated_state(stream_state, record) + res.append(record) + return res + + +def read_full_refresh(stream_instance: Stream): + records = [] + slices = stream_instance.stream_slices(sync_mode=SyncMode.full_refresh) + for slice in slices: + records.extend(list(stream_instance.read_records(stream_slice=slice, sync_mode=SyncMode.full_refresh))) + return records diff --git a/airbyte-integrations/connectors/source-instagram/README.md b/airbyte-integrations/connectors/source-instagram/README.md index 359aa3de2764..3de258e14ffb 100644 --- a/airbyte-integrations/connectors/source-instagram/README.md +++ b/airbyte-integrations/connectors/source-instagram/README.md @@ -59,6 +59,21 @@ To run unit tests locally, from the connector directory run: python -m pytest unit_tests ``` +#### Acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Source Acceptance Tests](https://docs.airbyte.io/connector-development/testing-connectors/source-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. +To run your integration tests with acceptance tests, from the connector root, run +``` +docker build . --no-cache -t airbyte/source-instagram:dev \ +&& python -m pytest -p source_acceptance_test.plugin +``` +or +``` +./acceptance-test-docker.sh +``` + +To run your integration tests with docker + ### Locally running the connector docker image #### Build diff --git a/airbyte-integrations/connectors/source-instagram/setup.py b/airbyte-integrations/connectors/source-instagram/setup.py index 39844e7e3f4f..98650485d960 100644 --- a/airbyte-integrations/connectors/source-instagram/setup.py +++ b/airbyte-integrations/connectors/source-instagram/setup.py @@ -15,7 +15,8 @@ TEST_REQUIREMENTS = [ "pytest~=6.1", - "requests_mock==1.8.0", + "pytest-mock~=3.6", + "requests_mock~=1.8", ] setup( diff --git a/airbyte-integrations/connectors/source-instagram/source_instagram/common.py b/airbyte-integrations/connectors/source-instagram/source_instagram/common.py index 256051ecba5b..4c48f574ffd4 100644 --- a/airbyte-integrations/connectors/source-instagram/source_instagram/common.py +++ b/airbyte-integrations/connectors/source-instagram/source_instagram/common.py @@ -7,10 +7,12 @@ import urllib.parse as urlparse import backoff -from airbyte_cdk.entrypoint import logger # FIXME (Eugene K): register logger as standard python logger +from airbyte_cdk.logger import AirbyteLogger from facebook_business.exceptions import FacebookRequestError from requests.status_codes import codes as status_codes +logger = AirbyteLogger() + class InstagramAPIException(Exception): """General class for all API errors""" @@ -38,9 +40,12 @@ def should_retry_api_error(exc: FacebookRequestError): if exc.http_status() == status_codes.TOO_MANY_REQUESTS: return True - # FIXME: add type and http_status - if exc.api_error_code() == 10 and exc.api_error_message() == "(#10) Not enough viewers for the media to show insights": - return False # expected error + if ( + exc.api_error_type() == "OAuthException" + and exc.api_error_code() == 10 + and exc.api_error_message() == "(#10) Not enough viewers for the media to show insights" + ): + return True # Issue 4028, Sometimes an error about the Rate Limit is returned with a 400 HTTP code if exc.http_status() == status_codes.BAD_REQUEST and exc.api_error_code() == 100 and exc.api_error_subcode() == 33: @@ -49,9 +54,10 @@ def should_retry_api_error(exc: FacebookRequestError): if exc.api_transient_error(): return True - # FIXME: add type, code and http_status - if exc.api_error_subcode() == 2108006: - return False + # The media was posted before the most recent time that the user's account + # was converted to a business account from a personal account. + if exc.api_error_type() == "OAuthException" and exc.api_error_code() == 100 and exc.api_error_subcode() == 2108006: + return True return False @@ -66,12 +72,9 @@ def should_retry_api_error(exc: FacebookRequestError): def remove_params_from_url(url, params): - parsed_url = urlparse.urlparse(url) - res_query = [] - for q in parsed_url.query.split("&"): - key, value = q.split("=") - if key not in params: - res_query.append(f"{key}={value}") - - parse_result = parsed_url._replace(query="&".join(res_query)) - return urlparse.urlunparse(parse_result) + parsed = urlparse.urlparse(url) + query = urlparse.parse_qs(parsed.query, keep_blank_values=True) + filtered = dict((k, v) for k, v in query.items() if k not in params) + return urlparse.urlunparse( + [parsed.scheme, parsed.netloc, parsed.path, parsed.params, urlparse.urlencode(filtered, doseq=True), parsed.fragment] + ) diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py b/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py new file mode 100644 index 000000000000..f050341099cd --- /dev/null +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/conftest.py @@ -0,0 +1,104 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from facebook_business import FacebookAdsApi, FacebookSession +from pytest import fixture +from source_instagram.api import InstagramAPI as API + +FB_API_VERSION = FacebookAdsApi.API_VERSION + + +@fixture(scope="session", name="account_id") +def account_id_fixture(): + return "unknown_account" + + +@fixture(name="config") +def config_fixture(): + config = { + "access_token": "TOKEN", + "start_date": "2022-03-20T00:00:00", + } + + return config + + +@fixture(scope="session", name="some_config") +def some_config_fixture(account_id): + return {"start_date": "2021-01-23T00:00:00Z", "access_token": "unknown_token"} + + +@fixture(name="fb_account_response") +def fb_account_response_fixture(account_id, some_config, requests_mock): + account = {"id": "test_id", "instagram_business_account": {"id": "test_id"}} + requests_mock.register_uri( + "GET", + FacebookSession.GRAPH + f"/{FB_API_VERSION}/act_{account_id}/" + f"?access_token={some_config['access_token']}&fields=instagram_business_account", + json=account, + ) + return { + "json": { + "data": [ + { + "account_id": account_id, + "id": f"act_{account_id}", + } + ], + "paging": {"cursors": {"before": "MjM4NDYzMDYyMTcyNTAwNzEZD", "after": "MjM4NDYzMDYyMTcyNTAwNzEZD"}}, + }, + "status_code": 200, + } + + +@fixture(name="api") +def api_fixture(some_config, requests_mock, fb_account_response): + api = API(access_token=some_config["access_token"]) + + requests_mock.register_uri( + "GET", + FacebookSession.GRAPH + f"/{FB_API_VERSION}/me/accounts?" f"access_token={some_config['access_token']}&summary=true", + [fb_account_response], + ) + + return api + + +@fixture(name="user_data") +def user_data_fixture(): + return { + "biography": "Dino data crunching app", + "id": "17841405822304914", + "username": "metricsaurus", + "website": "http://www.metricsaurus.com/", + } + + +@fixture(name="user_insight_data") +def user_insight_data_fixture(): + return { + "name": "impressions", + "period": "day", + "values": [{"value": 4, "end_time": "2020-05-04T07:00:00+0000"}, {"value": 66, "end_time": "2020-05-05T07:00:00+0000"}], + "title": "Impressions", + "description": "Total number of times this profile has been seen", + "id": "17841400008460056/insights/impressions/day", + } + + +@fixture(name="user_stories_data") +def user_stories_data_fixture(): + return {"id": "test_id"} + + +@fixture(name="user_media_insights_data") +def user_media_insights_data_fixture(): + return { + "name": "impressions", + "period": "lifetime", + "values": [{"value": 264}], + "title": "Impressions", + "description": "Total number of times the media object has been seen", + "id": "17855590849148465/insights/impressions/lifetime", + } diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/test_common.py b/airbyte-integrations/connectors/source-instagram/unit_tests/test_common.py new file mode 100644 index 000000000000..786830b1574f --- /dev/null +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/test_common.py @@ -0,0 +1,53 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from source_instagram.common import remove_params_from_url + + +def test_empty_url(): + url = "" + parsed_url = remove_params_from_url(url=url, params=[]) + assert parsed_url == url + + +def test_does_not_raise_exception_for_invalid_url(): + url = "abcd" + parsed_url = remove_params_from_url(url=url, params=["test"]) + assert parsed_url == url + + +def test_escaped_characters(): + url = "https://google.com?test=123%23%24%25%2A&test2=456" + parsed_url = remove_params_from_url(url=url, params=["test3"]) + assert parsed_url == url + + +def test_no_params_url(): + url = "https://google.com" + parsed_url = remove_params_from_url(url=url, params=["test"]) + assert parsed_url == url + + +def test_no_params_arg(): + url = "https://google.com?" + parsed_url = remove_params_from_url(url=url, params=["test"]) + assert parsed_url == "https://google.com" + + +def test_partially_empty_params(): + url = "https://google.com?test=122&&" + parsed_url = remove_params_from_url(url=url, params=[]) + assert parsed_url == "https://google.com?test=122" + + +def test_no_matching_params(): + url = "https://google.com?test=123" + parsed_url = remove_params_from_url(url=url, params=["test2"]) + assert parsed_url == url + + +def test_removes_params(): + url = "https://google.com?test=123&test2=456" + parsed_url = remove_params_from_url(url=url, params=["test2"]) + assert parsed_url == "https://google.com?test=123" diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py b/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py new file mode 100644 index 000000000000..d817beff7ccb --- /dev/null +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/test_source.py @@ -0,0 +1,74 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from airbyte_cdk.logger import AirbyteLogger +from airbyte_cdk.models import ( + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + ConnectorSpecification, + DestinationSyncMode, + SyncMode, +) +from source_instagram.source import SourceInstagram + +logger = AirbyteLogger() + + +def test_check_connection_ok(api, some_config): + ok, error_msg = SourceInstagram().check_connection(logger, config=some_config) + + assert ok + assert not error_msg + + +def test_check_connection_empty_config(api): + config = {} + ok, error_msg = SourceInstagram().check_connection(logger, config=config) + + assert not ok + assert error_msg + + +def test_check_connection_invalid_config(api, some_config): + some_config.pop("start_date") + ok, error_msg = SourceInstagram().check_connection(logger, config=some_config) + + assert not ok + assert error_msg + + +def test_check_connection_exception(api, config): + api.side_effect = RuntimeError("Something went wrong!") + ok, error_msg = SourceInstagram().check_connection(logger, config=config) + + assert not ok + assert error_msg + + +def test_streams(api, config): + streams = SourceInstagram().streams(config) + + assert len(streams) == 7 + + +def test_spec(): + spec = SourceInstagram().spec() + + assert isinstance(spec, ConnectorSpecification) + + +def test_read(config): + source = SourceInstagram() + catalog = ConfiguredAirbyteCatalog( + streams=[ + ConfiguredAirbyteStream( + stream=AirbyteStream(name="users", json_schema={}), + sync_mode=SyncMode.full_refresh, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + ] + ) + assert source.read(logger, config, catalog) diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py new file mode 100644 index 000000000000..00779faff01c --- /dev/null +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/test_streams.py @@ -0,0 +1,181 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from datetime import datetime +from unittest.mock import MagicMock + +import pytest +from facebook_business import FacebookAdsApi, FacebookSession +from source_instagram.streams import ( + InstagramStream, + Media, + MediaInsights, + Stories, + StoryInsights, + UserInsights, + UserLifetimeInsights, + Users, +) +from utils import read_full_refresh, read_incremental + +FB_API_VERSION = FacebookAdsApi.API_VERSION + + +def test_clear_url(config): + media_url = "https://google.com?_nc_rid=123" + profile_picture_url = "https://google.com?ccb=123" + + expected = {"media_url": "https://google.com", "profile_picture_url": "https://google.com"} + assert InstagramStream._clear_url({"media_url": media_url, "profile_picture_url": profile_picture_url}) == expected + + +def test_state_outdated(api, config): + assert UserInsights(api=api, start_date=datetime.strptime(config["start_date"], "%Y-%m-%dT%H:%M:%S"))._state_has_legacy_format( + {"state": MagicMock()} + ) + + +def test_state_is_not_outdated(api, config): + assert not UserInsights(api=api, start_date=datetime.strptime(config["start_date"], "%Y-%m-%dT%H:%M:%S"))._state_has_legacy_format( + {"state": {}} + ) + + +def test_media_get_children(api, requests_mock, some_config): + test_id = "test_id" + expected = {"id": "test_id"} + + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/", [{}]) + + assert next(Media(api=api)._get_children([test_id])) == expected + + +def test_media_read(api, user_stories_data, requests_mock): + test_id = "test_id" + stream = Media(api=api) + + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/media", [{"json": user_stories_data}]) + + records = read_full_refresh(stream) + assert records == [{"business_account_id": "test_id", "id": "test_id", "page_id": "act_unknown_account"}] + + +def test_media_insights_read(api, user_stories_data, user_media_insights_data, requests_mock): + test_id = "test_id" + stream = MediaInsights(api=api) + + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/media", [{"json": user_stories_data}]) + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/insights", [{"json": user_media_insights_data}]) + + records = read_full_refresh(stream) + assert records == [{"business_account_id": "test_id", "id": "test_id", "impressions": 264, "page_id": "act_unknown_account"}] + + +def test_user_read(api, user_data, requests_mock): + test_id = "test_id" + stream = Users(api=api) + + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/", [{"json": user_data}]) + + records = read_full_refresh(stream) + assert records == [ + { + "biography": "Dino data crunching app", + "id": "17841405822304914", + "page_id": "act_unknown_account", + "username": "metricsaurus", + "website": "http://www.metricsaurus.com/", + } + ] + + +def test_user_insights_read(api, config, user_insight_data, requests_mock): + test_id = "test_id" + + stream = UserInsights(api=api, start_date=datetime.strptime(config["start_date"], "%Y-%m-%dT%H:%M:%S")) + + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/insights", [{"json": user_insight_data}]) + + records = read_incremental(stream, {}) + assert records + + +def test_user_lifetime_insights_read(api, config, user_insight_data, requests_mock): + test_id = "test_id" + + stream = UserLifetimeInsights(api=api) + + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/insights", [{"json": user_insight_data}]) + + records = read_full_refresh(stream) + assert records == [ + { + "page_id": "act_unknown_account", + "business_account_id": "test_id", + "metric": "impressions", + "date": "2020-05-04T07:00:00+0000", + "value": 4, + } + ] + + +def test_stories_read(api, requests_mock, user_stories_data): + test_id = "test_id" + stream = Stories(api=api) + + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/stories", [{"json": user_stories_data}]) + + records = read_full_refresh(stream) + assert records == [{"business_account_id": "test_id", "id": "test_id", "page_id": "act_unknown_account"}] + + +def test_stories_insights_read(api, requests_mock, user_stories_data, user_media_insights_data): + test_id = "test_id" + stream = StoryInsights(api=api) + + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/stories", [{"json": user_stories_data}]) + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/insights", [{"json": user_media_insights_data}]) + + records = read_full_refresh(stream) + assert records == [{"business_account_id": "test_id", "id": "test_id", "impressions": 264, "page_id": "act_unknown_account"}] + + +@pytest.mark.parametrize( + "error_response", + [ + {"json": {"error": {"type": "OAuthException", "code": 1}}}, + {"json": {"error": {"code": 4}}}, + {"json": {}, "status_code": 429}, + {"json": {"error": {"type": "OAuthException", "message": "(#10) Not enough viewers for the media to show insights", "code": 10}}}, + {"json": {"error": {"code": 100, "error_subcode": 33}}, "status_code": 400}, + {"json": {"error": {"is_transient": True}}}, + {"json": {"error": {"code": 4, "error_subcode": 2108006}}}, + ], + ids=[ + "oauth_error", + "rate_limit_error", + "too_many_request_error", + "viewers_insights_error", + "4028_issue_error", + "transient_error", + "user_media_creation_time_error", + ], +) +def test_common_error_retry(error_response, requests_mock, api, account_id): + """Error once, check that we retry and not fail""" + response = {"business_account_id": "test_id", "page_id": "act_unknown_account"} + responses = [ + error_response, + { + "json": response, + "status_code": 200, + }, + ] + test_id = "test_id" + requests_mock.register_uri("GET", FacebookSession.GRAPH + f"/{FB_API_VERSION}/{test_id}/media", responses) + + stream = Media(api=api) + records = read_full_refresh(stream) + + assert [response] == records diff --git a/airbyte-integrations/connectors/source-instagram/unit_tests/utils.py b/airbyte-integrations/connectors/source-instagram/unit_tests/utils.py new file mode 100644 index 000000000000..c92ac97b0373 --- /dev/null +++ b/airbyte-integrations/connectors/source-instagram/unit_tests/utils.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +from typing import Any, MutableMapping + +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources.streams import Stream + + +def read_incremental(stream_instance: Stream, stream_state: MutableMapping[str, Any]): + res = [] + slices = stream_instance.stream_slices(sync_mode=SyncMode.incremental, stream_state=stream_state) + for slice in slices: + records = stream_instance.read_records(sync_mode=SyncMode.incremental, stream_slice=slice, stream_state=stream_state) + for record in records: + stream_state = stream_instance.get_updated_state(stream_state, record) + res.append(record) + return res + + +def read_full_refresh(stream_instance: Stream): + records = [] + slices = stream_instance.stream_slices(sync_mode=SyncMode.full_refresh) + for slice in slices: + records.extend(list(stream_instance.read_records(stream_slice=slice, sync_mode=SyncMode.full_refresh))) + return records diff --git a/airbyte-integrations/connectors/source-intercom/Dockerfile b/airbyte-integrations/connectors/source-intercom/Dockerfile index 2ae375afc8c4..75a3c7dbbe98 100644 --- a/airbyte-integrations/connectors/source-intercom/Dockerfile +++ b/airbyte-integrations/connectors/source-intercom/Dockerfile @@ -35,5 +35,5 @@ COPY source_intercom ./source_intercom ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.13 +LABEL io.airbyte.version=0.1.15 LABEL io.airbyte.name=airbyte/source-intercom diff --git a/airbyte-integrations/connectors/source-intercom/integration_tests/expected_records.txt b/airbyte-integrations/connectors/source-intercom/integration_tests/expected_records.txt index 5599db1de79e..d7fd268a9c89 100644 --- a/airbyte-integrations/connectors/source-intercom/integration_tests/expected_records.txt +++ b/airbyte-integrations/connectors/source-intercom/integration_tests/expected_records.txt @@ -1,2 +1,2 @@ -{"stream": "conversations", "data": {"type": "conversation", "id": "1", "created_at": 1607553243, "updated_at": 1626346673, "waiting_since": null, "snoozed_until": null, "source": {"type": "conversation", "id": "701718739", "delivered_as": "customer_initiated", "subject": "", "body": "

hey there

", "author": {"type": "lead", "id": "5fd150d50697b6d0bbc4a2c2", "name": null, "email": ""}, "attachments": [], "url": "http://localhost:63342/airbyte-python/airbyte-integrations/bases/base-java/build/tmp/expandedArchives/org.jacoco.agent-0.8.5.jar_6a2df60c47de373ea127d14406367999/about.html?_ijt=uosck1k6vmp2dnl4oqib2g3u9d", "redacted": false}, "contacts": {"type": "contact.list", "contacts": [{"type": "contact", "id": "5fd150d50697b6d0bbc4a2c2"}]}, "first_contact_reply": {"created_at": 1607553243, "type": "conversation", "url": "http://localhost:63342/airbyte-python/airbyte-integrations/bases/base-java/build/tmp/expandedArchives/org.jacoco.agent-0.8.5.jar_6a2df60c47de373ea127d14406367999/about.html?_ijt=uosck1k6vmp2dnl4oqib2g3u9d"}, "admin_assignee_id": null, "team_assignee_id": null, "open": true, "state": "open", "read": false, "tags": {"type": "tag.list", "tags": []}, "priority": "not_priority", "sla_applied": null, "statistics": {"type": "conversation_statistics", "time_to_assignment": null, "time_to_admin_reply": 4317957, "time_to_first_close": null, "time_to_last_close": null, "median_time_to_reply": 4317954, "first_contact_reply_at": 1607553243, "first_assignment_at": null, "first_admin_reply_at": 1625654131, "first_close_at": null, "last_assignment_at": null, "last_assignment_admin_reply_at": null, "last_contact_reply_at": 1607553246, "last_admin_reply_at": 1625656000, "last_close_at": null, "last_closed_by_id": null, "count_reopens": 0, "count_assignments": 0, "count_conversation_parts": 7}, "conversation_rating": null, "teammates": {"type": "admin.list", "admins": [{"type": "admin", "id": "4423433"}]}, "title": null}, "emitted_at": 1638877461000} {"stream": "conversations", "data": {"type": "conversation", "id": "2", "created_at": 1625749234, "updated_at": 1632835061, "waiting_since": null, "snoozed_until": null, "source": {"type": "conversation", "id": "906873821", "delivered_as": "admin_initiated", "subject": "", "body": "

Hi Jean,

", "author": {"type": "admin", "id": "4423433", "name": "John Lafleur", "email": "integration-test@airbyte.io"}, "attachments": [], "url": null, "redacted": false}, "contacts": {"type": "contact.list", "contacts": [{"type": "contact", "id": "60e6f6e020ae45ce1ac86f26"}]}, "first_contact_reply": null, "admin_assignee_id": 4423433, "team_assignee_id": null, "open": false, "state": "closed", "read": true, "tags": {"type": "tag.list", "tags": []}, "priority": "not_priority", "sla_applied": null, "statistics": {"type": "conversation_statistics", "time_to_assignment": null, "time_to_admin_reply": null, "time_to_first_close": null, "time_to_last_close": null, "median_time_to_reply": null, "first_contact_reply_at": null, "first_assignment_at": null, "first_admin_reply_at": null, "first_close_at": null, "last_assignment_at": null, "last_assignment_admin_reply_at": null, "last_contact_reply_at": null, "last_admin_reply_at": null, "last_close_at": null, "last_closed_by_id": null, "count_reopens": 0, "count_assignments": 0, "count_conversation_parts": 7}, "conversation_rating": null, "teammates": {"type": "admin.list", "admins": []}, "title": null}, "emitted_at": 1638877461000} +{"stream": "conversations", "data": {"type": "conversation", "id": "1", "created_at": 1607553243, "updated_at": 1626346673, "waiting_since": null, "snoozed_until": null, "source": {"type": "conversation", "id": "701718739", "delivered_as": "customer_initiated", "subject": "", "body": "

hey there

", "author": {"type": "lead", "id": "5fd150d50697b6d0bbc4a2c2", "name": null, "email": ""}, "attachments": [], "url": "http://localhost:63342/airbyte-python/airbyte-integrations/bases/base-java/build/tmp/expandedArchives/org.jacoco.agent-0.8.5.jar_6a2df60c47de373ea127d14406367999/about.html?_ijt=uosck1k6vmp2dnl4oqib2g3u9d", "redacted": false}, "contacts": {"type": "contact.list", "contacts": [{"type": "contact", "id": "5fd150d50697b6d0bbc4a2c2"}]}, "first_contact_reply": {"created_at": 1607553243, "type": "conversation", "url": "http://localhost:63342/airbyte-python/airbyte-integrations/bases/base-java/build/tmp/expandedArchives/org.jacoco.agent-0.8.5.jar_6a2df60c47de373ea127d14406367999/about.html?_ijt=uosck1k6vmp2dnl4oqib2g3u9d"}, "admin_assignee_id": null, "team_assignee_id": null, "open": true, "state": "open", "read": false, "tags": {"type": "tag.list", "tags": []}, "priority": "not_priority", "sla_applied": null, "statistics": {"type": "conversation_statistics", "time_to_assignment": null, "time_to_admin_reply": 4317957, "time_to_first_close": null, "time_to_last_close": null, "median_time_to_reply": 4317954, "first_contact_reply_at": 1607553243, "first_assignment_at": null, "first_admin_reply_at": 1625654131, "first_close_at": null, "last_assignment_at": null, "last_assignment_admin_reply_at": null, "last_contact_reply_at": 1607553246, "last_admin_reply_at": 1625656000, "last_close_at": null, "last_closed_by_id": null, "count_reopens": 0, "count_assignments": 0, "count_conversation_parts": 7}, "conversation_rating": null, "teammates": {"type": "admin.list", "admins": [{"type": "admin", "id": "4423433"}]}, "title": null}, "emitted_at": 1638877461000} diff --git a/airbyte-integrations/connectors/source-intercom/integration_tests/integration_test.py b/airbyte-integrations/connectors/source-intercom/integration_tests/integration_test.py index 1bf0f2c761fb..9c7ad78e62a7 100644 --- a/airbyte-integrations/connectors/source-intercom/integration_tests/integration_test.py +++ b/airbyte-integrations/connectors/source-intercom/integration_tests/integration_test.py @@ -93,7 +93,7 @@ def test_companies_scroll(stream_attributes): # read all records records = [] for slice in stream2.stream_slices(sync_mode=SyncMode.full_refresh): - records += list(stream2.read_records(sync_mode=SyncMode, stream_slice=slice)) + records += list(stream2.read_records(sync_mode=SyncMode.full_refresh, stream_slice=slice)) assert len(records) == 3 assert (time.time() - start_time) > 60.0 diff --git a/airbyte-integrations/connectors/source-intercom/source_intercom/source.py b/airbyte-integrations/connectors/source-intercom/source_intercom/source.py index 8f99e9f473a5..94c49285ae99 100755 --- a/airbyte-integrations/connectors/source-intercom/source_intercom/source.py +++ b/airbyte-integrations/connectors/source-intercom/source_intercom/source.py @@ -6,7 +6,7 @@ from datetime import datetime from enum import Enum from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple -from urllib.parse import parse_qsl, urlparse +from urllib.parse import parse_qsl, urlparse, urljoin import requests from airbyte_cdk.logger import AirbyteLogger @@ -92,6 +92,10 @@ def parse_response(self, response: requests.Response, stream_state: Mapping[str, class IncrementalIntercomStream(IntercomStream, ABC): cursor_field = "updated_at" + def __init__(self, authenticator: AuthBase, start_date: str = None, **kwargs): + super().__init__(authenticator, start_date, **kwargs) + self.has_old_records = False + def filter_by_state(self, stream_state: Mapping[str, Any] = None, record: Mapping[str, Any] = None) -> Iterable: """ Endpoint does not provide query filtering params, but they provide us @@ -101,6 +105,8 @@ def filter_by_state(self, stream_state: Mapping[str, Any] = None, record: Mappin if not stream_state or record[self.cursor_field] > stream_state.get(self.cursor_field): yield record + else: + self.has_old_records = True def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]: record = super().parse_response(response, stream_state, **kwargs) @@ -282,9 +288,16 @@ class Conversations(IncrementalIntercomStream): def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]: params = super().request_params(next_page_token, **kwargs) - params.update({"order": "asc", "sort": self.cursor_field}) + params.update({"order": "desc", "sort": self.cursor_field}) return params + # We're sorting by desc. Once we hit the first page with an out-of-date result we can stop. + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + if self.has_old_records: + return None + + return super().next_page_token(response) + def path(self, **kwargs) -> str: return "conversations" @@ -413,7 +426,7 @@ class SourceIntercom(AbstractSource): def check_connection(self, logger, config) -> Tuple[bool, any]: authenticator = VersionApiAuthenticator(token=config["access_token"]) try: - url = f"{IntercomStream.url_base}/tags" + url = urljoin(IntercomStream.url_base, "/tags") auth_headers = {"Accept": "application/json", **authenticator.get_auth_header()} session = requests.get(url, headers=auth_headers) session.raise_for_status() diff --git a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java index c5f515e55cd4..f85b4eebc0c5 100644 --- a/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java +++ b/airbyte-integrations/connectors/source-jdbc/src/main/java/io/airbyte/integrations/source/jdbc/AbstractJdbcSource.java @@ -262,7 +262,7 @@ public AutoCloseableIterator queryTableIncremental(final JdbcDatabase LOGGER.info("Queueing query for table: {}", tableName); return AutoCloseableIterators.lazyIterator(() -> { try { - final Stream stream = database.query( + final Stream stream = database.unsafeQuery( connection -> { LOGGER.info("Preparing query for table: {}", tableName); final String sql = String.format("SELECT %s FROM %s WHERE %s > ?", diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java index cbc62a3c2fe6..4fdd684a3489 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSource.java @@ -94,7 +94,7 @@ public AutoCloseableIterator queryTableIncremental(JdbcDatabase databa LOGGER.info("Queueing query for table: {}", tableName); return AutoCloseableIterators.lazyIterator(() -> { try { - final Stream stream = database.query( + final Stream stream = database.unsafeQuery( connection -> { LOGGER.info("Preparing query for table: {}", tableName); @@ -244,7 +244,7 @@ public List> getCheckOperations(final J protected void assertCdcEnabledInDb(final JsonNode config, final JdbcDatabase database) throws SQLException { - final List queryResponse = database.query(connection -> { + final List queryResponse = database.unsafeQuery(connection -> { final String sql = "SELECT name, is_cdc_enabled FROM sys.databases WHERE name = ?"; final PreparedStatement ps = connection.prepareStatement(sql); ps.setString(1, config.get("database").asText()); @@ -267,7 +267,7 @@ protected void assertCdcEnabledInDb(final JsonNode config, final JdbcDatabase da protected void assertCdcSchemaQueryable(final JsonNode config, final JdbcDatabase database) throws SQLException { - final List queryResponse = database.query(connection -> { + final List queryResponse = database.unsafeQuery(connection -> { final String sql = "USE " + config.get("database").asText() + "; SELECT * FROM cdc.change_tables"; final PreparedStatement ps = connection.prepareStatement(sql); @@ -286,7 +286,7 @@ protected void assertCdcSchemaQueryable(final JsonNode config, final JdbcDatabas // todo: ensure this works for Azure managed SQL (since it uses different sql server agent) protected void assertSqlServerAgentRunning(final JdbcDatabase database) throws SQLException { try { - final List queryResponse = database.query(connection -> { + final List queryResponse = database.unsafeQuery(connection -> { final String sql = "SELECT status_desc FROM sys.dm_server_services WHERE [servicename] LIKE 'SQL Server Agent%'"; final PreparedStatement ps = connection.prepareStatement(sql); LOGGER.info(String @@ -312,7 +312,7 @@ protected void assertSqlServerAgentRunning(final JdbcDatabase database) throws S protected void assertSnapshotIsolationAllowed(final JsonNode config, final JdbcDatabase database) throws SQLException { - final List queryResponse = database.query(connection -> { + final List queryResponse = database.unsafeQuery(connection -> { final String sql = "SELECT name, snapshot_isolation_state FROM sys.databases WHERE name = ?"; final PreparedStatement ps = connection.prepareStatement(sql); ps.setString(1, config.get("database").asText()); diff --git a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java index c114aaa8304e..98ff05c57879 100644 --- a/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java +++ b/airbyte-integrations/connectors/source-mssql/src/main/java/io/airbyte/integrations/source/mssql/MssqlSourceOperations.java @@ -16,6 +16,7 @@ import com.microsoft.sqlserver.jdbc.Geometry; import com.microsoft.sqlserver.jdbc.SQLServerResultSetMetaData; import io.airbyte.db.jdbc.JdbcSourceOperations; +import java.nio.charset.Charset; import java.sql.JDBCType; import java.sql.ResultSet; import java.sql.SQLException; @@ -53,7 +54,7 @@ public void setJsonField(final ResultSet resultSet, final int colIndex, final Ob } } - private void putValue(JDBCType columnType, + private void putValue(final JDBCType columnType, final ResultSet resultSet, final String columnName, final int colIndex, @@ -104,8 +105,8 @@ protected void putBinary(final ObjectNode node, final ResultSet resultSet, final int index) throws SQLException { - byte[] bytes = resultSet.getBytes(index); - String value = new String(bytes); + final byte[] bytes = resultSet.getBytes(index); + final String value = new String(bytes, Charset.defaultCharset()); node.put(columnName, value); } diff --git a/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java b/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java index ed5ba68a46ce..ccc2996916e3 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/FillMsSqlTestDbScriptTest.java @@ -12,7 +12,6 @@ import io.airbyte.integrations.standardtest.source.TestDestinationEnv; import io.airbyte.integrations.standardtest.source.performancetest.AbstractSourceFillDbWithTestData; import java.util.stream.Stream; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.provider.Arguments; public class FillMsSqlTestDbScriptTest extends AbstractSourceFillDbWithTestData { diff --git a/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MsSqlRdsSourcePerformanceSecretTest.java b/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MsSqlRdsSourcePerformanceSecretTest.java index 4b88c2c506c4..1ff961d83bfc 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MsSqlRdsSourcePerformanceSecretTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test-performance/java/io/airbyte/integrations/source/mssql/MsSqlRdsSourcePerformanceSecretTest.java @@ -11,7 +11,6 @@ import io.airbyte.integrations.standardtest.source.performancetest.AbstractSourcePerformanceTest; import java.nio.file.Path; import java.util.stream.Stream; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.provider.Arguments; public class MsSqlRdsSourcePerformanceSecretTest extends AbstractSourcePerformanceTest { diff --git a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java index 26efe159ff87..890420c029cb 100644 --- a/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java +++ b/airbyte-integrations/connectors/source-mssql/src/test/java/io/airbyte/integrations/source/mssql/CdcMssqlSourceTest.java @@ -316,7 +316,7 @@ protected CdcTargetPosition cdcLatestTargetPosition() { dbName), DRIVER_CLASS, new MssqlJdbcStreamingQueryConfiguration(), - null, + Map.of(), new MssqlSourceOperations()); return MssqlCdcTargetPosition.getTargetPosition(jdbcDatabase, dbName); } diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcTargetPosition.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcTargetPosition.java index e4986b575dcf..de02f827e0e6 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcTargetPosition.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlCdcTargetPosition.java @@ -47,7 +47,7 @@ public String toString() { public static MySqlCdcTargetPosition targetPosition(final JdbcDatabase database) { try { - final List masterStatus = database.resultSetQuery( + final List masterStatus = database.unsafeResultSetQuery( connection -> connection.createStatement().executeQuery("SHOW MASTER STATUS"), resultSet -> { final String file = resultSet.getString("File"); diff --git a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/helpers/CdcConfigurationHelper.java b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/helpers/CdcConfigurationHelper.java index e20c2fc5f790..b2df3e912259 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/helpers/CdcConfigurationHelper.java +++ b/airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/helpers/CdcConfigurationHelper.java @@ -75,7 +75,7 @@ public static List> getCheckOperations( private static boolean isBinlogAvailable(String binlog, JdbcDatabase database) { try { - List binlogs = database.resultSetQuery(connection -> connection.createStatement().executeQuery("SHOW BINARY LOGS"), + List binlogs = database.unsafeResultSetQuery(connection -> connection.createStatement().executeQuery("SHOW BINARY LOGS"), resultSet -> resultSet.getString("Log_name")).collect(Collectors.toList()); return !binlog.isEmpty() && binlogs.stream().anyMatch(e -> e.equals(binlog)); @@ -97,7 +97,7 @@ private static Optional getBinlog(JsonNode offset) { private static CheckedConsumer getCheckOperation(String name, String value) { return database -> { - final List result = database.resultSetQuery(connection -> { + final List result = database.unsafeResultSetQuery(connection -> { final String sql = """ show variables where Variable_name = '%s'""".formatted(name); diff --git a/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/FillMySqlTestDbScriptTest.java b/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/FillMySqlTestDbScriptTest.java index 68f6de841793..f2a3ca890868 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/FillMySqlTestDbScriptTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/FillMySqlTestDbScriptTest.java @@ -15,7 +15,6 @@ import java.util.Map; import java.util.stream.Stream; import org.jooq.SQLDialect; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.provider.Arguments; public class FillMySqlTestDbScriptTest extends AbstractSourceFillDbWithTestData { @@ -79,4 +78,5 @@ protected Stream provideParameters() { // for MySQL DB name ans schema name would be the same return Stream.of(Arguments.of("your_db_name", "your_schema_name", 100, 2, 240, 1000)); } + } diff --git a/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/MySqlRdsSourcePerformanceSecretTest.java b/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/MySqlRdsSourcePerformanceSecretTest.java index 5c7322ed3bb6..2d95fe80bd55 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/MySqlRdsSourcePerformanceSecretTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test-performance/java/io/airbyte/integrations/source/mysql/MySqlRdsSourcePerformanceSecretTest.java @@ -15,7 +15,6 @@ import java.util.Map; import java.util.stream.Stream; import org.jooq.SQLDialect; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.provider.Arguments; public class MySqlRdsSourcePerformanceSecretTest extends AbstractSourcePerformanceTest { @@ -72,4 +71,5 @@ protected Stream provideParameters() { Arguments.of("t25_c8_r50k_s10kb", "t25_c8_r50k_s10kb", 50000, 8, 25), Arguments.of("t1000_c8_r10k_s500b", "t1000_c8_r10k_s500b", 10000, 8, 1000)); } + } diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java index ee6d1c3806fe..0b4525659f39 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlJdbcSourceAcceptanceTest.java @@ -19,7 +19,7 @@ import io.airbyte.protocol.models.ConnectorSpecification; import java.sql.Connection; import java.sql.DriverManager; -import java.sql.SQLException; +import java.util.concurrent.Callable; import org.jooq.SQLDialect; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; @@ -31,21 +31,20 @@ class MySqlJdbcSourceAcceptanceTest extends JdbcSourceAcceptanceTest { protected static final String TEST_USER = "test"; - protected static final String TEST_PASSWORD = "test"; + protected static final Callable TEST_PASSWORD = () -> "test"; protected static MySQLContainer container; - protected JsonNode config; protected Database database; @BeforeAll - static void init() throws SQLException { + static void init() throws Exception { container = new MySQLContainer<>("mysql:8.0") .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) + .withPassword(TEST_PASSWORD.call()) .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD); + .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD.call()); container.start(); - final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), "root", TEST_PASSWORD); + final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), "root", TEST_PASSWORD.call()); connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); } @@ -56,7 +55,7 @@ public void setup() throws Exception { .put("port", container.getFirstMappedPort()) .put("database", Strings.addRandomSuffix("db", "_", 10)) .put("username", TEST_USER) - .put("password", TEST_PASSWORD) + .put("password", TEST_PASSWORD.call()) .build()); database = Databases.createDatabase( diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java index d92dc9c31d35..23cb1d387497 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSourceTests.java @@ -10,7 +10,6 @@ import com.google.common.collect.ImmutableMap; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.string.Strings; -import io.airbyte.db.Database; import io.airbyte.protocol.models.AirbyteConnectionStatus; import java.sql.Connection; import java.sql.DriverManager; @@ -22,15 +21,11 @@ public class MySqlSourceTests { private static final String TEST_USER = "test"; private static final String TEST_PASSWORD = "test"; - private static MySQLContainer container; - - private JsonNode config; - private Database database; @Test public void testSettingTimezones() throws Exception { // start DB - container = new MySQLContainer<>("mysql:8.0") + final MySQLContainer container = new MySQLContainer<>("mysql:8.0") .withUsername(TEST_USER) .withPassword(TEST_PASSWORD) .withEnv("MYSQL_ROOT_HOST", "%") @@ -41,7 +36,7 @@ public void testSettingTimezones() throws Exception { properties.putAll(ImmutableMap.of("user", "root", "password", TEST_PASSWORD, "serverTimezone", "Europe/Moscow")); DriverManager.getConnection(container.getJdbcUrl(), properties); final String dbName = Strings.addRandomSuffix("db", "_", 10); - config = getConfig(container, dbName, "serverTimezone=Europe/Moscow"); + final JsonNode config = getConfig(container, dbName, "serverTimezone=Europe/Moscow"); try (final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), properties)) { connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java index 7288006c627d..d6780431f480 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlSslJdbcSourceAcceptanceTest.java @@ -22,7 +22,7 @@ public void setup() throws Exception { .put("port", container.getFirstMappedPort()) .put("database", Strings.addRandomSuffix("db", "_", 10)) .put("username", TEST_USER) - .put("password", TEST_PASSWORD) + .put("password", TEST_PASSWORD.call()) .put("ssl", true) .build()); diff --git a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlStressTest.java b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlStressTest.java index fc0c803c8c81..a33e9db6a725 100644 --- a/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlStressTest.java +++ b/airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/MySqlStressTest.java @@ -15,8 +15,8 @@ import io.airbyte.integrations.source.jdbc.test.JdbcStressTest; import java.sql.Connection; import java.sql.DriverManager; -import java.sql.SQLException; import java.util.Optional; +import java.util.concurrent.Callable; import org.jooq.SQLDialect; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; @@ -29,21 +29,21 @@ class MySqlStressTest extends JdbcStressTest { private static final String TEST_USER = "test"; - private static final String TEST_PASSWORD = "test"; + private static final Callable TEST_PASSWORD = () -> "test"; private static MySQLContainer container; private JsonNode config; private Database database; @BeforeAll - static void init() throws SQLException { + static void init() throws Exception { container = new MySQLContainer<>("mysql:8.0") .withUsername(TEST_USER) - .withPassword(TEST_PASSWORD) + .withPassword(TEST_PASSWORD.call()) .withEnv("MYSQL_ROOT_HOST", "%") - .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD); + .withEnv("MYSQL_ROOT_PASSWORD", TEST_PASSWORD.call()); container.start(); - final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), "root", TEST_PASSWORD); + final Connection connection = DriverManager.getConnection(container.getJdbcUrl(), "root", TEST_PASSWORD.call()); connection.createStatement().execute("GRANT ALL PRIVILEGES ON *.* TO '" + TEST_USER + "'@'%';\n"); } @@ -54,7 +54,7 @@ public void setup() throws Exception { .put("port", container.getFirstMappedPort()) .put("database", Strings.addRandomSuffix("db", "_", 10)) .put("username", TEST_USER) - .put("password", TEST_PASSWORD) + .put("password", TEST_PASSWORD.call()) .build()); database = Databases.createDatabase( diff --git a/airbyte-integrations/connectors/source-oracle-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/oracle_strict_encrypt/OracleSourceNneAcceptanceTest.java b/airbyte-integrations/connectors/source-oracle-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/oracle_strict_encrypt/OracleSourceNneAcceptanceTest.java index 0fb73078987d..070568267100 100644 --- a/airbyte-integrations/connectors/source-oracle-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/oracle_strict_encrypt/OracleSourceNneAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-oracle-strict-encrypt/src/test-integration/java/io/airbyte/integrations/source/oracle_strict_encrypt/OracleSourceNneAcceptanceTest.java @@ -45,7 +45,7 @@ public void testEncrytion() throws SQLException { final String network_service_banner = "select network_service_banner from v$session_connect_info where sid in (select distinct sid from v$mystat)"; - final List collect = database.query(network_service_banner).collect(Collectors.toList()); + final List collect = database.unsafeQuery(network_service_banner).collect(Collectors.toList()); assertTrue(collect.get(2).get("NETWORK_SERVICE_BANNER").asText() .contains(algorithm + " Encryption")); @@ -74,7 +74,7 @@ public void testCheckProtocol() throws SQLException { + algorithm + " )")); final String network_service_banner = "SELECT sys_context('USERENV', 'NETWORK_PROTOCOL') as network_protocol FROM dual"; - final List collect = database.query(network_service_banner).collect(Collectors.toList()); + final List collect = database.unsafeQuery(network_service_banner).collect(Collectors.toList()); assertEquals("tcp", collect.get(0).get("NETWORK_PROTOCOL").asText()); } diff --git a/airbyte-integrations/connectors/source-oracle/src/test-integration/java/io/airbyte/integrations/source/oracle/OracleSourceNneAcceptanceTest.java b/airbyte-integrations/connectors/source-oracle/src/test-integration/java/io/airbyte/integrations/source/oracle/OracleSourceNneAcceptanceTest.java index 851558d5cd49..e67716e45504 100644 --- a/airbyte-integrations/connectors/source-oracle/src/test-integration/java/io/airbyte/integrations/source/oracle/OracleSourceNneAcceptanceTest.java +++ b/airbyte-integrations/connectors/source-oracle/src/test-integration/java/io/airbyte/integrations/source/oracle/OracleSourceNneAcceptanceTest.java @@ -45,7 +45,7 @@ public void testEncrytion() throws SQLException { final String network_service_banner = "select network_service_banner from v$session_connect_info where sid in (select distinct sid from v$mystat)"; - final List collect = database.query(network_service_banner).collect(Collectors.toList()); + final List collect = database.unsafeQuery(network_service_banner).collect(Collectors.toList()); assertTrue(collect.get(2).get("NETWORK_SERVICE_BANNER").asText() .contains(algorithm + " Encryption")); @@ -64,7 +64,7 @@ public void testNoneEncrytion() throws SQLException { final String network_service_banner = "select network_service_banner from v$session_connect_info where sid in (select distinct sid from v$mystat)"; - final List collect = database.query(network_service_banner).collect(Collectors.toList()); + final List collect = database.unsafeQuery(network_service_banner).collect(Collectors.toList()); assertTrue(collect.get(1).get("NETWORK_SERVICE_BANNER").asText() .contains("Encryption service")); @@ -93,7 +93,7 @@ public void testCheckProtocol() throws SQLException { + algorithm + " )")); final String network_service_banner = "SELECT sys_context('USERENV', 'NETWORK_PROTOCOL') as network_protocol FROM dual"; - final List collect = database.query(network_service_banner).collect(Collectors.toList()); + final List collect = database.unsafeQuery(network_service_banner).collect(Collectors.toList()); assertEquals("tcp", collect.get(0).get("NETWORK_PROTOCOL").asText()); } diff --git a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java index 32e979794a5f..71826a0237e9 100644 --- a/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java +++ b/airbyte-integrations/connectors/source-postgres/src/main/java/io/airbyte/integrations/source/postgres/PostgresSource.java @@ -155,7 +155,7 @@ public List> getCheckOperations(final J if (isCdc(config)) { checkOperations.add(database -> { - final List matchingSlots = database.query(connection -> { + final List matchingSlots = database.unsafeQuery(connection -> { final String sql = "SELECT * FROM pg_replication_slots WHERE slot_name = ? AND plugin = ? AND database = ?"; final PreparedStatement ps = connection.prepareStatement(sql); ps.setString(1, config.get("replication_method").get("replication_slot").asText()); @@ -177,7 +177,7 @@ public List> getCheckOperations(final J }); checkOperations.add(database -> { - final List matchingPublications = database.query(connection -> { + final List matchingPublications = database.unsafeQuery(connection -> { final PreparedStatement ps = connection .prepareStatement("SELECT * FROM pg_publication WHERE pubname = ?"); ps.setString(1, config.get("replication_method").get("publication").asText()); @@ -274,7 +274,7 @@ private static AirbyteStream removeIncrementalWithoutPk(final AirbyteStream stre public Set getPrivilegesTableForCurrentUser(final JdbcDatabase database, final String schema) throws SQLException { - return database.query(connection -> { + return database.unsafeQuery(connection -> { final PreparedStatement ps = connection.prepareStatement( """ SELECT DISTINCT table_catalog, diff --git a/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/FillPostgresTestDbScriptTest.java b/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/FillPostgresTestDbScriptTest.java index 6113c16bb8cb..19caac05a622 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/FillPostgresTestDbScriptTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/FillPostgresTestDbScriptTest.java @@ -13,7 +13,6 @@ import io.airbyte.integrations.standardtest.source.performancetest.AbstractSourceFillDbWithTestData; import java.util.stream.Stream; import org.jooq.SQLDialect; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.provider.Arguments; public class FillPostgresTestDbScriptTest extends AbstractSourceFillDbWithTestData { @@ -75,4 +74,5 @@ protected Database setupDatabase(final String dbName) throws Exception { protected Stream provideParameters() { return Stream.of(Arguments.of("postgres", "\"your_schema_name\"", 100, 2, 240, 1000)); } + } diff --git a/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java b/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java index 912d8a988ce6..316efc76f530 100644 --- a/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java +++ b/airbyte-integrations/connectors/source-postgres/src/test-performance/java/io/airbyte/integrations/source/postgres/PostgresRdsSourcePerformanceTest.java @@ -12,7 +12,6 @@ import java.nio.file.Path; import java.util.List; import java.util.stream.Stream; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.provider.Arguments; public class PostgresRdsSourcePerformanceTest extends AbstractSourcePerformanceTest { diff --git a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java index 9edab761d41b..22c5d79f4de7 100644 --- a/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java +++ b/airbyte-integrations/connectors/source-relational-db/src/main/java/io/airbyte/integrations/source/relationaldb/AbstractRelationalDbSource.java @@ -58,7 +58,7 @@ protected String getFullTableName(final String nameSpace, final String tableName protected AutoCloseableIterator queryTable(final Database database, final String sqlQuery) { return AutoCloseableIterators.lazyIterator(() -> { try { - final Stream stream = database.query(sqlQuery); + final Stream stream = database.unsafeQuery(sqlQuery); return AutoCloseableIterators.fromStream(stream); } catch (final Exception e) { throw new RuntimeException(e); diff --git a/airbyte-integrations/connectors/source-shopify/Dockerfile b/airbyte-integrations/connectors/source-shopify/Dockerfile index 09424b318333..db1c017b8915 100644 --- a/airbyte-integrations/connectors/source-shopify/Dockerfile +++ b/airbyte-integrations/connectors/source-shopify/Dockerfile @@ -28,5 +28,5 @@ COPY source_shopify ./source_shopify ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.35 +LABEL io.airbyte.version=0.1.36 LABEL io.airbyte.name=airbyte/source-shopify diff --git a/airbyte-integrations/connectors/source-shopify/acceptance-test-config.yml b/airbyte-integrations/connectors/source-shopify/acceptance-test-config.yml index b138f5a3b61c..cda71978b962 100644 --- a/airbyte-integrations/connectors/source-shopify/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-shopify/acceptance-test-config.yml @@ -25,7 +25,7 @@ tests: timeout_seconds: 3600 # some streams hold data only for some time, therefore certain streams could be empty while sync. # 'abandoned_checkouts' stream holds data up to 1 month. - empty_streams: ["abandoned_checkouts"] + empty_streams: ["abandoned_checkouts", "balance_transactions"] incremental: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json index 76283fc4059c..e9e8facca510 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/abnormal_state.json @@ -77,5 +77,8 @@ "orders": { "updated_at": "2025-03-03T03:47:46-08:00" } + }, + "balance_transactions": { + "id": 9999999999999 } } diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json index 369bc39712bf..be0c8854c55c 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/configured_catalog.json @@ -249,6 +249,18 @@ "sync_mode": "incremental", "cursor_field": ["updated_at"], "destination_sync_mode": "append" + }, + { + "stream": { + "name": "balance_transactions", + "json_schema": {}, + "supported_sync_modes": ["incremental", "full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": ["id"] + }, + "sync_mode": "incremental", + "cursor_field": ["id"], + "destination_sync_mode": "append" } ] } diff --git a/airbyte-integrations/connectors/source-shopify/integration_tests/state.json b/airbyte-integrations/connectors/source-shopify/integration_tests/state.json index 263c4268de83..00d1d3e75daa 100644 --- a/airbyte-integrations/connectors/source-shopify/integration_tests/state.json +++ b/airbyte-integrations/connectors/source-shopify/integration_tests/state.json @@ -77,5 +77,8 @@ "orders": { "updated_at": "2022-03-03T03:47:46-08:00" } + }, + "balance_transactions": { + "id": 29427031703741 } } diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/balance_transactions.json b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/balance_transactions.json new file mode 100644 index 000000000000..1abec0f45df3 --- /dev/null +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/schemas/balance_transactions.json @@ -0,0 +1,52 @@ +{ + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "type": { + "type": ["null", "string"] + }, + "test": { + "type": ["null", "boolean"] + }, + "payout_id": { + "type": ["null", "integer"] + }, + "payout_status": { + "type": ["null", "string"] + }, + "payoucurrencyt_status": { + "type": ["null", "string"] + }, + "amount": { + "type": ["null", "number"] + }, + "fee": { + "type": ["null", "number"] + }, + "net": { + "type": ["null", "number"] + }, + "source_id": { + "type": ["null", "integer"] + }, + "source_type": { + "type": ["null", "string"] + }, + "source_order_transaction_id": { + "type": ["null", "integer"] + }, + "source_order_id": { + "type": ["null", "integer"] + }, + "processed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "shop_url": { + "type": ["null", "string"] + } + } + +} diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/source.py b/airbyte-integrations/connectors/source-shopify/source_shopify/source.py index 1e804a1dc14d..e9cf2bd06693 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/source.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/source.py @@ -38,6 +38,13 @@ def __init__(self, config: Dict): @property def url_base(self) -> str: return f"https://{self.config['shop']}.myshopify.com/admin/api/{self.api_version}/" + + @property + def default_filter_field_value(self) -> Union[int, str]: + # certain streams are using `since_id` field as `filter_field`, which requires to use `int` type, + # but many other use `str` values for this, we determine what to use based on `filter_field` value + # by default, we use the user defined `Start Date` as initial value, or 0 for `id`-dependent streams. + return 0 if self.filter_field == "since_id" else self.config["start_date"] @staticmethod def next_page_token(response: requests.Response) -> Optional[Mapping[str, Any]]: @@ -53,7 +60,7 @@ def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> params.update(**next_page_token) else: params["order"] = f"{self.order_field} asc" - params[self.filter_field] = self.config["start_date"] + params[self.filter_field] = self.default_filter_field_value return params @limiter.balance_rate_limit() @@ -92,7 +99,7 @@ def state_checkpoint_interval(self) -> int: cursor_field = "updated_at" @property - def default_comparison_value(self) -> Union[int, str]: + def default_state_comparison_value(self) -> Union[int, str]: # certain streams are using `id` field as `cursor_field`, which requires to use `int` type, # but many other use `str` values for this, we determine what to use based on `cursor_field` value return 0 if self.cursor_field == "id" else "" @@ -100,8 +107,8 @@ def default_comparison_value(self) -> Union[int, str]: def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: return { self.cursor_field: max( - latest_record.get(self.cursor_field, self.default_comparison_value), - current_stream_state.get(self.cursor_field, self.default_comparison_value), + latest_record.get(self.cursor_field, self.default_state_comparison_value), + current_stream_state.get(self.cursor_field, self.default_state_comparison_value), ) } @@ -307,25 +314,31 @@ class Collects(IncrementalShopifyStream): The Collect stream is the link between Products and Collections, if the Collection is created for Products, the `collect` record is created, it's reasonable to Full Refresh all collects. As for Incremental refresh - we would use the since_id specificaly for this stream. - """ data_field = "collects" cursor_field = "id" order_field = "id" filter_field = "since_id" - + def path(self, **kwargs) -> str: return f"{self.data_field}.json" - def request_params( - self, stream_state: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, **kwargs - ) -> MutableMapping[str, Any]: - params = super().request_params(stream_state=stream_state, next_page_token=next_page_token, **kwargs) - # If there is a next page token then we should only send pagination-related parameters. - if not next_page_token and not stream_state: - params[self.filter_field] = 0 - return params + +class BalanceTransactions(IncrementalShopifyStream): + + """ + PaymentsTransactions stream does not support Incremental Refresh based on datetime fields, only `since_id` is supported: + https://shopify.dev/api/admin-rest/2021-07/resources/transactions + """ + + data_field = "transactions" + cursor_field = "id" + order_field = "id" + filter_field = "since_id" + + def path(self, **kwargs) -> str: + return f"shopify_payments/balance/{self.data_field}.json" class OrderRefunds(ShopifySubstream): @@ -514,6 +527,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: OrderRisks(config), TenderTransactions(config), Transactions(config), + BalanceTransactions(config), Pages(config), PriceRules(config), DiscountCodes(config), diff --git a/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py b/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py index a0d4c62a3d8c..2b834be51def 100644 --- a/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py +++ b/airbyte-integrations/connectors/source-shopify/source_shopify/utils.py @@ -20,6 +20,7 @@ "read_locations": ["Locations"], "read_inventory": ["InventoryItems", "InventoryLevels"], "read_merchant_managed_fulfillment_orders": ["FulfillmentOrders"], + "read_shopify_payments_payouts": ["BalanceTransactions"], } diff --git a/airbyte-integrations/connectors/source-stripe/Dockerfile b/airbyte-integrations/connectors/source-stripe/Dockerfile index 6e733fe6825c..a90d7c815126 100644 --- a/airbyte-integrations/connectors/source-stripe/Dockerfile +++ b/airbyte-integrations/connectors/source-stripe/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.29 +LABEL io.airbyte.version=0.1.30 LABEL io.airbyte.name=airbyte/source-stripe diff --git a/airbyte-integrations/connectors/source-stripe/source_stripe/spec.json b/airbyte-integrations/connectors/source-stripe/source_stripe/spec.json index 06cf65a5f800..3696d24b1273 100644 --- a/airbyte-integrations/connectors/source-stripe/source_stripe/spec.json +++ b/airbyte-integrations/connectors/source-stripe/source_stripe/spec.json @@ -15,7 +15,7 @@ }, "client_secret": { "type": "string", - "title": "Client Secret", + "title": "Secret Key", "description": "Stripe API key (usually starts with 'sk_live_'; find yours here).", "airbyte_secret": true, "order": 1 @@ -33,7 +33,7 @@ "title": "Lookback Window (in days)", "default": 0, "minimum": 0, - "description": "When set, the connector will always reload data from the past N days, where N is the value set here. This is useful if your data is updated after creation.", + "description": "When set, the connector will always reload data from the past N days, where N is the value set here. This is useful if your data is updated after creation. More info here", "order": 3 } } diff --git a/airbyte-integrations/connectors/source-zendesk-support/Dockerfile b/airbyte-integrations/connectors/source-zendesk-support/Dockerfile index 41daded4afea..03358e75d66f 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/Dockerfile +++ b/airbyte-integrations/connectors/source-zendesk-support/Dockerfile @@ -25,5 +25,5 @@ COPY source_zendesk_support ./source_zendesk_support ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.0 +LABEL io.airbyte.version=0.2.2 LABEL io.airbyte.name=airbyte/source-zendesk-support diff --git a/airbyte-integrations/connectors/source-zendesk-support/README.md b/airbyte-integrations/connectors/source-zendesk-support/README.md index d4cfd188c916..96c6104b2861 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/README.md +++ b/airbyte-integrations/connectors/source-zendesk-support/README.md @@ -101,7 +101,8 @@ Customize `acceptance-test-config.yml` file to configure tests. See [Source Acce If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. To run your integration tests with acceptance tests, from the connector root, run ``` -python -m pytest integration_tests -p integration_tests.acceptance +docker build . --no-cache -t airbyte/source-zendesk-support:dev \ +&& python -m pytest -p source_acceptance_test.plugin ``` To run your integration tests with docker diff --git a/airbyte-integrations/connectors/source-zendesk-support/acceptance-test-config.yml b/airbyte-integrations/connectors/source-zendesk-support/acceptance-test-config.yml index 565b1a47d614..6bdcc2ef1a10 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-zendesk-support/acceptance-test-config.yml @@ -13,6 +13,7 @@ tests: status: "failed" discovery: - config_path: "secrets/config.json" + - config_path: "secrets/config_oauth.json" basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/configured_catalog.json" diff --git a/airbyte-integrations/connectors/source-zendesk-support/integration_tests/invalid_config.json b/airbyte-integrations/connectors/source-zendesk-support/integration_tests/invalid_config.json index c1562ac3660e..0eb9ad451f4f 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/integration_tests/invalid_config.json +++ b/airbyte-integrations/connectors/source-zendesk-support/integration_tests/invalid_config.json @@ -1,5 +1,6 @@ { - "auth_method": { + "credentials": { + "credentials": "api_token", "api_token": "", "email": "broken.email@invalid.config" }, diff --git a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_comments.json b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_comments.json index df3aa01c3bb6..4fb30ea5cb38 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_comments.json +++ b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/schemas/ticket_comments.json @@ -4,6 +4,9 @@ "type": ["null", "string"], "format": "date-time" }, + "timestamp": { + "type": ["null", "integer"] + }, "body": { "type": ["null", "string"] }, @@ -16,6 +19,9 @@ "type": { "type": ["null", "string"] }, + "via_reference_id": { + "type": ["null", "integer"] + }, "html_body": { "type": ["null", "string"] }, diff --git a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/source.py b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/source.py index f45d9aa5eab8..7c9dfbb721b5 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/source.py +++ b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/source.py @@ -51,11 +51,21 @@ class SourceZendeskSupport(AbstractSource): @classmethod def get_authenticator(cls, config: Mapping[str, Any]) -> BasicApiTokenAuthenticator: - if config["auth_method"]["auth_method"] == "access_token": - return TokenAuthenticator(token=config["auth_method"]["access_token"]) - elif config["auth_method"]["auth_method"] == "api_token": - return BasicApiTokenAuthenticator(config["auth_method"]["email"], config["auth_method"]["api_token"]) - raise SourceZendeskException(f"Not implemented authorization method: {config['auth_method']}") + + # old authentication flow support + auth_old = config.get("auth_method") + if auth_old: + if auth_old.get("auth_method") == "api_token": + return BasicApiTokenAuthenticator(config["auth_method"]["email"], config["auth_method"]["api_token"]) + # new authentication flow + auth = config.get("credentials") + if auth: + if auth.get("credentials") == "oauth2.0": + return TokenAuthenticator(token=config["credentials"]["access_token"]) + elif auth.get("credentials") == "api_token": + return BasicApiTokenAuthenticator(config["credentials"]["email"], config["credentials"]["api_token"]) + else: + raise SourceZendeskException(f"Not implemented authorization method: {config['credentials']}") def check_connection(self, logger, config) -> Tuple[bool, any]: """Connection check to validate that the user-provided config can be used to connect to the underlying API diff --git a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/spec.json b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/spec.json index c4ea2fd36e88..5e01fc832fa4 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/spec.json +++ b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/spec.json @@ -4,8 +4,8 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Source Zendesk Support Spec", "type": "object", - "required": ["start_date", "subdomain", "auth_method"], - "additionalProperties": false, + "required": ["start_date", "subdomain"], + "additionalProperties": true, "properties": { "start_date": { "type": "string", @@ -16,51 +16,57 @@ }, "subdomain": { "type": "string", - "description": "The subdomain for your Zendesk Support" + "title": "Subdomain", + "description": "Identifier of your Zendesk Subdomain, like: https://{MY_SUBDOMAIN}.zendesk.com/, where MY_SUBDOMAIN is the value of your subdomain" }, - "auth_method": { + "credentials": { "title": "Authorization Method", "type": "object", - "default": "api_token", - "description": "Zendesk service provides 2 auth method: API token and OAuth2. Now only the first one is available. Another one will be added in the future.", + "description": "Zendesk service provides two authentication methods. Choose between: `OAuth2.0` or `API token`.", "oneOf": [ { - "title": "API Token", + "title": "OAuth2.0", "type": "object", - "required": ["email", "api_token"], - "additionalProperties": false, + "required": ["access_token"], + "additionalProperties": true, "properties": { - "auth_method": { + "credentials": { "type": "string", - "const": "api_token" + "const": "oauth2.0", + "enum": ["oauth2.0"], + "default": "oauth2.0", + "order": 0 }, - "email": { - "title": "Email", - "type": "string", - "description": "The user email for your Zendesk account." - }, - "api_token": { - "title": "API Token", + "access_token": { "type": "string", + "title": "Access Token", "description": "The value of the API token generated. See the docs for more information.", "airbyte_secret": true } } }, { - "title": "OAuth2.0", + "title": "API Token", "type": "object", - "required": ["access_token"], - "additionalProperties": false, + "required": ["email", "api_token"], + "additionalProperties": true, "properties": { - "auth_method": { + "credentials": { "type": "string", - "const": "access_token" + "const": "api_token", + "enum": ["api_token"], + "default": "api_token", + "order": 0 }, - "access_token": { - "title": "Access Token", + "email": { + "title": "Email", "type": "string", - "description": "The value of the Access token generated. See the docs for more information.", + "description": "The user email for your Zendesk account." + }, + "api_token": { + "title": "API Token", + "type": "string", + "description": "The value of the API token generated. See the docs for more information.", "airbyte_secret": true } } @@ -68,5 +74,58 @@ ] } } + }, + "advanced_auth": { + "auth_flow_type": "oauth2.0", + "predicate_key": ["credentials", "credentials"], + "predicate_value": "oauth2.0", + "oauth_config_specification": { + "complete_oauth_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "access_token": { + "type": "string", + "path_in_connector_config": ["credentials", "access_token"] + } + } + }, + "complete_oauth_server_input_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string" + }, + "client_secret": { + "type": "string" + } + } + }, + "complete_oauth_server_output_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "client_id": { + "type": "string", + "path_in_connector_config": ["credentials", "client_id"] + }, + "client_secret": { + "type": "string", + "path_in_connector_config": ["credentials", "client_secret"] + } + } + }, + "oauth_user_input_from_connector_config_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "subdomain": { + "type": "string", + "path_in_connector_config": ["subdomain"] + } + } + } + } } } diff --git a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/streams.py b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/streams.py index c98d5bd1cfbc..c35f75f72425 100644 --- a/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/streams.py +++ b/airbyte-integrations/connectors/source-zendesk-support/source_zendesk_support/streams.py @@ -25,8 +25,9 @@ from requests.auth import AuthBase from requests_futures.sessions import PICKLE_ERROR, FuturesSession -DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" -LAST_END_TIME_KEY = "_last_end_time" +DATETIME_FORMAT: str = "%Y-%m-%dT%H:%M:%SZ" +LAST_END_TIME_KEY: str = "_last_end_time" +END_OF_STREAM_KEY: str = "end_of_stream" class SourceZendeskException(Exception): @@ -114,6 +115,12 @@ def str2unixtime(str_dt: str) -> Optional[int]: dt = datetime.strptime(str_dt, DATETIME_FORMAT) return calendar.timegm(dt.utctimetuple()) + @staticmethod + def _parse_next_page_number(response: requests.Response) -> Optional[int]: + """Parses a response and tries to find next page number""" + next_page = response.json().get("next_page") + return dict(parse_qsl(urlparse(next_page).query)).get("page") if next_page else None + def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]: """try to select relevant data only""" @@ -149,6 +156,16 @@ def __init__(self, authenticator: Union[AuthBase, HttpAuthenticator] = None, **k self._session.auth = authenticator self.future_requests = deque() + @property + def url_base(self) -> str: + return f"https://{self._subdomain}.zendesk.com/api/v2/" + + def path(self, **kwargs): + return self.name + + def next_page_token(self, *args, **kwargs): + return None + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: latest_benchmark = latest_record[self.cursor_field] if current_stream_state.get(self.cursor_field): @@ -270,26 +287,13 @@ def read_records( else: yield from self.parse_response(response, stream_state=stream_state, stream_slice=stream_slice) - @property - def url_base(self) -> str: - return f"https://{self._subdomain}.zendesk.com/api/v2/" - - @staticmethod - def _parse_next_page_number(response: requests.Response) -> Optional[int]: - """Parses a response and tries to find next page number""" - next_page = response.json().get("next_page") - if next_page: - return dict(parse_qsl(urlparse(next_page).query)).get("page") - return None - - def path(self, **kwargs): - return self.name - - def next_page_token(self, *args, **kwargs): - return None - class SourceZendeskSupportFullRefreshStream(BaseSourceZendeskSupportStream): + """ + # endpoints don't provide the updated_at/created_at fields + # thus we can't implement an incremental logic for them + """ + primary_key = "id" response_list_name: str = None @@ -300,14 +304,6 @@ def url_base(self) -> str: def path(self, **kwargs): return self.name - @staticmethod - def _parse_next_page_number(response: requests.Response) -> Optional[int]: - """Parses a response and tries to find next page number""" - next_page = response.json().get("next_page") - if next_page: - return dict(parse_qsl(urlparse(next_page).query)).get("page") - return None - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: next_page = self._parse_next_page_number(response) if not next_page: @@ -327,6 +323,10 @@ def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> class SourceZendeskSupportCursorPaginationStream(SourceZendeskSupportFullRefreshStream): + """ + # endpoints provide a cursor pagination and sorting mechanism + """ + next_page_field = "next_page" prev_start_time = None @@ -342,17 +342,74 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, self.prev_start_time = start_time return {self.cursor_field: start_time} - def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]: + def request_params( + self, stream_state: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, **kwargs + ) -> MutableMapping[str, Any]: next_page_token = next_page_token or {} + if stream_state: + # use the state value if exists + parsed_state = calendar.timegm(pendulum.parse(stream_state.get(self.cursor_field)).utctimetuple()) + else: + # for full-refresh use start_date + parsed_state = calendar.timegm(pendulum.parse(self._start_date).utctimetuple()) if self.cursor_field: - params = { - "start_time": next_page_token.get(self.cursor_field, calendar.timegm(pendulum.parse(self._start_date).utctimetuple())) - } + params = {"start_time": next_page_token.get(self.cursor_field, parsed_state)} else: params = {"start_time": calendar.timegm(pendulum.parse(self._start_date).utctimetuple())} return params +class ZendeskSupportTicketEventsExportStream(SourceZendeskSupportCursorPaginationStream): + """Incremental Export from TicketEvents stream: + https://developer.zendesk.com/api-reference/ticketing/ticket-management/incremental_exports/#incremental-ticket-event-export + + @ param response_list_name: the main nested entity to look at inside of response, defualt = "ticket_events" + @ param response_target_entity: nested property inside of `response_list_name`, default = "child_events" + @ param list_entities_from_event : the list of nested child_events entities to include from parent record + @ param sideload_param : parameter variable to include various information to child_events property + more info: https://developer.zendesk.com/documentation/ticketing/using-the-zendesk-api/side_loading/#supported-endpoints + @ param event_type : specific event_type to check ["Audit", "Change", "Comment", etc] + """ + + response_list_name: str = "ticket_events" + response_target_entity: str = "child_events" + list_entities_from_event: List[str] = None + sideload_param: str = None + event_type: str = None + + @property + def update_event_from_record(self) -> bool: + """Returns True/False based on list_entities_from_event property""" + return True if len(self.list_entities_from_event) > 0 else False + + def path(self, **kwargs) -> str: + return "incremental/ticket_events" + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """ + Returns next_page_token based on `end_of_stream` parameter inside of response + """ + next_page_token = super().next_page_token(response) + return None if response.json().get(END_OF_STREAM_KEY, False) else next_page_token + + def request_params( + self, stream_state: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, **kwargs + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state, next_page_token, **kwargs) + if self.sideload_param: + params["include"] = self.sideload_param + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + for record in response.json().get(self.response_list_name, []): + for event in record.get(self.response_target_entity, []): + if event.get("event_type") == self.event_type: + if self.update_event_from_record: + for prop in self.list_entities_from_event: + event[prop] = record.get(prop) + yield event + + class Users(SourceZendeskSupportStream): """Users stream: https://developer.zendesk.com/api-reference/ticketing/ticket-management/incremental_exports/""" @@ -376,32 +433,15 @@ def request_params(self, **kwargs) -> MutableMapping[str, Any]: return params -class TicketComments(SourceZendeskSupportStream): - """TicketComments stream: https://developer.zendesk.com/api-reference/ticketing/tickets/ticket_comments/ - ZenDesk doesn't provide API for loading of all comments by one direct endpoints. - Thus at first we loads all updated tickets and after this tries to load all created/updated - comments per every ticket""" - - # Tickets can be removed throughout synchronization. The ZendDesk API will return a response - # with 404 code if a ticket is not exists. But it shouldn't break loading of other comments. - # raise_on_http_errors = False +class TicketComments(ZendeskSupportTicketEventsExportStream): + """ + Fetch the TicketComments incrementaly from TicketEvents Export stream + """ - parent = Tickets cursor_field = "created_at" - - response_list_name = "comments" - - def path(self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: - ticket_id = stream_slice["id"] - return f"tickets/{ticket_id}/comments" - - def stream_slices( - self, sync_mode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None - ) -> Iterable[Optional[Mapping[str, Any]]]: - tickets_stream = self.parent(start_date=self._start_date, subdomain=self._subdomain, authenticator=self._session.auth) - for ticket in tickets_stream.read_records(sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_state=stream_state): - if ticket["comment_count"]: - yield {"id": ticket["id"], "child_count": ticket["comment_count"]} + list_entities_from_event = ["via_reference_id", "ticket_id", "timestamp"] + sideload_param = "comment_events" + event_type = "Comment" class Groups(SourceZendeskSupportStream): @@ -463,9 +503,6 @@ class Macros(SourceZendeskSupportStream): """Macros stream: https://developer.zendesk.com/api-reference/ticketing/business-rules/macros/""" -# endpoints provide a cursor pagination and sorting mechanism - - class TicketAudits(SourceZendeskSupportCursorPaginationStream): """TicketAudits stream: https://developer.zendesk.com/api-reference/ticketing/tickets/ticket_audits/""" @@ -490,10 +527,6 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, return response.json().get("before_cursor") -# endpoints don't provide the updated_at/created_at fields -# thus we can't implement an incremental logic for them - - class Tags(SourceZendeskSupportFullRefreshStream): """Tags stream: https://developer.zendesk.com/api-reference/ticketing/ticket-management/tags/""" diff --git a/airbyte-integrations/connectors/source-zendesk-support/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-zendesk-support/unit_tests/unit_test.py new file mode 100644 index 000000000000..902cd43ea885 --- /dev/null +++ b/airbyte-integrations/connectors/source-zendesk-support/unit_tests/unit_test.py @@ -0,0 +1,120 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + +import calendar +from datetime import datetime +from urllib.parse import parse_qsl, urlparse + +import pendulum +import pytz +import requests +from source_zendesk_support.source import BasicApiTokenAuthenticator +from source_zendesk_support.streams import DATETIME_FORMAT, END_OF_STREAM_KEY, BaseSourceZendeskSupportStream, TicketComments + +# config +STREAM_ARGS = { + "subdomain": "test", + "start_date": "2022-01-27T00:00:00Z", + "authenticator": BasicApiTokenAuthenticator("test@airbyte.io", "api_token"), +} + +DATETIME_STR = "2021-07-22T06:55:55Z" +DATETIME_FROM_STR = datetime.strptime(DATETIME_STR, DATETIME_FORMAT) +STREAM_URL = "https://subdomain.zendesk.com/api/v2/stream.json?&start_time=1647532987&page=1" +STREAM_RESPONSE: dict = { + "ticket_events": [ + { + "child_events": [ + { + "id": 99999, + "via": {}, + "via_reference_id": None, + "type": "Comment", + "author_id": 10, + "body": "test_comment", + "html_body": '
test_comment
', + "plain_body": "test_comment", + "public": True, + "attachments": [], + "audit_id": 123456, + "created_at": "2022-03-17T16:03:07Z", + "event_type": "Comment", + } + ], + "id": 999999, + "ticket_id": 3, + "timestamp": 1647532987, + "created_at": "2022-03-17T16:03:07Z", + "updater_id": 9999999, + "via": "Web form", + "system": {}, + "metadata": {}, + "event_type": "Audit", + } + ], + "next_page": "https://subdomain.zendesk.com/api/v2/stream.json?&start_time=1122334455&page=2", + "count": 215, + "end_of_stream": False, + "end_time": 1647532987, +} +TEST_STREAM = TicketComments(**STREAM_ARGS) + + +def test_str2datetime(): + expected = datetime.strptime(DATETIME_STR, DATETIME_FORMAT) + output = BaseSourceZendeskSupportStream.str2datetime(DATETIME_STR) + assert output == expected + + +def test_datetime2str(): + expected = datetime.strftime(DATETIME_FROM_STR.replace(tzinfo=pytz.UTC), DATETIME_FORMAT) + output = BaseSourceZendeskSupportStream.datetime2str(DATETIME_FROM_STR) + assert output == expected + + +def test_str2unixtime(): + expected = calendar.timegm(DATETIME_FROM_STR.utctimetuple()) + output = BaseSourceZendeskSupportStream.str2unixtime(DATETIME_STR) + assert output == expected + + +def test_parse_next_page_number(requests_mock): + expected = dict(parse_qsl(urlparse(STREAM_RESPONSE.get("next_page")).query)).get("page") + requests_mock.get(STREAM_URL, json=STREAM_RESPONSE) + test_response = requests.get(STREAM_URL) + output = BaseSourceZendeskSupportStream._parse_next_page_number(test_response) + assert output == expected + + +def test_next_page_token(requests_mock): + # mocking the logic of next_page_token + if STREAM_RESPONSE.get(END_OF_STREAM_KEY) is False: + expected = {"created_at": "1122334455"} + else: + expected = None + requests_mock.get(STREAM_URL, json=STREAM_RESPONSE) + test_response = requests.get(STREAM_URL) + output = TEST_STREAM.next_page_token(test_response) + assert expected == output + + +def test_request_params(requests_mock): + expected = {"start_time": calendar.timegm(pendulum.parse(STREAM_ARGS.get("start_date")).utctimetuple()), "include": "comment_events"} + stream_state = None + requests_mock.get(STREAM_URL, json=STREAM_RESPONSE) + test_response = requests.get(STREAM_URL) + next_page_token = TEST_STREAM.next_page_token(test_response) + output = TEST_STREAM.request_params(stream_state, next_page_token) + assert expected == output + + +def test_parse_response(requests_mock): + requests_mock.get(STREAM_URL, json=STREAM_RESPONSE) + test_response = requests.get(STREAM_URL) + output = TEST_STREAM.parse_response(test_response) + # get the first parsed element from generator + parsed_output = list(output)[0] + # check, if we have all transformations correctly + for entity in TicketComments.list_entities_from_event: + assert True if entity in parsed_output else False diff --git a/airbyte-metrics/reporter/Dockerfile b/airbyte-metrics/reporter/Dockerfile index f75b1e901cab..0915687f29ae 100644 --- a/airbyte-metrics/reporter/Dockerfile +++ b/airbyte-metrics/reporter/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-metrics-reporter WORKDIR /app -ADD bin/${APPLICATION}-0.35.55-alpha.tar /app +ADD bin/${APPLICATION}-0.35.59-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.55-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.59-alpha/bin/${APPLICATION}"] diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java index 54e26b45dc6e..72500f200a27 100644 --- a/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/OAuthImplementationFactory.java @@ -55,6 +55,7 @@ public OAuthImplementationFactory(final ConfigRepository configRepository, final .put("airbyte/source-youtube-analytics", new YouTubeAnalyticsOAuthFlow(configRepository, httpClient)) .put("airbyte/source-drift", new DriftOAuthFlow(configRepository, httpClient)) .put("airbyte/source-zendesk-chat", new ZendeskChatOAuthFlow(configRepository, httpClient)) + .put("airbyte/source-zendesk-support", new ZendeskSupportOAuthFlow(configRepository, httpClient)) .put("airbyte/source-monday", new MondayOAuthFlow(configRepository, httpClient)) .put("airbyte/source-zendesk-sunshine", new ZendeskSunshineOAuthFlow(configRepository, httpClient)) .put("airbyte/source-mailchimp", new MailchimpOAuthFlow(configRepository, httpClient)) diff --git a/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ZendeskSupportOAuthFlow.java b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ZendeskSupportOAuthFlow.java new file mode 100644 index 000000000000..71481301bb3a --- /dev/null +++ b/airbyte-oauth/src/main/java/io/airbyte/oauth/flows/ZendeskSupportOAuthFlow.java @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.oauth.flows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableMap; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.oauth.BaseOAuth2Flow; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.http.HttpClient; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; +import java.util.function.Supplier; +import org.apache.http.client.utils.URIBuilder; + +/** + * Following docs from + * https://support.zendesk.com/hc/en-us/articles/4408845965210-Using-OAuth-authentication-with-your-application + */ +public class ZendeskSupportOAuthFlow extends BaseOAuth2Flow { + + public ZendeskSupportOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient) { + super(configRepository, httpClient); + } + + @VisibleForTesting + public ZendeskSupportOAuthFlow(final ConfigRepository configRepository, final HttpClient httpClient, final Supplier stateSupplier) { + super(configRepository, httpClient, stateSupplier); + } + + @Override + protected String formatConsentUrl(final UUID definitionId, + final String clientId, + final String redirectUrl, + final JsonNode inputOAuthConfiguration) + throws IOException { + + // getting subdomain value from user's config + final String subdomain = getConfigValueUnsafe(inputOAuthConfiguration, "subdomain"); + + final URIBuilder builder = new URIBuilder() + .setScheme("https") + .setHost(subdomain + ".zendesk.com") + .setPath("oauth/authorizations/new") + // required + .addParameter("client_id", clientId) + .addParameter("redirect_uri", redirectUrl) + .addParameter("response_type", "code") + .addParameter("scope", "read") + .addParameter("state", getState()); + + try { + return builder.build().toString(); + } catch (URISyntaxException e) { + throw new IOException("Failed to format Consent URL for OAuth flow", e); + } + } + + @Override + protected Map getAccessTokenQueryParameters(String clientId, + String clientSecret, + String authCode, + String redirectUrl) { + return ImmutableMap.builder() + // required + .put("grant_type", "authorization_code") + .put("code", authCode) + .put("client_id", clientId) + .put("client_secret", clientSecret) + .put("redirect_uri", redirectUrl) + .put("scope", "read") + .build(); + } + + @Override + protected String getAccessTokenUrl(final JsonNode inputOAuthConfiguration) { + // getting subdomain value from user's config + final String subdomain = getConfigValueUnsafe(inputOAuthConfiguration, "subdomain"); + return "https://" + subdomain + ".zendesk.com/oauth/tokens"; + } + + @Override + protected Map extractOAuthOutput(final JsonNode data, final String accessTokenUrl) throws IOException { + final Map result = new HashMap<>(); + // getting out access_token + if (data.has("access_token")) { + result.put("access_token", data.get("access_token").asText()); + } else { + throw new IOException(String.format("Missing 'access_token' in query params from %s", accessTokenUrl)); + } + return result; + } + +} diff --git a/airbyte-scheduler/app/Dockerfile b/airbyte-scheduler/app/Dockerfile index 27b1313dda72..3e37313d597b 100644 --- a/airbyte-scheduler/app/Dockerfile +++ b/airbyte-scheduler/app/Dockerfile @@ -5,7 +5,7 @@ ENV APPLICATION airbyte-scheduler WORKDIR /app -ADD bin/${APPLICATION}-0.35.55-alpha.tar /app +ADD bin/${APPLICATION}-0.35.59-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.55-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.59-alpha/bin/${APPLICATION}"] diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java index 048ef4279b76..4118cda02534 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/DefaultJobPersistence.java @@ -385,6 +385,22 @@ public List listJobsWithStatus(final ConfigType configType, final JobStatus return listJobsWithStatus(Sets.newHashSet(configType), status); } + @Override + public List listJobStatusWithConnection(final UUID connectionId, final Set configTypes, final Instant jobCreatedAtTimestamp) + throws IOException { + final LocalDateTime timeConvertedIntoLocalDateTime = LocalDateTime.ofInstant(jobCreatedAtTimestamp, ZoneOffset.UTC); + + final String JobStatusSelect = "SELECT status FROM jobs "; + return jobDatabase.query(ctx -> ctx + .fetch(JobStatusSelect + "WHERE " + + "scope = ? AND " + + "CAST(config_type AS VARCHAR) in " + Sqls.toSqlInFragment(configTypes) + " AND " + + "created_at >= ? ORDER BY created_at DESC", connectionId.toString(), timeConvertedIntoLocalDateTime)) + .stream() + .map(r -> JobStatus.valueOf(r.get("status", String.class).toUpperCase())) + .toList(); + } + @Override public Optional getLastReplicationJob(final UUID connectionId) throws IOException { return jobDatabase.query(ctx -> ctx @@ -400,6 +416,21 @@ public Optional getLastReplicationJob(final UUID connectionId) throws IOExc .flatMap(r -> getJobOptional(ctx, r.get("job_id", Long.class)))); } + @Override + public Optional getFirstReplicationJob(final UUID connectionId) throws IOException { + return jobDatabase.query(ctx -> ctx + .fetch(BASE_JOB_SELECT_AND_JOIN + "WHERE " + + "CAST(jobs.config_type AS VARCHAR) in " + Sqls.toSqlInFragment(Job.REPLICATION_TYPES) + " AND " + + "scope = ? AND " + + "CAST(jobs.status AS VARCHAR) <> ? " + + "ORDER BY jobs.created_at ASC LIMIT 1", + connectionId.toString(), + Sqls.toSqlName(JobStatus.CANCELLED)) + .stream() + .findFirst() + .flatMap(r -> getJobOptional(ctx, r.get("job_id", Long.class)))); + } + @Override public Optional getNextJob() throws IOException { // rules: diff --git a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java index 821b123d6787..69ba6ab67a13 100644 --- a/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java +++ b/airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/JobPersistence.java @@ -145,7 +145,6 @@ public interface JobPersistence { List listJobs(Set configTypes, String configId, int limit, int offset) throws IOException; /** - * * @param configType The type of job * @param attemptEndedAtTimestamp The timestamp after which you want the jobs * @return List of jobs that have attempts after the provided timestamp @@ -161,8 +160,21 @@ public interface JobPersistence { List listJobsWithStatus(JobConfig.ConfigType configType, JobStatus status) throws IOException; + /** + * @param connectionId The ID of the connection + * @param configTypes The types of jobs + * @param jobCreatedAtTimestamp The timestamp after which you want the jobs + * @return List of job statuses from a specific connection that have attempts after the provided + * timestamp, sorted by jobs' createAt in descending order + * @throws IOException + */ + List listJobStatusWithConnection(UUID connectionId, Set configTypes, Instant jobCreatedAtTimestamp) + throws IOException; + Optional getLastReplicationJob(UUID connectionId) throws IOException; + Optional getFirstReplicationJob(UUID connectionId) throws IOException; + Optional getNextJob() throws IOException; /** diff --git a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/DefaultJobPersistenceTest.java b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/DefaultJobPersistenceTest.java index 903cdc5e7fc9..49395c1df973 100644 --- a/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/DefaultJobPersistenceTest.java +++ b/airbyte-scheduler/persistence/src/test/java/io/airbyte/scheduler/persistence/DefaultJobPersistenceTest.java @@ -89,8 +89,12 @@ class DefaultJobPersistenceTest { private static final JobConfig SYNC_JOB_CONFIG = new JobConfig() .withConfigType(ConfigType.SYNC) .withSync(new JobSyncConfig()); - private static PostgreSQLContainer container; + private static final int DEFAULT_MINIMUM_AGE_IN_DAYS = 30; + private static final int DEFAULT_EXCESSIVE_NUMBER_OF_JOBS = 500; + private static final int DEFAULT_MINIMUM_RECENCY_COUNT = 10; + + private static PostgreSQLContainer container; private Database jobDatabase; private Database configDatabase; private Supplier timeSupplier; @@ -169,7 +173,8 @@ public void setup() throws Exception { timeSupplier = mock(Supplier.class); when(timeSupplier.get()).thenReturn(NOW); - jobPersistence = new DefaultJobPersistence(jobDatabase, timeSupplier, 30, 500, 10); + jobPersistence = new DefaultJobPersistence(jobDatabase, timeSupplier, DEFAULT_MINIMUM_AGE_IN_DAYS, DEFAULT_EXCESSIVE_NUMBER_OF_JOBS, + DEFAULT_MINIMUM_RECENCY_COUNT); } @AfterEach @@ -337,7 +342,8 @@ void testListJobsWithTimestamp() throws IOException { final Instant now = Instant.parse("2021-01-01T00:00:00Z"); final Supplier timeSupplier = incrementingSecondSupplier(now); - jobPersistence = new DefaultJobPersistence(jobDatabase, timeSupplier, 30, 500, 10); + jobPersistence = new DefaultJobPersistence(jobDatabase, timeSupplier, DEFAULT_MINIMUM_AGE_IN_DAYS, DEFAULT_EXCESSIVE_NUMBER_OF_JOBS, + DEFAULT_MINIMUM_RECENCY_COUNT); final long syncJobId = jobPersistence.enqueueJob(SCOPE, SYNC_JOB_CONFIG).orElseThrow(); final int syncJobAttemptNumber0 = jobPersistence.createAttempt(syncJobId, LOG_PATH); jobPersistence.failAttempt(syncJobId, syncJobAttemptNumber0); @@ -402,7 +408,8 @@ void testListJobsWithTimestamp() throws IOException { void testListAttemptsWithJobInfo() throws IOException { final Instant now = Instant.parse("2021-01-01T00:00:00Z"); final Supplier timeSupplier = incrementingSecondSupplier(now); - jobPersistence = new DefaultJobPersistence(jobDatabase, timeSupplier, 30, 500, 10); + jobPersistence = new DefaultJobPersistence(jobDatabase, timeSupplier, DEFAULT_MINIMUM_AGE_IN_DAYS, DEFAULT_EXCESSIVE_NUMBER_OF_JOBS, + DEFAULT_MINIMUM_RECENCY_COUNT); final long job1 = jobPersistence.enqueueJob(SCOPE + "-1", SYNC_JOB_CONFIG).orElseThrow(); final long job2 = jobPersistence.enqueueJob(SCOPE + "-2", SYNC_JOB_CONFIG).orElseThrow(); @@ -793,6 +800,38 @@ public void testGetLastSyncJobForConnectionId() throws IOException { } + @Nested + @DisplayName("When getting first replication job") + class GetFirstReplicationJob { + + @Test + @DisplayName("Should return nothing if no job exists") + public void testGetFirstSyncJobForConnectionIdEmpty() throws IOException { + final Optional actual = jobPersistence.getFirstReplicationJob(CONNECTION_ID); + + assertTrue(actual.isEmpty()); + } + + @Test + @DisplayName("Should return the first job") + public void testGetFirstSyncJobForConnectionId() throws IOException { + final long jobId1 = jobPersistence.enqueueJob(SCOPE, SYNC_JOB_CONFIG).orElseThrow(); + jobPersistence.succeedAttempt(jobId1, jobPersistence.createAttempt(jobId1, LOG_PATH)); + final List attemptsWithJobInfo = jobPersistence.listAttemptsWithJobInfo(SYNC_JOB_CONFIG.getConfigType(), Instant.EPOCH); + final List attempts = Collections.singletonList(attemptsWithJobInfo.get(0).getAttempt()); + + final Instant afterNow = NOW.plusSeconds(1000); + when(timeSupplier.get()).thenReturn(afterNow); + final long jobId2 = jobPersistence.enqueueJob(SCOPE, SYNC_JOB_CONFIG).orElseThrow(); + + final Optional actual = jobPersistence.getFirstReplicationJob(CONNECTION_ID); + final Job expected = createJob(jobId1, SYNC_JOB_CONFIG, JobStatus.SUCCEEDED, attempts, NOW.getEpochSecond()); + + assertEquals(Optional.of(expected), actual); + } + + } + @Nested @DisplayName("When getting next job") class GetNextJob { @@ -1315,4 +1354,140 @@ private Job addStateToJob(final Job job) throws IOException, SQLException { } + @Nested + @DisplayName("When listing job statuses with specified connection id and timestamp") + class ListJobStatusWithConnection { + + @Test + @DisplayName("Should list only job statuses of specified connection id") + public void testConnectionIdFiltering() throws IOException { + jobPersistence = new DefaultJobPersistence(jobDatabase, timeSupplier, DEFAULT_MINIMUM_AGE_IN_DAYS, DEFAULT_EXCESSIVE_NUMBER_OF_JOBS, + DEFAULT_MINIMUM_RECENCY_COUNT); + + // create a connection with a non-relevant connection id that should be ignored for the duration of + // the test + final long wrongConnectionSyncJobId = jobPersistence.enqueueJob(UUID.randomUUID().toString(), SYNC_JOB_CONFIG).orElseThrow(); + final int wrongSyncJobAttemptNumber0 = jobPersistence.createAttempt(wrongConnectionSyncJobId, LOG_PATH); + jobPersistence.failAttempt(wrongConnectionSyncJobId, wrongSyncJobAttemptNumber0); + assertEquals(0, jobPersistence.listJobStatusWithConnection(CONNECTION_ID, Sets.newHashSet(ConfigType.SYNC), Instant.EPOCH).size()); + + // create a connection with relevant connection id + final long syncJobId = jobPersistence.enqueueJob(SCOPE, SYNC_JOB_CONFIG).orElseThrow(); + final int syncJobAttemptNumber0 = jobPersistence.createAttempt(syncJobId, LOG_PATH); + jobPersistence.failAttempt(syncJobId, syncJobAttemptNumber0); + + // check to see current status of only relevantly scoped job + final List jobStatuses = jobPersistence.listJobStatusWithConnection(CONNECTION_ID, Sets.newHashSet(ConfigType.SYNC), Instant.EPOCH); + assertEquals(jobStatuses.size(), 1); + assertEquals(JobStatus.INCOMPLETE, jobStatuses.get(0)); + } + + @Test + @DisplayName("Should list jobs statuses filtered by different timestamps") + public void testTimestampFiltering() throws IOException { + jobPersistence = new DefaultJobPersistence(jobDatabase, timeSupplier, DEFAULT_MINIMUM_AGE_IN_DAYS, DEFAULT_EXCESSIVE_NUMBER_OF_JOBS, + DEFAULT_MINIMUM_RECENCY_COUNT); + + // Create and fail initial job + final long syncJobId = jobPersistence.enqueueJob(SCOPE, SYNC_JOB_CONFIG).orElseThrow(); + final int syncJobAttemptNumber0 = jobPersistence.createAttempt(syncJobId, LOG_PATH); + jobPersistence.failAttempt(syncJobId, syncJobAttemptNumber0); + jobPersistence.failJob(syncJobId); + + // Check to see current status of all jobs from beginning of time, expecting only 1 job + final List jobStatuses = jobPersistence.listJobStatusWithConnection(CONNECTION_ID, Sets.newHashSet(ConfigType.SYNC), Instant.EPOCH); + assertEquals(jobStatuses.size(), 1); + assertEquals(JobStatus.FAILED, jobStatuses.get(0)); + + // Edit time supplier to return later time + final Instant timeAfterFirstJob = NOW.plusSeconds(60); + when(timeSupplier.get()).thenReturn(timeAfterFirstJob); + + // Create and succeed second job + final long newSyncJobId = jobPersistence.enqueueJob(SCOPE, SYNC_JOB_CONFIG).orElseThrow(); + final int newSyncJobAttemptNumber = jobPersistence.createAttempt(newSyncJobId, LOG_PATH); + jobPersistence.succeedAttempt(newSyncJobId, newSyncJobAttemptNumber); + + // Check to see current status of all jobs from beginning of time, expecting both jobs in createAt + // descending order (most recent first) + final List allQueryJobStatuses = + jobPersistence.listJobStatusWithConnection(CONNECTION_ID, Sets.newHashSet(ConfigType.SYNC), Instant.EPOCH); + assertEquals(2, allQueryJobStatuses.size()); + assertEquals(JobStatus.SUCCEEDED, allQueryJobStatuses.get(0)); + assertEquals(JobStatus.FAILED, allQueryJobStatuses.get(1)); + + // Look up jobs with a timestamp after the first job. Expecting only the second job status + final List timestampFilteredJobStatuses = + jobPersistence.listJobStatusWithConnection(CONNECTION_ID, Sets.newHashSet(ConfigType.SYNC), timeAfterFirstJob); + assertEquals(1, timestampFilteredJobStatuses.size()); + assertEquals(JobStatus.SUCCEEDED, timestampFilteredJobStatuses.get(0)); + + // Check to see if timestamp filtering is working by only looking up jobs with timestamp after + // second job. Expecting no job status output + final Instant timeAfterSecondJob = timeAfterFirstJob.plusSeconds(60); + assertEquals(0, jobPersistence.listJobStatusWithConnection(CONNECTION_ID, Sets.newHashSet(ConfigType.SYNC), timeAfterSecondJob).size()); + } + + @Test + @DisplayName("Should list jobs statuses of differing status types") + public void testMultipleJobStatusTypes() throws IOException { + final Supplier timeSupplier = incrementingSecondSupplier(NOW); + jobPersistence = new DefaultJobPersistence(jobDatabase, timeSupplier, DEFAULT_MINIMUM_AGE_IN_DAYS, DEFAULT_EXCESSIVE_NUMBER_OF_JOBS, + DEFAULT_MINIMUM_RECENCY_COUNT); + + // Create and fail initial job + final long syncJobId1 = jobPersistence.enqueueJob(SCOPE, SYNC_JOB_CONFIG).orElseThrow(); + final int syncJobAttemptNumber1 = jobPersistence.createAttempt(syncJobId1, LOG_PATH); + jobPersistence.failAttempt(syncJobId1, syncJobAttemptNumber1); + jobPersistence.failJob(syncJobId1); + + // Create and succeed second job + final long syncJobId2 = jobPersistence.enqueueJob(SCOPE, SYNC_JOB_CONFIG).orElseThrow(); + final int syncJobAttemptNumber2 = jobPersistence.createAttempt(syncJobId2, LOG_PATH); + jobPersistence.succeedAttempt(syncJobId2, syncJobAttemptNumber2); + + // Create and cancel third job + final long syncJobId3 = jobPersistence.enqueueJob(SCOPE, SYNC_JOB_CONFIG).orElseThrow(); + jobPersistence.createAttempt(syncJobId3, LOG_PATH); + jobPersistence.cancelJob(syncJobId3); + + // Check to see current status of all jobs from beginning of time, expecting all jobs in createAt + // descending order (most recent first) + final List allJobStatuses = + jobPersistence.listJobStatusWithConnection(CONNECTION_ID, Sets.newHashSet(ConfigType.SYNC), Instant.EPOCH); + assertEquals(3, allJobStatuses.size()); + assertEquals(JobStatus.CANCELLED, allJobStatuses.get(0)); + assertEquals(JobStatus.SUCCEEDED, allJobStatuses.get(1)); + assertEquals(JobStatus.FAILED, allJobStatuses.get(2)); + } + + @Test + @DisplayName("Should list jobs statuses of differing job config types") + public void testMultipleConfigTypes() throws IOException { + final Set configTypes = Sets.newHashSet(ConfigType.GET_SPEC, ConfigType.CHECK_CONNECTION_DESTINATION); + final Supplier timeSupplier = incrementingSecondSupplier(NOW); + jobPersistence = new DefaultJobPersistence(jobDatabase, timeSupplier, DEFAULT_MINIMUM_AGE_IN_DAYS, DEFAULT_EXCESSIVE_NUMBER_OF_JOBS, + DEFAULT_MINIMUM_RECENCY_COUNT); + + // pending status + final long failedSpecJobId = jobPersistence.enqueueJob(SCOPE, CHECK_JOB_CONFIG).orElseThrow(); + jobPersistence.failJob(failedSpecJobId); + + // incomplete status + final long incompleteSpecJobId = jobPersistence.enqueueJob(SCOPE, SPEC_JOB_CONFIG).orElseThrow(); + final int attemptNumber = jobPersistence.createAttempt(incompleteSpecJobId, LOG_PATH); + jobPersistence.failAttempt(incompleteSpecJobId, attemptNumber); + + // this job should be ignored since it's not in the configTypes we're querying for + jobPersistence.enqueueJob(SCOPE, SYNC_JOB_CONFIG).orElseThrow(); + + // expect order to be from most recent to least recent + final List allJobStatuses = jobPersistence.listJobStatusWithConnection(CONNECTION_ID, configTypes, Instant.EPOCH); + assertEquals(2, allJobStatuses.size()); + assertEquals(JobStatus.INCOMPLETE, allJobStatuses.get(0)); + assertEquals(JobStatus.FAILED, allJobStatuses.get(1)); + } + + } + } diff --git a/airbyte-server/Dockerfile b/airbyte-server/Dockerfile index 54098b909cf1..6a1adec5da58 100644 --- a/airbyte-server/Dockerfile +++ b/airbyte-server/Dockerfile @@ -7,7 +7,7 @@ ENV APPLICATION airbyte-server WORKDIR /app -ADD bin/${APPLICATION}-0.35.55-alpha.tar /app +ADD bin/${APPLICATION}-0.35.59-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.55-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.59-alpha/bin/${APPLICATION}"] diff --git a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java index efaec668edd9..f96f51ad4d7d 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java +++ b/airbyte-server/src/main/java/io/airbyte/server/apis/ConfigurationApi.java @@ -16,6 +16,10 @@ import io.airbyte.api.model.ConnectionSearch; import io.airbyte.api.model.ConnectionState; import io.airbyte.api.model.ConnectionUpdate; +import io.airbyte.api.model.CustomDestinationDefinitionCreate; +import io.airbyte.api.model.CustomDestinationDefinitionUpdate; +import io.airbyte.api.model.CustomSourceDefinitionCreate; +import io.airbyte.api.model.CustomSourceDefinitionUpdate; import io.airbyte.api.model.DbMigrationExecutionRead; import io.airbyte.api.model.DbMigrationReadList; import io.airbyte.api.model.DbMigrationRequestBody; @@ -23,6 +27,7 @@ import io.airbyte.api.model.DestinationCreate; import io.airbyte.api.model.DestinationDefinitionCreate; import io.airbyte.api.model.DestinationDefinitionIdRequestBody; +import io.airbyte.api.model.DestinationDefinitionIdWithWorkspaceId; import io.airbyte.api.model.DestinationDefinitionRead; import io.airbyte.api.model.DestinationDefinitionReadList; import io.airbyte.api.model.DestinationDefinitionSpecificationRead; @@ -51,6 +56,10 @@ import io.airbyte.api.model.OperationReadList; import io.airbyte.api.model.OperationUpdate; import io.airbyte.api.model.OperatorConfiguration; +import io.airbyte.api.model.PrivateDestinationDefinitionRead; +import io.airbyte.api.model.PrivateDestinationDefinitionReadList; +import io.airbyte.api.model.PrivateSourceDefinitionRead; +import io.airbyte.api.model.PrivateSourceDefinitionReadList; import io.airbyte.api.model.SetInstancewideDestinationOauthParamsRequestBody; import io.airbyte.api.model.SetInstancewideSourceOauthParamsRequestBody; import io.airbyte.api.model.SlugRequestBody; @@ -58,11 +67,13 @@ import io.airbyte.api.model.SourceCreate; import io.airbyte.api.model.SourceDefinitionCreate; import io.airbyte.api.model.SourceDefinitionIdRequestBody; +import io.airbyte.api.model.SourceDefinitionIdWithWorkspaceId; import io.airbyte.api.model.SourceDefinitionRead; import io.airbyte.api.model.SourceDefinitionReadList; import io.airbyte.api.model.SourceDefinitionSpecificationRead; import io.airbyte.api.model.SourceDefinitionUpdate; import io.airbyte.api.model.SourceDiscoverSchemaRead; +import io.airbyte.api.model.SourceDiscoverSchemaRequestBody; import io.airbyte.api.model.SourceIdRequestBody; import io.airbyte.api.model.SourceOauthConsentRequest; import io.airbyte.api.model.SourceRead; @@ -308,26 +319,51 @@ public SourceDefinitionReadList listSourceDefinitions() { return execute(sourceDefinitionsHandler::listSourceDefinitions); } + @Override + public SourceDefinitionReadList listSourceDefinitionsForWorkspace(final WorkspaceIdRequestBody workspaceIdRequestBody) { + return null; + } + @Override public SourceDefinitionReadList listLatestSourceDefinitions() { return execute(sourceDefinitionsHandler::listLatestSourceDefinitions); } + @Override + public PrivateSourceDefinitionReadList listPrivateSourceDefinitions(final WorkspaceIdRequestBody workspaceIdRequestBody) { + return null; + } + @Override public SourceDefinitionRead getSourceDefinition(final SourceDefinitionIdRequestBody sourceDefinitionIdRequestBody) { return execute(() -> sourceDefinitionsHandler.getSourceDefinition(sourceDefinitionIdRequestBody)); } + @Override + public SourceDefinitionRead getSourceDefinitionForWorkspace(final SourceDefinitionIdWithWorkspaceId sourceDefinitionIdWithWorkspaceId) { + return null; + } + @Override public SourceDefinitionRead createSourceDefinition(final SourceDefinitionCreate sourceDefinitionCreate) { return execute(() -> sourceDefinitionsHandler.createCustomSourceDefinition(sourceDefinitionCreate)); } + @Override + public SourceDefinitionRead createCustomSourceDefinition(final CustomSourceDefinitionCreate customSourceDefinitionCreate) { + return null; + } + @Override public SourceDefinitionRead updateSourceDefinition(final SourceDefinitionUpdate sourceDefinitionUpdate) { return execute(() -> sourceDefinitionsHandler.updateSourceDefinition(sourceDefinitionUpdate)); } + @Override + public SourceDefinitionRead updateCustomSourceDefinition(final CustomSourceDefinitionUpdate customSourceDefinitionUpdate) { + return null; + } + @Override public void deleteSourceDefinition(final SourceDefinitionIdRequestBody sourceDefinitionIdRequestBody) { execute(() -> { @@ -336,6 +372,21 @@ public void deleteSourceDefinition(final SourceDefinitionIdRequestBody sourceDef }); } + @Override + public void deleteCustomSourceDefinition(final SourceDefinitionIdWithWorkspaceId sourceDefinitionIdWithWorkspaceId) { + + } + + @Override + public PrivateSourceDefinitionRead grantSourceDefinitionToWorkspace(final SourceDefinitionIdWithWorkspaceId sourceDefinitionIdWithWorkspaceId) { + return null; + } + + @Override + public void revokeSourceDefinitionFromWorkspace(final SourceDefinitionIdWithWorkspaceId sourceDefinitionIdWithWorkspaceId) { + + } + // SOURCE SPECIFICATION @Override @@ -432,8 +483,8 @@ public CheckConnectionRead checkConnectionToSourceForUpdate(final SourceUpdate s } @Override - public SourceDiscoverSchemaRead discoverSchemaForSource(final SourceIdRequestBody sourceIdRequestBody) { - return execute(() -> schedulerHandler.discoverSchemaForSourceFromSourceId(sourceIdRequestBody)); + public SourceDiscoverSchemaRead discoverSchemaForSource(final SourceDiscoverSchemaRequestBody discoverSchemaRequestBody) { + return execute(() -> schedulerHandler.discoverSchemaForSourceFromSourceId(discoverSchemaRequestBody)); } // DB MIGRATION @@ -455,26 +506,52 @@ public DestinationDefinitionReadList listDestinationDefinitions() { return execute(destinationDefinitionsHandler::listDestinationDefinitions); } + @Override + public DestinationDefinitionReadList listDestinationDefinitionsForWorkspace(final WorkspaceIdRequestBody workspaceIdRequestBody) { + return null; + } + @Override public DestinationDefinitionReadList listLatestDestinationDefinitions() { return execute(destinationDefinitionsHandler::listLatestDestinationDefinitions); } + @Override + public PrivateDestinationDefinitionReadList listPrivateDestinationDefinitions(final WorkspaceIdRequestBody workspaceIdRequestBody) { + return null; + } + @Override public DestinationDefinitionRead getDestinationDefinition(final DestinationDefinitionIdRequestBody destinationDefinitionIdRequestBody) { return execute(() -> destinationDefinitionsHandler.getDestinationDefinition(destinationDefinitionIdRequestBody)); } + @Override + public DestinationDefinitionRead getDestinationDefinitionForWorkspace( + final DestinationDefinitionIdWithWorkspaceId destinationDefinitionIdWithWorkspaceId) { + return null; + } + @Override public DestinationDefinitionRead createDestinationDefinition(final DestinationDefinitionCreate destinationDefinitionCreate) { return execute(() -> destinationDefinitionsHandler.createCustomDestinationDefinition(destinationDefinitionCreate)); } + @Override + public DestinationDefinitionRead createCustomDestinationDefinition(final CustomDestinationDefinitionCreate customDestinationDefinitionCreate) { + return null; + } + @Override public DestinationDefinitionRead updateDestinationDefinition(final DestinationDefinitionUpdate destinationDefinitionUpdate) { return execute(() -> destinationDefinitionsHandler.updateDestinationDefinition(destinationDefinitionUpdate)); } + @Override + public DestinationDefinitionRead updateCustomDestinationDefinition(final CustomDestinationDefinitionUpdate customDestinationDefinitionUpdate) { + return null; + } + @Override public void deleteDestinationDefinition(final DestinationDefinitionIdRequestBody destinationDefinitionIdRequestBody) { execute(() -> { @@ -483,10 +560,27 @@ public void deleteDestinationDefinition(final DestinationDefinitionIdRequestBody }); } + @Override + public void deleteCustomDestinationDefinition(final DestinationDefinitionIdWithWorkspaceId destinationDefinitionIdWithWorkspaceId) { + + } + + @Override + public PrivateDestinationDefinitionRead grantDestinationDefinitionToWorkspace( + final DestinationDefinitionIdWithWorkspaceId destinationDefinitionIdWithWorkspaceId) { + return null; + } + + @Override + public void revokeDestinationDefinitionFromWorkspace(final DestinationDefinitionIdWithWorkspaceId destinationDefinitionIdWithWorkspaceId) { + + } + // DESTINATION SPECIFICATION @Override - public DestinationDefinitionSpecificationRead getDestinationDefinitionSpecification(final DestinationDefinitionIdRequestBody destinationDefinitionIdRequestBody) { + public DestinationDefinitionSpecificationRead getDestinationDefinitionSpecification( + final DestinationDefinitionIdRequestBody destinationDefinitionIdRequestBody) { return execute(() -> schedulerHandler.getDestinationSpecification(destinationDefinitionIdRequestBody)); } diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/OperationsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/OperationsHandler.java index f659816f20a4..338d0fde92a6 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/OperationsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/OperationsHandler.java @@ -30,6 +30,7 @@ import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.UUID; import java.util.function.Supplier; @@ -183,8 +184,8 @@ public void deleteOperationsForConnection(final StandardSync standardSync, final removeOperation(operationId); } } - standardSync.withOperationIds(operationIds); - configRepository.writeStandardSync(standardSync); + + configRepository.updateConnectionOperationIds(standardSync.getConnectionId(), new HashSet<>(operationIds)); } public void deleteOperation(final OperationIdRequestBody operationIdRequestBody) diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java index 3053bdac3664..22cb44c9de22 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/SchedulerHandler.java @@ -6,7 +6,10 @@ import com.fasterxml.jackson.databind.JsonNode; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Charsets; import com.google.common.collect.Lists; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; import io.airbyte.api.model.AdvancedAuth; import io.airbyte.api.model.AuthSpecification; import io.airbyte.api.model.CheckConnectionRead; @@ -19,17 +22,23 @@ import io.airbyte.api.model.DestinationIdRequestBody; import io.airbyte.api.model.DestinationSyncMode; import io.airbyte.api.model.DestinationUpdate; +import io.airbyte.api.model.JobConfigType; import io.airbyte.api.model.JobIdRequestBody; import io.airbyte.api.model.JobInfoRead; +import io.airbyte.api.model.LogRead; import io.airbyte.api.model.SourceCoreConfig; import io.airbyte.api.model.SourceDefinitionIdRequestBody; import io.airbyte.api.model.SourceDefinitionSpecificationRead; import io.airbyte.api.model.SourceDiscoverSchemaRead; +import io.airbyte.api.model.SourceDiscoverSchemaRequestBody; import io.airbyte.api.model.SourceIdRequestBody; import io.airbyte.api.model.SourceUpdate; +import io.airbyte.api.model.SynchronousJobRead; import io.airbyte.commons.docker.DockerUtils; import io.airbyte.commons.enums.Enums; import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.commons.json.Jsons; +import io.airbyte.config.ActorCatalog; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.DestinationConnection; import io.airbyte.config.JobConfig.ConfigType; @@ -68,6 +77,7 @@ import io.temporal.api.workflowservice.v1.RequestCancelWorkflowExecutionRequest; import io.temporal.serviceclient.WorkflowServiceStubs; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.UUID; @@ -77,6 +87,7 @@ public class SchedulerHandler { private static final Logger LOGGER = LoggerFactory.getLogger(SchedulerHandler.class); + private static final HashFunction HASH_FUNCTION = Hashing.md5(); private final ConfigRepository configRepository; private final SecretsRepositoryWriter secretsRepositoryWriter; @@ -241,13 +252,35 @@ public CheckConnectionRead checkDestinationConnectionFromDestinationIdForUpdate( return checkDestinationConnectionFromDestinationCreate(destinationCoreConfig); } - public SourceDiscoverSchemaRead discoverSchemaForSourceFromSourceId(final SourceIdRequestBody sourceIdRequestBody) + public SourceDiscoverSchemaRead discoverSchemaForSourceFromSourceId(final SourceDiscoverSchemaRequestBody discoverSchemaRequestBody) throws ConfigNotFoundException, IOException, JsonValidationException { - final SourceConnection source = configRepository.getSourceConnection(sourceIdRequestBody.getSourceId()); + final SourceConnection source = configRepository.getSourceConnection(discoverSchemaRequestBody.getSourceId()); final StandardSourceDefinition sourceDef = configRepository.getStandardSourceDefinition(source.getSourceDefinitionId()); final String imageName = DockerUtils.getTaggedImageName(sourceDef.getDockerRepository(), sourceDef.getDockerImageTag()); - final SynchronousResponse response = synchronousSchedulerClient.createDiscoverSchemaJob(source, imageName); - return discoverJobToOutput(response); + + final String configHash = HASH_FUNCTION.hashBytes(Jsons.serialize(source.getConfiguration()).getBytes( + Charsets.UTF_8)).toString(); + final String connectorVersion = sourceDef.getDockerImageTag(); + final Optional currentCatalog = + configRepository.getSourceCatalog(discoverSchemaRequestBody.getSourceId(), configHash, connectorVersion); + final boolean bustActorCatalogCache = discoverSchemaRequestBody.getDisableCache() != null && discoverSchemaRequestBody.getDisableCache(); + if (currentCatalog.isEmpty() || bustActorCatalogCache) { + final SynchronousResponse response = synchronousSchedulerClient.createDiscoverSchemaJob(source, imageName); + configRepository.writeActorCatalogFetchEvent(response.getOutput(), source.getSourceId(), configHash, connectorVersion); + return discoverJobToOutput(response); + } + final AirbyteCatalog airbyteCatalog = Jsons.object(currentCatalog.get().getCatalog(), AirbyteCatalog.class); + final SynchronousJobRead emptyJob = new SynchronousJobRead() + .configId("NoConfiguration") + .configType(JobConfigType.DISCOVER_SCHEMA) + .id(UUID.randomUUID()) + .createdAt(0L) + .endedAt(0L) + .logs(new LogRead().logLines(new ArrayList<>())) + .succeeded(true); + return new SourceDiscoverSchemaRead() + .catalog(CatalogConverter.toApi(airbyteCatalog)) + .jobInfo(emptyJob); } public SourceDiscoverSchemaRead discoverSchemaForSourceFromSourceCreate(final SourceCoreConfig sourceCreate) diff --git a/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java b/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java index 854968f77d15..64f2c5c8b277 100644 --- a/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java +++ b/airbyte-server/src/main/java/io/airbyte/server/handlers/WebBackendConnectionsHandler.java @@ -30,6 +30,7 @@ import io.airbyte.api.model.OperationReadList; import io.airbyte.api.model.OperationUpdate; import io.airbyte.api.model.SourceDiscoverSchemaRead; +import io.airbyte.api.model.SourceDiscoverSchemaRequestBody; import io.airbyte.api.model.SourceIdRequestBody; import io.airbyte.api.model.SourceRead; import io.airbyte.api.model.WebBackendConnectionCreate; @@ -181,8 +182,8 @@ public WebBackendConnectionRead webBackendGetConnection(final WebBackendConnecti final ConnectionRead connection = connectionsHandler.getConnection(connectionIdRequestBody.getConnectionId()); if (MoreBooleans.isTruthy(webBackendConnectionRequestBody.getWithRefreshedCatalog())) { - final SourceIdRequestBody sourceId = new SourceIdRequestBody().sourceId(connection.getSourceId()); - final SourceDiscoverSchemaRead discoverSchema = schedulerHandler.discoverSchemaForSourceFromSourceId(sourceId); + final SourceDiscoverSchemaRequestBody discoverSchemaReadReq = new SourceDiscoverSchemaRequestBody().sourceId(connection.getSourceId()); + final SourceDiscoverSchemaRead discoverSchema = schedulerHandler.discoverSchemaForSourceFromSourceId(discoverSchemaReadReq); final AirbyteCatalog original = connection.getSyncCatalog(); final AirbyteCatalog discovered = discoverSchema.getCatalog(); diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/OperationsHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/OperationsHandlerTest.java index c0464fa38677..d32bc34877ff 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/OperationsHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/OperationsHandlerTest.java @@ -30,9 +30,12 @@ import io.airbyte.config.persistence.ConfigRepository; import io.airbyte.validation.json.JsonValidationException; import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.UUID; import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -193,23 +196,34 @@ void testDeleteOperation() throws JsonValidationException, IOException, ConfigNo @Test void testDeleteOperationsForConnection() throws JsonValidationException, IOException, ConfigNotFoundException { + final UUID syncConnectionId = UUID.randomUUID(); + final UUID otherConnectionId = UUID.randomUUID(); final UUID operationId = UUID.randomUUID(); - final List toDelete = List.of(standardSyncOperation.getOperationId(), operationId); + final UUID remainingOperationId = UUID.randomUUID(); + final List toDelete = Stream.of(standardSyncOperation.getOperationId(), operationId).collect(Collectors.toList()); final StandardSync sync = new StandardSync() - .withConnectionId(UUID.randomUUID()) - .withOperationIds(List.of(standardSyncOperation.getOperationId(), operationId)); + .withConnectionId(syncConnectionId) + .withOperationIds(List.of(standardSyncOperation.getOperationId(), operationId, remainingOperationId)); when(configRepository.listStandardSyncs()).thenReturn(List.of( sync, new StandardSync() - .withConnectionId(UUID.randomUUID()) + .withConnectionId(otherConnectionId) .withOperationIds(List.of(standardSyncOperation.getOperationId())))); final StandardSyncOperation operation = new StandardSyncOperation().withOperationId(operationId); + final StandardSyncOperation remainingOperation = new StandardSyncOperation().withOperationId(remainingOperationId); when(configRepository.getStandardSyncOperation(operationId)).thenReturn(operation); + when(configRepository.getStandardSyncOperation(remainingOperationId)).thenReturn(remainingOperation); when(configRepository.getStandardSyncOperation(standardSyncOperation.getOperationId())).thenReturn(standardSyncOperation); + // first, test that a remaining operation results in proper call operationsHandler.deleteOperationsForConnection(sync, toDelete); - verify(configRepository).writeStandardSyncOperation(operation.withTombstone(true)); + verify(configRepository).updateConnectionOperationIds(syncConnectionId, Collections.singleton(remainingOperationId)); + + // next, test that removing all operations results in proper call + toDelete.add(remainingOperationId); + operationsHandler.deleteOperationsForConnection(sync, toDelete); + verify(configRepository).updateConnectionOperationIds(syncConnectionId, Collections.emptySet()); } @Test diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java index b039e5cf645b..c0c0c1af37db 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/SchedulerHandlerTest.java @@ -14,6 +14,7 @@ import static org.mockito.Mockito.RETURNS_DEEP_STUBS; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -32,6 +33,7 @@ import io.airbyte.api.model.SourceDefinitionIdRequestBody; import io.airbyte.api.model.SourceDefinitionSpecificationRead; import io.airbyte.api.model.SourceDiscoverSchemaRead; +import io.airbyte.api.model.SourceDiscoverSchemaRequestBody; import io.airbyte.api.model.SourceIdRequestBody; import io.airbyte.api.model.SourceUpdate; import io.airbyte.commons.docker.DockerUtils; @@ -39,6 +41,7 @@ import io.airbyte.commons.features.FeatureFlags; import io.airbyte.commons.json.Jsons; import io.airbyte.commons.lang.Exceptions; +import io.airbyte.config.ActorCatalog; import io.airbyte.config.ActorDefinitionResourceRequirements; import io.airbyte.config.Configs.WorkerEnvironment; import io.airbyte.config.DestinationConnection; @@ -381,12 +384,14 @@ void testCheckDestinationConnectionFromUpdate() throws IOException, JsonValidati @Test void testDiscoverSchemaForSourceFromSourceId() throws IOException, JsonValidationException, ConfigNotFoundException { final SourceConnection source = SourceHelpers.generateSource(UUID.randomUUID()); - final SourceIdRequestBody request = new SourceIdRequestBody().sourceId(source.getSourceId()); + final SourceDiscoverSchemaRequestBody request = new SourceDiscoverSchemaRequestBody().sourceId(source.getSourceId()); final SynchronousResponse discoverResponse = (SynchronousResponse) jobResponse; final SynchronousJobMetadata metadata = mock(SynchronousJobMetadata.class); when(discoverResponse.isSuccess()).thenReturn(true); - when(discoverResponse.getOutput()).thenReturn(CatalogHelpers.createAirbyteCatalog("shoes", Field.of("sku", JsonSchemaType.STRING))); + final AirbyteCatalog airbyteCatalog = CatalogHelpers.createAirbyteCatalog("shoes", + Field.of("sku", JsonSchemaType.STRING)); + when(discoverResponse.getOutput()).thenReturn(airbyteCatalog); when(discoverResponse.getMetadata()).thenReturn(metadata); when(metadata.isSucceeded()).thenReturn(true); @@ -396,6 +401,7 @@ void testDiscoverSchemaForSourceFromSourceId() throws IOException, JsonValidatio .withDockerImageTag(SOURCE_DOCKER_TAG) .withSourceDefinitionId(source.getSourceDefinitionId())); when(configRepository.getSourceConnection(source.getSourceId())).thenReturn(source); + when(configRepository.getSourceCatalog(any(), any(), any())).thenReturn(Optional.empty()); when(synchronousSchedulerClient.createDiscoverSchemaJob(source, SOURCE_DOCKER_IMAGE)) .thenReturn(discoverResponse); @@ -405,13 +411,93 @@ void testDiscoverSchemaForSourceFromSourceId() throws IOException, JsonValidatio assertNotNull(actual.getJobInfo()); assertTrue(actual.getJobInfo().getSucceeded()); verify(configRepository).getSourceConnection(source.getSourceId()); + verify(configRepository).getSourceCatalog(eq(request.getSourceId()), any(), eq(SOURCE_DOCKER_TAG)); + verify(configRepository).writeActorCatalogFetchEvent(eq(airbyteCatalog), eq(source.getSourceId()), any(), eq(SOURCE_DOCKER_TAG)); + verify(synchronousSchedulerClient).createDiscoverSchemaJob(source, SOURCE_DOCKER_IMAGE); + } + + @Test + void testDiscoverSchemaForSourceFromSourceIdCachedCatalog() throws IOException, JsonValidationException, ConfigNotFoundException { + final SourceConnection source = SourceHelpers.generateSource(UUID.randomUUID()); + final SourceDiscoverSchemaRequestBody request = new SourceDiscoverSchemaRequestBody().sourceId(source.getSourceId()); + + final SynchronousResponse discoverResponse = (SynchronousResponse) jobResponse; + final SynchronousJobMetadata metadata = mock(SynchronousJobMetadata.class); + when(discoverResponse.isSuccess()).thenReturn(true); + final AirbyteCatalog airbyteCatalog = CatalogHelpers.createAirbyteCatalog("shoes", + Field.of("sku", JsonSchemaType.STRING)); + when(discoverResponse.getOutput()).thenReturn(airbyteCatalog); + when(discoverResponse.getMetadata()).thenReturn(metadata); + when(metadata.isSucceeded()).thenReturn(true); + + when(configRepository.getStandardSourceDefinition(source.getSourceDefinitionId())) + .thenReturn(new StandardSourceDefinition() + .withDockerRepository(SOURCE_DOCKER_REPO) + .withDockerImageTag(SOURCE_DOCKER_TAG) + .withSourceDefinitionId(source.getSourceDefinitionId())); + when(configRepository.getSourceConnection(source.getSourceId())).thenReturn(source); + final ActorCatalog actorCatalog = new ActorCatalog() + .withCatalog(Jsons.jsonNode(airbyteCatalog)) + .withCatalogHash("") + .withId(UUID.randomUUID()); + when(configRepository.getSourceCatalog(any(), any(), any())).thenReturn(Optional.of(actorCatalog)); + when(synchronousSchedulerClient.createDiscoverSchemaJob(source, SOURCE_DOCKER_IMAGE)) + .thenReturn(discoverResponse); + + final SourceDiscoverSchemaRead actual = schedulerHandler.discoverSchemaForSourceFromSourceId(request); + + assertNotNull(actual.getCatalog()); + assertNotNull(actual.getJobInfo()); + assertTrue(actual.getJobInfo().getSucceeded()); + verify(configRepository).getSourceConnection(source.getSourceId()); + verify(configRepository).getSourceCatalog(eq(request.getSourceId()), any(), any()); + verify(configRepository, never()).writeActorCatalogFetchEvent(any(), any(), any(), any()); + verify(synchronousSchedulerClient, never()).createDiscoverSchemaJob(source, SOURCE_DOCKER_IMAGE); + } + + @Test + void testDiscoverSchemaForSourceFromSourceIdDisableCache() throws IOException, JsonValidationException, ConfigNotFoundException { + final SourceConnection source = SourceHelpers.generateSource(UUID.randomUUID()); + final SourceDiscoverSchemaRequestBody request = new SourceDiscoverSchemaRequestBody().sourceId(source.getSourceId()).disableCache(true); + + final SynchronousResponse discoverResponse = (SynchronousResponse) jobResponse; + final SynchronousJobMetadata metadata = mock(SynchronousJobMetadata.class); + when(discoverResponse.isSuccess()).thenReturn(true); + final AirbyteCatalog airbyteCatalog = CatalogHelpers.createAirbyteCatalog("shoes", + Field.of("sku", JsonSchemaType.STRING)); + when(discoverResponse.getOutput()).thenReturn(airbyteCatalog); + when(discoverResponse.getMetadata()).thenReturn(metadata); + when(metadata.isSucceeded()).thenReturn(true); + + when(configRepository.getStandardSourceDefinition(source.getSourceDefinitionId())) + .thenReturn(new StandardSourceDefinition() + .withDockerRepository(SOURCE_DOCKER_REPO) + .withDockerImageTag(SOURCE_DOCKER_TAG) + .withSourceDefinitionId(source.getSourceDefinitionId())); + when(configRepository.getSourceConnection(source.getSourceId())).thenReturn(source); + final ActorCatalog actorCatalog = new ActorCatalog() + .withCatalog(Jsons.jsonNode(airbyteCatalog)) + .withCatalogHash("") + .withId(UUID.randomUUID()); + when(configRepository.getSourceCatalog(any(), any(), any())).thenReturn(Optional.of(actorCatalog)); + when(synchronousSchedulerClient.createDiscoverSchemaJob(source, SOURCE_DOCKER_IMAGE)) + .thenReturn(discoverResponse); + + final SourceDiscoverSchemaRead actual = schedulerHandler.discoverSchemaForSourceFromSourceId(request); + + assertNotNull(actual.getCatalog()); + assertNotNull(actual.getJobInfo()); + assertTrue(actual.getJobInfo().getSucceeded()); + verify(configRepository).getSourceConnection(source.getSourceId()); + verify(configRepository).getSourceCatalog(eq(request.getSourceId()), any(), any()); + verify(configRepository).writeActorCatalogFetchEvent(any(), any(), any(), any()); verify(synchronousSchedulerClient).createDiscoverSchemaJob(source, SOURCE_DOCKER_IMAGE); } @Test void testDiscoverSchemaForSourceFromSourceIdFailed() throws IOException, JsonValidationException, ConfigNotFoundException { final SourceConnection source = SourceHelpers.generateSource(UUID.randomUUID()); - final SourceIdRequestBody request = new SourceIdRequestBody().sourceId(source.getSourceId()); + final SourceDiscoverSchemaRequestBody request = new SourceDiscoverSchemaRequestBody().sourceId(source.getSourceId()); when(configRepository.getStandardSourceDefinition(source.getSourceDefinitionId())) .thenReturn(new StandardSourceDefinition() diff --git a/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java b/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java index 202f1329df90..89ecc2771459 100644 --- a/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java +++ b/airbyte-server/src/test/java/io/airbyte/server/handlers/WebBackendConnectionsHandlerTest.java @@ -42,6 +42,7 @@ import io.airbyte.api.model.OperationUpdate; import io.airbyte.api.model.ResourceRequirements; import io.airbyte.api.model.SourceDiscoverSchemaRead; +import io.airbyte.api.model.SourceDiscoverSchemaRequestBody; import io.airbyte.api.model.SourceIdRequestBody; import io.airbyte.api.model.SourceRead; import io.airbyte.api.model.SyncMode; @@ -196,7 +197,9 @@ public void setup() throws IOException, JsonValidationException, ConfigNotFoundE final AirbyteCatalog modifiedCatalog = ConnectionHelpers.generateBasicApiCatalog(); - when(schedulerHandler.discoverSchemaForSourceFromSourceId(sourceIdRequestBody)).thenReturn( + final SourceDiscoverSchemaRequestBody sourceDiscoverSchema = new SourceDiscoverSchemaRequestBody(); + sourceDiscoverSchema.setSourceId(connectionRead.getSourceId()); + when(schedulerHandler.discoverSchemaForSourceFromSourceId(sourceDiscoverSchema)).thenReturn( new SourceDiscoverSchemaRead() .jobInfo(mock(SynchronousJobRead.class)) .catalog(modifiedCatalog)); diff --git a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java index 908382c03ead..7fb338c6109c 100644 --- a/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java +++ b/airbyte-tests/src/acceptanceTests/java/io/airbyte/test/acceptance/AcceptanceTests.java @@ -65,6 +65,7 @@ import io.airbyte.api.client.model.SourceDefinitionIdRequestBody; import io.airbyte.api.client.model.SourceDefinitionRead; import io.airbyte.api.client.model.SourceDefinitionSpecificationRead; +import io.airbyte.api.client.model.SourceDiscoverSchemaRequestBody; import io.airbyte.api.client.model.SourceIdRequestBody; import io.airbyte.api.client.model.SourceRead; import io.airbyte.api.client.model.SyncMode; @@ -288,20 +289,22 @@ public void tearDown() throws ApiException, SQLException { destinationPsql.stop(); } - for (final UUID sourceId : sourceIds) { - deleteSource(sourceId); + for (final UUID operationId : operationIds) { + deleteOperation(operationId); } for (final UUID connectionId : connectionIds) { disableConnection(connectionId); } + for (final UUID sourceId : sourceIds) { + deleteSource(sourceId); + } + for (final UUID destinationId : destinationIds) { deleteDestination(destinationId); } - for (final UUID operationId : operationIds) { - deleteOperation(operationId); - } + } @Test @@ -1145,7 +1148,7 @@ public void testCancelSyncWhenCancelledWhenWorkerIsNotRunning() throws Exception } private AirbyteCatalog discoverSourceSchema(final UUID sourceId) throws ApiException { - return apiClient.getSourceApi().discoverSchemaForSource(new SourceIdRequestBody().sourceId(sourceId)).getCatalog(); + return apiClient.getSourceApi().discoverSchemaForSource(new SourceDiscoverSchemaRequestBody().sourceId(sourceId)).getCatalog(); } private void assertSourceAndDestinationDbInSync(final boolean withScdTable) throws Exception { diff --git a/airbyte-webapp/package-lock.json b/airbyte-webapp/package-lock.json index 8085b85e4e5b..3dedc45e7aec 100644 --- a/airbyte-webapp/package-lock.json +++ b/airbyte-webapp/package-lock.json @@ -1,12 +1,12 @@ { "name": "airbyte-webapp", - "version": "0.35.55-alpha", + "version": "0.35.59-alpha", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "airbyte-webapp", - "version": "0.35.55-alpha", + "version": "0.35.59-alpha", "dependencies": { "@fortawesome/fontawesome-svg-core": "^1.2.36", "@fortawesome/free-brands-svg-icons": "^5.15.4", @@ -14,7 +14,6 @@ "@fortawesome/free-solid-svg-icons": "^5.15.4", "@fortawesome/react-fontawesome": "^0.1.17", "@fullstory/browser": "^1.5.0", - "@rest-hooks/legacy": "^2.0.5", "@sentry/react": "^6.17.9", "@sentry/tracing": "^6.17.9", "dayjs": "^1.10.7", @@ -5494,21 +5493,6 @@ "@rest-hooks/normalizr": "^6.0.9" } }, - "node_modules/@rest-hooks/legacy": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@rest-hooks/legacy/-/legacy-2.0.5.tgz", - "integrity": "sha512-5hPqHkvQ5iC7QJSjUpufMNlg3fhMUJ2kVS24QczkJlj2usj0ffuHMN24TKqByLDlRdmaFAH0ebqnF0La170bSg==", - "peerDependencies": { - "@rest-hooks/core": "^1.0.7", - "@types/react": "^16.8.4 || ^17.0.0", - "react": "^16.8.4 || ^17.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, "node_modules/@rest-hooks/normalizr": { "version": "6.0.9", "resolved": "https://registry.npmjs.org/@rest-hooks/normalizr/-/normalizr-6.0.9.tgz", @@ -48662,12 +48646,6 @@ "@rest-hooks/normalizr": "^6.0.9" } }, - "@rest-hooks/legacy": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@rest-hooks/legacy/-/legacy-2.0.5.tgz", - "integrity": "sha512-5hPqHkvQ5iC7QJSjUpufMNlg3fhMUJ2kVS24QczkJlj2usj0ffuHMN24TKqByLDlRdmaFAH0ebqnF0La170bSg==", - "requires": {} - }, "@rest-hooks/normalizr": { "version": "6.0.9", "resolved": "https://registry.npmjs.org/@rest-hooks/normalizr/-/normalizr-6.0.9.tgz", diff --git a/airbyte-webapp/package.json b/airbyte-webapp/package.json index 97fd8a890c6b..388d145e666c 100644 --- a/airbyte-webapp/package.json +++ b/airbyte-webapp/package.json @@ -1,6 +1,6 @@ { "name": "airbyte-webapp", - "version": "0.35.55-alpha", + "version": "0.35.59-alpha", "private": true, "engines": { "node": ">=16.0.0" @@ -21,7 +21,6 @@ "@fortawesome/free-solid-svg-icons": "^5.15.4", "@fortawesome/react-fontawesome": "^0.1.17", "@fullstory/browser": "^1.5.0", - "@rest-hooks/legacy": "^2.0.5", "@sentry/react": "^6.17.9", "@sentry/tracing": "^6.17.9", "dayjs": "^1.10.7", diff --git a/airbyte-webapp/src/App.tsx b/airbyte-webapp/src/App.tsx index 793b39bdf722..bf370ce8dd4e 100644 --- a/airbyte-webapp/src/App.tsx +++ b/airbyte-webapp/src/App.tsx @@ -1,8 +1,6 @@ import React, { Suspense } from "react"; import { ThemeProvider } from "styled-components"; import { IntlProvider } from "react-intl"; -import { CacheProvider } from "rest-hooks"; -import { QueryClient, QueryClientProvider } from "react-query"; import { BrowserRouter as Router } from "react-router-dom"; import en from "./locales/en.json"; @@ -26,6 +24,7 @@ import { windowConfigProvider, } from "./config"; import { WorkspaceServiceProvider } from "./services/workspaces/WorkspacesService"; +import { StoreProvider } from "views/common/StoreProvider"; const StyleProvider: React.FC = ({ children }) => ( @@ -46,20 +45,6 @@ const I18NProvider: React.FC = ({ children }) => ( ); -const queryClient = new QueryClient({ - defaultOptions: { - queries: { - suspense: true, - }, - }, -}); - -const StoreProvider: React.FC = ({ children }) => ( - - {children} - -); - const configProviders: ValueProvider = [ envConfigProvider, windowConfigProvider, diff --git a/airbyte-webapp/src/config/configProviders.ts b/airbyte-webapp/src/config/configProviders.ts index 8922d7e32408..40738c6d4d08 100644 --- a/airbyte-webapp/src/config/configProviders.ts +++ b/airbyte-webapp/src/config/configProviders.ts @@ -8,6 +8,7 @@ const windowConfigProvider: ConfigProvider = async () => { enabled: isDefined(window.TRACKING_STRATEGY) ? window.TRACKING_STRATEGY === "segment" : undefined, + token: window.SEGMENT_TOKEN, }, apiUrl: window.API_URL, version: window.AIRBYTE_VERSION, diff --git a/airbyte-webapp/src/config/types.ts b/airbyte-webapp/src/config/types.ts index fed45ba423c1..d31ad3e28410 100644 --- a/airbyte-webapp/src/config/types.ts +++ b/airbyte-webapp/src/config/types.ts @@ -13,6 +13,7 @@ declare global { REACT_APP_WEBAPP_TAG?: string; REACT_APP_INTERCOM_APP_ID?: string; REACT_APP_INTEGRATION_DOCS_URLS?: string; + SEGMENT_TOKEN?: string; analytics: SegmentAnalytics; // API_URL to hack rest-hooks resources diff --git a/airbyte-webapp/src/config/uiConfig.ts b/airbyte-webapp/src/config/uiConfig.ts index 73aec893d473..16cb2c4ab264 100644 --- a/airbyte-webapp/src/config/uiConfig.ts +++ b/airbyte-webapp/src/config/uiConfig.ts @@ -7,6 +7,7 @@ const uiConfig = { helpLink: "https://airbyte.com/community", gitLink: "https://docs.airbyte.com/quickstart/deploy-airbyte", updateLink: `${BASE_DOCS_LINK}/upgrading-airbyte`, + productReleaseStages: `${BASE_DOCS_LINK}/project-overview/product-release-stages`, slackLink: "https://slack.airbyte.com", docsLink: BASE_DOCS_LINK, configurationArchiveLink: `${BASE_DOCS_LINK}/tutorials/upgrading-airbyte`, diff --git a/airbyte-webapp/src/core/domain/connector/DestinationDefinitionSpecificationService.ts b/airbyte-webapp/src/core/domain/connector/DestinationDefinitionSpecificationService.ts new file mode 100644 index 000000000000..9a1371bb1d84 --- /dev/null +++ b/airbyte-webapp/src/core/domain/connector/DestinationDefinitionSpecificationService.ts @@ -0,0 +1,18 @@ +import { AirbyteRequestService } from "core/request/AirbyteRequestService"; +import { DestinationDefinitionSpecification } from "./types"; + +class DestinationDefinitionSpecificationService extends AirbyteRequestService { + get url(): string { + return "destination_definition_specifications"; + } + + public get( + destinationDefinitionId: string + ): Promise { + return this.fetch(`${this.url}/get`, { + destinationDefinitionId, + }); + } +} + +export { DestinationDefinitionSpecificationService }; diff --git a/airbyte-webapp/src/core/domain/connector/SourceDefinitionSpecificationService.ts b/airbyte-webapp/src/core/domain/connector/SourceDefinitionSpecificationService.ts new file mode 100644 index 000000000000..c05b803516e3 --- /dev/null +++ b/airbyte-webapp/src/core/domain/connector/SourceDefinitionSpecificationService.ts @@ -0,0 +1,18 @@ +import { AirbyteRequestService } from "core/request/AirbyteRequestService"; +import { SourceDefinitionSpecification } from "./types"; + +class SourceDefinitionSpecificationService extends AirbyteRequestService { + get url(): string { + return "source_definition_specifications"; + } + + public get( + sourceDefinitionId: string + ): Promise { + return this.fetch(`${this.url}/get`, { + sourceDefinitionId, + }); + } +} + +export { SourceDefinitionSpecificationService }; diff --git a/airbyte-webapp/src/core/resources/DestinationDefinitionSpecification.ts b/airbyte-webapp/src/core/resources/DestinationDefinitionSpecification.ts deleted file mode 100644 index 909aea5f67ec..000000000000 --- a/airbyte-webapp/src/core/resources/DestinationDefinitionSpecification.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { ReadShape, Resource, SchemaDetail } from "rest-hooks"; - -import { ConnectionSpecification } from "core/domain/connection"; -import { DestinationSyncMode } from "core/domain/catalog"; -import { DestinationDefinitionSpecification } from "core/domain/connector/types"; - -import BaseResource from "./BaseResource"; - -export default class DestinationDefinitionSpecificationResource - extends BaseResource - implements DestinationDefinitionSpecification { - readonly destinationDefinitionId: string = ""; - readonly documentationUrl: string = ""; - readonly connectionSpecification: ConnectionSpecification = { - properties: {}, - required: [""], - }; - readonly supportedDestinationSyncModes: DestinationSyncMode[] = []; - readonly supportsDbt: boolean = false; - readonly supportsNormalization: boolean = false; - - pk(): string { - return this.destinationDefinitionId?.toString(); - } - - static urlRoot = "destination_definition_specifications"; - - static detailShape( - this: T - ): ReadShape> { - return { - ...super.detailShape(), - schema: this, - }; - } -} diff --git a/airbyte-webapp/src/core/resources/Schema.ts b/airbyte-webapp/src/core/resources/Schema.ts index e38ac21ed778..a500aa781e6b 100644 --- a/airbyte-webapp/src/core/resources/Schema.ts +++ b/airbyte-webapp/src/core/resources/Schema.ts @@ -3,7 +3,7 @@ import { ReadShape, Resource, SchemaDetail } from "rest-hooks"; import BaseResource from "./BaseResource"; import { SourceDiscoverSchemaRead, SyncSchema } from "core/domain/catalog"; import { toInnerModel } from "core/domain/catalog/fieldUtil"; -import { JobInfo } from "core/domain/job/Job"; +import { JobInfo } from "core/domain/job"; export interface Schema extends SourceDiscoverSchemaRead { id: string; diff --git a/airbyte-webapp/src/core/resources/SourceDefinitionSpecification.ts b/airbyte-webapp/src/core/resources/SourceDefinitionSpecification.ts deleted file mode 100644 index b781b9a4eb9f..000000000000 --- a/airbyte-webapp/src/core/resources/SourceDefinitionSpecification.ts +++ /dev/null @@ -1,30 +0,0 @@ -import { ReadShape, Resource, SchemaDetail } from "rest-hooks"; -import BaseResource from "./BaseResource"; -import { ConnectionSpecification } from "core/domain/connection"; -import { SourceDefinitionSpecification } from "core/domain/connector/types"; - -export default class SourceDefinitionSpecificationResource - extends BaseResource - implements SourceDefinitionSpecification { - readonly sourceDefinitionId: string = ""; - readonly documentationUrl: string = ""; - readonly connectionSpecification: ConnectionSpecification = { - properties: {}, - required: [], - }; - - pk(): string { - return this.sourceDefinitionId?.toString(); - } - - static urlRoot = "source_definition_specifications"; - - static detailShape( - this: T - ): ReadShape> { - return { - ...super.detailShape(), - schema: this, - }; - } -} diff --git a/airbyte-webapp/src/hooks/services/useDestinationHook.tsx b/airbyte-webapp/src/hooks/services/useDestinationHook.tsx index 83b848545140..b2abc0cf2865 100644 --- a/airbyte-webapp/src/hooks/services/useDestinationHook.tsx +++ b/airbyte-webapp/src/hooks/services/useDestinationHook.tsx @@ -1,20 +1,15 @@ import { useCallback } from "react"; import { useFetcher, useResource } from "rest-hooks"; -import { useStatefulResource } from "@rest-hooks/legacy"; import DestinationResource from "core/resources/Destination"; import ConnectionResource, { Connection } from "core/resources/Connection"; import { RoutePaths } from "pages/routes"; import useRouter from "../useRouter"; -import DestinationDefinitionSpecificationResource from "core/resources/DestinationDefinitionSpecification"; import SchedulerResource, { Scheduler } from "core/resources/Scheduler"; import { ConnectionConfiguration } from "core/domain/connection"; import useWorkspace from "./useWorkspace"; import { useAnalyticsService } from "hooks/services/Analytics/useAnalyticsService"; -import { - Destination, - DestinationDefinitionSpecification, -} from "core/domain/connector"; +import { Destination } from "core/domain/connector"; type ValuesProps = { name: string; @@ -24,46 +19,6 @@ type ValuesProps = { type ConnectorProps = { name: string; destinationDefinitionId: string }; -export const useDestinationDefinitionSpecificationLoad = ( - destinationDefinitionId: string | null -): { - isLoading: boolean; - destinationDefinitionSpecification?: DestinationDefinitionSpecification; - sourceDefinitionError?: Error; -} => { - const { - loading: isLoading, - error, - data: destinationDefinitionSpecification, - } = useStatefulResource( - DestinationDefinitionSpecificationResource.detailShape(), - destinationDefinitionId - ? { - destinationDefinitionId, - } - : null - ); - - return { - destinationDefinitionSpecification, - sourceDefinitionError: error, - isLoading, - }; -}; - -export const useDestinationDefinitionSpecificationLoadAsync = ( - destinationDefinitionId: string -): DestinationDefinitionSpecification => { - const definition = useResource( - DestinationDefinitionSpecificationResource.detailShape(), - { - destinationDefinitionId, - } - ); - - return definition; -}; - type DestinationService = { checkDestinationConnection: ({ destinationId, diff --git a/airbyte-webapp/src/hooks/services/useSourceHook.tsx b/airbyte-webapp/src/hooks/services/useSourceHook.tsx index 563d63b921f1..1f6ad2da9f4b 100644 --- a/airbyte-webapp/src/hooks/services/useSourceHook.tsx +++ b/airbyte-webapp/src/hooks/services/useSourceHook.tsx @@ -1,18 +1,16 @@ import { useCallback } from "react"; import { useFetcher, useResource } from "rest-hooks"; -import { useStatefulResource } from "@rest-hooks/legacy"; import SourceResource from "core/resources/Source"; import { RoutePaths } from "pages/routes"; import ConnectionResource, { Connection } from "core/resources/Connection"; -import SourceDefinitionSpecificationResource from "core/resources/SourceDefinitionSpecification"; import SchedulerResource, { Scheduler } from "core/resources/Scheduler"; import { ConnectionConfiguration } from "core/domain/connection"; import useWorkspace from "./useWorkspace"; import useRouter from "hooks/useRouter"; import { useAnalyticsService } from "hooks/services/Analytics/useAnalyticsService"; -import { Source, SourceDefinitionSpecification } from "core/domain/connector"; +import { Source } from "core/domain/connector"; type ValuesProps = { name: string; @@ -23,29 +21,6 @@ type ValuesProps = { type ConnectorProps = { name: string; sourceDefinitionId: string }; -export const useSourceDefinitionSpecificationLoad = ( - sourceDefinitionId: string -): { - isLoading: boolean; - sourceDefinitionError?: Error; - sourceDefinitionSpecification?: SourceDefinitionSpecification; -} => { - const { - loading: isLoading, - error: sourceDefinitionError, - data: sourceDefinitionSpecification, - } = useStatefulResource( - SourceDefinitionSpecificationResource.detailShape(), - sourceDefinitionId - ? { - sourceDefinitionId, - } - : null - ); - - return { sourceDefinitionSpecification, sourceDefinitionError, isLoading }; -}; - type SourceService = { recreateSource: (recreateSourcePayload: { values: ValuesProps; diff --git a/airbyte-webapp/src/locales/en.json b/airbyte-webapp/src/locales/en.json index bbef0c5d7383..dfe9b85e5c8c 100644 --- a/airbyte-webapp/src/locales/en.json +++ b/airbyte-webapp/src/locales/en.json @@ -17,7 +17,7 @@ "sidebar.joinSlack": "Join our Slack", "sidebar.status": "Airbyte status", "sidebar.chat": "Chat with us", - "sidebar.recipes": "Recipes - Use cases", + "sidebar.recipes": "Tutorials - Use cases", "form.continue": "Continue", "form.yourEmail": "Your email", @@ -464,8 +464,8 @@ "connector.releaseStage.beta": "beta", "connector.releaseStage.custom": "custom", "connector.releaseStage.generally_available": "generally available", - "connector.connectorsInDevelopment.alpha": "Alpha connectors are in development. We strongly discourage production use cases and do not offer support SLAs. The release may not be feature complete and breaking changes may be introduced.", - "connector.connectorsInDevelopment.beta": "Beta connectors are considered stable and reliable with no backwards incompatible changes but has yet to be used by a larger group of users so we expect there to be some small issues and bugs to iron out.", + "connector.connectorsInDevelopment.alpha": "Alpha connectors are in development and support is not provided. See our documentation for more details.", + "connector.connectorsInDevelopment.beta": "Beta connectors are in development but stable and reliable and support is provided. See our documentation for more details.", "credits.credits": "Credits", "credits.whatAreCredits": "What are credits?", "credits.buyCredits": "+ Buy credits", diff --git a/airbyte-webapp/src/packages/cloud/App.tsx b/airbyte-webapp/src/packages/cloud/App.tsx index 76e95d54ef3b..4666139500e7 100644 --- a/airbyte-webapp/src/packages/cloud/App.tsx +++ b/airbyte-webapp/src/packages/cloud/App.tsx @@ -1,9 +1,7 @@ import React, { Suspense } from "react"; import { ThemeProvider } from "styled-components"; import { IntlProvider } from "react-intl"; -import { CacheProvider } from "rest-hooks"; import { BrowserRouter as Router } from "react-router-dom"; -import { QueryClient, QueryClientProvider } from "react-query"; import en from "locales/en.json"; import cloudLocales from "packages/cloud/locales/en.json"; @@ -20,6 +18,7 @@ import { AuthenticationProvider } from "packages/cloud/services/auth/AuthService import { AppServicesProvider } from "./services/AppServicesProvider"; import { IntercomProvider } from "./services/thirdParty/intercom/IntercomProvider"; import { ConfigProvider } from "./services/ConfigProvider"; +import { StoreProvider } from "views/common/StoreProvider"; const messages = Object.assign({}, en, cloudLocales); @@ -42,36 +41,20 @@ const StyleProvider: React.FC = ({ children }) => ( ); -const queryClient = new QueryClient({ - defaultOptions: { - queries: { - suspense: true, - }, - }, -}); - -const StoreProvider: React.FC = ({ children }) => ( - - {children} - -); - const Services: React.FC = ({ children }) => ( - - - - - - - - {children} - - - - - - - + + + + + + + {children} + + + + + + ); const App: React.FC = () => { @@ -81,11 +64,13 @@ const App: React.FC = () => { }> - - - - - + + + + + + + diff --git a/airbyte-webapp/src/packages/cloud/cloudRoutes.tsx b/airbyte-webapp/src/packages/cloud/cloudRoutes.tsx index 19e8d59186f5..0f6a3515dd0f 100644 --- a/airbyte-webapp/src/packages/cloud/cloudRoutes.tsx +++ b/airbyte-webapp/src/packages/cloud/cloudRoutes.tsx @@ -34,6 +34,8 @@ import { VerifyEmailAction } from "./views/FirebaseActionRoute"; import { RoutePaths } from "pages/routes"; import useRouter from "hooks/useRouter"; import { storeUtmFromQuery } from "utils/utmStorage"; +import { DefaultView } from "./views/DefaultView"; +import { hasFromState } from "utils/stateUtils"; export const CloudRoutes = { Root: "/", @@ -117,11 +119,11 @@ const MainViewRoutes = () => { key={r} path={`${r}/*`} element={ - + hasFromState(location.state) ? ( + + ) : ( + + ) } /> ) @@ -136,10 +138,7 @@ const MainViewRoutes = () => { } /> - } - /> + } /> ); }; diff --git a/airbyte-webapp/src/packages/cloud/lib/auth/GoogleAuthService.ts b/airbyte-webapp/src/packages/cloud/lib/auth/GoogleAuthService.ts index 031a7f150e2b..d20af327f32e 100644 --- a/airbyte-webapp/src/packages/cloud/lib/auth/GoogleAuthService.ts +++ b/airbyte-webapp/src/packages/cloud/lib/auth/GoogleAuthService.ts @@ -61,6 +61,7 @@ export class GoogleAuthService implements AuthService { case AuthErrorCodes.INVALID_EMAIL: throw new FieldError("email", ErrorCodes.Invalid); case AuthErrorCodes.USER_CANCELLED: + case AuthErrorCodes.USER_DISABLED: throw new FieldError("email", "disabled"); case AuthErrorCodes.USER_DELETED: throw new FieldError("email", "notfound"); @@ -125,11 +126,11 @@ export class GoogleAuthService implements AuthService { await updateEmail(user, email); } catch (e) { switch (e.code) { - case "auth/invalid-email": + case AuthErrorCodes.INVALID_EMAIL: throw new FieldError("email", ErrorCodes.Invalid); - case "auth/email-already-in-use": + case AuthErrorCodes.EMAIL_EXISTS: throw new FieldError("email", ErrorCodes.Duplicate); - case "auth/requires-recent-login": + case AuthErrorCodes.CREDENTIAL_TOO_OLD_LOGIN_AGAIN: throw new Error("auth/requires-recent-login"); } } diff --git a/airbyte-webapp/src/packages/cloud/locales/en.json b/airbyte-webapp/src/packages/cloud/locales/en.json index 9886a1be4df2..55f4d298f7ca 100644 --- a/airbyte-webapp/src/packages/cloud/locales/en.json +++ b/airbyte-webapp/src/packages/cloud/locales/en.json @@ -114,6 +114,8 @@ "signup.password.minLength": "Password should be at least 6 characters", "email.duplicate": "Email already exists", + "email.notfound": "Email not found", + "email.disabled": "Your account is disabled", "password.validation": "Your password is too weak", "password.invalid": "Invalid password" } diff --git a/airbyte-webapp/src/packages/cloud/services/ConfigProvider.tsx b/airbyte-webapp/src/packages/cloud/services/ConfigProvider.tsx index ffe9957cfdd7..1bf43374edab 100644 --- a/airbyte-webapp/src/packages/cloud/services/ConfigProvider.tsx +++ b/airbyte-webapp/src/packages/cloud/services/ConfigProvider.tsx @@ -19,8 +19,8 @@ const configProviders: ValueProvider = [ // fileConfigProvider, cloudEnvConfigProvider, cloudWindowConfigProvider, - windowConfigProvider, envConfigProvider, + windowConfigProvider, ]; /** diff --git a/airbyte-webapp/src/packages/cloud/views/DefaultView.tsx b/airbyte-webapp/src/packages/cloud/views/DefaultView.tsx new file mode 100644 index 000000000000..53fd7282e48a --- /dev/null +++ b/airbyte-webapp/src/packages/cloud/views/DefaultView.tsx @@ -0,0 +1,21 @@ +import { RoutePaths } from "pages/routes"; +import { Navigate } from "react-router-dom"; +import { CloudRoutes } from "../cloudRoutes"; +import { useListCloudWorkspaces } from "../services/workspaces/WorkspacesService"; + +export const DefaultView: React.FC = () => { + const workspaces = useListCloudWorkspaces(); + + // Only show the workspace creation list if there is more than one workspace + // otherwise redirect to the single workspace + return ( + 1 + ? `/${CloudRoutes.SelectWorkspace}` + : `/${RoutePaths.Workspaces}/${workspaces[0].workspaceId}` + } + replace + /> + ); +}; diff --git a/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/ReplicationView.tsx b/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/ReplicationView.tsx index cf84396fc629..e8679a7eab3a 100644 --- a/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/ReplicationView.tsx +++ b/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/ReplicationView.tsx @@ -5,20 +5,18 @@ import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; import { faSyncAlt } from "@fortawesome/free-solid-svg-icons"; import { useAsyncFn } from "react-use"; -import { Button } from "components"; +import { Button, Card } from "components"; import useConnection, { useConnectionLoad, ValuesProps, } from "hooks/services/useConnectionHook"; import ConnectionForm from "views/Connection/ConnectionForm"; -import TransferFormCard from "views/Connection/ConnectionForm/TransferFormCard"; import ResetDataModal from "components/ResetDataModal"; import { ModalTypes } from "components/ResetDataModal/types"; import LoadingSchema from "components/LoadingSchema"; import { equal } from "utils/objects"; import { ConnectionNamespaceDefinition } from "core/domain/connection"; -import { CollapsibleCard } from "views/Connection/CollapsibleCard"; type IProps = { onAfterSaveSchema: () => void; @@ -31,25 +29,11 @@ const Content = styled.div` padding-bottom: 10px; `; -const TitleContainer = styled.div<{ hasButton: boolean }>` - display: flex; - flex-direction: row; - justify-content: space-between; - align-items: center; - margin: ${({ hasButton }) => (hasButton ? "-5px 0" : 0)}; -`; - const TryArrow = styled(FontAwesomeIcon)` margin: 0 10px -1px 0; font-size: 14px; `; -const Title = styled.div` - display: flex; - justify-content: space-between; - align-items: center; -`; - const Message = styled.div` font-weight: 500; font-size: 12px; @@ -160,17 +144,7 @@ const ReplicationView: React.FC = ({ return ( - - - - - - - } - > + {!isRefreshingCatalog && connection ? ( = ({ ) : ( )} - + {isModalOpen ? ( setIsUpdateModalOpen(false)} diff --git a/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/TransformationView.tsx b/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/TransformationView.tsx index 0d677b6c2458..90574e4b5fdf 100644 --- a/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/TransformationView.tsx +++ b/airbyte-webapp/src/pages/ConnectionPage/pages/ConnectionItemPage/components/TransformationView.tsx @@ -21,9 +21,9 @@ import { } from "core/domain/connection"; import useConnection from "hooks/services/useConnectionHook"; import { useCurrentWorkspace } from "hooks/services/useWorkspace"; -import { useDestinationDefinitionSpecificationLoadAsync } from "hooks/services/useDestinationHook"; import { ContentCard, H4 } from "components"; import { FeatureItem, useFeatureService } from "hooks/services/Feature"; +import { useGetDestinationDefinitionSpecification } from "services/connector/DestinationDefinitionSpecificationService"; type TransformationViewProps = { connection: Connection; @@ -107,7 +107,7 @@ const NormalizationCard: React.FC<{ const TransformationView: React.FC = ({ connection, }) => { - const definition = useDestinationDefinitionSpecificationLoadAsync( + const definition = useGetDestinationDefinitionSpecification( connection.destination.destinationDefinitionId ); const { updateConnection } = useConnection(); diff --git a/airbyte-webapp/src/pages/DestinationPage/pages/CreateDestinationPage/components/DestinationForm.tsx b/airbyte-webapp/src/pages/DestinationPage/pages/CreateDestinationPage/components/DestinationForm.tsx index b68f34e2610c..fc2e05096a6d 100644 --- a/airbyte-webapp/src/pages/DestinationPage/pages/CreateDestinationPage/components/DestinationForm.tsx +++ b/airbyte-webapp/src/pages/DestinationPage/pages/CreateDestinationPage/components/DestinationForm.tsx @@ -1,13 +1,14 @@ import React, { useState } from "react"; import { FormattedMessage } from "react-intl"; + import useRouter from "hooks/useRouter"; -import { useDestinationDefinitionSpecificationLoad } from "hooks/services/useDestinationHook"; import { createFormErrorMessage } from "utils/errorStatusMessage"; import { ConnectionConfiguration } from "core/domain/connection"; import { useAnalyticsService } from "hooks/services/Analytics/useAnalyticsService"; import { LogsRequestError } from "core/request/LogsRequestError"; import { ConnectorCard } from "views/Connector/ConnectorCard"; import { DestinationDefinition } from "core/domain/connector"; +import { useGetDestinationDefinitionSpecificationAsync } from "services/connector/DestinationDefinitionSpecificationService"; type IProps = { onSubmit: (values: { @@ -46,13 +47,13 @@ const DestinationForm: React.FC = ({ const [destinationDefinitionId, setDestinationDefinitionId] = useState( hasDestinationDefinitionId(location.state) ? location.state.destinationDefinitionId - : "" + : null ); const { - destinationDefinitionSpecification, + data: destinationDefinitionSpecification, isLoading, - sourceDefinitionError, - } = useDestinationDefinitionSpecificationLoad(destinationDefinitionId); + error: destinationDefinitionError, + } = useGetDestinationDefinitionSpecificationAsync(destinationDefinitionId); const onDropDownSelect = (destinationDefinitionId: string) => { setDestinationDefinitionId(destinationDefinitionId); @@ -87,7 +88,7 @@ const DestinationForm: React.FC = ({ return ( = ({ null ); - const destinationSpecification = useDestinationDefinitionSpecificationLoadAsync( + const destinationSpecification = useGetDestinationDefinitionSpecification( currentDestination.destinationDefinitionId ); diff --git a/airbyte-webapp/src/pages/OnboardingPage/components/DestinationStep.tsx b/airbyte-webapp/src/pages/OnboardingPage/components/DestinationStep.tsx index 16d9c0b47110..203e40a2552d 100644 --- a/airbyte-webapp/src/pages/OnboardingPage/components/DestinationStep.tsx +++ b/airbyte-webapp/src/pages/OnboardingPage/components/DestinationStep.tsx @@ -3,7 +3,6 @@ import { FormattedMessage } from "react-intl"; import { LogsRequestError } from "core/request/LogsRequestError"; -import { useDestinationDefinitionSpecificationLoad } from "hooks/services/useDestinationHook"; import { createFormErrorMessage } from "utils/errorStatusMessage"; import { ConnectionConfiguration } from "core/domain/connection"; import { DestinationDefinition } from "core/domain/connector"; @@ -12,6 +11,7 @@ import { ConnectorCard } from "views/Connector/ConnectorCard"; import TitlesBlock from "./TitlesBlock"; import HighlightedText from "./HighlightedText"; import { useAnalyticsService } from "hooks/services/Analytics/useAnalyticsService"; +import { useGetDestinationDefinitionSpecificationAsync } from "services/connector/DestinationDefinitionSpecificationService"; type IProps = { availableServices: DestinationDefinition[]; @@ -33,11 +33,13 @@ const DestinationStep: React.FC = ({ error, afterSelectConnector, }) => { - const [destinationDefinitionId, setDestinationDefinitionId] = useState(""); + const [destinationDefinitionId, setDestinationDefinitionId] = useState< + string | null + >(null); const { - destinationDefinitionSpecification, + data: destinationDefinitionSpecification, isLoading, - } = useDestinationDefinitionSpecificationLoad(destinationDefinitionId); + } = useGetDestinationDefinitionSpecificationAsync(destinationDefinitionId); const analyticsService = useAnalyticsService(); diff --git a/airbyte-webapp/src/pages/OnboardingPage/components/SourceStep.tsx b/airbyte-webapp/src/pages/OnboardingPage/components/SourceStep.tsx index 7ef870ac0362..0cdda1802267 100644 --- a/airbyte-webapp/src/pages/OnboardingPage/components/SourceStep.tsx +++ b/airbyte-webapp/src/pages/OnboardingPage/components/SourceStep.tsx @@ -4,13 +4,12 @@ import { FormattedMessage } from "react-intl"; import { ConnectionConfiguration } from "core/domain/connection"; import { LogsRequestError } from "core/request/LogsRequestError"; import { ConnectorCard } from "views/Connector/ConnectorCard"; - -import { useSourceDefinitionSpecificationLoad } from "hooks/services/useSourceHook"; import { createFormErrorMessage } from "utils/errorStatusMessage"; import { useAnalyticsService } from "hooks/services/Analytics/useAnalyticsService"; import HighlightedText from "./HighlightedText"; import TitlesBlock from "./TitlesBlock"; import { SourceDefinition } from "core/domain/connector"; +import { useGetSourceDefinitionSpecificationAsync } from "services/connector/SourceDefinitionSpecificationService"; type IProps = { onSubmit: (values: { @@ -32,13 +31,15 @@ const SourceStep: React.FC = ({ error, afterSelectConnector, }) => { - const [sourceDefinitionId, setSourceDefinitionId] = useState(""); + const [sourceDefinitionId, setSourceDefinitionId] = useState( + null + ); const analyticsService = useAnalyticsService(); const { - sourceDefinitionSpecification, + data: sourceDefinitionSpecification, isLoading, - } = useSourceDefinitionSpecificationLoad(sourceDefinitionId); + } = useGetSourceDefinitionSpecificationAsync(sourceDefinitionId); const onServiceSelect = (sourceId: string) => { const sourceDefinition = availableServices.find( diff --git a/airbyte-webapp/src/pages/SourcesPage/pages/CreateSourcePage/components/SourceForm.tsx b/airbyte-webapp/src/pages/SourcesPage/pages/CreateSourcePage/components/SourceForm.tsx index b4f95392f25f..725e02178d05 100644 --- a/airbyte-webapp/src/pages/SourcesPage/pages/CreateSourcePage/components/SourceForm.tsx +++ b/airbyte-webapp/src/pages/SourcesPage/pages/CreateSourcePage/components/SourceForm.tsx @@ -2,13 +2,13 @@ import React, { useState } from "react"; import { FormattedMessage } from "react-intl"; import useRouter from "hooks/useRouter"; -import { useSourceDefinitionSpecificationLoad } from "hooks/services/useSourceHook"; import { createFormErrorMessage } from "utils/errorStatusMessage"; import { ConnectionConfiguration } from "core/domain/connection"; import { useAnalyticsService } from "hooks/services/Analytics/useAnalyticsService"; import { LogsRequestError } from "core/request/LogsRequestError"; import { ConnectorCard } from "views/Connector/ConnectorCard"; import { SourceDefinition } from "core/domain/connector"; +import { useGetSourceDefinitionSpecificationAsync } from "services/connector/SourceDefinitionSpecificationService"; type IProps = { onSubmit: (values: { @@ -44,17 +44,17 @@ const SourceForm: React.FC = ({ const { location } = useRouter(); const analyticsService = useAnalyticsService(); - const [sourceDefinitionId, setSourceDefinitionId] = useState( + const [sourceDefinitionId, setSourceDefinitionId] = useState( hasSourceDefinitionId(location.state) ? location.state.sourceDefinitionId - : "" + : null ); const { - sourceDefinitionSpecification, - sourceDefinitionError, + data: sourceDefinitionSpecification, + error: sourceDefinitionError, isLoading, - } = useSourceDefinitionSpecificationLoad(sourceDefinitionId); + } = useGetSourceDefinitionSpecificationAsync(sourceDefinitionId); const onDropDownSelect = (sourceDefinitionId: string) => { setSourceDefinitionId(sourceDefinitionId); diff --git a/airbyte-webapp/src/pages/SourcesPage/pages/SourceItemPage/components/SourceSettings.tsx b/airbyte-webapp/src/pages/SourcesPage/pages/SourceItemPage/components/SourceSettings.tsx index ca61c0d7277f..13ca7d7f17ac 100644 --- a/airbyte-webapp/src/pages/SourcesPage/pages/SourceItemPage/components/SourceSettings.tsx +++ b/airbyte-webapp/src/pages/SourcesPage/pages/SourceItemPage/components/SourceSettings.tsx @@ -4,7 +4,6 @@ import { FormattedMessage } from "react-intl"; import { useResource } from "rest-hooks"; import useSource from "hooks/services/useSourceHook"; -import SourceDefinitionSpecificationResource from "core/resources/SourceDefinitionSpecification"; import DeleteBlock from "components/DeleteBlock"; import { Connection } from "core/resources/Connection"; import { createFormErrorMessage } from "utils/errorStatusMessage"; @@ -13,6 +12,7 @@ import SourceDefinitionResource from "core/resources/SourceDefinition"; import { LogsRequestError } from "core/request/LogsRequestError"; import { ConnectorCard } from "views/Connector/ConnectorCard"; import { Source } from "core/domain/connector"; +import { useGetSourceDefinitionSpecification } from "services/connector/SourceDefinitionSpecificationService"; const Content = styled.div` max-width: 813px; @@ -35,11 +35,8 @@ const SourceSettings: React.FC = ({ const { updateSource, deleteSource, checkSourceConnection } = useSource(); - const sourceDefinitionSpecification = useResource( - SourceDefinitionSpecificationResource.detailShape(), - { - sourceDefinitionId: currentSource.sourceDefinitionId, - } + const sourceDefinitionSpecification = useGetSourceDefinitionSpecification( + currentSource.sourceDefinitionId ); const sourceDefinition = useResource(SourceDefinitionResource.detailShape(), { sourceDefinitionId: currentSource.sourceDefinitionId, diff --git a/airbyte-webapp/src/services/connector/DestinationDefinitionSpecificationService.tsx b/airbyte-webapp/src/services/connector/DestinationDefinitionSpecificationService.tsx new file mode 100644 index 000000000000..25ed0e57231f --- /dev/null +++ b/airbyte-webapp/src/services/connector/DestinationDefinitionSpecificationService.tsx @@ -0,0 +1,59 @@ +import { + QueryObserverResult, + QueryObserverSuccessResult, + useQuery, +} from "react-query"; + +import { DestinationDefinitionSpecification } from "core/domain/connector"; +import { useConfig } from "config"; +import { useDefaultRequestMiddlewares } from "services/useDefaultRequestMiddlewares"; +import { useInitService } from "services/useInitService"; +import { DestinationDefinitionSpecificationService } from "core/domain/connector/DestinationDefinitionSpecificationService"; +import { isDefined } from "utils/common"; + +export const destinationDefinitionSpecificationKeys = { + all: ["destinationDefinitionSpecification"] as const, + detail: (id: string | number) => + [...destinationDefinitionSpecificationKeys.all, "details", id] as const, +}; + +function useGetService(): DestinationDefinitionSpecificationService { + const { apiUrl } = useConfig(); + + const requestAuthMiddleware = useDefaultRequestMiddlewares(); + + return useInitService( + () => + new DestinationDefinitionSpecificationService( + apiUrl, + requestAuthMiddleware + ), + [apiUrl, requestAuthMiddleware] + ); +} + +export const useGetDestinationDefinitionSpecification = ( + id: string +): DestinationDefinitionSpecification => { + const service = useGetService(); + + return (useQuery(destinationDefinitionSpecificationKeys.detail(id), () => + service.get(id) + ) as QueryObserverSuccessResult).data; +}; + +export const useGetDestinationDefinitionSpecificationAsync = ( + id: string | null +): QueryObserverResult => { + const service = useGetService(); + + const escapedId = id ?? ""; + return useQuery( + destinationDefinitionSpecificationKeys.detail(escapedId), + () => service.get(escapedId), + { + suspense: false, + enabled: isDefined(id), + } + ); +}; diff --git a/airbyte-webapp/src/services/connector/SourceDefinitionSpecificationService.tsx b/airbyte-webapp/src/services/connector/SourceDefinitionSpecificationService.tsx new file mode 100644 index 000000000000..f4033b0f6b21 --- /dev/null +++ b/airbyte-webapp/src/services/connector/SourceDefinitionSpecificationService.tsx @@ -0,0 +1,56 @@ +import { + QueryObserverResult, + QueryObserverSuccessResult, + useQuery, +} from "react-query"; + +import { SourceDefinitionSpecification } from "core/domain/connector"; +import { useConfig } from "config"; +import { useDefaultRequestMiddlewares } from "services/useDefaultRequestMiddlewares"; +import { useInitService } from "services/useInitService"; +import { SourceDefinitionSpecificationService } from "core/domain/connector/SourceDefinitionSpecificationService"; +import { isDefined } from "utils/common"; + +export const sourceDefinitionSpecificationKeys = { + all: ["sourceDefinitionSpecification"] as const, + detail: (id: string | number) => + [...sourceDefinitionSpecificationKeys.all, "details", id] as const, +}; + +function useGetService(): SourceDefinitionSpecificationService { + const { apiUrl } = useConfig(); + + const requestAuthMiddleware = useDefaultRequestMiddlewares(); + + return useInitService( + () => + new SourceDefinitionSpecificationService(apiUrl, requestAuthMiddleware), + [apiUrl, requestAuthMiddleware] + ); +} + +export const useGetSourceDefinitionSpecification = ( + id: string +): SourceDefinitionSpecification => { + const service = useGetService(); + + return (useQuery(sourceDefinitionSpecificationKeys.detail(id), () => + service.get(id) + ) as QueryObserverSuccessResult).data; +}; + +export const useGetSourceDefinitionSpecificationAsync = ( + id: string | null +): QueryObserverResult => { + const service = useGetService(); + + const escapedId = id ?? ""; + return useQuery( + sourceDefinitionSpecificationKeys.detail(escapedId), + () => service.get(escapedId), + { + suspense: false, + enabled: isDefined(id), + } + ); +}; diff --git a/airbyte-webapp/src/theme.ts b/airbyte-webapp/src/theme.ts index 0d1a8fe46919..8844bbe9df70 100644 --- a/airbyte-webapp/src/theme.ts +++ b/airbyte-webapp/src/theme.ts @@ -13,6 +13,7 @@ export const theme = { dangerColor: "#FF5E7B", warningColor: "#FFBF00", + warningBackgroundColor: "rgba(255, 191, 0, 0.2)", lightDangerColor: "#FEEDEE", dangerTransparentColor: "rgba(247, 77, 88, 0.1)", attentionColor: "#FFBD2E", diff --git a/airbyte-webapp/src/utils/stateUtils.ts b/airbyte-webapp/src/utils/stateUtils.ts new file mode 100644 index 000000000000..108478915fc1 --- /dev/null +++ b/airbyte-webapp/src/utils/stateUtils.ts @@ -0,0 +1,5 @@ +import type { Location } from "react-router-dom"; + +export function hasFromState(state: unknown): state is { from: Location } { + return typeof state === "object" && state !== null && "from" in state; +} diff --git a/airbyte-webapp/src/views/Connection/ConnectionForm/ConnectionForm.tsx b/airbyte-webapp/src/views/Connection/ConnectionForm/ConnectionForm.tsx index 59c68eba26f9..44c65a6f67a3 100644 --- a/airbyte-webapp/src/views/Connection/ConnectionForm/ConnectionForm.tsx +++ b/airbyte-webapp/src/views/Connection/ConnectionForm/ConnectionForm.tsx @@ -7,9 +7,15 @@ import ResetDataModal from "components/ResetDataModal"; import { ModalTypes } from "components/ResetDataModal/types"; import { equal } from "utils/objects"; -import { ControlLabels, DropDown, DropDownRow, Input, Label } from "components"; +import { + ControlLabels, + DropDown, + DropDownRow, + H5, + Input, + Label, +} from "components"; -import { useDestinationDefinitionSpecificationLoadAsync } from "hooks/services/useDestinationHook"; import useWorkspace from "hooks/services/useWorkspace"; import { createFormErrorMessage } from "utils/errorStatusMessage"; @@ -28,6 +34,7 @@ import { useInitialValues, } from "./formConfig"; import { OperationsSection } from "./components/OperationsSection"; +import { useGetDestinationDefinitionSpecification } from "services/connector/DestinationDefinitionSpecificationService"; const EditLaterMessage = styled(Label)` margin: -20px 0 29px; @@ -39,14 +46,6 @@ const ConnectorLabel = styled(ControlLabels)` vertical-align: top; `; -const FormContainer = styled(Form)` - padding: 15px 20px; - - & > div:not(:last-child) { - margin-bottom: 20px; - } -`; - const NamespaceFormatLabel = styled(ControlLabels)` flex: 5 0 0; display: flex; @@ -63,6 +62,31 @@ export const FlexRow = styled.div` gap: 10px; `; +const StyledSection = styled.div` + padding: 15px 20px; + + & > div:not(:last-child) { + margin-bottom: 20px; + } +`; + +const Header = styled(H5)` + margin-bottom: 16px; +`; + +const Section: React.FC<{ title: React.ReactNode }> = (props) => ( + +
{props.title}
+ {props.children} +
+); + +const FormContainer = styled(Form)` + & > ${StyledSection}:not(:last-child) { + box-shadow: 0 1px 0 rgba(139, 139, 160, 0.25); + } +`; + type ConnectionFormProps = { onSubmit: (values: ConnectionFormValues) => void; className?: string; @@ -96,7 +120,7 @@ const ConnectionForm: React.FC = ({ additionalSchemaControl, connection, }) => { - const destDefinition = useDestinationDefinitionSpecificationLoadAsync( + const destDefinition = useGetDestinationDefinitionSpecification( connection.destination.destinationDefinitionId ); @@ -162,8 +186,7 @@ const ConnectionForm: React.FC = ({ > {({ isSubmitting, setFieldValue, isValid, dirty, resetForm, values }) => ( - {/* in create mode schedule is part of form */} - {!isEditMode && ( +
}> {({ field, meta }: FieldProps) => ( = ({ )} - )} - - - - {({ field }: FieldProps) => ( - - +
}> + + + + {({ field }: FieldProps) => ( + - - )} - - - {values.namespaceDefinition === - ConnectionNamespaceDefinition.CustomFormat && ( - - {({ field, meta }: FieldProps) => ( - - } - message={ - - } - > - - - )} - - )} - - {isEditMode ? ( - { - resetForm(); - if (onCancel) { - onCancel(); - } - }} - successMessage={successMessage} - errorMessage={ - errorMessage || !isValid - ? formatMessage({ id: "connectionForm.validation.error" }) - : null + > + + + )} + + + {values.namespaceDefinition === + ConnectionNamespaceDefinition.CustomFormat && ( + + {({ field, meta }: FieldProps) => ( + + } + message={ + + } + > + + + )} + + )} + - ) : ( - <> - - } - /> - { + resetForm(); + if (onCancel) { + onCancel(); + } + }} + successMessage={successMessage} errorMessage={ errorMessage || !isValid ? formatMessage({ id: "connectionForm.validation.error" }) : null } + editSchemeMode={editSchemeMode} /> - - )} + ) : ( + <> + + } + /> + + + )} +
{modalIsOpen && ( void; - connection?: Connection; -}; - -const transferFormValidationSchema = yup.object({ - schedule: yup - .object({ - units: yup.number().required("form.empty.error"), - timeUnit: yup.string().required("form.empty.error"), - }) - .nullable() - .defined("form.empty.error"), -}); - -const TransferFormCard: React.FC = ({ connection }) => { - const { updateConnection } = useConnection(); - const formatMessage = useIntl().formatMessage; - - const onSubmit = async (values: { schedule: ScheduleProperties }) => { - if (connection) { - return await updateConnection({ - schedule: values.schedule, - connectionId: connection.connectionId, - namespaceDefinition: connection.namespaceDefinition, - namespaceFormat: connection.namespaceFormat, - status: connection.status, - prefix: connection.prefix, - syncCatalog: connection.syncCatalog, - operations: connection.operations, - }); - } - - return null; - }; - - const frequencies = useFrequencyDropdownData(); - - return ( - } - > - - {({ field, meta, form }: FieldProps) => ( - - form.setFieldValue(field.name, item.value)} - /> - - )} - - - ); -}; - -export default TransferFormCard; diff --git a/airbyte-webapp/src/views/Connection/ConnectionForm/formConfig.tsx b/airbyte-webapp/src/views/Connection/ConnectionForm/formConfig.tsx index 5912ca6406c9..b31a2c25f6ac 100644 --- a/airbyte-webapp/src/views/Connection/ConnectionForm/formConfig.tsx +++ b/airbyte-webapp/src/views/Connection/ConnectionForm/formConfig.tsx @@ -49,6 +49,11 @@ const SUPPORTED_MODES: [SyncMode, DestinationSyncMode][] = [ [SyncMode.Incremental, DestinationSyncMode.Dedupted], ]; +const DEFAULT_SCHEDULE: ScheduleProperties = { + units: 24, + timeUnit: ConnectionSchedule.Hours, +}; + function useDefaultTransformation(): Transformation { const { workspace } = useWorkspace(); @@ -282,10 +287,10 @@ const useInitialValues = ( return useMemo(() => { const initialValues: FormikConnectionFormValues = { syncCatalog: initialSchema, - schedule: connection.schedule ?? { - units: 24, - timeUnit: ConnectionSchedule.Hours, - }, + schedule: + connection.schedule !== undefined + ? connection.schedule + : DEFAULT_SCHEDULE, prefix: connection.prefix || "", namespaceDefinition: connection.namespaceDefinition, namespaceFormat: connection.namespaceFormat ?? SOURCE_NAMESPACE_TAG, diff --git a/airbyte-webapp/src/views/Connector/ServiceForm/FormRoot.tsx b/airbyte-webapp/src/views/Connector/ServiceForm/FormRoot.tsx index 6847a19627ef..98877eab28ee 100644 --- a/airbyte-webapp/src/views/Connector/ServiceForm/FormRoot.tsx +++ b/airbyte-webapp/src/views/Connector/ServiceForm/FormRoot.tsx @@ -31,7 +31,7 @@ const FormRoot: React.FC<{ hasSuccess?: boolean; additionBottomControls?: React.ReactNode; errorMessage?: React.ReactNode; - fetchingConnectorError?: Error; + fetchingConnectorError?: Error | null; successMessage?: React.ReactNode; onRetest?: () => void; }> = ({ diff --git a/airbyte-webapp/src/views/Connector/ServiceForm/ServiceForm.test.tsx b/airbyte-webapp/src/views/Connector/ServiceForm/ServiceForm.test.tsx index 6e1d4fc6481a..20dd70cbc629 100644 --- a/airbyte-webapp/src/views/Connector/ServiceForm/ServiceForm.test.tsx +++ b/airbyte-webapp/src/views/Connector/ServiceForm/ServiceForm.test.tsx @@ -100,6 +100,14 @@ const schema: AirbyteJSONSchema = { jest.mock("hooks/services/Analytics"); +jest.mock("hooks/services/useWorkspace", () => ({ + useCurrentWorkspace: () => ({ + workspace: { + workspaceId: "workspaceId", + }, + }), +})); + describe("Service Form", () => { describe("should display json schema specs", () => { let container: HTMLElement; diff --git a/airbyte-webapp/src/views/Connector/ServiceForm/ServiceForm.tsx b/airbyte-webapp/src/views/Connector/ServiceForm/ServiceForm.tsx index fba933e4635e..1143d0fbe9ed 100644 --- a/airbyte-webapp/src/views/Connector/ServiceForm/ServiceForm.tsx +++ b/airbyte-webapp/src/views/Connector/ServiceForm/ServiceForm.tsx @@ -39,7 +39,7 @@ export type ServiceFormProps = { formValues?: Partial; hasSuccess?: boolean; additionBottomControls?: React.ReactNode; - fetchingConnectorError?: Error; + fetchingConnectorError?: Error | null; errorMessage?: React.ReactNode; successMessage?: React.ReactNode; }; diff --git a/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/ConnectorServiceTypeControl.tsx b/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/ConnectorServiceTypeControl.tsx index 6280e09eae1b..be353d58ce93 100644 --- a/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/ConnectorServiceTypeControl.tsx +++ b/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/ConnectorServiceTypeControl.tsx @@ -5,14 +5,9 @@ import { components } from "react-select"; import { MenuListComponentProps } from "react-select/src/components/Menu"; import styled from "styled-components"; import { WarningMessage } from "../WarningMessage"; +import { useCurrentWorkspace } from "hooks/services/useWorkspace"; -import { - ControlLabels, - defaultDataItemSort, - DropDown, - DropDownRow, - ImageBlock, -} from "components"; +import { ControlLabels, DropDown, DropDownRow, ImageBlock } from "components"; import { FormBaseItem } from "core/form/types"; import { @@ -33,6 +28,7 @@ import { ItemView as SingleValueView, } from "components/base/DropDown/components/SingleValue"; import { useAnalyticsService } from "hooks/services/Analytics"; +import { naturalComparator } from "utils/objects"; const BottomElement = styled.div` background: ${(props) => props.theme.greyColro0}; @@ -87,6 +83,16 @@ const SingleValueContent = styled(components.SingleValue)` type MenuWithRequestButtonProps = MenuListComponentProps; +/** + * Can be used to overwrite the alphabetical order of connectors in the select. + * A higher positive number will put the given connector to the top of the list + * a low negative number to the end of it. + */ +const ORDER_OVERWRITE: Record = { + // Push Google Sheets connector to top + "71607ba1-c0ac-4799-8049-7f4b90dd50f7": 1, +}; + const ConnectorList: React.FC = ({ children, ...props @@ -178,13 +184,16 @@ const ConnectorServiceTypeControl: React.FC<{ // This way, they will not be available for usage in new connections, but they will be available for users // already leveraging them. // TODO End hack + const workspace = useCurrentWorkspace(); const disallowedOauthConnectors = // I would prefer to use windowConfigProvider.cloud but that function is async window.CLOUD === "true" ? [ "200330b2-ea62-4d11-ac6d-cfe3e3f8ab2b", // Snapchat "2470e835-feaf-4db6-96f3-70fd645acc77", // Salesforce Singer - "9da77001-af33-4bcd-be46-6252bf9342b9", // Shopify + ...(workspace.workspaceId !== "54135667-ce73-4820-a93c-29fe1510d348" // Shopify workspace for review + ? ["9da77001-af33-4bcd-be46-6252bf9342b9"] // Shopify + : []), ] : []; const sortedDropDownData = useMemo( @@ -199,7 +208,14 @@ const ConnectorServiceTypeControl: React.FC<{ img: , releaseStage: item.releaseStage, })) - .sort(defaultDataItemSort), + .sort((a, b) => { + const priorityA = ORDER_OVERWRITE[a.value] ?? 0; + const priorityB = ORDER_OVERWRITE[b.value] ?? 0; + // If they have different priority use the higher priority first, otherwise use the label + return priorityA !== priorityB + ? priorityB - priorityA + : naturalComparator(a.label, b.label); + }), // eslint-disable-next-line react-hooks/exhaustive-deps [availableServices] ); diff --git a/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/Instruction.tsx b/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/Instruction.tsx index 8cbc3f58338b..dd0864ca0793 100644 --- a/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/Instruction.tsx +++ b/airbyte-webapp/src/views/Connector/ServiceForm/components/Controls/Instruction.tsx @@ -71,8 +71,8 @@ const DocumentationPanel: React.FC<{ onClose: () => void } & IProps> = ({ const config = useConfig(); const { data: docs, isLoading } = useDocumentation(documentationUrl); - const removeBaseUrl = (url: { path: string }) => { - if (url.path.startsWith("../../")) { + const removeBaseUrl = (url: { path?: string }) => { + if (url.path?.startsWith("../../")) { return url.path.replace("../../", `${config.integrationUrl}/`); } return url.path; diff --git a/airbyte-webapp/src/views/Connector/ServiceForm/components/CreateControls.tsx b/airbyte-webapp/src/views/Connector/ServiceForm/components/CreateControls.tsx index c6f53252743c..d35d8e23cdb2 100644 --- a/airbyte-webapp/src/views/Connector/ServiceForm/components/CreateControls.tsx +++ b/airbyte-webapp/src/views/Connector/ServiceForm/components/CreateControls.tsx @@ -14,7 +14,7 @@ type IProps = { hasSuccess?: boolean; isLoadSchema?: boolean; errorMessage?: React.ReactNode; - fetchingConnectorError?: Error; + fetchingConnectorError?: Error | null; additionBottomControls?: React.ReactNode; }; diff --git a/airbyte-webapp/src/views/Connector/ServiceForm/components/WarningMessage.tsx b/airbyte-webapp/src/views/Connector/ServiceForm/components/WarningMessage.tsx index de5456b6b21e..8b42a9288c8d 100644 --- a/airbyte-webapp/src/views/Connector/ServiceForm/components/WarningMessage.tsx +++ b/airbyte-webapp/src/views/Connector/ServiceForm/components/WarningMessage.tsx @@ -1,27 +1,26 @@ import React from "react"; import { FormattedMessage } from "react-intl"; import styled from "styled-components"; -import { FontAwesomeIcon } from "@fortawesome/react-fontawesome"; -import { faExclamationCircle } from "@fortawesome/free-solid-svg-icons"; import { ReleaseStage } from "core/domain/connector"; +import { useConfig } from "config"; const Content = styled.div` - display: flex; - flex-direction: row; - padding: 13px 20px; - border: 1px solid ${({ theme }) => theme.redColor}; + padding: 13px 16px; + background: ${({ theme }) => theme.warningBackgroundColor}; border-radius: 8px; font-size: 12px; - line-height: 18px; white-space: break-spaces; margin-top: 16px; `; -const Exclamation = styled(FontAwesomeIcon)` - font-size: 20px; - margin-right: 12px; - color: ${({ theme }) => theme.redColor}; +const Link = styled.a` + color: ${({ theme }) => theme.darkPrimaryColor}; + + &:hover, + &:focus { + color: ${({ theme }) => theme.darkPrimaryColor60}; + } `; type WarningMessageProps = { @@ -29,12 +28,23 @@ type WarningMessageProps = { }; const WarningMessage: React.FC = ({ stage }) => { + const config = useConfig(); return ( - -
- -
+ ( + + {node} + + ), + }} + />
); }; diff --git a/airbyte-webapp/src/views/common/StoreProvider.tsx b/airbyte-webapp/src/views/common/StoreProvider.tsx new file mode 100644 index 000000000000..192443bf02d7 --- /dev/null +++ b/airbyte-webapp/src/views/common/StoreProvider.tsx @@ -0,0 +1,21 @@ +import { QueryClient, QueryClientProvider } from "react-query"; +import React from "react"; +import { CacheProvider } from "rest-hooks"; + +const queryClient = new QueryClient({ + defaultOptions: { + queries: { + suspense: true, + refetchOnWindowFocus: false, + refetchOnReconnect: false, + }, + }, +}); + +const StoreProvider: React.FC = ({ children }) => ( + + {children} + +); + +export { StoreProvider }; diff --git a/airbyte-workers/Dockerfile b/airbyte-workers/Dockerfile index c784b1c32b11..a1776acb22dd 100644 --- a/airbyte-workers/Dockerfile +++ b/airbyte-workers/Dockerfile @@ -30,7 +30,7 @@ ENV APPLICATION airbyte-workers WORKDIR /app # Move worker app -ADD bin/${APPLICATION}-0.35.55-alpha.tar /app +ADD bin/${APPLICATION}-0.35.59-alpha.tar /app # wait for upstream dependencies to become available before starting server -ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.55-alpha/bin/${APPLICATION}"] +ENTRYPOINT ["/bin/bash", "-c", "${APPLICATION}-0.35.59-alpha/bin/${APPLICATION}"] diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java index 4b523d2beae2..f47d5786f5b5 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/WorkerApp.java @@ -52,6 +52,7 @@ import io.airbyte.workers.temporal.discover.catalog.DiscoverCatalogActivityImpl; import io.airbyte.workers.temporal.discover.catalog.DiscoverCatalogWorkflowImpl; import io.airbyte.workers.temporal.scheduling.ConnectionManagerWorkflowImpl; +import io.airbyte.workers.temporal.scheduling.activities.AutoDisableConnectionActivityImpl; import io.airbyte.workers.temporal.scheduling.activities.ConfigFetchActivityImpl; import io.airbyte.workers.temporal.scheduling.activities.ConnectionDeletionActivityImpl; import io.airbyte.workers.temporal.scheduling.activities.GenerateInputActivityImpl; @@ -159,6 +160,7 @@ public void start() { private void registerConnectionManager(final WorkerFactory factory) { final JobCreator jobCreator = new DefaultJobCreator(jobPersistence, configRepository, defaultWorkerConfigs.getResourceRequirements()); + final FeatureFlags featureFlags = new EnvVariableFeatureFlags(); final Worker connectionUpdaterWorker = factory.newWorker(TemporalJobType.CONNECTION_UPDATER.toString(), getWorkerOptions(maxWorkers.getMaxSyncWorkers())); @@ -177,7 +179,8 @@ private void registerConnectionManager(final WorkerFactory factory) { configRepository, jobCreator), new ConfigFetchActivityImpl(configRepository, jobPersistence, configs, () -> Instant.now().getEpochSecond()), - new ConnectionDeletionActivityImpl(connectionHelper)); + new ConnectionDeletionActivityImpl(connectionHelper), + new AutoDisableConnectionActivityImpl(configRepository, jobPersistence, featureFlags, configs)); } private void registerSync(final WorkerFactory factory) { diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java index 5ffcd155da52..c166f29f5b43 100644 --- a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowImpl.java @@ -11,6 +11,8 @@ import io.airbyte.workers.helper.FailureHelper; import io.airbyte.workers.temporal.TemporalJobType; import io.airbyte.workers.temporal.exception.RetryableException; +import io.airbyte.workers.temporal.scheduling.activities.AutoDisableConnectionActivity; +import io.airbyte.workers.temporal.scheduling.activities.AutoDisableConnectionActivity.AutoDisableConnectionActivityInput; import io.airbyte.workers.temporal.scheduling.activities.ConfigFetchActivity; import io.airbyte.workers.temporal.scheduling.activities.ConfigFetchActivity.ScheduleRetrieverInput; import io.airbyte.workers.temporal.scheduling.activities.ConfigFetchActivity.ScheduleRetrieverOutput; @@ -47,6 +49,7 @@ import io.temporal.workflow.ChildWorkflowOptions; import io.temporal.workflow.Workflow; import java.time.Duration; +import java.time.Instant; import java.util.Set; import java.util.UUID; import java.util.function.Consumer; @@ -60,6 +63,7 @@ public class ConnectionManagerWorkflowImpl implements ConnectionManagerWorkflow public static final int NON_RUNNING_ATTEMPT_ID = -1; private static final int TASK_QUEUE_CHANGE_CURRENT_VERSION = 1; + private static final int AUTO_DISABLE_FAILING_CONNECTION_CHANGE_CURRENT_VERSION = 1; private static final String RENAME_ATTEMPT_ID_TO_NUMBER_TAG = "rename_attempt_id_to_number"; private static final int RENAME_ATTEMPT_ID_TO_NUMBER_CURRENT_VERSION = 1; @@ -76,6 +80,8 @@ public class ConnectionManagerWorkflowImpl implements ConnectionManagerWorkflow Workflow.newActivityStub(ConfigFetchActivity.class, ActivityConfiguration.SHORT_ACTIVITY_OPTIONS); private final ConnectionDeletionActivity connectionDeletionActivity = Workflow.newActivityStub(ConnectionDeletionActivity.class, ActivityConfiguration.SHORT_ACTIVITY_OPTIONS); + private final AutoDisableConnectionActivity autoDisableConnectionActivity = + Workflow.newActivityStub(AutoDisableConnectionActivity.class, ActivityConfiguration.SHORT_ACTIVITY_OPTIONS); private CancellationScope cancellableSyncWorkflow; @@ -90,12 +96,13 @@ public void run(final ConnectionUpdaterInput connectionUpdaterInput) throws Retr cancellableSyncWorkflow = generateSyncWorkflowRunnable(connectionUpdaterInput); cancellableSyncWorkflow.run(); } catch (final CanceledFailure cf) { - // When a scope is cancelled temporal will thow a CanceledFailure as you can see here: + // When a scope is cancelled temporal will throw a CanceledFailure as you can see here: // https://github.com/temporalio/sdk-java/blob/master/temporal-sdk/src/main/java/io/temporal/workflow/CancellationScope.java#L72 // The naming is very misleading, it is not a failure but the expected behavior... } if (workflowState.isDeleted()) { + log.info("Workflow deletion was requested. Calling deleteConnection activity before terminating the workflow."); deleteConnectionBeforeTerminatingTheWorkflow(); return; } @@ -138,7 +145,7 @@ private CancellationScope generateSyncWorkflowRunnable(final ConnectionUpdaterIn () -> skipScheduling() || connectionUpdaterInput.isFromFailure()); if (workflowState.isDeleted()) { - deleteConnectionBeforeTerminatingTheWorkflow(); + log.info("Returning from workflow cancellation scope because workflow deletion was requested."); return; } @@ -253,6 +260,15 @@ private void reportFailure(final ConnectionUpdaterInput connectionUpdaterInput, connectionUpdaterInput.getJobId(), "Job failed after too many retries for connection " + connectionId)); + final int autoDisableConnectionVersion = + Workflow.getVersion("auto_disable_failing_connection", Workflow.DEFAULT_VERSION, AUTO_DISABLE_FAILING_CONNECTION_CHANGE_CURRENT_VERSION); + + if (autoDisableConnectionVersion != Workflow.DEFAULT_VERSION) { + final AutoDisableConnectionActivityInput autoDisableConnectionActivityInput = + new AutoDisableConnectionActivityInput(connectionId, Instant.ofEpochMilli(Workflow.currentTimeMillis())); + runMandatoryActivity(autoDisableConnectionActivity::autoDisableFailingConnection, autoDisableConnectionActivityInput); + } + resetNewConnectionInput(connectionUpdaterInput); if (workflowState.isResetConnection()) { connectionUpdaterInput.setFromJobResetFailure(true); @@ -528,7 +544,6 @@ private StandardSyncOutput runChildWorkflow(final GeneratedJobInput jobInputs) { if (taskQueueChangeVersion < TASK_QUEUE_CHANGE_CURRENT_VERSION) { taskQueue = TemporalJobType.CONNECTION_UPDATER.name(); } - final SyncWorkflow childSync = Workflow.newChildWorkflowStub(SyncWorkflow.class, ChildWorkflowOptions.newBuilder() .setWorkflowId("sync_" + workflowInternalState.getJobId()) diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/AutoDisableConnectionActivity.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/AutoDisableConnectionActivity.java new file mode 100644 index 000000000000..95ccf30b738f --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/AutoDisableConnectionActivity.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.temporal.scheduling.activities; + +import io.temporal.activity.ActivityInterface; +import io.temporal.activity.ActivityMethod; +import java.time.Instant; +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@ActivityInterface +public interface AutoDisableConnectionActivity { + + @Data + @NoArgsConstructor + @AllArgsConstructor + class AutoDisableConnectionActivityInput { + + private UUID connectionId; + + private Instant currTimestamp; + + } + + @Data + @NoArgsConstructor + @AllArgsConstructor + class AutoDisableConnectionOutput { + + private boolean disabled; + + } + + /** + * Disable a connection if no successful sync jobs in the last MAX_FAILURE_JOBS_IN_A_ROW job + * attempts or the last MAX_DAYS_OF_STRAIGHT_FAILURE days (minimum 1 job attempt): disable + * connection to prevent wasting resources + */ + @ActivityMethod + AutoDisableConnectionOutput autoDisableFailingConnection(AutoDisableConnectionActivityInput input); + +} diff --git a/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/AutoDisableConnectionActivityImpl.java b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/AutoDisableConnectionActivityImpl.java new file mode 100644 index 000000000000..f78e04995c3a --- /dev/null +++ b/airbyte-workers/src/main/java/io/airbyte/workers/temporal/scheduling/activities/AutoDisableConnectionActivityImpl.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.temporal.scheduling.activities; + +import static io.airbyte.scheduler.models.Job.REPLICATION_TYPES; + +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.config.Configs; +import io.airbyte.config.StandardSync; +import io.airbyte.config.StandardSync.Status; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.scheduler.models.Job; +import io.airbyte.scheduler.models.JobStatus; +import io.airbyte.scheduler.persistence.JobPersistence; +import io.airbyte.workers.temporal.exception.RetryableException; +import java.time.temporal.ChronoUnit; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import lombok.AllArgsConstructor; + +@AllArgsConstructor +public class AutoDisableConnectionActivityImpl implements AutoDisableConnectionActivity { + + private ConfigRepository configRepository; + private JobPersistence jobPersistence; + private FeatureFlags featureFlags; + private Configs configs; + + // Given a connection id and current timestamp, this activity will set a connection to INACTIVE if + // either: + // - fails jobs consecutively and hits the `configs.getMaxFailedJobsInARowBeforeConnectionDisable()` + // limit + // - all the jobs in the past `configs.getMaxDaysOfOnlyFailedJobsBeforeConnectionDisable()` days are + // failures, and that the connection's first job is at least that many days old + @Override + public AutoDisableConnectionOutput autoDisableFailingConnection(final AutoDisableConnectionActivityInput input) { + if (featureFlags.autoDisablesFailingConnections()) { + try { + final int maxDaysOfOnlyFailedJobs = configs.getMaxDaysOfOnlyFailedJobsBeforeConnectionDisable(); + final List jobStatuses = jobPersistence.listJobStatusWithConnection(input.getConnectionId(), REPLICATION_TYPES, + input.getCurrTimestamp().minus(maxDaysOfOnlyFailedJobs, ChronoUnit.DAYS)); + + int numFailures = 0; + + // jobs are sorted from most recent to least recent + for (final JobStatus jobStatus : jobStatuses) { + if (jobStatus == JobStatus.FAILED) { + numFailures++; + if (numFailures == configs.getMaxFailedJobsInARowBeforeConnectionDisable()) + break; + } else if (jobStatus == JobStatus.SUCCEEDED) { + return new AutoDisableConnectionOutput(false); + } + } + + // if the jobs in the last 14 days don't include any succeeded or failed jobs (e.g. only cancelled + // jobs), do not auto-disable + if (numFailures == 0) { + return new AutoDisableConnectionOutput(false); + } + + // if the very first job of a connection fails, it will hit the condition of "only failed jobs in + // the past `maxDaysOfOnlyFailedJobs` days", to avoid this behavior, we ensure that this condition + // is only taken into account if the connection has a job that's at least `maxDaysOfOnlyFailedJobs` + // days old + if (numFailures != configs.getMaxFailedJobsInARowBeforeConnectionDisable()) { + final Optional optionalFirstJob = jobPersistence.getFirstReplicationJob(input.getConnectionId()); + if (optionalFirstJob.isPresent()) { + final long timeBetweenCurrTimestampAndFirstJob = input.getCurrTimestamp().getEpochSecond() + - optionalFirstJob.get().getCreatedAtInSecond(); + if (timeBetweenCurrTimestampAndFirstJob <= TimeUnit.DAYS.toSeconds(maxDaysOfOnlyFailedJobs)) { + return new AutoDisableConnectionOutput(false); + } + } + } + + final StandardSync standardSync = configRepository.getStandardSync(input.getConnectionId()); + standardSync.setStatus(Status.INACTIVE); + configRepository.writeStandardSync(standardSync); + } catch (final Exception e) { + throw new RetryableException(e); + } + } + return new AutoDisableConnectionOutput(true); + } + +} diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java index 2935f779e830..4d74a84f4a5c 100644 --- a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/ConnectionManagerWorkflowTest.java @@ -4,12 +4,16 @@ package io.airbyte.workers.temporal.scheduling; +import static org.mockito.Mockito.atLeastOnce; + import io.airbyte.config.FailureReason.FailureOrigin; import io.airbyte.config.FailureReason.FailureType; import io.airbyte.config.StandardSyncInput; import io.airbyte.scheduler.models.IntegrationLauncherConfig; import io.airbyte.scheduler.models.JobRunConfig; import io.airbyte.workers.temporal.TemporalJobType; +import io.airbyte.workers.temporal.scheduling.activities.AutoDisableConnectionActivity; +import io.airbyte.workers.temporal.scheduling.activities.AutoDisableConnectionActivity.AutoDisableConnectionActivityInput; import io.airbyte.workers.temporal.scheduling.activities.ConfigFetchActivity; import io.airbyte.workers.temporal.scheduling.activities.ConfigFetchActivity.GetMaxAttemptOutput; import io.airbyte.workers.temporal.scheduling.activities.ConfigFetchActivity.ScheduleRetrieverOutput; @@ -88,6 +92,8 @@ public class ConnectionManagerWorkflowTest { Mockito.mock(GenerateInputActivityImpl.class, Mockito.withSettings().withoutAnnotations()); private static final JobCreationAndStatusUpdateActivity mJobCreationAndStatusUpdateActivity = Mockito.mock(JobCreationAndStatusUpdateActivity.class, Mockito.withSettings().withoutAnnotations()); + private static final AutoDisableConnectionActivity mAutoDisableConnectionActivity = + Mockito.mock(AutoDisableConnectionActivity.class, Mockito.withSettings().withoutAnnotations()); private TestWorkflowEnvironment testEnv; private WorkflowClient client; @@ -109,6 +115,7 @@ public void setUp() { Mockito.reset(mConnectionDeletionActivity); Mockito.reset(mGenerateInputActivityImpl); Mockito.reset(mJobCreationAndStatusUpdateActivity); + Mockito.reset(mAutoDisableConnectionActivity); // default is to wait "forever" Mockito.when(mConfigFetchActivity.getTimeToWait(Mockito.any())).thenReturn(new ScheduleRetrieverOutput( @@ -399,7 +406,7 @@ public void deleteSync() throws InterruptedException { && changedStateEvent.isValue()) .isEmpty(); - Mockito.verify(mConnectionDeletionActivity, Mockito.atLeast(1)).deleteConnection(Mockito.any()); + Mockito.verify(mConnectionDeletionActivity, Mockito.times(1)).deleteConnection(Mockito.any()); } } @@ -690,6 +697,105 @@ public void updatedSignalReceivedWhileRunning() throws InterruptedException { } + @Nested + @DisplayName("Test that connections are auto disabled if conditions are met") + class AutoDisableConnection { + + private static final long JOB_ID = 111L; + private static final int ATTEMPT_ID = 222; + + @BeforeEach + public void setup() { + testEnv = TestWorkflowEnvironment.newInstance(); + + final Worker managerWorker = testEnv.newWorker(TemporalJobType.CONNECTION_UPDATER.name()); + managerWorker.registerWorkflowImplementationTypes(ConnectionManagerWorkflowImpl.class); + managerWorker.registerActivitiesImplementations(mConfigFetchActivity, mConnectionDeletionActivity, + mGenerateInputActivityImpl, mJobCreationAndStatusUpdateActivity, mAutoDisableConnectionActivity); + + client = testEnv.getWorkflowClient(); + workflow = client.newWorkflowStub(ConnectionManagerWorkflow.class, + WorkflowOptions.newBuilder().setTaskQueue(TemporalJobType.CONNECTION_UPDATER.name()).build()); + + Mockito.when(mConfigFetchActivity.getMaxAttempt()).thenReturn(new GetMaxAttemptOutput(1)); + } + + @Test + @Timeout(value = 2, + unit = TimeUnit.SECONDS) + @DisplayName("Test that auto disable activity is touched during failure") + public void testAutoDisableOnFailure() throws InterruptedException { + final Worker syncWorker = testEnv.newWorker(TemporalJobType.SYNC.name()); + syncWorker.registerWorkflowImplementationTypes(SourceAndDestinationFailureSyncWorkflow.class); + + testEnv.start(); + + final UUID testId = UUID.randomUUID(); + final UUID connectionId = UUID.randomUUID(); + final TestStateListener testStateListener = new TestStateListener(); + final WorkflowState workflowState = new WorkflowState(testId, testStateListener); + final ConnectionUpdaterInput input = ConnectionUpdaterInput.builder() + .connectionId(connectionId) + .jobId(JOB_ID) + .attemptId(ATTEMPT_ID) + .fromFailure(false) + .attemptNumber(1) + .workflowState(workflowState) + .resetConnection(false) + .fromJobResetFailure(false) + .build(); + + startWorkflowAndWaitUntilReady(workflow, input); + + // wait for workflow to initialize + testEnv.sleep(Duration.ofMinutes(1)); + + workflow.submitManualSync(); + testEnv.sleep(Duration.ofMinutes(1L)); // any time after no-waiting manual run + + Mockito.verify(mJobCreationAndStatusUpdateActivity, atLeastOnce()).attemptFailureWithAttemptNumber(Mockito.any()); + Mockito.verify(mJobCreationAndStatusUpdateActivity, atLeastOnce()).jobFailure(Mockito.any()); + Mockito.verify(mAutoDisableConnectionActivity) + .autoDisableFailingConnection(new AutoDisableConnectionActivityInput(connectionId, Mockito.any())); + } + + @Test + @Timeout(value = 2, + unit = TimeUnit.SECONDS) + @DisplayName("Test that auto disable activity is not touched during job success") + public void testNoAutoDisableOnSuccess() throws InterruptedException { + final Worker syncWorker = testEnv.newWorker(TemporalJobType.SYNC.name()); + syncWorker.registerWorkflowImplementationTypes(EmptySyncWorkflow.class); + + testEnv.start(); + + final UUID testId = UUID.randomUUID(); + final UUID connectionId = UUID.randomUUID(); + final TestStateListener testStateListener = new TestStateListener(); + final WorkflowState workflowState = new WorkflowState(testId, testStateListener); + final ConnectionUpdaterInput input = ConnectionUpdaterInput.builder() + .connectionId(connectionId) + .jobId(JOB_ID) + .attemptId(ATTEMPT_ID) + .fromFailure(false) + .attemptNumber(0) + .workflowState(workflowState) + .resetConnection(false) + .fromJobResetFailure(false) + .build(); + + startWorkflowAndWaitUntilReady(workflow, input); + + // wait for workflow to initialize + testEnv.sleep(Duration.ofMinutes(1)); + + workflow.submitManualSync(); + testEnv.sleep(Duration.ofMinutes(1L)); // any time after no-waiting manual run + Mockito.verifyNoInteractions(mAutoDisableConnectionActivity); + } + + } + @Nested @DisplayName("Test that sync workflow failures are recorded") class SyncWorkflowReplicationFailuresRecorded { @@ -704,7 +810,7 @@ public void setup() { final Worker managerWorker = testEnv.newWorker(TemporalJobType.CONNECTION_UPDATER.name()); managerWorker.registerWorkflowImplementationTypes(ConnectionManagerWorkflowImpl.class); managerWorker.registerActivitiesImplementations(mConfigFetchActivity, mConnectionDeletionActivity, - mGenerateInputActivityImpl, mJobCreationAndStatusUpdateActivity); + mGenerateInputActivityImpl, mJobCreationAndStatusUpdateActivity, mAutoDisableConnectionActivity); client = testEnv.getWorkflowClient(); workflow = client.newWorkflowStub(ConnectionManagerWorkflow.class, @@ -1272,7 +1378,7 @@ private void setupSpecificChildWorkflow(final Class final Worker managerWorker = testEnv.newWorker(TemporalJobType.CONNECTION_UPDATER.name()); managerWorker.registerWorkflowImplementationTypes(ConnectionManagerWorkflowImpl.class); managerWorker.registerActivitiesImplementations(mConfigFetchActivity, mConnectionDeletionActivity, - mGenerateInputActivityImpl, mJobCreationAndStatusUpdateActivity); + mGenerateInputActivityImpl, mJobCreationAndStatusUpdateActivity, mAutoDisableConnectionActivity); client = testEnv.getWorkflowClient(); testEnv.start(); diff --git a/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/AutoDisableConnectionActivityTest.java b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/AutoDisableConnectionActivityTest.java new file mode 100644 index 000000000000..7513eb391683 --- /dev/null +++ b/airbyte-workers/src/test/java/io/airbyte/workers/temporal/scheduling/activities/AutoDisableConnectionActivityTest.java @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2021 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.workers.temporal.scheduling.activities; + +import static io.airbyte.config.EnvConfigs.DEFAULT_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE; +import static io.airbyte.config.EnvConfigs.DEFAULT_FAILED_JOBS_IN_A_ROW_BEFORE_CONNECTION_DISABLE; +import static io.airbyte.scheduler.models.Job.REPLICATION_TYPES; + +import io.airbyte.commons.features.FeatureFlags; +import io.airbyte.config.Configs; +import io.airbyte.config.StandardSync; +import io.airbyte.config.StandardSync.Status; +import io.airbyte.config.persistence.ConfigNotFoundException; +import io.airbyte.config.persistence.ConfigRepository; +import io.airbyte.scheduler.models.Job; +import io.airbyte.scheduler.models.JobStatus; +import io.airbyte.scheduler.persistence.JobPersistence; +import io.airbyte.validation.json.JsonValidationException; +import io.airbyte.workers.temporal.scheduling.activities.AutoDisableConnectionActivity.AutoDisableConnectionActivityInput; +import io.airbyte.workers.temporal.scheduling.activities.AutoDisableConnectionActivity.AutoDisableConnectionOutput; +import java.io.IOException; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +@ExtendWith(MockitoExtension.class) +class AutoDisableConnectionActivityTest { + + @Mock + private FeatureFlags mFeatureFlags; + + @Mock + private ConfigRepository mConfigRepository; + + @Mock + private JobPersistence mJobPersistence; + + @Mock + private Configs mConfigs; + + @Mock + private Job mJob; + + @InjectMocks + private AutoDisableConnectionActivityImpl autoDisableActivity; + + private static final UUID CONNECTION_ID = UUID.randomUUID(); + private static final Instant CURR_INSTANT = Instant.now(); + private static final AutoDisableConnectionActivityInput ACTIVITY_INPUT = new AutoDisableConnectionActivityInput(CONNECTION_ID, CURR_INSTANT); + private static final int MAX_FAILURE_JOBS_IN_A_ROW = DEFAULT_FAILED_JOBS_IN_A_ROW_BEFORE_CONNECTION_DISABLE; + + private final StandardSync standardSync = new StandardSync(); + + @BeforeEach + void setUp() { + standardSync.setStatus(Status.ACTIVE); + Mockito.when(mFeatureFlags.autoDisablesFailingConnections()).thenReturn(true); + Mockito.when(mConfigs.getMaxDaysOfOnlyFailedJobsBeforeConnectionDisable()).thenReturn(DEFAULT_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE); + } + + @Test + @DisplayName("Test that the connection is disabled after MAX_FAILURE_JOBS_IN_A_ROW straight failures") + public void testMaxFailuresInARow() throws IOException, JsonValidationException, ConfigNotFoundException { + // from most recent to least recent: MAX_FAILURE_JOBS_IN_A_ROW and 1 success + final List jobStatuses = new ArrayList<>(Collections.nCopies(MAX_FAILURE_JOBS_IN_A_ROW, JobStatus.FAILED)); + jobStatuses.add(JobStatus.SUCCEEDED); + + Mockito.when(mJobPersistence.listJobStatusWithConnection(CONNECTION_ID, REPLICATION_TYPES, + CURR_INSTANT.minus(DEFAULT_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE, ChronoUnit.DAYS))).thenReturn(jobStatuses); + Mockito.when(mConfigs.getMaxFailedJobsInARowBeforeConnectionDisable()).thenReturn(MAX_FAILURE_JOBS_IN_A_ROW); + Mockito.when(mConfigRepository.getStandardSync(CONNECTION_ID)).thenReturn(standardSync); + + final AutoDisableConnectionOutput output = autoDisableActivity.autoDisableFailingConnection(ACTIVITY_INPUT); + Assertions.assertThat(output.isDisabled()).isTrue(); + Assertions.assertThat(standardSync.getStatus()).isEqualTo(Status.INACTIVE); + } + + @Test + @DisplayName("Test that the connection is _not_ disabled after MAX_FAILURE_JOBS_IN_A_ROW - 1 straight failures") + public void testLessThanMaxFailuresInARow() throws IOException { + // from most recent to least recent: MAX_FAILURE_JOBS_IN_A_ROW-1 and 1 success + final List jobStatuses = new ArrayList<>(Collections.nCopies(MAX_FAILURE_JOBS_IN_A_ROW - 1, JobStatus.FAILED)); + jobStatuses.add(JobStatus.SUCCEEDED); + + Mockito.when(mJobPersistence.listJobStatusWithConnection(CONNECTION_ID, REPLICATION_TYPES, + CURR_INSTANT.minus(DEFAULT_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE, ChronoUnit.DAYS))).thenReturn(jobStatuses); + Mockito.when(mConfigs.getMaxFailedJobsInARowBeforeConnectionDisable()).thenReturn(MAX_FAILURE_JOBS_IN_A_ROW); + + final AutoDisableConnectionOutput output = autoDisableActivity.autoDisableFailingConnection(ACTIVITY_INPUT); + Assertions.assertThat(output.isDisabled()).isFalse(); + Assertions.assertThat(standardSync.getStatus()).isEqualTo(Status.ACTIVE); + } + + @Test + @DisplayName("Test that the connection is _not_ disabled after 0 jobs in last MAX_DAYS_OF_STRAIGHT_FAILURE days") + public void testNoRuns() throws IOException { + Mockito.when(mJobPersistence.listJobStatusWithConnection(CONNECTION_ID, REPLICATION_TYPES, + CURR_INSTANT.minus(DEFAULT_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE, ChronoUnit.DAYS))).thenReturn(Collections.emptyList()); + + final AutoDisableConnectionOutput output = autoDisableActivity.autoDisableFailingConnection(ACTIVITY_INPUT); + Assertions.assertThat(output.isDisabled()).isFalse(); + Assertions.assertThat(standardSync.getStatus()).isEqualTo(Status.ACTIVE); + } + + @Test + @DisplayName("Test that the connection is disabled after only failed jobs in last MAX_DAYS_OF_STRAIGHT_FAILURE days") + public void testOnlyFailuresInMaxDays() throws IOException, JsonValidationException, ConfigNotFoundException { + final int maxDaysOfOnlyFailedJobsBeforeConnectionDisable = 1; + + Mockito.when(mConfigs.getMaxDaysOfOnlyFailedJobsBeforeConnectionDisable()).thenReturn(maxDaysOfOnlyFailedJobsBeforeConnectionDisable); + Mockito.when(mJobPersistence.listJobStatusWithConnection(CONNECTION_ID, REPLICATION_TYPES, + CURR_INSTANT.minus(maxDaysOfOnlyFailedJobsBeforeConnectionDisable, ChronoUnit.DAYS))) + .thenReturn(Collections.singletonList(JobStatus.FAILED)); + + Mockito.when(mJobPersistence.getFirstReplicationJob(CONNECTION_ID)).thenReturn(Optional.of(mJob)); + // set first job created at to older than DEFAULT_DAYS_OF_ONLY_FAILED_JOBS_BEFORE_CONNECTION_DISABLE + // days + Mockito.when(mJob.getCreatedAtInSecond()).thenReturn(Instant.MIN.getEpochSecond()); + + Mockito.when(mConfigRepository.getStandardSync(CONNECTION_ID)).thenReturn(standardSync); + Mockito.when(mConfigs.getMaxFailedJobsInARowBeforeConnectionDisable()).thenReturn(MAX_FAILURE_JOBS_IN_A_ROW); + + final AutoDisableConnectionOutput output = autoDisableActivity.autoDisableFailingConnection(ACTIVITY_INPUT); + Assertions.assertThat(output.isDisabled()).isTrue(); + Assertions.assertThat(standardSync.getStatus()).isEqualTo(Status.INACTIVE); + } + + @Test + @DisplayName("Test that the connection is _not_ disabled after only failed jobs and oldest job is less than MAX_DAYS_OF_STRAIGHT_FAILURE days old") + public void testOnlyFailuresButFirstJobYoungerThanMaxDays() throws IOException, JsonValidationException, ConfigNotFoundException { + final int maxDaysOfOnlyFailedJobsBeforeConnectionDisable = 1; + + Mockito.when(mConfigs.getMaxDaysOfOnlyFailedJobsBeforeConnectionDisable()).thenReturn(maxDaysOfOnlyFailedJobsBeforeConnectionDisable); + Mockito.when(mJobPersistence.listJobStatusWithConnection(CONNECTION_ID, REPLICATION_TYPES, + CURR_INSTANT.minus(maxDaysOfOnlyFailedJobsBeforeConnectionDisable, ChronoUnit.DAYS))) + .thenReturn(Collections.singletonList(JobStatus.FAILED)); + + Mockito.when(mJobPersistence.getFirstReplicationJob(CONNECTION_ID)).thenReturn(Optional.of(mJob)); + Mockito.when(mJob.getCreatedAtInSecond()).thenReturn(CURR_INSTANT.getEpochSecond()); + Mockito.when(mConfigs.getMaxFailedJobsInARowBeforeConnectionDisable()).thenReturn(MAX_FAILURE_JOBS_IN_A_ROW); + + final AutoDisableConnectionOutput output = autoDisableActivity.autoDisableFailingConnection(ACTIVITY_INPUT); + Assertions.assertThat(output.isDisabled()).isFalse(); + Assertions.assertThat(standardSync.getStatus()).isEqualTo(Status.ACTIVE); + } + + @Test + @DisplayName("Test that the connection is _not_ disabled after only cancelled jobs") + public void testIgnoreOnlyCancelledRuns() throws IOException, JsonValidationException, ConfigNotFoundException { + final int maxDaysOfOnlyFailedJobsBeforeConnectionDisable = 1; + + Mockito.when(mConfigs.getMaxDaysOfOnlyFailedJobsBeforeConnectionDisable()).thenReturn(maxDaysOfOnlyFailedJobsBeforeConnectionDisable); + Mockito.when(mJobPersistence.listJobStatusWithConnection(CONNECTION_ID, REPLICATION_TYPES, + CURR_INSTANT.minus(maxDaysOfOnlyFailedJobsBeforeConnectionDisable, ChronoUnit.DAYS))) + .thenReturn(Collections.singletonList(JobStatus.CANCELLED)); + + final AutoDisableConnectionOutput output = autoDisableActivity.autoDisableFailingConnection(ACTIVITY_INPUT); + Assertions.assertThat(output.isDisabled()).isFalse(); + Assertions.assertThat(standardSync.getStatus()).isEqualTo(Status.ACTIVE); + } + +} diff --git a/charts/airbyte/Chart.yaml b/charts/airbyte/Chart.yaml index 1f1012a85b03..7a5f94059e85 100644 --- a/charts/airbyte/Chart.yaml +++ b/charts/airbyte/Chart.yaml @@ -21,7 +21,7 @@ version: 0.3.0 # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.35.55-alpha" +appVersion: "0.35.59-alpha" dependencies: - name: common diff --git a/charts/airbyte/README.md b/charts/airbyte/README.md index 5844798fa9b0..4f2d3c30d3de 100644 --- a/charts/airbyte/README.md +++ b/charts/airbyte/README.md @@ -31,7 +31,7 @@ Helm charts for Airbyte. | `webapp.replicaCount` | Number of webapp replicas | `1` | | `webapp.image.repository` | The repository to use for the airbyte webapp image. | `airbyte/webapp` | | `webapp.image.pullPolicy` | the pull policy to use for the airbyte webapp image | `IfNotPresent` | -| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.55-alpha` | +| `webapp.image.tag` | The airbyte webapp image tag. Defaults to the chart's AppVersion | `0.35.59-alpha` | | `webapp.podAnnotations` | Add extra annotations to the webapp pod(s) | `{}` | | `webapp.containerSecurityContext` | Security context for the container | `{}` | | `webapp.livenessProbe.enabled` | Enable livenessProbe on the webapp | `true` | @@ -73,7 +73,7 @@ Helm charts for Airbyte. | `scheduler.replicaCount` | Number of scheduler replicas | `1` | | `scheduler.image.repository` | The repository to use for the airbyte scheduler image. | `airbyte/scheduler` | | `scheduler.image.pullPolicy` | the pull policy to use for the airbyte scheduler image | `IfNotPresent` | -| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.55-alpha` | +| `scheduler.image.tag` | The airbyte scheduler image tag. Defaults to the chart's AppVersion | `0.35.59-alpha` | | `scheduler.podAnnotations` | Add extra annotations to the scheduler pod | `{}` | | `scheduler.resources.limits` | The resources limits for the scheduler container | `{}` | | `scheduler.resources.requests` | The requested resources for the scheduler container | `{}` | @@ -120,7 +120,7 @@ Helm charts for Airbyte. | `server.replicaCount` | Number of server replicas | `1` | | `server.image.repository` | The repository to use for the airbyte server image. | `airbyte/server` | | `server.image.pullPolicy` | the pull policy to use for the airbyte server image | `IfNotPresent` | -| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.55-alpha` | +| `server.image.tag` | The airbyte server image tag. Defaults to the chart's AppVersion | `0.35.59-alpha` | | `server.podAnnotations` | Add extra annotations to the server pod | `{}` | | `server.containerSecurityContext` | Security context for the container | `{}` | | `server.livenessProbe.enabled` | Enable livenessProbe on the server | `true` | @@ -158,7 +158,7 @@ Helm charts for Airbyte. | `worker.replicaCount` | Number of worker replicas | `1` | | `worker.image.repository` | The repository to use for the airbyte worker image. | `airbyte/worker` | | `worker.image.pullPolicy` | the pull policy to use for the airbyte worker image | `IfNotPresent` | -| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.55-alpha` | +| `worker.image.tag` | The airbyte worker image tag. Defaults to the chart's AppVersion | `0.35.59-alpha` | | `worker.podAnnotations` | Add extra annotations to the worker pod(s) | `{}` | | `worker.containerSecurityContext` | Security context for the container | `{}` | | `worker.livenessProbe.enabled` | Enable livenessProbe on the worker | `true` | @@ -190,7 +190,7 @@ Helm charts for Airbyte. | ----------------------------- | -------------------------------------------------------------------- | -------------------- | | `bootloader.image.repository` | The repository to use for the airbyte bootloader image. | `airbyte/bootloader` | | `bootloader.image.pullPolicy` | the pull policy to use for the airbyte bootloader image | `IfNotPresent` | -| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.55-alpha` | +| `bootloader.image.tag` | The airbyte bootloader image tag. Defaults to the chart's AppVersion | `0.35.59-alpha` | ### Temporal parameters diff --git a/charts/airbyte/values.yaml b/charts/airbyte/values.yaml index c37542370218..2ea046cc363b 100644 --- a/charts/airbyte/values.yaml +++ b/charts/airbyte/values.yaml @@ -43,7 +43,7 @@ webapp: image: repository: airbyte/webapp pullPolicy: IfNotPresent - tag: 0.35.55-alpha + tag: 0.35.59-alpha ## @param webapp.podAnnotations [object] Add extra annotations to the webapp pod(s) ## @@ -209,7 +209,7 @@ scheduler: image: repository: airbyte/scheduler pullPolicy: IfNotPresent - tag: 0.35.55-alpha + tag: 0.35.59-alpha ## @param scheduler.podAnnotations [object] Add extra annotations to the scheduler pod ## @@ -440,7 +440,7 @@ server: image: repository: airbyte/server pullPolicy: IfNotPresent - tag: 0.35.55-alpha + tag: 0.35.59-alpha ## @param server.podAnnotations [object] Add extra annotations to the server pod ## @@ -581,7 +581,7 @@ worker: image: repository: airbyte/worker pullPolicy: IfNotPresent - tag: 0.35.55-alpha + tag: 0.35.59-alpha ## @param worker.podAnnotations [object] Add extra annotations to the worker pod(s) ## @@ -699,7 +699,7 @@ bootloader: image: repository: airbyte/bootloader pullPolicy: IfNotPresent - tag: 0.35.55-alpha + tag: 0.35.59-alpha ## @param bootloader.podAnnotations [object] Add extra annotations to the bootloader pod ## diff --git a/docker-compose.yaml b/docker-compose.yaml index 0eccd58cc660..303954df5893 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -93,6 +93,7 @@ services: restart: unless-stopped environment: - AIRBYTE_VERSION=${VERSION} + - AUTO_DISABLE_FAILING_CONNECTIONS=${AUTO_DISABLE_FAILING_CONNECTIONS} - CONFIG_DATABASE_PASSWORD=${CONFIG_DATABASE_PASSWORD:-} - CONFIG_DATABASE_URL=${CONFIG_DATABASE_URL:-} - CONFIG_DATABASE_USER=${CONFIG_DATABASE_USER:-} diff --git a/docs/connector-development/tutorials/adding-incremental-sync.md b/docs/connector-development/tutorials/adding-incremental-sync.md index 605a7fd5d57b..317a2a736a7c 100644 --- a/docs/connector-development/tutorials/adding-incremental-sync.md +++ b/docs/connector-development/tutorials/adding-incremental-sync.md @@ -50,11 +50,11 @@ In this case we might choose something like this: } ``` -The second change we need to make to the `read` method is to use the state object so that we only emit new records. This stock ticker API does not give us control over how we query it, so we will have to filter out records that we already replicated within the Source. +The second change we need to make to the `read` method is to use the state object so that we only emit new records. Lastly, we need to emit an updated state object, so that the next time this Source runs we do not resend messages that we have already sent. -Here's what our updated source would look like. +Here's what our updated `read` method would look like. ```python def read(config, catalog, state): @@ -73,48 +73,101 @@ def read(config, catalog, state): log("No streams selected") return - # If we've made it this far, all the configuration is good and we can pull the last 7 days of market data - api_key = config["api_key"] - stock_ticker = config["stock_ticker"] - response = _call_api(f"/stock/{stock_ticker}/chart/7d", api_key) - # max_date starts at the value from the incoming state object. None if there was no previous state. - max_date = state.get("stock_prices") - if response.status_code != 200: - # In a real scenario we'd handle this error better :) - log("Failure occurred when calling IEX API") - sys.exit(1) - else: - # Sort the stock prices ascending by date then output them one by one as AirbyteMessages - prices = sorted(response.json(), key=lambda record: to_datetime(record["date"])) - for price in prices: - data = {"date": price["date"], "stock_ticker": price["symbol"], "price": price["close"]} - record = {"stream": "stock_prices", "data": data, "emitted_at": int(datetime.datetime.now().timestamp()) * 1000} - output_message = {"type": "RECORD", "record": record} - - if stock_prices_stream["sync_mode"] == "incremental": - # Filter records that are older than the last state. - # If no previous state, filter nothing. - state_date = to_datetime(state.get("stock_prices")) - if state_date and state_date > to_datetime(data["date"]): - continue - # If this record has the greatest date value so far, bump - # max_date. - if not max_date or to_datetime(max_date) < to_datetime(data["date"]): - max_date = data["date"] - - print(json.dumps(output_message)) - - # Emit new state message. - if stock_prices_stream["sync_mode"] == "incremental": - output_message = {"type": "STATE", "state": {"data": {"stock_prices": max_date}}} - print(json.dumps(output_message)) - -def to_datetime(date): - if date: - return datetime.datetime.strptime(date, '%Y-%m-%d') - else: - return None + # By default we fetch stock prices for the 7 day period ending with today + today = date.today() + cursor_value = today.strftime("%Y-%m-%d") + from_day = (today - timedelta(days=7)).strftime("%Y-%m-%d") + + # In case of incremental sync, state should contain the last date when we fetched stock prices + if stock_prices_stream["sync_mode"] == "incremental": + if state and state.get("stock_prices"): + from_date = datetime.strptime(state.get("stock_prices"), "%Y-%m-%d") + from_day = (from_date + timedelta(days=1)).strftime("%Y-%m-%d") + + # If the state indicates that we have already ran the sync up to cursor_value, we can skip the sync + if cursor_value != from_day: + # If we've made it this far, all the configuration is good and we can pull the market data + response = _call_api(ticker=config["stock_ticker"], token = config["api_key"], from_day=from_day, to_day=cursor_value) + if response.status_code != 200: + # In a real scenario we'd handle this error better :) + log("Failure occurred when calling Polygon.io API") + sys.exit(1) + else: + # Stock prices are returned sorted by by date in ascending order + # We want to output them one by one as AirbyteMessages + response_json = response.json() + if response_json["resultsCount"] > 0: + results = response_json["results"] + for result in results: + data = {"date": datetime.fromtimestamp(result["t"]/1000, tz=timezone.utc).strftime("%Y-%m-%d"), "stock_ticker": config["stock_ticker"], "price": result["c"]} + record = {"stream": "stock_prices", "data": data, "emitted_at": int(datetime.now().timestamp()) * 1000} + output_message = {"type": "RECORD", "record": record} + print(json.dumps(output_message)) + + # We update the cursor as we print out the data, so that next time sync starts where we stopped printing out results + if stock_prices_stream["sync_mode"] == "incremental": + cursor_value = datetime.fromtimestamp(results[len(results)-1]["t"]/1000, tz=timezone.utc).strftime("%Y-%m-%d") + + # Emit new state message. + if stock_prices_stream["sync_mode"] == "incremental": + output_message = {"type": "STATE", "state": {"data": {"stock_prices": cursor_value}}} + print(json.dumps(output_message)) ``` -That's all you need to do to add incremental functionality to the stock ticker Source. Incremental definitely requires more configurability than full refresh, so your implementation may deviate slightly depending on whether your cursor field is source defined or user-defined. If you think you are running into one of those cases, check out our [incremental](../../understanding-airbyte/connections/incremental-append.md) documentation for more information on different types of configuration. +We will also need to parse `state` argument in the `run` method. In order to do that, we will modify the code that +calls `read` method from `run` method: +```python + elif command == "read": + config = read_json(get_input_file_path(parsed_args.config)) + configured_catalog = read_json(get_input_file_path(parsed_args.catalog)) + state = None + if parsed_args.state: + state = read_json(get_input_file_path(parsed_args.state)) + + read(config, configured_catalog, state) +``` +Finally, we need to pass more arguments to our `_call_api` method in order to fetch only new prices for incremental sync: +```python +def _call_api(ticker, token, from_day, to_day): + return requests.get(f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/1/day/{from_day}/{to_day}?sort=asc&limit=120&apiKey={token}") +``` + +You will notice that in order to test these changes you need a `state` object. If you run an incremental sync +without passing a state object, the new code will output a state object that you can use with the next sync. If you run this: +```bash +python source.py read --config secrets/valid_config.json --catalog incremental_configured_catalog.json +``` + +The output will look like following: +```bash +{"type": "RECORD", "record": {"stream": "stock_prices", "data": {"date": "2022-03-07", "stock_ticker": "TSLA", "price": 804.58}, "emitted_at": 1647294277000}} +{"type": "RECORD", "record": {"stream": "stock_prices", "data": {"date": "2022-03-08", "stock_ticker": "TSLA", "price": 824.4}, "emitted_at": 1647294277000}} +{"type": "RECORD", "record": {"stream": "stock_prices", "data": {"date": "2022-03-09", "stock_ticker": "TSLA", "price": 858.97}, "emitted_at": 1647294277000}} +{"type": "RECORD", "record": {"stream": "stock_prices", "data": {"date": "2022-03-10", "stock_ticker": "TSLA", "price": 838.3}, "emitted_at": 1647294277000}} +{"type": "RECORD", "record": {"stream": "stock_prices", "data": {"date": "2022-03-11", "stock_ticker": "TSLA", "price": 795.35}, "emitted_at": 1647294277000}} +{"type": "STATE", "state": {"data": {"stock_prices": "2022-03-11"}}} +``` + +Notice that the last line of output is the state object. Copy the state object: +```json +{"stock_prices": "2022-03-11"} +``` +and paste it into a new file (i.e. `state.json`). Now you can run an incremental sync: +```bash +python source.py read --config secrets/valid_config.json --catalog incremental_configured_catalog.json --state state.json +``` + +That's all you need to do to add incremental functionality to the stock ticker Source. + +You can deploy the new version of your connector simply by running: +```bash +./gradlew clean :airbyte-integrations:connectors:source-stock-ticker-api:build +``` + +Bonus points: go to Airbyte UI and reconfigure the connection to use incremental sync. + +Incremental definitely requires more configurability than full refresh, so your implementation may deviate slightly depending on whether your cursor +field is source defined or user-defined. If you think you are running into one of those cases, check out +our [incremental](../../understanding-airbyte/connections/incremental-append.md) documentation for more information on different types of +configuration. diff --git a/docs/deploying-airbyte/local-deployment.md b/docs/deploying-airbyte/local-deployment.md index a53447022594..278a244d4676 100644 --- a/docs/deploying-airbyte/local-deployment.md +++ b/docs/deploying-airbyte/local-deployment.md @@ -38,7 +38,32 @@ VERSION=dev docker-compose up ## Deploy on Windows -We recommend following [this guide](https://docs.docker.com/docker-for-windows/install/) to install Docker on Windows. After installing the WSL 2 backend and Docker you should be able to run containers using Windows PowerShell. Additionally, as we note frequently, you will need `docker-compose` to build Airbyte from source. The suggested guide already installs `docker-compose` on Windows. +After installing the WSL 2 backend and Docker you should be able to run containers using Windows PowerShell. Additionally, as we note frequently, you will need `docker-compose` to build Airbyte from source. The suggested guide already installs `docker-compose` on Windows. + +### Setup Guide + +**1. Check out system requirements from [Docker documentation](https://docs.docker.com/desktop/windows/install/).** + +Follow the steps on the system requirements, and necessarily, download and install the Linux kernel update package. + +**2. Install Docker Desktop on Windows.** + +Install [Docker Desktop](https://docs.docker.com/desktop/windows/install/) from here. + +Make sure to select the options: +1. *Enable Hyper-V Windows Features* +2. *Install required Windows components for WSL 2*\ + when prompted. After installation, it will require to reboot your computer. + +**3. You're done!** + +```bash +git clone https://github.com/airbytehq/airbyte.git +cd airbyte +docker-compose up +``` +* In your browser, just visit [http://localhost:8000](http://localhost:8000) +* Start moving some data! ## Troubleshooting diff --git a/docs/integrations/destinations/bigquery.md b/docs/integrations/destinations/bigquery.md index c7be00605da3..10da7f02d41e 100644 --- a/docs/integrations/destinations/bigquery.md +++ b/docs/integrations/destinations/bigquery.md @@ -145,7 +145,9 @@ When you create a dataset in BigQuery, the dataset name must be unique for each * Dataset names are case-sensitive: mydataset and MyDataset can coexist in the same project. * Dataset names cannot contain spaces or special characters such as -, &, @, or %. -Therefore, Airbyte BigQuery destination will convert any invalid characters into '\_' characters when writing data. +Therefore, Airbyte BigQuery destination will convert any invalid characters into `_` characters when writing data. + +Since datasets that begin with `_` will be hidden from the BigQuery Explorer panel. To avoid creating such datasets, the destination will prepend the namespace with `n` if the converted namespace ## CHANGELOG @@ -153,6 +155,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------| :--- | :--- | :--- | +| 0.6.12 | 2022-03-18 | [10793](https://github.com/airbytehq/airbyte/pull/10793) | Fix namespace with invalid characters | | 0.6.11 | 2022-03-03 | [10755](https://github.com/airbytehq/airbyte/pull/10755) | Make sure to kill children threads and stop JVM | | 0.6.8 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | | 0.6.6 | 2022-02-01 | [\#9959](https://github.com/airbytehq/airbyte/pull/9959) | Fix null pointer exception from buffered stream consumer. | @@ -178,6 +181,7 @@ Therefore, Airbyte BigQuery destination will convert any invalid characters into | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------| :--- | +| 0.2.11 | 2022-03-18 | [10793](https://github.com/airbytehq/airbyte/pull/10793) | Fix namespace with invalid characters | | 0.2.10 | 2022-03-03 | [10755](https://github.com/airbytehq/airbyte/pull/10755) | Make sure to kill children threads and stop JVM | | 0.2.8 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | | 0.2.7 | 2022-02-01 | [\#9959](https://github.com/airbytehq/airbyte/pull/9959) | Fix null pointer exception from buffered stream consumer. | diff --git a/docs/integrations/destinations/redshift.md b/docs/integrations/destinations/redshift.md index 17d5c6fb3ed6..252a36bcea06 100644 --- a/docs/integrations/destinations/redshift.md +++ b/docs/integrations/destinations/redshift.md @@ -124,6 +124,7 @@ All Redshift connections are encrypted using SSL | Version | Date | Pull Request | Subject | |:--------| :-------- | :----- | :------ | +| 0.3.28 | 2022-03-18 | [\#11254](https://github.com/airbytehq/airbyte/pull/11254) | Fixed missing records during S3 staging | | 0.3.27 | 2022-02-25 | [10421](https://github.com/airbytehq/airbyte/pull/10421) | Refactor JDBC parameters handling | | 0.3.25 | 2022-02-14 | [#9920](https://github.com/airbytehq/airbyte/pull/9920) | Updated the size of staging files for S3 staging. Also, added closure of S3 writers to staging files when data has been written to an staging file. | | 0.3.24 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index e9ecefd72a41..7b93447d3357 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -1,263 +1,225 @@ # Snowflake -## Overview +Setting up the Snowflake destination connector involves setting up Snowflake entities (warehouse, database, schema, user, and role) in the Snowflake console, then setting up the data loading method (internal stage, AWS S3, GCS bucket, or Azure Blob Storage), and then configuring the Snowflake destination connector using the Airbyte UI. -The Airbyte Snowflake destination allows you to sync data to Snowflake. +This page describes the step-by-step process of setting up the Snowflake destination connector. -### Sync overview +## Prerequisites -#### Output schema +- A Snowflake account with the[ ACCOUNTADMIN](https://docs.snowflake.com/en/user-guide/security-access-control-considerations.html) role. If you don’t have an account with the `ACCOUNTADMIN` role, contact your Snowflake administrator to set one up for you. +- (Optional) An AWS, Google Cloud Storage, or Azure account. -Each stream will be output into its own table in Snowflake. Each table will contain 3 columns: +## Step 1: Set up Airbyte-specific entities in Snowflake -* `_airbyte_ab_id`: a uuid assigned by Airbyte to each event that is processed. The column type in Snowflake is `VARCHAR`. -* `_airbyte_emitted_at`: a timestamp representing when the event was pulled from the data source. The column type in Snowflake is `TIMESTAMP WITH TIME ZONE`. -* `_airbyte_data`: a json blob representing with the event data. The column type in Snowflake is `VARIANT`. +To set up the Snowflake destination connector, you first need to create Airbyte-specific Snowflake entities (a warehouse, database, schema, user, and role) with the `OWNERSHIP` permission to write data into Snowflake, track costs pertaining to Airbyte, and control permissions at a granular level. -Note that Airbyte will create **permanent** tables. If you prefer to create transient tables (see [Snowflake docs](https://docs.snowflake.com/en/user-guide/tables-temp-transient.html) for a comparison), you will want to create a dedicated transient database for Airbyte (`CREATE TRANSIENT DATABASE airbyte_database`). +You can use the following script in a new [Snowflake worksheet](https://docs.snowflake.com/en/user-guide/ui-worksheet.html) to create the entities: -#### Features +1. [Log into your Snowflake account](https://www.snowflake.com/login/). +2. Edit the following script to change the password to a more secure password and to change the names of other resources if you so desire. -| Feature | Supported?\(Yes/No\) | Notes | -| :--- | :--- | :--- | -| Full Refresh Sync | Yes | | -| Incremental - Append Sync | Yes | | -| Incremental - Deduped History | Yes | | -| Namespaces | Yes | | + **Note:** Make sure you follow the [Snowflake identifier requirements](https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html) while renaming the resources. + + -- set variables (these need to be uppercase) + set airbyte_role = 'AIRBYTE_ROLE'; + set airbyte_username = 'AIRBYTE_USER'; + set airbyte_warehouse = 'AIRBYTE_WAREHOUSE'; + set airbyte_database = 'AIRBYTE_DATABASE'; + set airbyte_schema = 'AIRBYTE_SCHEMA'; -## Getting started + -- set user password + set airbyte_password = 'password'; -### Requirements + begin; -1. Active Snowflake warehouse + -- create Airbyte role + use role securityadmin; + create role if not exists identifier($airbyte_role); + grant role identifier($airbyte_role) to role SYSADMIN; -We recommend creating an Airbyte-specific warehouse, database, schema, user, and role for writing data into Snowflake so it is possible to track costs specifically related to Airbyte \(including the cost of running this warehouse\) and control permissions at a granular level. Since the Airbyte user creates, drops, and alters tables, `OWNERSHIP` permissions are required in Snowflake. If you are not following the recommended script below, please limit the `OWNERSHIP` permissions to only the necessary database and schema for the Airbyte user. + -- create Airbyte user + create user if not exists identifier($airbyte_username) + password = $airbyte_password + default_role = $airbyte_role + default_warehouse = $airbyte_warehouse; -We provide the following script to create these resources. Before running, you must change the password to something secure. You may change the names of the other resources if you desire. -Login into your Snowflake warehouse, copy and paste the following script in a new [worksheet](https://docs.snowflake.com/en/user-guide/ui-worksheet.html). Select the `All Queries` checkbox and then press the `Run` button. + grant role identifier($airbyte_role) to user identifier($airbyte_username); -```text --- set variables (these need to be uppercase) -set airbyte_role = 'AIRBYTE_ROLE'; -set airbyte_username = 'AIRBYTE_USER'; -set airbyte_warehouse = 'AIRBYTE_WAREHOUSE'; -set airbyte_database = 'AIRBYTE_DATABASE'; -set airbyte_schema = 'AIRBYTE_SCHEMA'; + -- change role to sysadmin for warehouse / database steps + use role sysadmin; --- set user password -set airbyte_password = 'password'; + -- create Airbyte warehouse + create warehouse if not exists identifier($airbyte_warehouse) + warehouse_size = xsmall + warehouse_type = standard + auto_suspend = 60 + auto_resume = true + initially_suspended = true; -begin; + -- create Airbyte database + create database if not exists identifier($airbyte_database); --- create Airbyte role -use role securityadmin; -create role if not exists identifier($airbyte_role); -grant role identifier($airbyte_role) to role SYSADMIN; + -- grant Airbyte warehouse access + grant USAGE + on warehouse identifier($airbyte_warehouse) + to role identifier($airbyte_role); --- create Airbyte user -create user if not exists identifier($airbyte_username) -password = $airbyte_password -default_role = $airbyte_role -default_warehouse = $airbyte_warehouse; + -- grant Airbyte database access + grant OWNERSHIP + on database identifier($airbyte_database) + to role identifier($airbyte_role); -grant role identifier($airbyte_role) to user identifier($airbyte_username); + commit; --- change role to sysadmin for warehouse / database steps -use role sysadmin; + begin; --- create Airbyte warehouse -create warehouse if not exists identifier($airbyte_warehouse) -warehouse_size = xsmall -warehouse_type = standard -auto_suspend = 60 -auto_resume = true -initially_suspended = true; + USE DATABASE identifier($airbyte_database); --- create Airbyte database -create database if not exists identifier($airbyte_database); + -- create schema for Airbyte data + CREATE SCHEMA IF NOT EXISTS identifier($airbyte_schema); --- grant Airbyte warehouse access -grant USAGE -on warehouse identifier($airbyte_warehouse) -to role identifier($airbyte_role); + commit; --- grant Airbyte database access -grant OWNERSHIP -on database identifier($airbyte_database) -to role identifier($airbyte_role); + begin; -commit; + -- grant Airbyte schema access + grant OWNERSHIP + on schema identifier($airbyte_schema) + to role identifier($airbyte_role); -begin; + commit; + -USE DATABASE identifier($airbyte_database); +3. Run the script using the [Worksheet page](https://docs.snowflake.com/en/user-guide/ui-worksheet.html) or [Snowlight](https://docs.snowflake.com/en/user-guide/ui-snowsight-gs.html). Make sure to select the **All Queries** checkbox. --- create schema for Airbyte data -CREATE SCHEMA IF NOT EXISTS identifier($airbyte_schema); +## Step 2: Set up a data loading method -commit; +By default, Airbyte uses Snowflake’s [Internal Stage](https://docs.snowflake.com/en/user-guide/data-load-local-file-system-create-stage.html) to load data. -begin; +Make sure the database and schema have the `USAGE` privilege. --- grant Airbyte schema access -grant OWNERSHIP -on schema identifier($airbyte_schema) -to role identifier($airbyte_role); +You can also store data externally using an [Amazon S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html), a [Google Cloud Storage (GCS) bucket](https://cloud.google.com/storage/docs/introduction), or [Azure Blob Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/). -commit; -``` -### Setup the Snowflake destination in Airbyte +### Using an Amazon S3 bucket -You should now have all the requirements needed to configure Snowflake as a destination in the UI. You'll need the following information to configure the Snowflake destination: +To use an Amazon S3 bucket, [create a new Amazon S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html) with read/write access for Airbyte to stage data to Snowflake. -#### There are 2 way ways of oauth supported: login\pass and oauth2. -### Login and Password -* **[Host](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html)** : The host domain of the snowflake instance (must include the account, region, cloud environment, and end with snowflakecomputing.com). Be sure to use the correct [account identifier format](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#account-identifier-formats-by-cloud-platform-and-region) based on the region you are in: - * Example - us-west-1: `xy12345.snowflakecomputing.com` - * Example - us-east-2: `xy12345.us-east-2.aws.snowflakecomputing.com` -* **[Role](https://docs.snowflake.com/en/user-guide/security-access-control-overview.html#roles)** : The role you created for Airbyte to access Snowflake. Example - `AIRBYTE_ROLE` -* **[Warehouse](https://docs.snowflake.com/en/user-guide/warehouses-overview.html#overview-of-warehouses)** : The warehouse you created for Airbyte to sync data into. Example - `AIRBYTE_WAREHOUSE` -* **[Database](https://docs.snowflake.com/en/sql-reference/ddl-database.html#database-schema-share-ddl)** : The database you created for Airbyte to sync data into. Example - `AIRBYTE_DATABASE` -* **[Schema](https://docs.snowflake.com/en/sql-reference/ddl-database.html#database-schema-share-ddl)** : The default schema is used as the target schema for all statements issued from the connection that do not explicitly specify a schema name. Schema name would be transformed to allowed by Snowflake if it not follow [Snowflake Naming Conventions](https://docs.airbyte.io/integrations/destinations/snowflake#notes-about-snowflake-naming-conventions). -* **Username** : The username you created to allow Airbyte to access the database. Example - `AIRBYTE_USER` -* **Password** : The password associated with the username. -* **[JDBC URL Params](https://docs.snowflake.com/en/user-guide/jdbc-parameters.html)** (Optional) : Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3). +### Using a Google Cloud Storage (GCS) bucket -### OAuth 2.0 -* **[Host](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html)** : The host domain of the snowflake instance (must include the account, region, cloud environment, and end with snowflakecomputing.com). Be sure to use the correct [account identifier format](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html#account-identifier-formats-by-cloud-platform-and-region) based on the region you are in: - * Example - us-west-1: `xy12345.snowflakecomputing.com` - * Example - us-east-2: `xy12345.us-east-2.aws.snowflakecomputing.com` -* **[Role](https://docs.snowflake.com/en/user-guide/security-access-control-overview.html#roles)** : The role you created for Airbyte to access Snowflake. Example - `AIRBYTE_ROLE` -* **[Warehouse](https://docs.snowflake.com/en/user-guide/warehouses-overview.html#overview-of-warehouses)** : The warehouse you created for Airbyte to sync data into. Example - `AIRBYTE_WAREHOUSE` -* **[Database](https://docs.snowflake.com/en/sql-reference/ddl-database.html#database-schema-share-ddl)** : The database you created for Airbyte to sync data into. Example - `AIRBYTE_DATABASE` -* **[Schema](https://docs.snowflake.com/en/sql-reference/ddl-database.html#database-schema-share-ddl)** : The default schema is used as the target schema for all statements issued from the connection that do not explicitly specify a schema name. Schema name would be transformed to allowed by Snowflake if it not follow [Snowflake Naming Conventions](https://docs.airbyte.io/integrations/destinations/snowflake#notes-about-snowflake-naming-conventions). -* **Username** : The username you created to allow Airbyte to access the database. Example - `AIRBYTE_USER` -* **OAuth2** : The Login name and password to obtain auth token. -* **[JDBC URL Params](https://docs.snowflake.com/en/user-guide/jdbc-parameters.html)** (Optional) : Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3). +To use a GCS bucket: +1. Navigate to the Google Cloud Console and [create a new GCS bucket](https://cloud.google.com/storage/docs/creating-buckets) with read/write access for Airbyte to stage data to Snowflake. +2. [Generate a JSON key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys#creating_service_account_keys) for your service account. +3. Edit the following script to replace `AIRBYTE_ROLE` with the role you used for Airbyte's Snowflake configuration and `YOURBUCKETNAME` with your GCS bucket name. + ```text + create storage INTEGRATION gcs_airbyte_integration + TYPE = EXTERNAL_STAGE + STORAGE_PROVIDER = GCS + ENABLED = TRUE + STORAGE_ALLOWED_LOCATIONS = ('gcs://YOURBUCKETNAME'); -## Notes about Snowflake Naming Conventions + create stage gcs_airbyte_stage + url = 'gcs://YOURBUCKETNAME' + storage_integration = gcs_airbyte_integration; -From [Snowflake Identifiers syntax](https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html): + GRANT USAGE ON integration gcs_airbyte_integration TO ROLE AIRBYTE_ROLE; + GRANT USAGE ON stage gcs_airbyte_stage TO ROLE AIRBYTE_ROLE; -### Unquoted Identifiers: + DESC STORAGE INTEGRATION gcs_airbyte_integration; + ``` + The final query should show a `STORAGE_GCP_SERVICE_ACCOUNT` property with an email as the property value. Add read/write permissions to your bucket with that email. + +4. Navigate to the Snowflake UI and run the script as a [Snowflake account admin](https://docs.snowflake.com/en/user-guide/security-access-control-considerations.html) using the [Worksheet page](https://docs.snowflake.com/en/user-guide/ui-worksheet.html) or [Snowlight](https://docs.snowflake.com/en/user-guide/ui-snowsight-gs.html). -* Start with a letter \(A-Z, a-z\) or an underscore \(“\_”\). -* Contain only letters, underscores, decimal digits \(0-9\), and dollar signs \(“$”\). -* Are case-insensitive. +### Using Azure Blob Storage -When an identifier is unquoted, it is stored and resolved in uppercase. +To use Azure Blob Storage, you will need to [create a storage account](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-create?tabs=azure-portal) and [container](https://docs.microsoft.com/en-us/rest/api/storageservices/create-container), and provide a [SAS Token](https://docs.snowflake.com/en/user-guide/data-load-azure-config.html#option-2-generating-a-sas-token) to access the container. We recommend creating a dedicated container for Airbyte to stage data to Snowflake. Airbyte needs read/write access to interact with this container. -### Quoted Identifiers: -* The identifier is case-sensitive. -* Delimited identifiers \(i.e. identifiers enclosed in double quotes\) can start with and contain any valid characters, including: - * Numbers - * Special characters \(., ', !, @, \#, $, %, ^, &, \*, etc.\) - * Extended ASCII and non-ASCII characters - * Blank spaces +## Step 3: Set up Snowflake as a destination in Airbyte -When an identifier is double-quoted, it is stored and resolved exactly as entered, including case. +Navigate to the Airbyte UI to set up Snowflake as a destination. You'll need the following information to configure the Snowflake destination: -### Note +| Field | Description | +|---|---| +| [Host](https://docs.snowflake.com/en/user-guide/admin-account-identifier.html) | The host domain of the snowflake instance (must include the account, region, cloud environment, and end with snowflakecomputing.com). Example: `accountname.us-east-2.aws.snowflakecomputing.com` | +| [Role](https://docs.snowflake.com/en/user-guide/security-access-control-overview.html#roles) | The role you created in Step 1 for Airbyte to access Snowflake. Example: `AIRBYTE_ROLE` | +| [Warehouse](https://docs.snowflake.com/en/user-guide/warehouses-overview.html#overview-of-warehouses) | The warehouse you created in Step 1 for Airbyte to sync data into. Example: `AIRBYTE_WAREHOUSE` | +| [Database](https://docs.snowflake.com/en/sql-reference/ddl-database.html#database-schema-share-ddl) | The database you created in Step 1 for Airbyte to sync data into. Example: `AIRBYTE_DATABASE` | +| [Schema](https://docs.snowflake.com/en/sql-reference/ddl-database.html#database-schema-share-ddl) | The default schema used as the target schema for all statements issued from the connection that do not explicitly specify a schema name. | +| Username | The username you created in Step 1 to allow Airbyte to access the database. Example: `AIRBYTE_USER` | +| Password | The password associated with the username. | +| [JDBC URL Params](https://docs.snowflake.com/en/user-guide/jdbc-parameters.html) (Optional) | Additional properties to pass to the JDBC URL string when connecting to the database formatted as `key=value` pairs separated by the symbol `&`. Example: `key1=value1&key2=value2&key3=value3` | -* Regardless of whether an identifier is unquoted or double-quoted, the maximum number of characters allowed is 255 \(including blank spaces\). -* Identifiers can also be specified using string literals, session variables or bind variables. For details, see SQL Variables. -* If an object is created using a double-quoted identifier, when referenced in a query or any other SQL statement, the identifier must be specified exactly as created, including the double quotes. Failure to include the quotes might result in an Object does not exist error \(or similar type of error\). -* Also, note that the entire identifier must be enclosed in quotes when referenced in a query/SQL statement. This is particularly important if periods \(.\) are used in identifiers because periods are also used in fully-qualified object names to separate each object. +To use AWS S3 as the cloud storage, enter the information for the S3 bucket you created in Step 2: -Therefore, Airbyte Snowflake destination will create tables and schemas using the Unquoted identifiers when possible or fallback to Quoted Identifiers if the names are containing special characters. +| Field | Description | +|---|---| +| S3 Bucket Name | The name of the staging S3 bucket (Example: `airbyte.staging`). Airbyte will write files to this bucket and read them via statements on Snowflake. | +| S3 Bucket Region | The S3 staging bucket region used. | +| S3 Key Id * | The Access Key ID granting access to the S3 staging bucket. Airbyte requires Read and Write permissions for the bucket. | +| S3 Access Key * | The corresponding secret to the S3 Key ID. | +| Stream Part Size (Optional) | Increase this if syncing tables larger than 100GB. Files are streamed to S3 in parts. This determines the size of each part, in MBs. As S3 has a limit of 10,000 parts per file, part size affects the table size. This is 10MB by default, resulting in a default limit of 100GB tables.
Note, a larger part size will result in larger memory requirements. A rule of thumb is to multiply the part size by 10 to get the memory requirement. Modify this with care. (e.g. 5) | +| Purge Staging Files and Tables | Determines whether to delete the staging files from S3 after completing the sync. Specifically, the connector will create CSV files named `bucketPath/namespace/streamName/syncDate_epochMillis_randomUuid.csv` containing three columns (`ab_id`, `data`, `emitted_at`). Normally these files are deleted after sync; if you want to keep them for other purposes, set `purge_staging_data` to false. | -## Loading Method +To use GCS as the cloud storage, enter the information for the GCS bucket you created in Step 2: -By default, Airbyte uses [INTERNAL STAGING](https://docs.airbyte.com/integrations/destinations/snowflake#internal-staging) +| Field | Description | +|---|---| +| GCP Project ID | The name of the GCP project ID for your credentials. (Example: `my-project`) | +| GCP Bucket Name | The name of the staging GCS bucket. Airbyte will write files to this bucket and read them via statements on Snowflake. (Example: `airbyte-staging`) | +| Google Application Credentials | The contents of the JSON key file that has read/write permissions to the staging GCS bucket. You will separately need to grant bucket access to your Snowflake GCP service account. See the [GCP docs](https://cloud.google.com/iam/docs/creating-managing-service-account-keys#creating_service_account_keys) for more information on how to generate a JSON key for your service account. | -### Internal Staging +To use Azure Blob storage, enter the information for the storage you created in Step 2: -Internal named stages are storage location objects within a Snowflake database/schema. Because they are database objects, the same security permissions apply as with any other database objects. No need to provide additional properties for internal staging. This is also the recommended way of using the connector. It doesn't require any external resources and is quick to setup and use. +| Field | Description | +|---|---| +| Endpoint Domain Name | Leave default value `blob.core.windows.net` or [map a custom domain](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-custom-domain-name?tabs=azure-portal) to an Azure Blob Storage endpoint. | +| Azure Blob Storage Account Name | The Azure storage account you created in Step 2. | +| Azure blob storage container (Bucket) Name | The Azure blob storage container you created in Step 2. | +| SAS Token | The SAS Token you provided in Step 2. | -**Operating on a stage also requires the USAGE privilege on the parent database and schema.** -### Azure Blob Storage Staging +## Output schema -For Azure Blob Storage, you will need to create a storage account and container and provide SAS Token to access the container. We recommend creating a container that is only used for Airbyte to stage data to Snowflake. Airbyte needs read/write access to interact with this container. +Airbyte outputs each stream into its own table with the following columns in Snowflake: -Provide the required Azure Blob info. +| Airbyte field | Description | Column type | +|---|---|---| +| _airbyte_ab_id | A UUID assigned to each processed event | VARCHAR | +| _airbyte_emitted_at | A timestamp for when the event was pulled from the data source | TIMESTAMP WITH TIME ZONE | +| _airbyte_data | A JSON blob with the event data. | VARIANT | -* **Endpoint Domain Name** - * Leave default value *blob.core.windows.net* or [map a custom domain](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-custom-domain-name?tabs=azure-portal) to an Azure Blob Storage endpoint. -* **Azure Blob Storage Account Name** - * An Azure storage account contains all of your Azure Storage data objects, including blobs, file shares, queues, tables, and disks. The storage account provides a unique namespace for your Azure Storage data. - * See [this](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-create?tabs=azure-portal) to create a storage account. -* **Azure blob storage container (Bucket) Name** - * See [this](https://docs.microsoft.com/en-us/rest/api/storageservices/create-container) to create container with REST API or create container with Azure UI. -* **SAS Token** - * A shared access signature (SAS) enables you to grant limited access to containers and blobs in your storage account. Please pay attention on [read and write permissions](https://docs.snowflake.com/en/user-guide/data-load-azure-config.html#option-2-generating-a-sas-token) to use Snowflake staging +**Note:** By default, Airbyte creates permanent tables. If you prefer transient tables, create a dedicated transient database for Airbyte. For more information, refer to[ Working with Temporary and Transient Tables](https://docs.snowflake.com/en/user-guide/tables-temp-transient.html) -### AWS S3 Staging -For AWS S3, you will need to create a bucket and provide credentials to access the bucket. We recommend creating a bucket that is only used for Airbyte to stage data to Snowflake. Airbyte needs read/write access to interact with this bucket. +## Supported sync modes -Provide the required S3 info. +The Snowflake destination supports the following sync modes: -* **S3 Bucket Name** - * See [this](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html) to create an S3 bucket. -* **S3 Bucket Region** - * Place the S3 bucket and the Redshift cluster in the same region to save on networking costs. -* **Access Key Id** - * See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) on how to generate an access key. - * We recommend creating an Airbyte-specific user. This user will require [read and write permissions](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_examples_s3_rw-bucket.html) to objects in the staging bucket. -* **Secret Access Key** - * Corresponding key to the above key id. -* **Part Size** - * Affects the size limit of an individual Redshift table. Optional. Increase this if syncing tables larger than 100GB. Files are streamed to S3 in parts. This determines the size of each part, in MBs. As S3 has a limit of 10,000 parts per file, part size affects the table size. This is 10MB by default, resulting in a default table limit of 100GB. Note, a larger part size will result in larger memory requirements. A rule of thumb is to multiply the part size by 10 to get the memory requirement. Modify this with care. +- [Full Refresh Sync](https://docs.airbyte.com/understanding-airbyte/glossary#full-refresh-sync) +- [Full Refresh - Append](https://docs.airbyte.com/understanding-airbyte/connections/full-refresh-append) +- [Incremental Sync - Append](https://docs.airbyte.com/understanding-airbyte/connections/incremental-append) +- [Incremental Sync - Deduped History](https://docs.airbyte.com/understanding-airbyte/connections/incremental-deduped-history) -Optional parameters: -* **Purge Staging Data** - * Whether to delete the staging files from S3 after completing the sync. Specifically, the connector will create CSV files named `bucketPath/namespace/streamName/syncDate_epochMillis_randomUuid.csv` containing three columns (`ab_id`, `data`, `emitted_at`). Normally these files are deleted after the `COPY` command completes; if you want to keep them for other purposes, set `purge_staging_data` to `false`. +## Snowflake tutorials +Now that you have set up the Snowflake destination connector, check out the following Snowflake tutorials: -### Google Cloud Storage \(GCS\) Staging +- [Build a data ingestion pipeline from Mailchimp to Snowflake](https://airbyte.com/tutorials/data-ingestion-pipeline-mailchimp-snowflake) +- [Replicate data from a PostgreSQL database to Snowflake](https://airbyte.com/tutorials/postgresql-database-to-snowflake) +- [Migrate your data from Redshift to Snowflake](https://airbyte.com/tutorials/redshift-to-snowflake) +- [Orchestrate ELT pipelines with Prefect, Airbyte and dbt](https://airbyte.com/tutorials/elt-pipeline-prefect-airbyte-dbt) -First you will need to create a GCS bucket. - -Then you will need to run the script below: - -* You must run the script as the account admin for Snowflake. -* You should replace `AIRBYTE_ROLE` with the role you used for Airbyte's Snowflake configuration. -* Replace `YOURBUCKETNAME` with your bucket name -* The stage name can be modified to any valid name. -* `gcs_airbyte_integration` must be used - -The script: - -```text -create storage INTEGRATION gcs_airbyte_integration - TYPE = EXTERNAL_STAGE - STORAGE_PROVIDER = GCS - ENABLED = TRUE - STORAGE_ALLOWED_LOCATIONS = ('gcs://YOURBUCKETNAME'); - -create stage gcs_airbyte_stage - url = 'gcs://YOURBUCKETNAME' - storage_integration = gcs_airbyte_integration; - -GRANT USAGE ON integration gcs_airbyte_integration TO ROLE AIRBYTE_ROLE; -GRANT USAGE ON stage gcs_airbyte_stage TO ROLE AIRBYTE_ROLE; - -DESC STORAGE INTEGRATION gcs_airbyte_integration; -``` - -The final query should show a `STORAGE_GCP_SERVICE_ACCOUNT` property with an email as the property value. - -Finally, you need to add read/write permissions to your bucket with that email. ## Changelog | Version | Date | Pull Request | Subject | |:--------|:-----------| :----- | :------ | +| 0.4.22 | 2022-03-18 | [\#10793](https://github.com/airbytehq/airbyte/pull/10793) | Fix namespace with invalid characters | +| 0.4.21 | 2022-03-18 | [\#11071](https://github.com/airbytehq/airbyte/pull/11071) | Switch to compressed on-disk buffering before staging to s3/internal stage | | 0.4.20 | 2022-03-14 | [\#10341](https://github.com/airbytehq/airbyte/pull/10341) | Add Azure blob staging support | | 0.4.19 | 2022-03-11 | [10699](https://github.com/airbytehq/airbyte/pull/10699) | Added unit tests | | 0.4.17 | 2022-02-25 | [10421](https://github.com/airbytehq/airbyte/pull/10421) | Refactor JDBC parameters handling | diff --git a/docs/integrations/sources/chargify.md b/docs/integrations/sources/chargify.md new file mode 100644 index 000000000000..e78fed1a4a95 --- /dev/null +++ b/docs/integrations/sources/chargify.md @@ -0,0 +1,46 @@ +# Chargify + +## Overview + +The Chargify source supports Full Refresh syncs for Customers and Subscriptions endpoints. + +### Available streams + +Several output streams are available from this source: + +* [Customers](https://developers.chargify.com/docs/api-docs/b3A6MTQxMDgyNzY-list-or-find-customers) +* [Subscriptions](https://developers.chargify.com/docs/api-docs/b3A6MTQxMDgzODk-list-subscriptions) + +If there are more streams you'd like Airbyte to support, please [create an issue.](https://github.com/airbytehq/airbyte/issues/new/choose) + +### Features + +| Feature | Supported? | +| :--- | :--- | +| Full Refresh Sync | Yes | +| Incremental Sync | No | +| Replicate Incremental Deletes | No | +| SSL connection | Yes | +| Namespaces | No | + +### Performance considerations + +The Chargify connector should not run into Chargify API limitations under normal usage. Please [create an issue](https://github.com/airbytehq/airbyte/issues) if you see any rate limit issues that are not automatically retried successfully. + +## Getting started + +### Requirements + +* Chargify API Key +* Chargify domain + +### Setup guide + +Please follow the [Chargify documentation for generating an API key](https://developers.chargify.com/docs/api-docs/YXBpOjE0MTA4MjYx-chargify-api). + +## Changelog + +| Version | Date | Pull Request | Subject | +| :--- | :--- | :--- | :--- | +| 0.1.0 | 2022-03-16 | [10853](https://github.com/airbytehq/airbyte/pull/10853) | Initial release | + diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 763501a8fa21..3eedda399f0f 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -40,6 +40,7 @@ This connector outputs the following incremental streams: * [Review comments](https://docs.github.com/en/rest/reference/pulls#list-review-comments-in-a-repository) * [Stargazers](https://docs.github.com/en/rest/reference/activity#list-stargazers) * [Deployments](https://docs.github.com/en/rest/reference/deployments#list-deployments) +* [Project cards](https://docs.github.com/en/rest/reference/projects#list-project-cards) * [Project columns](https://docs.github.com/en/rest/reference/projects#list-project-columns) ### Notes diff --git a/docs/integrations/sources/google-ads.md b/docs/integrations/sources/google-ads.md index e89ac94aca97..b83a4aabe6a9 100644 --- a/docs/integrations/sources/google-ads.md +++ b/docs/integrations/sources/google-ads.md @@ -102,21 +102,22 @@ This source is constrained by whatever API limits are set for the Google Ads tha | Version | Date | Pull Request | Subject | |:---------|:-----------| :--- |:---------------------------------------------------------------------------------------------| +| `0.1.29` | 2022-03-22 | [10919](https://github.com/airbytehq/airbyte/pull/10919) | Fix user location report schema and add to acceptance tests | | `0.1.28` | 2022-02-25 | [10372](https://github.com/airbytehq/airbyte/pull/10372) | Add network fields to click view stream | | `0.1.27` | 2022-02-16 | [10315](https://github.com/airbytehq/airbyte/pull/10315) | Make `ad_group_ads` and other streams support incremental sync. | | `0.1.26` | 2022-02-11 | [10150](https://github.com/airbytehq/airbyte/pull/10150) | Add support for multiple customer IDs. | -| `0.1.25` | 2022-02-04 | [9812](https://github.com/airbytehq/airbyte/pull/9812) | Handle `EXPIRED_PAGE_TOKEN` exception and retry with updated state. | -| `0.1.24` | 2022-02-04 | [9996](https://github.com/airbytehq/airbyte/pull/9996) | Use Google Ads API version V9. | -| `0.1.23` | 2022-01-25 | [8669](https://github.com/airbytehq/airbyte/pull/8669) | Add end date parameter in spec. | -| `0.1.22` | 2022-01-24 | [9608](https://github.com/airbytehq/airbyte/pull/9608) | Reduce stream slice date range. | -| `0.1.21` | 2021-12-28 | [9149](https://github.com/airbytehq/airbyte/pull/9149) | Update title and description | -| `0.1.20` | 2021-12-22 | [9071](https://github.com/airbytehq/airbyte/pull/9071) | Fix: Keyword schema enum | -| `0.1.19` | 2021-12-14 | [8431](https://github.com/airbytehq/airbyte/pull/8431) | Add new streams: Geographic and Keyword | -| `0.1.18` | 2021-12-09 | [8225](https://github.com/airbytehq/airbyte/pull/8225) | Include time_zone to sync. Remove streams for manager account. | -| `0.1.16` | 2021-11-22 | [8178](https://github.com/airbytehq/airbyte/pull/8178) | clarify setup fields | -| `0.1.15` | 2021-10-07 | [6684](https://github.com/airbytehq/airbyte/pull/6684) | Add new stream `click_view` | -| `0.1.14` | 2021-10-01 | [6565](https://github.com/airbytehq/airbyte/pull/6565) | Fix OAuth Spec File | -| `0.1.13` | 2021-09-27 | [6458](https://github.com/airbytehq/airbyte/pull/6458) | Update OAuth Spec File | +| `0.1.25` | 2022-02-04 | [9812](https://github.com/airbytehq/airbyte/pull/9812) | Handle `EXPIRED_PAGE_TOKEN` exception and retry with updated state. | +| `0.1.24` | 2022-02-04 | [9996](https://github.com/airbytehq/airbyte/pull/9996) | Use Google Ads API version V9. | +| `0.1.23` | 2022-01-25 | [8669](https://github.com/airbytehq/airbyte/pull/8669) | Add end date parameter in spec. | +| `0.1.22` | 2022-01-24 | [9608](https://github.com/airbytehq/airbyte/pull/9608) | Reduce stream slice date range. | +| `0.1.21` | 2021-12-28 | [9149](https://github.com/airbytehq/airbyte/pull/9149) | Update title and description | +| `0.1.20` | 2021-12-22 | [9071](https://github.com/airbytehq/airbyte/pull/9071) | Fix: Keyword schema enum | +| `0.1.19` | 2021-12-14 | [8431](https://github.com/airbytehq/airbyte/pull/8431) | Add new streams: Geographic and Keyword | +| `0.1.18` | 2021-12-09 | [8225](https://github.com/airbytehq/airbyte/pull/8225) | Include time_zone to sync. Remove streams for manager account. | +| `0.1.16` | 2021-11-22 | [8178](https://github.com/airbytehq/airbyte/pull/8178) | clarify setup fields | +| `0.1.15` | 2021-10-07 | [6684](https://github.com/airbytehq/airbyte/pull/6684) | Add new stream `click_view` | +| `0.1.14` | 2021-10-01 | [6565](https://github.com/airbytehq/airbyte/pull/6565) | Fix OAuth Spec File | +| `0.1.13` | 2021-09-27 | [6458](https://github.com/airbytehq/airbyte/pull/6458) | Update OAuth Spec File | | `0.1.11` | 2021-09-22 | [\#6373](https://github.com/airbytehq/airbyte/pull/6373) | Fix inconsistent segments.date field type across all streams | | `0.1.10` | 2021-09-13 | [\#6022](https://github.com/airbytehq/airbyte/pull/6022) | Annotate Oauth2 flow initialization parameters in connector spec | | `0.1.9` | 2021-09-07 | [\#5302](https://github.com/airbytehq/airbyte/pull/5302) | Add custom query stream support | diff --git a/docs/integrations/sources/hubspot.md b/docs/integrations/sources/hubspot.md index 4163ded8f3d3..17dae86ef2b6 100644 --- a/docs/integrations/sources/hubspot.md +++ b/docs/integrations/sources/hubspot.md @@ -21,19 +21,28 @@ This source is capable of syncing the following tables and their data: * [Companies](https://developers.hubspot.com/docs/api/crm/companies) \(Incremental\) * [Contact Lists](http://developers.hubspot.com/docs/methods/lists/get_lists) \(Incremental\) * [Contacts](https://developers.hubspot.com/docs/methods/contacts/get_contacts) \(Incremental\) -* [Contacts list memberships](https://legacydocs.hubspot.com/docs/methods/contacts/get_contacts) +* [Contacts List Memberships](https://legacydocs.hubspot.com/docs/methods/contacts/get_contacts) * [Deal Pipelines](https://developers.hubspot.com/docs/methods/pipelines/get_pipelines_for_object_type) * [Deals](https://developers.hubspot.com/docs/api/crm/deals) \(including Contact associations\) \(Incremental\) * [Email Events](https://developers.hubspot.com/docs/methods/email/get_events) \(Incremental\) -* [Engagements](https://legacydocs.hubspot.com/docs/methods/engagements/get-all-engagements) +* [Engagements](https://legacydocs.hubspot.com/docs/methods/engagements/get-all-engagements) \(Incremental\) +* [Engagements Calls](https://developers.hubspot.com/docs/api/crm/calls) \(Incremental\) +* [Engagements Emails](https://developers.hubspot.com/docs/api/crm/email) \(Incremental\) +* [Engagements Meetings](https://developers.hubspot.com/docs/api/crm/meetings) \(Incremental\) +* [Engagements Notes](https://developers.hubspot.com/docs/api/crm/notes) \(Incremental\) +* [Engagements Tasks](https://developers.hubspot.com/docs/api/crm/tasks) \(Incremental\) +* [Feedback Submissions](https://developers.hubspot.com/docs/api/crm/feedback-submissions) \(Incremental\) * [Forms](https://developers.hubspot.com/docs/api/marketing/forms) +* [Form Submissions](https://legacydocs.hubspot.com/docs/methods/forms/get-submissions-for-a-form) * [Line Items](https://developers.hubspot.com/docs/api/crm/line-items) \(Incremental\) * [Marketing Emails](https://legacydocs.hubspot.com/docs/methods/cms_email/get-all-marketing-email-statistics) * [Owners](https://developers.hubspot.com/docs/methods/owners/get_owners) * [Products](https://developers.hubspot.com/docs/api/crm/products) \(Incremental\) +* [Property History](https://legacydocs.hubspot.com/docs/methods/contacts/get_contacts) \(Incremental\) * [Quotes](https://developers.hubspot.com/docs/api/crm/quotes) \(Incremental\) * [Subscription Changes](https://developers.hubspot.com/docs/methods/email/get_subscriptions_timeline) \(Incremental\) * [Tickets](https://developers.hubspot.com/docs/api/crm/tickets) \(Incremental\) +* [Ticket Pipelines](https://developers.hubspot.com/docs/api/crm/pipelines) * [Workflows](https://legacydocs.hubspot.com/docs/methods/workflows/v3/get_workflows) ### A note on the `engagements` stream @@ -48,25 +57,49 @@ Depending on the type of engagement, different properties will be set for that o * A `note` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body` column. * A `task` engagement will have a corresponding `engagements_metadata` object with non-null values in the `body`, `status`, and `forObjectType` columns. -**Note**: HubSpot API currently only supports `quotes` endpoint using API Key, using Oauth it is impossible to access this stream (as reported by [community.hubspot.com](https://community.hubspot.com/t5/APIs-Integrations/Help-with-using-Feedback-CRM-API-and-Quotes-CRM-API/m-p/449104/highlight/true#M44411)). +**Note**: HubSpot API currently only supports `quotes` endpoint using API Key, using OAuth it is impossible to access this stream (as reported by [community.hubspot.com](https://community.hubspot.com/t5/APIs-Integrations/Help-with-using-Feedback-CRM-API-and-Quotes-CRM-API/m-p/449104/highlight/true#M44411)). -## Getting Started \(Airbyte Open-Source / Airbyte Cloud\) +## Getting Started -#### Requirements +### Requirements \(Airbyte Cloud\) + +1. Click `Authenticate your account` to sign in with Google and authorize your account. +2. Fill out a start date +3. You're done. + +{% hint style="info" %} +HubSpot's API will [rate limit](https://developers.hubspot.com/docs/api/usage-details) the amount of records you can sync daily, so make sure that you are on the appropriate plan if you are planning on syncing more than 250,000 records per day. +{% endhint %} + +### Requirements \(Airbyte Open-Source\) * HubSpot Account -* Api credentials -* If using Oauth, [scopes](https://legacydocs.hubspot.com/docs/methods/oauth2/initiate-oauth-integration#scopes) enabled for the streams you want to sync +* API or OAuth2.0 Credentials (See below) + +#### Using API Credentials + +* API Key + +To obtain the API Key for the account, go to settings -> integrations \(under the account banner\) -> API Key. If you already have an API Key you can use that. Otherwise, generate a new one. See [docs](https://knowledge.hubspot.com/integrations/how-do-i-get-my-hubspot-api-key) for more details. + +#### Using OAuth2.0 Credentials + +* Client ID +* Client Secret +* Refresh Token +* If using OAuth, [scopes](https://legacydocs.hubspot.com/docs/methods/oauth2/initiate-oauth-integration#scopes) enabled for the streams you want to sync -{% hint style="info" %} HubSpot's API will [rate limit](https://developers.hubspot.com/docs/api/usage-details) the amount of records you can sync daily, so make sure that you are on the appropriate plan if you are planning on syncing more than 250,000 records per day. {% endhint %} +See HubSpot [docs](https://legacydocs.hubspot.com/docs/methods/oauth2/oauth2-quickstart) if you need help finding these fields -This connector supports only authentication with API Key. To obtain API key for the account go to settings -> integrations \(under the account banner\) -> api key. If you already have an api key you can use that. Otherwise generated a new one. See [docs](https://knowledge.hubspot.com/integrations/how-do-i-get-my-hubspot-api-key) for more details. +{% hint style="info" %} +HubSpot's API will [rate limit](https://developers.hubspot.com/docs/api/usage-details) the amount of records you can sync daily, so make sure that you are on the appropriate plan if you are planning on syncing more than 250,000 records per day. +{% endhint %} ## Rate Limiting & Performance The connector is restricted by normal HubSpot [rate limitations](https://legacydocs.hubspot.com/apps/api_guidelines). -When connector reads the stream using `API Key` that doesn't have neccessary permissions to read particular stream, like `workflows`, which requires to be enabled in order to be processed, the log message returned to the output and sync operation goes on with other streams available. +Some streams, such as `workflows` need to be enabled before they can be read using a connector authenticated using an `API Key`. If reading a stream that is not enabled, a log message returned to the output and the sync operation only sync the other streams available. Example of the output message when trying to read `workflows` stream with missing permissions for the `API Key`: @@ -75,14 +108,14 @@ Example of the output message when trying to read `workflows` stream with missin "type": "LOG", "log": { "level": "WARN", - "message": 'Stream `workflows` cannot be procced. This hapikey (EXAMPLE_API_KEY) does not have proper permissions! (requires any of [automation-access])' + "message": 'Stream `workflows` cannot be proceed. This API Key (EXAMPLE_API_KEY) does not have proper permissions! (requires any of [automation-access])' } } ``` -### Required scopes +## Required scopes -If you are using Oauth, most of the streams require the appropriate [scopes](https://legacydocs.hubspot.com/docs/methods/oauth2/initiate-oauth-integration#scopes) enabled for the API account. +If you are using OAuth, most of the streams require the appropriate [scopes](https://legacydocs.hubspot.com/docs/methods/oauth2/initiate-oauth-integration#scopes) enabled for the API account. | Stream | Required Scope | | :--- | :--- | @@ -108,46 +141,49 @@ If you are using Oauth, most of the streams require the appropriate [scopes](htt ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------| :--- |:-----------------------------------------------------------------------------------------------------------------------------------------------| +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------| +| 0.1.50 | 2022-03-22 | [11266](https://github.com/airbytehq/airbyte/pull/11266) | Fix Engagements Stream Pagination | +| 0.1.49 | 2022-03-17 | [11218](https://github.com/airbytehq/airbyte/pull/11218) | Anchor hyperlink in input configuration | +| 0.1.48 | 2022-03-16 | [11105](https://github.com/airbytehq/airbyte/pull/11105) | Fix float numbers, upd docs | | 0.1.47 | 2022-03-15 | [11121](https://github.com/airbytehq/airbyte/pull/11121) | Add partition keys where appropriate | | 0.1.46 | 2022-03-14 | [10700](https://github.com/airbytehq/airbyte/pull/10700) | Handle 10k+ records reading in Hubspot streams | | 0.1.45 | 2022-03-04 | [10707](https://github.com/airbytehq/airbyte/pull/10707) | Remove stage history from deals stream to increase efficiency | -| 0.1.44 | 2022-02-24 | [9027](https://github.com/airbytehq/airbyte/pull/9027) | Add associations companies to deals, ticket and contact stream | +| 0.1.44 | 2022-02-24 | [9027](https://github.com/airbytehq/airbyte/pull/9027) | Add associations companies to deals, ticket and contact stream | | 0.1.43 | 2022-02-24 | [10576](https://github.com/airbytehq/airbyte/pull/10576) | Cast timestamp to date/datetime | | 0.1.42 | 2022-02-22 | [10492](https://github.com/airbytehq/airbyte/pull/10492) | Add `date-time` format to datetime fields | | 0.1.41 | 2022-02-21 | [10177](https://github.com/airbytehq/airbyte/pull/10177) | Migrate to CDK | | 0.1.40 | 2022-02-10 | [10142](https://github.com/airbytehq/airbyte/pull/10142) | Add associations to ticket stream | | 0.1.39 | 2022-02-10 | [10055](https://github.com/airbytehq/airbyte/pull/10055) | Bug fix: reading not initialized stream | -| 0.1.38 | 2022-02-03 | [9786](https://github.com/airbytehq/airbyte/pull/9786) | Add new streams for engagements(calls, emails, meetings, notes and tasks) | -| 0.1.37 | 2022-01-27 | [9555](https://github.com/airbytehq/airbyte/pull/9555) | Getting form_submission for all forms | -| 0.1.36 | 2022-01-22 | [7784](https://github.com/airbytehq/airbyte/pull/7784) | Add Property History Stream | -| 0.1.35 | 2021-12-24 | [9081](https://github.com/airbytehq/airbyte/pull/9081) | Add Feedback Submissions stream and update Ticket Pipelines stream | -| 0.1.34 | 2022-01-20 | [9641](https://github.com/airbytehq/airbyte/pull/9641) | Add more fields for `email_events` stream | -| 0.1.33 | 2022-01-14 | [8887](https://github.com/airbytehq/airbyte/pull/8887) | More efficient support for incremental updates on Companies, Contact, Deals and Engagement streams | -| 0.1.32 | 2022-01-13 | [8011](https://github.com/airbytehq/airbyte/pull/8011) | Add new stream form_submissions | -| 0.1.31 | 2022-01-11 | [9385](https://github.com/airbytehq/airbyte/pull/9385) | Remove auto-generated `properties` from `Engagements` stream | -| 0.1.30 | 2021-01-10 | [9129](https://github.com/airbytehq/airbyte/pull/9129) | Created Contacts list memberships streams | -| 0.1.29 | 2021-12-17 | [8699](https://github.com/airbytehq/airbyte/pull/8699) | Add incremental sync support for `companies`, `contact_lists`, `contacts`, `deals`, `line_items`, `products`, `quotes`, `tickets` streams | -| 0.1.28 | 2021-12-15 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update fields and descriptions | -| 0.1.27 | 2021-12-09 | [8658](https://github.com/airbytehq/airbyte/pull/8658) | Fixed config backward compatibility issue by allowing additional properties in the spec | -| 0.1.26 | 2021-11-30 | [8329](https://github.com/airbytehq/airbyte/pull/8329) | Removed 'skip_dynamic_fields' config param | -| 0.1.25 | 2021-11-23 | [8216](https://github.com/airbytehq/airbyte/pull/8216) | Add skip dynamic fields for testing only | -| 0.1.24 | 2021-11-09 | [7683](https://github.com/airbytehq/airbyte/pull/7683) | Fix name issue 'Hubspot' -> 'HubSpot' | -| 0.1.23 | 2021-11-08 | [7730](https://github.com/airbytehq/airbyte/pull/7730) | Fix oAuth flow schema | -| 0.1.22 | 2021-11-03 | [7562](https://github.com/airbytehq/airbyte/pull/7562) | Migrate Hubspot source to CDK structure | -| 0.1.21 | 2021-10-27 | [7405](https://github.com/airbytehq/airbyte/pull/7405) | Change of package `import` from `urllib` to `urllib.parse` | -| 0.1.20 | 2021-10-26 | [7393](https://github.com/airbytehq/airbyte/pull/7393) | Hotfix for `split_properties` function, add the length of separator symbol `,`(`%2C` in HTTP format) to the checking of the summary URL length | -| 0.1.19 | 2021-10-26 | [6954](https://github.com/airbytehq/airbyte/pull/6954) | Fix issue with getting `414` HTTP error for streams | -| 0.1.18 | 2021-10-18 | [5840](https://github.com/airbytehq/airbyte/pull/5840) | Add new marketing emails (with statistics) stream | -| 0.1.17 | 2021-10-14 | [6995](https://github.com/airbytehq/airbyte/pull/6995) | Update `discover` method: disable `quotes` stream when using OAuth config | -| 0.1.16 | 2021-09-27 | [6465](https://github.com/airbytehq/airbyte/pull/6465) | Implement OAuth support. Use CDK authenticator instead of connector specific authenticator | -| 0.1.15 | 2021-09-23 | [6374](https://github.com/airbytehq/airbyte/pull/6374) | Use correct schema for `owners` stream | -| 0.1.14 | 2021-09-08 | [5693](https://github.com/airbytehq/airbyte/pull/5693) | Include deal\_to\_contact association when pulling deal stream and include contact ID in contact stream | -| 0.1.13 | 2021-09-08 | [5834](https://github.com/airbytehq/airbyte/pull/5834) | Fixed array fields without items property in schema | -| 0.1.12 | 2021-09-02 | [5798](https://github.com/airbytehq/airbyte/pull/5798) | Treat empty string values as None for field with format to fix normalization errors | -| 0.1.11 | 2021-08-26 | [5685](https://github.com/airbytehq/airbyte/pull/5685) | Remove all date-time format from schemas | -| 0.1.10 | 2021-08-17 | [5463](https://github.com/airbytehq/airbyte/pull/5463) | Fix fail on reading stream using `API Key` without required permissions | -| 0.1.9 | 2021-08-11 | [5334](https://github.com/airbytehq/airbyte/pull/5334) | Fix empty strings inside float datatype | -| 0.1.8 | 2021-08-06 | [5250](https://github.com/airbytehq/airbyte/pull/5250) | Fix issue with printing exceptions | -| 0.1.7 | 2021-07-27 | [4913](https://github.com/airbytehq/airbyte/pull/4913) | Update fields schema | \ No newline at end of file +| 0.1.38 | 2022-02-03 | [9786](https://github.com/airbytehq/airbyte/pull/9786) | Add new streams for engagements(calls, emails, meetings, notes and tasks) | +| 0.1.37 | 2022-01-27 | [9555](https://github.com/airbytehq/airbyte/pull/9555) | Getting form_submission for all forms | +| 0.1.36 | 2022-01-22 | [7784](https://github.com/airbytehq/airbyte/pull/7784) | Add Property History Stream | +| 0.1.35 | 2021-12-24 | [9081](https://github.com/airbytehq/airbyte/pull/9081) | Add Feedback Submissions stream and update Ticket Pipelines stream | +| 0.1.34 | 2022-01-20 | [9641](https://github.com/airbytehq/airbyte/pull/9641) | Add more fields for `email_events` stream | +| 0.1.33 | 2022-01-14 | [8887](https://github.com/airbytehq/airbyte/pull/8887) | More efficient support for incremental updates on Companies, Contact, Deals and Engagement streams | +| 0.1.32 | 2022-01-13 | [8011](https://github.com/airbytehq/airbyte/pull/8011) | Add new stream form_submissions | +| 0.1.31 | 2022-01-11 | [9385](https://github.com/airbytehq/airbyte/pull/9385) | Remove auto-generated `properties` from `Engagements` stream | +| 0.1.30 | 2021-01-10 | [9129](https://github.com/airbytehq/airbyte/pull/9129) | Created Contacts list memberships streams | +| 0.1.29 | 2021-12-17 | [8699](https://github.com/airbytehq/airbyte/pull/8699) | Add incremental sync support for `companies`, `contact_lists`, `contacts`, `deals`, `line_items`, `products`, `quotes`, `tickets` streams | +| 0.1.28 | 2021-12-15 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Update fields and descriptions | +| 0.1.27 | 2021-12-09 | [8658](https://github.com/airbytehq/airbyte/pull/8658) | Fixed config backward compatibility issue by allowing additional properties in the spec | +| 0.1.26 | 2021-11-30 | [8329](https://github.com/airbytehq/airbyte/pull/8329) | Removed 'skip_dynamic_fields' config param | +| 0.1.25 | 2021-11-23 | [8216](https://github.com/airbytehq/airbyte/pull/8216) | Add skip dynamic fields for testing only | +| 0.1.24 | 2021-11-09 | [7683](https://github.com/airbytehq/airbyte/pull/7683) | Fix name issue 'Hubspot' -> 'HubSpot' | +| 0.1.23 | 2021-11-08 | [7730](https://github.com/airbytehq/airbyte/pull/7730) | Fix OAuth flow schema | +| 0.1.22 | 2021-11-03 | [7562](https://github.com/airbytehq/airbyte/pull/7562) | Migrate Hubspot source to CDK structure | +| 0.1.21 | 2021-10-27 | [7405](https://github.com/airbytehq/airbyte/pull/7405) | Change of package `import` from `urllib` to `urllib.parse` | +| 0.1.20 | 2021-10-26 | [7393](https://github.com/airbytehq/airbyte/pull/7393) | Hotfix for `split_properties` function, add the length of separator symbol `,`(`%2C` in HTTP format) to the checking of the summary URL length | +| 0.1.19 | 2021-10-26 | [6954](https://github.com/airbytehq/airbyte/pull/6954) | Fix issue with getting `414` HTTP error for streams | +| 0.1.18 | 2021-10-18 | [5840](https://github.com/airbytehq/airbyte/pull/5840) | Add new marketing emails (with statistics) stream | +| 0.1.17 | 2021-10-14 | [6995](https://github.com/airbytehq/airbyte/pull/6995) | Update `discover` method: disable `quotes` stream when using OAuth config | +| 0.1.16 | 2021-09-27 | [6465](https://github.com/airbytehq/airbyte/pull/6465) | Implement OAuth support. Use CDK authenticator instead of connector specific authenticator | +| 0.1.15 | 2021-09-23 | [6374](https://github.com/airbytehq/airbyte/pull/6374) | Use correct schema for `owners` stream | +| 0.1.14 | 2021-09-08 | [5693](https://github.com/airbytehq/airbyte/pull/5693) | Include deal\_to\_contact association when pulling deal stream and include contact ID in contact stream | +| 0.1.13 | 2021-09-08 | [5834](https://github.com/airbytehq/airbyte/pull/5834) | Fixed array fields without items property in schema | +| 0.1.12 | 2021-09-02 | [5798](https://github.com/airbytehq/airbyte/pull/5798) | Treat empty string values as None for field with format to fix normalization errors | +| 0.1.11 | 2021-08-26 | [5685](https://github.com/airbytehq/airbyte/pull/5685) | Remove all date-time format from schemas | +| 0.1.10 | 2021-08-17 | [5463](https://github.com/airbytehq/airbyte/pull/5463) | Fix fail on reading stream using `API Key` without required permissions | +| 0.1.9 | 2021-08-11 | [5334](https://github.com/airbytehq/airbyte/pull/5334) | Fix empty strings inside float datatype | +| 0.1.8 | 2021-08-06 | [5250](https://github.com/airbytehq/airbyte/pull/5250) | Fix issue with printing exceptions | +| 0.1.7 | 2021-07-27 | [4913](https://github.com/airbytehq/airbyte/pull/4913) | Update fields schema | diff --git a/docs/integrations/sources/instagram.md b/docs/integrations/sources/instagram.md index a87e3c7f0e25..974d744ecb42 100644 --- a/docs/integrations/sources/instagram.md +++ b/docs/integrations/sources/instagram.md @@ -19,13 +19,12 @@ For more information, see the [Instagram API](https://developers.facebook.com/do ### Data type mapping -| Integration Type | Airbyte Type | Notes | -| :--- | :--- | :--- | -| `string` | `string` | | -| `number` | `number` | | -| `array` | `array` | | -| `object` | `object` | | -| Namespaces | No | | +| Integration Type | Airbyte Type | +| :--- | :--- | +| `string` | `string` | +| `number` | `number` | +| `array` | `array` | +| `object` | `object` | ### Features diff --git a/docs/integrations/sources/intercom.md b/docs/integrations/sources/intercom.md index 537dba6b0a3f..7f3232262ad8 100644 --- a/docs/integrations/sources/intercom.md +++ b/docs/integrations/sources/intercom.md @@ -55,6 +55,8 @@ Please read [How to get your Access Token](https://developers.intercom.com/build | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.15 | 2022-03-22 | [11176](https://github.com/airbytehq/airbyte/pull/11176) | Correct `check_connection` URL | +| 0.1.14 | 2022-03-16 | [11208](https://github.com/airbytehq/airbyte/pull/11208) | Improve 'conversations' incremental sync speed | | 0.1.13 | 2022-01-14 | [9513](https://github.com/airbytehq/airbyte/pull/9513) | Added handling of scroll param when it expired | | 0.1.12 | 2021-12-14 | [8429](https://github.com/airbytehq/airbyte/pull/8429) | Updated fields and descriptions | | 0.1.11 | 2021-12-13 | [8685](https://github.com/airbytehq/airbyte/pull/8685) | Remove time.sleep for rate limit | diff --git a/docs/integrations/sources/shopify.md b/docs/integrations/sources/shopify.md index a64e758c63e4..0c408471531d 100644 --- a/docs/integrations/sources/shopify.md +++ b/docs/integrations/sources/shopify.md @@ -13,6 +13,45 @@ This source can sync data for the [Shopify API](https://help.shopify.com/en/api/ This Source Connector is based on a [Airbyte CDK](https://docs.airbyte.io/connector-development/cdk-python). +## Troubleshooting + +Check out common troubleshooting issues for the BigQuery destination connector on our Discourse [here](https://discuss.airbyte.io/tags/c/connector/11/source-shopify). + +### Output schema + +This Source is capable of syncing the following core Streams: + +* [Abandoned Checkouts](https://help.shopify.com/en/api/reference/orders/abandoned_checkouts) +* [Collects](https://help.shopify.com/en/api/reference/products/collect) +* [Custom Collections](https://help.shopify.com/en/api/reference/products/customcollection) +* [Customers](https://help.shopify.com/en/api/reference/customers) +* [Draft Orders](https://help.shopify.com/en/api/reference/orders/draftorder) +* [Discount Codes](https://shopify.dev/docs/admin-api/rest/reference/discounts/discountcode) +* [Metafields](https://help.shopify.com/en/api/reference/metafield) +* [Orders](https://help.shopify.com/en/api/reference/orders) +* [Orders Refunds](https://shopify.dev/api/admin/rest/reference/orders/refund) +* [Orders Risks](https://shopify.dev/api/admin/rest/reference/orders/order-risk) +* [Products](https://help.shopify.com/en/api/reference/products) +* [Transactions](https://help.shopify.com/en/api/reference/orders/transaction) +* [Balance Transactions](https://shopify.dev/api/admin-rest/2021-07/resources/transactions) +* [Pages](https://help.shopify.com/en/api/reference/online-store/page) +* [Price Rules](https://help.shopify.com/en/api/reference/discounts/pricerule) +* [Locations](https://shopify.dev/api/admin-rest/2021-10/resources/location) +* [InventoryItems](https://shopify.dev/api/admin-rest/2021-10/resources/inventoryItem) +* [InventoryLevels](https://shopify.dev/api/admin-rest/2021-10/resources/inventorylevel) +* [Fulfillment Orders](https://shopify.dev/api/admin-rest/2021-07/resources/fulfillmentorder) +* [Fulfillments](https://shopify.dev/api/admin-rest/2021-07/resources/fulfillment) +* [Shop](https://shopify.dev/api/admin-rest/2021-07/resources/shop) + +#### NOTE: + +For better experience with `Incremental Refresh` the following is recommended: + +* `Order Refunds`, `Order Risks`, `Transactions` should be synced along with `Orders` stream. +* `Discount Codes` should be synced along with `Price Rules` stream. + +If child streams are synced alone from the parent stream - the full sync will take place, and the records are filtered out afterwards. + ### Data type mapping | Integration Type | Airbyte Type | @@ -100,6 +139,7 @@ This is expected when the connector hits the 429 - Rate Limit Exceeded HTTP Erro | Version | Date | Pull Request | Subject | | :--- | :--- | :--- | :--- | +| 0.1.36 | 2022-03-22 | [9850](https://github.com/airbytehq/airbyte/pull/9850) | Added `BalanceTransactions` stream | | 0.1.35 | 2022-03-07 | [10915](https://github.com/airbytehq/airbyte/pull/10915) | Fix a bug which caused `full-refresh` syncs of child REST entities configured for `incremental` | | 0.1.34 | 2022-03-02 | [10794](https://github.com/airbytehq/airbyte/pull/10794) | Minor specification re-order, fixed links in documentation | | 0.1.33 | 2022-02-17 | [10419](https://github.com/airbytehq/airbyte/pull/10419) | Fixed wrong field type for tax_exemptions for `Abandoned_checkouts` stream | diff --git a/docs/integrations/sources/stripe.md b/docs/integrations/sources/stripe.md index 727089a7c307..6edb5eac1497 100644 --- a/docs/integrations/sources/stripe.md +++ b/docs/integrations/sources/stripe.md @@ -35,8 +35,6 @@ This Source is capable of syncing the following core Streams: The Stripe API does not allow querying objects which were updated since the last sync. Therefore, this connector uses the `created` field to query for new data in your Stripe account. -If your data is updated after creation, you can use the Loockback Window option when configuring the connector to always reload data from the past N days. This will allow you to pick up updates to the data. - ### Data type mapping The [Stripe API](https://stripe.com/docs/api) uses the same [JSONSchema](https://json-schema.org/understanding-json-schema/reference/index.html) types that Airbyte uses internally \(`string`, `date-time`, `object`, `array`, `boolean`, `integer`, and `number`\), so no type conversions happen as part of this source. @@ -59,8 +57,15 @@ The Stripe connector should not run into Stripe API limitations under normal usa ### Requirements -* Stripe Account -* Stripe API Secret Key +* Stripe `Account ID` - the `Account ID` of your [Stripe Account](https://dashboard.stripe.com/settings/account) +* Stripe `Secret Key` - the `Secret Key` to be used with [authorized API calls](https://dashboard.stripe.com/apikeys) to retrieve your Stripe data. +* `Lookback Window (in days)` (Optional) - the value in days, which allows you to sync your data with shift equals to the number of days set. If your data is updated after creation, you can use the this option to always reload data from the past N days. This allows you to pick up updates to the data. +Example usage: `Start Date` is set to "2021-01-01T00:00:00Z" then: + * Default is 0, meaning data will be synced from the `Start Date`. + * 1 - means (`Start Date` - 1 day), so the start point of the sync will be "2020-12-31T00:00:00Z" + * 7 - means (`Start Date` - 7 days) then `Start Date` will be "2020-12-25T00:00:00Z" + * 30 - means (`Start Date` - 30 days) then `Start Date` will be "2020-12-02T00:00:00Z" + ### Setup guide @@ -72,8 +77,9 @@ If you would like to test Airbyte using test data on Stripe, `sk_test_` and `rk_ ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------| :--- |:--------------------------------------------------------------------------------------------------------| +| Version | Date | Pull Request | Subject | +|:--------|:-----------| :--- |:---------| +| 0.1.30 | 2022-03-21 | [11286](https://github.com/airbytehq/airbyte/pull/11286) | Minor corrections to documentation and connector specification | | 0.1.29 | 2022-03-08 | [10359](https://github.com/airbytehq/airbyte/pull/10359) | Improved performance for streams with substreams: invoice_line_items, subscription_items, bank_accounts | | 0.1.28 | 2022-02-08 | [10165](https://github.com/airbytehq/airbyte/pull/10165) | Improve 404 handling for `CheckoutSessionsLineItems` stream | | 0.1.27 | 2021-12-28 | [9148](https://github.com/airbytehq/airbyte/pull/9148) | Fix `date`, `arrival\_date` fields | diff --git a/docs/integrations/sources/zendesk-support.md b/docs/integrations/sources/zendesk-support.md index 83cdf21b65f5..0cd4228a0754 100644 --- a/docs/integrations/sources/zendesk-support.md +++ b/docs/integrations/sources/zendesk-support.md @@ -10,34 +10,35 @@ This source can sync data for the [Zendesk Support API](https://developer.zendes This Source is capable of syncing the following core Streams: -* [Tickets](https://developer.zendesk.com/rest_api/docs/support/tickets) +* [Brands](https://developer.zendesk.com/api-reference/ticketing/account-configuration/brands/#list-brands) +* [Custom Roles](https://developer.zendesk.com/api-reference/ticketing/account-configuration/custom_roles/#list-custom-roles) * [Groups](https://developer.zendesk.com/rest_api/docs/support/groups) -* [Users](https://developer.zendesk.com/rest_api/docs/support/users) +* [Group Memberships](https://developer.zendesk.com/rest_api/docs/support/group_memberships) +* [Macros](https://developer.zendesk.com/rest_api/docs/support/macros) * [Organizations](https://developer.zendesk.com/rest_api/docs/support/organizations) +* [Satisfaction Ratings](https://developer.zendesk.com/rest_api/docs/support/satisfaction_ratings) +* [Schedules](https://developer.zendesk.com/api-reference/ticketing/ticket-management/schedules/#list-schedules) +* [SLA Policies](https://developer.zendesk.com/rest_api/docs/support/sla_policies) +* [Tags](https://developer.zendesk.com/rest_api/docs/support/tags) +* [Tickets](https://developer.zendesk.com/rest_api/docs/support/tickets) * [Ticket Audits](https://developer.zendesk.com/rest_api/docs/support/ticket_audits) -* [Ticket Comments](https://developer.zendesk.com/rest_api/docs/support/ticket_comments) +* [Ticket Comments](https://developer.zendesk.com/api-reference/ticketing/ticket-management/incremental_exports/#incremental-ticket-event-export) * [Ticket Fields](https://developer.zendesk.com/rest_api/docs/support/ticket_fields) * [Ticket Forms](https://developer.zendesk.com/rest_api/docs/support/ticket_forms) * [Ticket Metrics](https://developer.zendesk.com/rest_api/docs/support/ticket_metrics) * [Ticket Metric Events](https://developer.zendesk.com/api-reference/ticketing/tickets/ticket_metric_events/) -* [Group Memberships](https://developer.zendesk.com/rest_api/docs/support/group_memberships) -* [Macros](https://developer.zendesk.com/rest_api/docs/support/macros) -* [Satisfaction Ratings](https://developer.zendesk.com/rest_api/docs/support/satisfaction_ratings) -* [Tags](https://developer.zendesk.com/rest_api/docs/support/tags) -* [SLA Policies](https://developer.zendesk.com/rest_api/docs/support/sla_policies) - - **Not implemented schema** +* [Users](https://developer.zendesk.com/rest_api/docs/support/users) - These Zendesk endpoints are available too. But syncing with them will be implemented in the future. +The streams below are not implemented. Please open a Github issue or request it through Airbyte Cloud's support box if you are interested in them. - **Tickets** +**Tickets** * [Ticket Attachments](https://developer.zendesk.com/api-reference/ticketing/tickets/ticket-attachments/) * [Ticket Requests](https://developer.zendesk.com/api-reference/ticketing/tickets/ticket-requests/) * [Ticket Activities](https://developer.zendesk.com/api-reference/ticketing/tickets/activity_stream/) * [Ticket Skips](https://developer.zendesk.com/api-reference/ticketing/tickets/ticket_skips/) - **Help Center** +**Help Center** * [Articles](https://developer.zendesk.com/api-reference/help_center/help-center-api/articles/) * [Article Attachments](https://developer.zendesk.com/api-reference/help_center/help-center-api/article_attachments/) @@ -54,12 +55,12 @@ This Source is capable of syncing the following core Streams: ### Data type mapping -| Integration Type | Airbyte Type | Notes | -| :--- | :--- | :--- | -| `string` | `string` | | -| `number` | `number` | | -| `array` | `array` | | -| `object` | `object` | | +| Integration Type | Airbyte Type | +| :--- | :--- | +| `string` | `string` | +| `number` | `number` | +| `array` | `array` | +| `object` | `object` | ### Features @@ -85,28 +86,34 @@ The Zendesk connector should not run into Zendesk API limitations under normal u * API Token * Zendesk API Token * Zendesk Email - * oAuth2 \(not implemented\) + * OAuth2.0 (obtain access_token by authorising your Zendesk Account) ### Setup guide +* API Token Generate a API access token using the [Zendesk support](https://support.zendesk.com/hc/en-us/articles/226022787-Generating-a-new-API-token) We recommend creating a restricted, read-only key specifically for Airbyte access. This will allow you to control which resources Airbyte should be able to access. +* OAuth2.0 (Only for Airbyte Cloud) +Simply proceed by pressing "Authenticate your Account" and complete the authentication with your Zendesk credentials. + ### CHANGELOG | Version | Date | Pull Request | Subject | |:---------|:-----------| :----- |:-------------------------------------------------------| +| `0.2.2` | 2022-03-17 | [11237](https://github.com/airbytehq/airbyte/pull/11237) | Fixed the bug when TicketComments stream didn't return all records +| `0.2.1` | 2022-03-15 | [11162](https://github.com/airbytehq/airbyte/pull/11162) | Added support of OAuth2.0 authentication method | `0.2.0` | 2022-03-01 | [9456](https://github.com/airbytehq/airbyte/pull/9456) | Update source to use future requests | -| `0.1.12` | 2022-01-25 | [9785](https://github.com/airbytehq/airbyte/pull/9785) | Add log message | +| `0.1.12` | 2022-01-25 | [9785](https://github.com/airbytehq/airbyte/pull/9785) | Add additional log messages | | `0.1.11` | 2021-12-21 | [8987](https://github.com/airbytehq/airbyte/pull/8987) | Update connector fields title/description | -| `0.1.9` | 2021-12-16 | [8616](https://github.com/airbytehq/airbyte/pull/8616) | Adds Brands, CustomRoles and Schedules | -| `0.1.8` | 2021-11-23 | [8050](https://github.com/airbytehq/airbyte/pull/8168) | Adds TicketMetricEvents | -| `0.1.7` | 2021-11-23 | [8058](https://github.com/airbytehq/airbyte/pull/8058) | support AccessToken auth | -| `0.1.6` | 2021-11-18 | [8050](https://github.com/airbytehq/airbyte/pull/8050) | Fix wrong types for schemas, add Transformer | +| `0.1.9` | 2021-12-16 | [8616](https://github.com/airbytehq/airbyte/pull/8616) | Adds Brands, CustomRoles and Schedules streams | +| `0.1.8` | 2021-11-23 | [8050](https://github.com/airbytehq/airbyte/pull/8168) | Adds TicketMetricEvents stream | +| `0.1.7` | 2021-11-23 | [8058](https://github.com/airbytehq/airbyte/pull/8058) | Added support of AccessToken authentication | +| `0.1.6` | 2021-11-18 | [8050](https://github.com/airbytehq/airbyte/pull/8050) | Fix wrong types for schemas, add TypeTransformer | | `0.1.5` | 2021-10-26 | [7679](https://github.com/airbytehq/airbyte/pull/7679) | Add ticket_id and ticket_comments | -| `0.1.4` | 2021-10-26 | [7377](https://github.com/airbytehq/airbyte/pull/7377) | fix initially_assigned_at type in ticket metrics | -| `0.1.3` | 2021-10-17 | [7097](https://github.com/airbytehq/airbyte/pull/7097) | correction of spec file | -| `0.1.2` | 2021-10-16 | [6513](https://github.com/airbytehq/airbyte/pull/6513) | fixed comments stream | -| `0.1.1` | 2021-09-02 | [5787](https://github.com/airbytehq/airbyte/pull/5787) | fixed incremental logic for the ticket_comments stream | -| `0.1.0` | 2021-07-21 | [4861](https://github.com/airbytehq/airbyte/pull/4861) | created CDK native zendesk connector | +| `0.1.4` | 2021-10-26 | [7377](https://github.com/airbytehq/airbyte/pull/7377) | Fix initially_assigned_at type in ticket metrics | +| `0.1.3` | 2021-10-17 | [7097](https://github.com/airbytehq/airbyte/pull/7097) | Corrected the connector's specification | +| `0.1.2` | 2021-10-16 | [6513](https://github.com/airbytehq/airbyte/pull/6513) | Fixed TicketComments stream | +| `0.1.1` | 2021-09-02 | [5787](https://github.com/airbytehq/airbyte/pull/5787) | Fixed incremental logic for the ticket_comments stream | +| `0.1.0` | 2021-07-21 | [4861](https://github.com/airbytehq/airbyte/pull/4861) | Created CDK native zendesk connector | diff --git a/docs/operator-guides/upgrading-airbyte.md b/docs/operator-guides/upgrading-airbyte.md index 9f0987b974c5..be0ee2a6989f 100644 --- a/docs/operator-guides/upgrading-airbyte.md +++ b/docs/operator-guides/upgrading-airbyte.md @@ -101,7 +101,7 @@ If you are upgrading from \(i.e. your current version of Airbyte is\) Airbyte ve Here's an example of what it might look like with the values filled in. It assumes that the downloaded `airbyte_archive.tar.gz` is in `/tmp`. ```bash - docker run --rm -v /tmp:/config airbyte/migration:0.35.55-alpha --\ + docker run --rm -v /tmp:/config airbyte/migration:0.35.59-alpha --\ --input /config/airbyte_archive.tar.gz\ --output /config/airbyte_archive_migrated.tar.gz ``` diff --git a/docs/reference/api/generated-api-html/index.html b/docs/reference/api/generated-api-html/index.html index 6c4e4297bd25..84e32e6e0af0 100644 --- a/docs/reference/api/generated-api-html/index.html +++ b/docs/reference/api/generated-api-html/index.html @@ -259,11 +259,19 @@

Destination

DestinationDefinition

DestinationDefinitionSpecification

@@ -332,11 +340,19 @@

Source

SourceDefinition

SourceDefinitionSpecification

@@ -2570,6 +2586,90 @@

422


DestinationDefinition

+
+
+ Up +
post /v1/destination_definitions/create_custom
+
Creates a custom destinationDefinition for the given workspace (createCustomDestinationDefinition)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
CustomDestinationDefinitionCreate CustomDestinationDefinitionCreate (optional)
+ +
Body Parameter
+ +
+ + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "resourceRequirements" : {
+    "default" : {
+      "cpu_limit" : "cpu_limit",
+      "memory_request" : "memory_request",
+      "memory_limit" : "memory_limit",
+      "cpu_request" : "cpu_request"
+    },
+    "jobSpecific" : [ {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    }, {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    } ]
+  },
+  "documentationUrl" : "https://openapi-generator.tech",
+  "dockerImageTag" : "dockerImageTag",
+  "releaseDate" : "2000-01-23",
+  "dockerRepository" : "dockerRepository",
+  "name" : "name",
+  "icon" : "icon",
+  "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + DestinationDefinitionRead +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
Up @@ -2654,6 +2754,54 @@

422

InvalidInputExceptionInfo

+
+
+ Up +
post /v1/destination_definitions/delete_custom
+
Delete a custom destination definition for the given workspace (deleteCustomDestinationDefinition)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
DestinationDefinitionIdWithWorkspaceId DestinationDefinitionIdWithWorkspaceId (required)
+ +
Body Parameter
+ +
+ + + + + + + + +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

204

+ The destination was deleted successfully. + +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
Up @@ -2789,22 +2937,34 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/destination_definitions/list
-
List all the destinationDefinitions the current Airbyte deployment is configured to use (listDestinationDefinitions)
+
post /v1/destination_definitions/get_for_workspace
+
Get a destinationDefinition that is configured for the given workspace (getDestinationDefinitionForWorkspace)
+

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
DestinationDefinitionIdWithWorkspaceId DestinationDefinitionIdWithWorkspaceId (required)
+ +
Body Parameter
+

Return type

@@ -2813,69 +2973,36 @@

Return type

Example data

Content-Type: application/json
{
-  "destinationDefinitions" : [ {
-    "resourceRequirements" : {
-      "default" : {
+  "resourceRequirements" : {
+    "default" : {
+      "cpu_limit" : "cpu_limit",
+      "memory_request" : "memory_request",
+      "memory_limit" : "memory_limit",
+      "cpu_request" : "cpu_request"
+    },
+    "jobSpecific" : [ {
+      "resourceRequirements" : {
         "cpu_limit" : "cpu_limit",
         "memory_request" : "memory_request",
         "memory_limit" : "memory_limit",
         "cpu_request" : "cpu_request"
-      },
-      "jobSpecific" : [ {
-        "resourceRequirements" : {
-          "cpu_limit" : "cpu_limit",
-          "memory_request" : "memory_request",
-          "memory_limit" : "memory_limit",
-          "cpu_request" : "cpu_request"
-        }
-      }, {
-        "resourceRequirements" : {
-          "cpu_limit" : "cpu_limit",
-          "memory_request" : "memory_request",
-          "memory_limit" : "memory_limit",
-          "cpu_request" : "cpu_request"
-        }
-      } ]
-    },
-    "documentationUrl" : "https://openapi-generator.tech",
-    "dockerImageTag" : "dockerImageTag",
-    "releaseDate" : "2000-01-23",
-    "dockerRepository" : "dockerRepository",
-    "name" : "name",
-    "icon" : "icon",
-    "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
-  }, {
-    "resourceRequirements" : {
-      "default" : {
+      }
+    }, {
+      "resourceRequirements" : {
         "cpu_limit" : "cpu_limit",
         "memory_request" : "memory_request",
         "memory_limit" : "memory_limit",
         "cpu_request" : "cpu_request"
-      },
-      "jobSpecific" : [ {
-        "resourceRequirements" : {
-          "cpu_limit" : "cpu_limit",
-          "memory_request" : "memory_request",
-          "memory_limit" : "memory_limit",
-          "cpu_request" : "cpu_request"
-        }
-      }, {
-        "resourceRequirements" : {
-          "cpu_limit" : "cpu_limit",
-          "memory_request" : "memory_request",
-          "memory_limit" : "memory_limit",
-          "cpu_request" : "cpu_request"
-        }
-      } ]
-    },
-    "documentationUrl" : "https://openapi-generator.tech",
-    "dockerImageTag" : "dockerImageTag",
-    "releaseDate" : "2000-01-23",
-    "dockerRepository" : "dockerRepository",
-    "name" : "name",
-    "icon" : "icon",
-    "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
-  } ]
+      }
+    } ]
+  },
+  "documentationUrl" : "https://openapi-generator.tech",
+  "dockerImageTag" : "dockerImageTag",
+  "releaseDate" : "2000-01-23",
+  "dockerRepository" : "dockerRepository",
+  "name" : "name",
+  "icon" : "icon",
+  "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
 }

Produces

@@ -2888,25 +3015,43 @@

Produces

Responses

200

Successful operation - DestinationDefinitionReadList + DestinationDefinitionRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo

-
+
Up -
post /v1/destination_definitions/list_latest
-
List the latest destinationDefinitions Airbyte supports (listLatestDestinationDefinitions)
-
Guaranteed to retrieve the latest information on supported destinations.
+
post /v1/destination_definitions/grant_definition
+
grant a private, non-custom destinationDefinition to a given workspace (grantDestinationDefinitionToWorkspace)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+

Request body

+
+
DestinationDefinitionIdWithWorkspaceId DestinationDefinitionIdWithWorkspaceId (required)
+
Body Parameter
+

Return type

@@ -2915,7 +3060,7 @@

Return type

Example data

Content-Type: application/json
{
-  "destinationDefinitions" : [ {
+  "destinationDefinition" : {
     "resourceRequirements" : {
       "default" : {
         "cpu_limit" : "cpu_limit",
@@ -2946,38 +3091,8 @@ 

Example data

"name" : "name", "icon" : "icon", "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91" - }, { - "resourceRequirements" : { - "default" : { - "cpu_limit" : "cpu_limit", - "memory_request" : "memory_request", - "memory_limit" : "memory_limit", - "cpu_request" : "cpu_request" - }, - "jobSpecific" : [ { - "resourceRequirements" : { - "cpu_limit" : "cpu_limit", - "memory_request" : "memory_request", - "memory_limit" : "memory_limit", - "cpu_request" : "cpu_request" - } - }, { - "resourceRequirements" : { - "cpu_limit" : "cpu_limit", - "memory_request" : "memory_request", - "memory_limit" : "memory_limit", - "cpu_request" : "cpu_request" - } - } ] - }, - "documentationUrl" : "https://openapi-generator.tech", - "dockerImageTag" : "dockerImageTag", - "releaseDate" : "2000-01-23", - "dockerRepository" : "dockerRepository", - "name" : "name", - "icon" : "icon", - "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91" - } ] + }, + "granted" : true }

Produces

@@ -2990,37 +3105,31 @@

Produces

Responses

200

Successful operation - DestinationDefinitionReadList + PrivateDestinationDefinitionRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo

-
+
Up -
post /v1/destination_definitions/update
-
Update destinationDefinition (updateDestinationDefinition)
+
post /v1/destination_definitions/list
+
List all the destinationDefinitions the current Airbyte deployment is configured to use (listDestinationDefinitions)
-

Consumes

- This API call consumes the following media types via the Content-Type request header: -
    -
  • application/json
  • -
- -

Request body

-
-
DestinationDefinitionUpdate DestinationDefinitionUpdate (required)
- -
Body Parameter
-

Return type

@@ -3029,36 +3138,69 @@

Return type

Example data

Content-Type: application/json
{
-  "resourceRequirements" : {
-    "default" : {
-      "cpu_limit" : "cpu_limit",
-      "memory_request" : "memory_request",
-      "memory_limit" : "memory_limit",
-      "cpu_request" : "cpu_request"
-    },
-    "jobSpecific" : [ {
-      "resourceRequirements" : {
+  "destinationDefinitions" : [ {
+    "resourceRequirements" : {
+      "default" : {
         "cpu_limit" : "cpu_limit",
         "memory_request" : "memory_request",
         "memory_limit" : "memory_limit",
         "cpu_request" : "cpu_request"
-      }
-    }, {
-      "resourceRequirements" : {
+      },
+      "jobSpecific" : [ {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      }, {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      } ]
+    },
+    "documentationUrl" : "https://openapi-generator.tech",
+    "dockerImageTag" : "dockerImageTag",
+    "releaseDate" : "2000-01-23",
+    "dockerRepository" : "dockerRepository",
+    "name" : "name",
+    "icon" : "icon",
+    "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  }, {
+    "resourceRequirements" : {
+      "default" : {
         "cpu_limit" : "cpu_limit",
         "memory_request" : "memory_request",
         "memory_limit" : "memory_limit",
         "cpu_request" : "cpu_request"
-      }
-    } ]
-  },
-  "documentationUrl" : "https://openapi-generator.tech",
-  "dockerImageTag" : "dockerImageTag",
-  "releaseDate" : "2000-01-23",
-  "dockerRepository" : "dockerRepository",
-  "name" : "name",
-  "icon" : "icon",
-  "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+      },
+      "jobSpecific" : [ {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      }, {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      } ]
+    },
+    "documentationUrl" : "https://openapi-generator.tech",
+    "dockerImageTag" : "dockerImageTag",
+    "releaseDate" : "2000-01-23",
+    "dockerRepository" : "dockerRepository",
+    "name" : "name",
+    "icon" : "icon",
+    "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  } ]
 }

Produces

@@ -3071,21 +3213,14 @@

Produces

Responses

200

Successful operation - DestinationDefinitionRead -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo -

422

- Input failed validation - InvalidInputExceptionInfo + DestinationDefinitionReadList

-

DestinationDefinitionSpecification

-
+
Up -
post /v1/destination_definition_specifications/get
-
Get specification for a destinationDefinition (getDestinationDefinitionSpecification)
+
post /v1/destination_definitions/list_for_workspace
+
List all the destinationDefinitions the given workspace is configured to use (listDestinationDefinitionsForWorkspace)
@@ -3097,7 +3232,7 @@

Consumes

Request body

-
DestinationDefinitionIdRequestBody DestinationDefinitionIdRequestBody (required)
+
WorkspaceIdRequestBody WorkspaceIdRequestBody (optional)
Body Parameter
@@ -3108,7 +3243,7 @@

Request body

Return type

@@ -3117,40 +3252,69 @@

Return type

Example data

Content-Type: application/json
{
-  "documentationUrl" : "documentationUrl",
-  "supportsNormalization" : true,
-  "connectionSpecification" : {
-    "user" : {
-      "type" : "string"
-    }
-  },
-  "supportedDestinationSyncModes" : [ null, null ],
-  "supportsDbt" : true,
-  "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "advancedAuth" : {
-    "predicateValue" : "predicateValue",
-    "oauthConfigSpecification" : { },
-    "predicateKey" : [ "predicateKey", "predicateKey" ],
-    "authFlowType" : "oauth2.0"
-  },
-  "authSpecification" : {
-    "auth_type" : "oauth2.0",
-    "oauth2Specification" : {
-      "oauthFlowOutputParameters" : [ [ "oauthFlowOutputParameters", "oauthFlowOutputParameters" ], [ "oauthFlowOutputParameters", "oauthFlowOutputParameters" ] ],
-      "rootObject" : [ "path", 1 ],
-      "oauthFlowInitParameters" : [ [ "oauthFlowInitParameters", "oauthFlowInitParameters" ], [ "oauthFlowInitParameters", "oauthFlowInitParameters" ] ]
-    }
-  },
-  "jobInfo" : {
-    "createdAt" : 0,
-    "configId" : "configId",
-    "endedAt" : 6,
-    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "logs" : {
-      "logLines" : [ "logLines", "logLines" ]
+  "destinationDefinitions" : [ {
+    "resourceRequirements" : {
+      "default" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      },
+      "jobSpecific" : [ {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      }, {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      } ]
     },
-    "succeeded" : true
-  }
+    "documentationUrl" : "https://openapi-generator.tech",
+    "dockerImageTag" : "dockerImageTag",
+    "releaseDate" : "2000-01-23",
+    "dockerRepository" : "dockerRepository",
+    "name" : "name",
+    "icon" : "icon",
+    "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  }, {
+    "resourceRequirements" : {
+      "default" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      },
+      "jobSpecific" : [ {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      }, {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      } ]
+    },
+    "documentationUrl" : "https://openapi-generator.tech",
+    "dockerImageTag" : "dockerImageTag",
+    "releaseDate" : "2000-01-23",
+    "dockerRepository" : "dockerRepository",
+    "name" : "name",
+    "icon" : "icon",
+    "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  } ]
 }

Produces

@@ -3163,22 +3327,15 @@

Produces

Responses

200

Successful operation - DestinationDefinitionSpecificationRead -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo -

422

- Input failed validation - InvalidInputExceptionInfo + DestinationDefinitionReadList

-

Health

-
+
Up -
get /v1/health
-
Health Check (getHealthCheck)
-
+
post /v1/destination_definitions/list_latest
+
List the latest destinationDefinitions Airbyte supports (listLatestDestinationDefinitions)
+
Guaranteed to retrieve the latest information on supported destinations.
@@ -3188,7 +3345,7 @@

Health

Return type

@@ -3197,163 +3354,68 @@

Return type

Example data

Content-Type: application/json
{
-  "available" : true
-}
- -

Produces

- This API call produces the following media types according to the Accept request header; - the media type will be conveyed by the Content-Type response header. -
    -
  • application/json
  • -
- -

Responses

-

200

- Successful operation - HealthCheckRead -
-
-

Jobs

-
-
- Up -
post /v1/jobs/cancel
-
Cancels a job (cancelJob)
-
- - -

Consumes

- This API call consumes the following media types via the Content-Type request header: -
    -
  • application/json
  • -
- -

Request body

-
-
JobIdRequestBody JobIdRequestBody (required)
- -
Body Parameter
- -
- - - - -

Return type

-
- JobInfoRead - -
- - - -

Example data

-
Content-Type: application/json
-
{
-  "job" : {
-    "createdAt" : 6,
-    "configId" : "configId",
-    "id" : 0,
-    "updatedAt" : 1
-  },
-  "attempts" : [ {
-    "attempt" : {
-      "totalStats" : {
-        "stateMessagesEmitted" : 7,
-        "recordsCommitted" : 1,
-        "bytesEmitted" : 4,
-        "recordsEmitted" : 2
-      },
-      "failureSummary" : {
-        "failures" : [ {
-          "retryable" : true,
-          "stacktrace" : "stacktrace",
-          "externalMessage" : "externalMessage",
-          "timestamp" : 1
-        }, {
-          "retryable" : true,
-          "stacktrace" : "stacktrace",
-          "externalMessage" : "externalMessage",
-          "timestamp" : 1
-        } ],
-        "partialSuccess" : true
+  "destinationDefinitions" : [ {
+    "resourceRequirements" : {
+      "default" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
       },
-      "createdAt" : 5,
-      "bytesSynced" : 9,
-      "endedAt" : 7,
-      "streamStats" : [ {
-        "stats" : {
-          "stateMessagesEmitted" : 7,
-          "recordsCommitted" : 1,
-          "bytesEmitted" : 4,
-          "recordsEmitted" : 2
-        },
-        "streamName" : "streamName"
+      "jobSpecific" : [ {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
       }, {
-        "stats" : {
-          "stateMessagesEmitted" : 7,
-          "recordsCommitted" : 1,
-          "bytesEmitted" : 4,
-          "recordsEmitted" : 2
-        },
-        "streamName" : "streamName"
-      } ],
-      "id" : 5,
-      "recordsSynced" : 3,
-      "updatedAt" : 2
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      } ]
     },
-    "logs" : {
-      "logLines" : [ "logLines", "logLines" ]
-    }
+    "documentationUrl" : "https://openapi-generator.tech",
+    "dockerImageTag" : "dockerImageTag",
+    "releaseDate" : "2000-01-23",
+    "dockerRepository" : "dockerRepository",
+    "name" : "name",
+    "icon" : "icon",
+    "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
   }, {
-    "attempt" : {
-      "totalStats" : {
-        "stateMessagesEmitted" : 7,
-        "recordsCommitted" : 1,
-        "bytesEmitted" : 4,
-        "recordsEmitted" : 2
-      },
-      "failureSummary" : {
-        "failures" : [ {
-          "retryable" : true,
-          "stacktrace" : "stacktrace",
-          "externalMessage" : "externalMessage",
-          "timestamp" : 1
-        }, {
-          "retryable" : true,
-          "stacktrace" : "stacktrace",
-          "externalMessage" : "externalMessage",
-          "timestamp" : 1
-        } ],
-        "partialSuccess" : true
+    "resourceRequirements" : {
+      "default" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
       },
-      "createdAt" : 5,
-      "bytesSynced" : 9,
-      "endedAt" : 7,
-      "streamStats" : [ {
-        "stats" : {
-          "stateMessagesEmitted" : 7,
-          "recordsCommitted" : 1,
-          "bytesEmitted" : 4,
-          "recordsEmitted" : 2
-        },
-        "streamName" : "streamName"
+      "jobSpecific" : [ {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
       }, {
-        "stats" : {
-          "stateMessagesEmitted" : 7,
-          "recordsCommitted" : 1,
-          "bytesEmitted" : 4,
-          "recordsEmitted" : 2
-        },
-        "streamName" : "streamName"
-      } ],
-      "id" : 5,
-      "recordsSynced" : 3,
-      "updatedAt" : 2
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      } ]
     },
-    "logs" : {
-      "logLines" : [ "logLines", "logLines" ]
-    }
+    "documentationUrl" : "https://openapi-generator.tech",
+    "dockerImageTag" : "dockerImageTag",
+    "releaseDate" : "2000-01-23",
+    "dockerRepository" : "dockerRepository",
+    "name" : "name",
+    "icon" : "icon",
+    "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
   } ]
 }
@@ -3367,20 +3429,14 @@

Produces

Responses

200

Successful operation - JobInfoRead -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo -

422

- Input failed validation - InvalidInputExceptionInfo + DestinationDefinitionReadList

-
+
Up -
post /v1/jobs/get_debug_info
-
Gets all information needed to debug this job (getJobDebugInfo)
+
post /v1/destination_definitions/list_private
+
List all private, non-custom destinationDefinitions, and for each indicate whether the given workspace has a grant for using the definition. Used by admins to view and modify a given workspace's grants. (listPrivateDestinationDefinitions)
@@ -3392,7 +3448,7 @@

Consumes

Request body

-
JobIdRequestBody JobIdRequestBody (required)
+
WorkspaceIdRequestBody WorkspaceIdRequestBody (optional)
Body Parameter
@@ -3403,7 +3459,7 @@

Request body

Return type

@@ -3412,9 +3468,8 @@

Return type

Example data

Content-Type: application/json
{
-  "job" : {
-    "configId" : "configId",
-    "sourceDefinition" : {
+  "destinationDefinitions" : [ {
+    "destinationDefinition" : {
       "resourceRequirements" : {
         "default" : {
           "cpu_limit" : "cpu_limit",
@@ -3444,10 +3499,10 @@ 

Example data

"dockerRepository" : "dockerRepository", "name" : "name", "icon" : "icon", - "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91" + "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91" }, - "airbyteVersion" : "airbyteVersion", - "id" : 0, + "granted" : true + }, { "destinationDefinition" : { "resourceRequirements" : { "default" : { @@ -3479,106 +3534,8 @@

Example data

"name" : "name", "icon" : "icon", "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91" - } - }, - "attempts" : [ { - "attempt" : { - "totalStats" : { - "stateMessagesEmitted" : 7, - "recordsCommitted" : 1, - "bytesEmitted" : 4, - "recordsEmitted" : 2 - }, - "failureSummary" : { - "failures" : [ { - "retryable" : true, - "stacktrace" : "stacktrace", - "externalMessage" : "externalMessage", - "timestamp" : 1 - }, { - "retryable" : true, - "stacktrace" : "stacktrace", - "externalMessage" : "externalMessage", - "timestamp" : 1 - } ], - "partialSuccess" : true - }, - "createdAt" : 5, - "bytesSynced" : 9, - "endedAt" : 7, - "streamStats" : [ { - "stats" : { - "stateMessagesEmitted" : 7, - "recordsCommitted" : 1, - "bytesEmitted" : 4, - "recordsEmitted" : 2 - }, - "streamName" : "streamName" - }, { - "stats" : { - "stateMessagesEmitted" : 7, - "recordsCommitted" : 1, - "bytesEmitted" : 4, - "recordsEmitted" : 2 - }, - "streamName" : "streamName" - } ], - "id" : 5, - "recordsSynced" : 3, - "updatedAt" : 2 - }, - "logs" : { - "logLines" : [ "logLines", "logLines" ] - } - }, { - "attempt" : { - "totalStats" : { - "stateMessagesEmitted" : 7, - "recordsCommitted" : 1, - "bytesEmitted" : 4, - "recordsEmitted" : 2 - }, - "failureSummary" : { - "failures" : [ { - "retryable" : true, - "stacktrace" : "stacktrace", - "externalMessage" : "externalMessage", - "timestamp" : 1 - }, { - "retryable" : true, - "stacktrace" : "stacktrace", - "externalMessage" : "externalMessage", - "timestamp" : 1 - } ], - "partialSuccess" : true - }, - "createdAt" : 5, - "bytesSynced" : 9, - "endedAt" : 7, - "streamStats" : [ { - "stats" : { - "stateMessagesEmitted" : 7, - "recordsCommitted" : 1, - "bytesEmitted" : 4, - "recordsEmitted" : 2 - }, - "streamName" : "streamName" - }, { - "stats" : { - "stateMessagesEmitted" : 7, - "recordsCommitted" : 1, - "bytesEmitted" : 4, - "recordsEmitted" : 2 - }, - "streamName" : "streamName" - } ], - "id" : 5, - "recordsSynced" : 3, - "updatedAt" : 2 }, - "logs" : { - "logLines" : [ "logLines", "logLines" ] - } + "granted" : true } ] }
@@ -3592,7 +3549,49 @@

Produces

Responses

200

Successful operation - JobDebugInfoRead + PrivateDestinationDefinitionReadList +
+
+
+
+ Up +
post /v1/destination_definitions/revoke_definition
+
revoke a grant to a private, non-custom destinationDefinition from a given workspace (revokeDestinationDefinitionFromWorkspace)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
DestinationDefinitionIdWithWorkspaceId DestinationDefinitionIdWithWorkspaceId (required)
+ +
Body Parameter
+ +
+ + + + + + + + +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

204

+ The resource was deleted successfully. +

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -3601,11 +3600,11 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/jobs/get
-
Get information about a job (getJobInfo)
+
post /v1/destination_definitions/update_custom
+
Update a custom destinationDefinition for the given workspace (updateCustomDestinationDefinition)
@@ -3617,7 +3616,7 @@

Consumes

Request body

-
JobIdRequestBody JobIdRequestBody (required)
+
CustomDestinationDefinitionUpdate CustomDestinationDefinitionUpdate (optional)
Body Parameter
@@ -3628,7 +3627,7 @@

Request body

Return type

@@ -3637,111 +3636,36 @@

Return type

Example data

Content-Type: application/json
{
-  "job" : {
-    "createdAt" : 6,
-    "configId" : "configId",
-    "id" : 0,
-    "updatedAt" : 1
-  },
-  "attempts" : [ {
-    "attempt" : {
-      "totalStats" : {
-        "stateMessagesEmitted" : 7,
-        "recordsCommitted" : 1,
-        "bytesEmitted" : 4,
-        "recordsEmitted" : 2
-      },
-      "failureSummary" : {
-        "failures" : [ {
-          "retryable" : true,
-          "stacktrace" : "stacktrace",
-          "externalMessage" : "externalMessage",
-          "timestamp" : 1
-        }, {
-          "retryable" : true,
-          "stacktrace" : "stacktrace",
-          "externalMessage" : "externalMessage",
-          "timestamp" : 1
-        } ],
-        "partialSuccess" : true
-      },
-      "createdAt" : 5,
-      "bytesSynced" : 9,
-      "endedAt" : 7,
-      "streamStats" : [ {
-        "stats" : {
-          "stateMessagesEmitted" : 7,
-          "recordsCommitted" : 1,
-          "bytesEmitted" : 4,
-          "recordsEmitted" : 2
-        },
-        "streamName" : "streamName"
-      }, {
-        "stats" : {
-          "stateMessagesEmitted" : 7,
-          "recordsCommitted" : 1,
-          "bytesEmitted" : 4,
-          "recordsEmitted" : 2
-        },
-        "streamName" : "streamName"
-      } ],
-      "id" : 5,
-      "recordsSynced" : 3,
-      "updatedAt" : 2
-    },
-    "logs" : {
-      "logLines" : [ "logLines", "logLines" ]
-    }
-  }, {
-    "attempt" : {
-      "totalStats" : {
-        "stateMessagesEmitted" : 7,
-        "recordsCommitted" : 1,
-        "bytesEmitted" : 4,
-        "recordsEmitted" : 2
-      },
-      "failureSummary" : {
-        "failures" : [ {
-          "retryable" : true,
-          "stacktrace" : "stacktrace",
-          "externalMessage" : "externalMessage",
-          "timestamp" : 1
-        }, {
-          "retryable" : true,
-          "stacktrace" : "stacktrace",
-          "externalMessage" : "externalMessage",
-          "timestamp" : 1
-        } ],
-        "partialSuccess" : true
-      },
-      "createdAt" : 5,
-      "bytesSynced" : 9,
-      "endedAt" : 7,
-      "streamStats" : [ {
-        "stats" : {
-          "stateMessagesEmitted" : 7,
-          "recordsCommitted" : 1,
-          "bytesEmitted" : 4,
-          "recordsEmitted" : 2
-        },
-        "streamName" : "streamName"
-      }, {
-        "stats" : {
-          "stateMessagesEmitted" : 7,
-          "recordsCommitted" : 1,
-          "bytesEmitted" : 4,
-          "recordsEmitted" : 2
-        },
-        "streamName" : "streamName"
-      } ],
-      "id" : 5,
-      "recordsSynced" : 3,
-      "updatedAt" : 2
+  "resourceRequirements" : {
+    "default" : {
+      "cpu_limit" : "cpu_limit",
+      "memory_request" : "memory_request",
+      "memory_limit" : "memory_limit",
+      "cpu_request" : "cpu_request"
     },
-    "logs" : {
-      "logLines" : [ "logLines", "logLines" ]
-    }
-  } ]
+    "jobSpecific" : [ {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    }, {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    } ]
+  },
+  "documentationUrl" : "https://openapi-generator.tech",
+  "dockerImageTag" : "dockerImageTag",
+  "releaseDate" : "2000-01-23",
+  "dockerRepository" : "dockerRepository",
+  "name" : "name",
+  "icon" : "icon",
+  "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
 }

Produces

@@ -3754,7 +3678,7 @@

Produces

Responses

200

Successful operation - JobInfoRead + DestinationDefinitionRead

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -3763,11 +3687,11 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/jobs/list
-
Returns recent jobs for a connection. Jobs are returned in descending order by createdAt. (listJobsFor)
+
post /v1/destination_definitions/update
+
Update destinationDefinition (updateDestinationDefinition)
@@ -3779,7 +3703,7 @@

Consumes

Request body

-
JobListRequestBody JobListRequestBody (required)
+
DestinationDefinitionUpdate DestinationDefinitionUpdate (required)
Body Parameter
@@ -3790,7 +3714,7 @@

Request body

Return type

@@ -3799,14 +3723,235 @@

Return type

Example data

Content-Type: application/json
{
-  "jobs" : [ {
-    "job" : {
-      "createdAt" : 6,
-      "configId" : "configId",
-      "id" : 0,
-      "updatedAt" : 1
+  "resourceRequirements" : {
+    "default" : {
+      "cpu_limit" : "cpu_limit",
+      "memory_request" : "memory_request",
+      "memory_limit" : "memory_limit",
+      "cpu_request" : "cpu_request"
     },
-    "attempts" : [ {
+    "jobSpecific" : [ {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    }, {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    } ]
+  },
+  "documentationUrl" : "https://openapi-generator.tech",
+  "dockerImageTag" : "dockerImageTag",
+  "releaseDate" : "2000-01-23",
+  "dockerRepository" : "dockerRepository",
+  "name" : "name",
+  "icon" : "icon",
+  "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + DestinationDefinitionRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+

DestinationDefinitionSpecification

+
+
+ Up +
post /v1/destination_definition_specifications/get
+
Get specification for a destinationDefinition (getDestinationDefinitionSpecification)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
DestinationDefinitionIdRequestBody DestinationDefinitionIdRequestBody (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "documentationUrl" : "documentationUrl",
+  "supportsNormalization" : true,
+  "connectionSpecification" : {
+    "user" : {
+      "type" : "string"
+    }
+  },
+  "supportedDestinationSyncModes" : [ null, null ],
+  "supportsDbt" : true,
+  "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "advancedAuth" : {
+    "predicateValue" : "predicateValue",
+    "oauthConfigSpecification" : { },
+    "predicateKey" : [ "predicateKey", "predicateKey" ],
+    "authFlowType" : "oauth2.0"
+  },
+  "authSpecification" : {
+    "auth_type" : "oauth2.0",
+    "oauth2Specification" : {
+      "oauthFlowOutputParameters" : [ [ "oauthFlowOutputParameters", "oauthFlowOutputParameters" ], [ "oauthFlowOutputParameters", "oauthFlowOutputParameters" ] ],
+      "rootObject" : [ "path", 1 ],
+      "oauthFlowInitParameters" : [ [ "oauthFlowInitParameters", "oauthFlowInitParameters" ], [ "oauthFlowInitParameters", "oauthFlowInitParameters" ] ]
+    }
+  },
+  "jobInfo" : {
+    "createdAt" : 0,
+    "configId" : "configId",
+    "endedAt" : 6,
+    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "logs" : {
+      "logLines" : [ "logLines", "logLines" ]
+    },
+    "succeeded" : true
+  }
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + DestinationDefinitionSpecificationRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+

Health

+
+
+ Up +
get /v1/health
+
Health Check (getHealthCheck)
+
+ + + + + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "available" : true
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + HealthCheckRead +
+
+

Jobs

+
+
+ Up +
post /v1/jobs/cancel
+
Cancels a job (cancelJob)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
JobIdRequestBody JobIdRequestBody (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+
+ JobInfoRead + +
+ + + +

Example data

+
Content-Type: application/json
+
{
+  "job" : {
+    "createdAt" : 6,
+    "configId" : "configId",
+    "id" : 0,
+    "updatedAt" : 1
+  },
+  "attempts" : [ {
+    "attempt" : {
       "totalStats" : {
         "stateMessagesEmitted" : 7,
         "recordsCommitted" : 1,
@@ -3850,7 +3995,12 @@ 

Example data

"id" : 5, "recordsSynced" : 3, "updatedAt" : 2 - }, { + }, + "logs" : { + "logLines" : [ "logLines", "logLines" ] + } + }, { + "attempt" : { "totalStats" : { "stateMessagesEmitted" : 7, "recordsCommitted" : 1, @@ -3894,20 +4044,144 @@

Example data

"id" : 5, "recordsSynced" : 3, "updatedAt" : 2 - } ] - }, { - "job" : { - "createdAt" : 6, - "configId" : "configId", - "id" : 0, - "updatedAt" : 1 }, - "attempts" : [ { - "totalStats" : { - "stateMessagesEmitted" : 7, - "recordsCommitted" : 1, - "bytesEmitted" : 4, - "recordsEmitted" : 2 + "logs" : { + "logLines" : [ "logLines", "logLines" ] + } + } ] +}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + JobInfoRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+
+
+ Up +
post /v1/jobs/get_debug_info
+
Gets all information needed to debug this job (getJobDebugInfo)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
JobIdRequestBody JobIdRequestBody (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "job" : {
+    "configId" : "configId",
+    "sourceDefinition" : {
+      "resourceRequirements" : {
+        "default" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        },
+        "jobSpecific" : [ {
+          "resourceRequirements" : {
+            "cpu_limit" : "cpu_limit",
+            "memory_request" : "memory_request",
+            "memory_limit" : "memory_limit",
+            "cpu_request" : "cpu_request"
+          }
+        }, {
+          "resourceRequirements" : {
+            "cpu_limit" : "cpu_limit",
+            "memory_request" : "memory_request",
+            "memory_limit" : "memory_limit",
+            "cpu_request" : "cpu_request"
+          }
+        } ]
+      },
+      "documentationUrl" : "https://openapi-generator.tech",
+      "dockerImageTag" : "dockerImageTag",
+      "releaseDate" : "2000-01-23",
+      "dockerRepository" : "dockerRepository",
+      "name" : "name",
+      "icon" : "icon",
+      "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+    },
+    "airbyteVersion" : "airbyteVersion",
+    "id" : 0,
+    "destinationDefinition" : {
+      "resourceRequirements" : {
+        "default" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        },
+        "jobSpecific" : [ {
+          "resourceRequirements" : {
+            "cpu_limit" : "cpu_limit",
+            "memory_request" : "memory_request",
+            "memory_limit" : "memory_limit",
+            "cpu_request" : "cpu_request"
+          }
+        }, {
+          "resourceRequirements" : {
+            "cpu_limit" : "cpu_limit",
+            "memory_request" : "memory_request",
+            "memory_limit" : "memory_limit",
+            "cpu_request" : "cpu_request"
+          }
+        } ]
+      },
+      "documentationUrl" : "https://openapi-generator.tech",
+      "dockerImageTag" : "dockerImageTag",
+      "releaseDate" : "2000-01-23",
+      "dockerRepository" : "dockerRepository",
+      "name" : "name",
+      "icon" : "icon",
+      "destinationDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+    }
+  },
+  "attempts" : [ {
+    "attempt" : {
+      "totalStats" : {
+        "stateMessagesEmitted" : 7,
+        "recordsCommitted" : 1,
+        "bytesEmitted" : 4,
+        "recordsEmitted" : 2
       },
       "failureSummary" : {
         "failures" : [ {
@@ -3946,7 +4220,12 @@ 

Example data

"id" : 5, "recordsSynced" : 3, "updatedAt" : 2 - }, { + }, + "logs" : { + "logLines" : [ "logLines", "logLines" ] + } + }, { + "attempt" : { "totalStats" : { "stateMessagesEmitted" : 7, "recordsCommitted" : 1, @@ -3990,7 +4269,10 @@

Example data

"id" : 5, "recordsSynced" : 3, "updatedAt" : 2 - } ] + }, + "logs" : { + "logLines" : [ "logLines", "logLines" ] + } } ] }
@@ -4004,7 +4286,7 @@

Produces

Responses

200

Successful operation - JobReadList + JobDebugInfoRead

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -4013,12 +4295,11 @@

422

InvalidInputExceptionInfo

-

Logs

-
+
Up -
post /v1/logs/get
-
Get logs (getLogs)
+
post /v1/jobs/get
+
Get information about a job (getJobInfo)
@@ -4030,7 +4311,7 @@

Consumes

Request body

-
LogsRequestBody LogsRequestBody (required)
+
JobIdRequestBody JobIdRequestBody (required)
Body Parameter
@@ -4041,99 +4322,867 @@

Request body

Return type

+ JobInfoRead - File
+

Example data

+
Content-Type: application/json
+
{
+  "job" : {
+    "createdAt" : 6,
+    "configId" : "configId",
+    "id" : 0,
+    "updatedAt" : 1
+  },
+  "attempts" : [ {
+    "attempt" : {
+      "totalStats" : {
+        "stateMessagesEmitted" : 7,
+        "recordsCommitted" : 1,
+        "bytesEmitted" : 4,
+        "recordsEmitted" : 2
+      },
+      "failureSummary" : {
+        "failures" : [ {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        }, {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        } ],
+        "partialSuccess" : true
+      },
+      "createdAt" : 5,
+      "bytesSynced" : 9,
+      "endedAt" : 7,
+      "streamStats" : [ {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      }, {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      } ],
+      "id" : 5,
+      "recordsSynced" : 3,
+      "updatedAt" : 2
+    },
+    "logs" : {
+      "logLines" : [ "logLines", "logLines" ]
+    }
+  }, {
+    "attempt" : {
+      "totalStats" : {
+        "stateMessagesEmitted" : 7,
+        "recordsCommitted" : 1,
+        "bytesEmitted" : 4,
+        "recordsEmitted" : 2
+      },
+      "failureSummary" : {
+        "failures" : [ {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        }, {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        } ],
+        "partialSuccess" : true
+      },
+      "createdAt" : 5,
+      "bytesSynced" : 9,
+      "endedAt" : 7,
+      "streamStats" : [ {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      }, {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      } ],
+      "id" : 5,
+      "recordsSynced" : 3,
+      "updatedAt" : 2
+    },
+    "logs" : {
+      "logLines" : [ "logLines", "logLines" ]
+    }
+  } ]
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + JobInfoRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+
+
+ Up +
post /v1/jobs/list
+
Returns recent jobs for a connection. Jobs are returned in descending order by createdAt. (listJobsFor)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
JobListRequestBody JobListRequestBody (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+
+ JobReadList + +
+ + + +

Example data

+
Content-Type: application/json
+
{
+  "jobs" : [ {
+    "job" : {
+      "createdAt" : 6,
+      "configId" : "configId",
+      "id" : 0,
+      "updatedAt" : 1
+    },
+    "attempts" : [ {
+      "totalStats" : {
+        "stateMessagesEmitted" : 7,
+        "recordsCommitted" : 1,
+        "bytesEmitted" : 4,
+        "recordsEmitted" : 2
+      },
+      "failureSummary" : {
+        "failures" : [ {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        }, {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        } ],
+        "partialSuccess" : true
+      },
+      "createdAt" : 5,
+      "bytesSynced" : 9,
+      "endedAt" : 7,
+      "streamStats" : [ {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      }, {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      } ],
+      "id" : 5,
+      "recordsSynced" : 3,
+      "updatedAt" : 2
+    }, {
+      "totalStats" : {
+        "stateMessagesEmitted" : 7,
+        "recordsCommitted" : 1,
+        "bytesEmitted" : 4,
+        "recordsEmitted" : 2
+      },
+      "failureSummary" : {
+        "failures" : [ {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        }, {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        } ],
+        "partialSuccess" : true
+      },
+      "createdAt" : 5,
+      "bytesSynced" : 9,
+      "endedAt" : 7,
+      "streamStats" : [ {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      }, {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      } ],
+      "id" : 5,
+      "recordsSynced" : 3,
+      "updatedAt" : 2
+    } ]
+  }, {
+    "job" : {
+      "createdAt" : 6,
+      "configId" : "configId",
+      "id" : 0,
+      "updatedAt" : 1
+    },
+    "attempts" : [ {
+      "totalStats" : {
+        "stateMessagesEmitted" : 7,
+        "recordsCommitted" : 1,
+        "bytesEmitted" : 4,
+        "recordsEmitted" : 2
+      },
+      "failureSummary" : {
+        "failures" : [ {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        }, {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        } ],
+        "partialSuccess" : true
+      },
+      "createdAt" : 5,
+      "bytesSynced" : 9,
+      "endedAt" : 7,
+      "streamStats" : [ {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      }, {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      } ],
+      "id" : 5,
+      "recordsSynced" : 3,
+      "updatedAt" : 2
+    }, {
+      "totalStats" : {
+        "stateMessagesEmitted" : 7,
+        "recordsCommitted" : 1,
+        "bytesEmitted" : 4,
+        "recordsEmitted" : 2
+      },
+      "failureSummary" : {
+        "failures" : [ {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        }, {
+          "retryable" : true,
+          "stacktrace" : "stacktrace",
+          "externalMessage" : "externalMessage",
+          "timestamp" : 1
+        } ],
+        "partialSuccess" : true
+      },
+      "createdAt" : 5,
+      "bytesSynced" : 9,
+      "endedAt" : 7,
+      "streamStats" : [ {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      }, {
+        "stats" : {
+          "stateMessagesEmitted" : 7,
+          "recordsCommitted" : 1,
+          "bytesEmitted" : 4,
+          "recordsEmitted" : 2
+        },
+        "streamName" : "streamName"
+      } ],
+      "id" : 5,
+      "recordsSynced" : 3,
+      "updatedAt" : 2
+    } ]
+  } ]
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + JobReadList +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+

Logs

+
+
+ Up +
post /v1/logs/get
+
Get logs (getLogs)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
LogsRequestBody LogsRequestBody (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+
+ + File +
+ + + + +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • text/plain
  • +
  • application/json
  • +
+ +

Responses

+

200

+ Returns the log file + File +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+

Notifications

+
+
+ Up +
post /v1/notifications/try
+
Try sending a notifications (tryNotificationConfig)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
Notification Notification (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "message" : "message",
+  "status" : "succeeded"
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + NotificationRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+

Oauth

+
+
+ Up +
post /v1/destination_oauths/complete_oauth
+
Given a destination def ID generate an access/refresh token etc. (completeDestinationOAuth)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
CompleteDestinationOAuthRequest CompleteDestinationOAuthRequest (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+
+ + map[String, Object] +
+ + + + +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + map[String, Object] +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+
+
+ Up +
post /v1/source_oauths/complete_oauth
+
Given a source def ID generate an access/refresh token etc. (completeSourceOAuth)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
CompleteSourceOauthRequest CompleteSourceOauthRequest (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+
+ + map[String, Object] +
+ + + + +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + map[String, Object] +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+
+
+ Up +
post /v1/destination_oauths/get_consent_url
+
Given a destination connector definition ID, return the URL to the consent screen where to redirect the user to. (getDestinationOAuthConsent)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
DestinationOauthConsentRequest DestinationOauthConsentRequest (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "consentUrl" : "consentUrl"
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + OAuthConsentRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+
+
+ Up +
post /v1/source_oauths/get_consent_url
+
Given a source connector definition ID, return the URL to the consent screen where to redirect the user to. (getSourceOAuthConsent)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
SourceOauthConsentRequest SourceOauthConsentRequest (required)
+ +
Body Parameter
+ +
+ + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "consentUrl" : "consentUrl"
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + OAuthConsentRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+
+
+ Up +
post /v1/destination_oauths/oauth_params/create
+
Sets instancewide variables to be used for the oauth flow when creating this destination. When set, these variables will be injected into a connector's configuration before any interaction with the connector image itself. This enables running oauth flows with consistent variables e.g: the company's Google Ads developer_token, client_id, and client_secret without the user having to know about these variables. (setInstancewideDestinationOauthParams)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
SetInstancewideDestinationOauthParamsRequestBody SetInstancewideDestinationOauthParamsRequestBody (required)
+ +
Body Parameter
+ +
+ + + + + + + + +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful + +

400

+ Exception occurred; see message for details. + KnownExceptionInfo +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +
+
+
+
+ Up +
post /v1/source_oauths/oauth_params/create
+
Sets instancewide variables to be used for the oauth flow when creating this source. When set, these variables will be injected into a connector's configuration before any interaction with the connector image itself. This enables running oauth flows with consistent variables e.g: the company's Google Ads developer_token, client_id, and client_secret without the user having to know about these variables. (setInstancewideSourceOauthParams)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
SetInstancewideSourceOauthParamsRequestBody SetInstancewideSourceOauthParamsRequestBody (required)
+ +
Body Parameter
+ +
+ + + + + + + + +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful + +

400

+ Exception occurred; see message for details. + KnownExceptionInfo +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +
+
+

Openapi

+
+
+ Up +
get /v1/openapi
+
Returns the openapi specification (getOpenApiSpec)
+
-

Produces

- This API call produces the following media types according to the Accept request header; - the media type will be conveyed by the Content-Type response header. -
    -
  • text/plain
  • -
  • application/json
  • -
- -

Responses

-

200

- Returns the log file - File -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo -

422

- Input failed validation - InvalidInputExceptionInfo -
-
-

Notifications

-
-
- Up -
post /v1/notifications/try
-
Try sending a notifications (tryNotificationConfig)
-
- - -

Consumes

- This API call consumes the following media types via the Content-Type request header: -
    -
  • application/json
  • -
- -

Request body

-
-
Notification Notification (required)
-
Body Parameter
-

Return type

- NotificationRead + File
-

Example data

-
Content-Type: application/json
-
{
-  "message" : "message",
-  "status" : "succeeded"
-}

Produces

This API call produces the following media types according to the Accept request header; the media type will be conveyed by the Content-Type response header.
    -
  • application/json
  • +
  • text/plain

Responses

200

- Successful operation - NotificationRead -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo -

422

- Input failed validation - InvalidInputExceptionInfo + Returns the openapi specification file + File

-

Oauth

-
+

Operation

+
Up -
post /v1/destination_oauths/complete_oauth
-
Given a destination def ID generate an access/refresh token etc. (completeDestinationOAuth)
+
post /v1/operations/check
+
Check if an operation to be created is valid (checkOperation)
@@ -4145,7 +5194,7 @@

Consumes

Request body

-
CompleteDestinationOAuthRequest CompleteDestinationOAuthRequest (required)
+
OperatorConfiguration OperatorConfiguration (required)
Body Parameter
@@ -4156,12 +5205,18 @@

Request body

Return type

+ CheckOperationRead - map[String, Object]
+

Example data

+
Content-Type: application/json
+
{
+  "message" : "message",
+  "status" : "succeeded"
+}

Produces

This API call produces the following media types according to the Accept request header; @@ -4173,20 +5228,17 @@

Produces

Responses

200

Successful operation - map[String, Object] -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo + CheckOperationRead

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/source_oauths/complete_oauth
-
Given a source def ID generate an access/refresh token etc. (completeSourceOAuth)
+
post /v1/operations/create
+
Create an operation to be applied as part of a connection pipeline (createOperation)
@@ -4198,7 +5250,7 @@

Consumes

Request body

-
CompleteSourceOauthRequest CompleteSourceOauthRequest (required)
+
OperationCreate OperationCreate (required)
Body Parameter
@@ -4209,12 +5261,30 @@

Request body

Return type

+ OperationRead - map[String, Object]
+

Example data

+
Content-Type: application/json
+
{
+  "name" : "name",
+  "operationId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "operatorConfiguration" : {
+    "normalization" : {
+      "option" : "basic"
+    },
+    "dbt" : {
+      "gitRepoBranch" : "gitRepoBranch",
+      "dockerImage" : "dockerImage",
+      "dbtArguments" : "dbtArguments",
+      "gitRepoUrl" : "gitRepoUrl"
+    }
+  },
+  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+}

Produces

This API call produces the following media types according to the Accept request header; @@ -4226,20 +5296,17 @@

Produces

Responses

200

Successful operation - map[String, Object] -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo + OperationRead

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/destination_oauths/get_consent_url
-
Given a destination connector definition ID, return the URL to the consent screen where to redirect the user to. (getDestinationOAuthConsent)
+
post /v1/operations/delete
+
Delete an operation (deleteOperation)
@@ -4251,7 +5318,7 @@

Consumes

Request body

-
DestinationOauthConsentRequest DestinationOauthConsentRequest (required)
+
OperationIdRequestBody OperationIdRequestBody (required)
Body Parameter
@@ -4260,19 +5327,9 @@

Request body

-

Return type

- -

Example data

-
Content-Type: application/json
-
{
-  "consentUrl" : "consentUrl"
-}

Produces

This API call produces the following media types according to the Accept request header; @@ -4282,9 +5339,9 @@

Produces

Responses

-

200

- Successful operation - OAuthConsentRead +

204

+ The resource was deleted successfully. +

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -4293,11 +5350,11 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/source_oauths/get_consent_url
-
Given a source connector definition ID, return the URL to the consent screen where to redirect the user to. (getSourceOAuthConsent)
+
post /v1/operations/get
+
Returns an operation (getOperation)
@@ -4309,7 +5366,7 @@

Consumes

Request body

-
SourceOauthConsentRequest SourceOauthConsentRequest (required)
+
OperationIdRequestBody OperationIdRequestBody (required)
Body Parameter
@@ -4320,7 +5377,7 @@

Request body

Return type

@@ -4329,7 +5386,20 @@

Return type

Example data

Content-Type: application/json
{
-  "consentUrl" : "consentUrl"
+  "name" : "name",
+  "operationId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "operatorConfiguration" : {
+    "normalization" : {
+      "option" : "basic"
+    },
+    "dbt" : {
+      "gitRepoBranch" : "gitRepoBranch",
+      "dockerImage" : "dockerImage",
+      "dbtArguments" : "dbtArguments",
+      "gitRepoUrl" : "gitRepoUrl"
+    }
+  },
+  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
 }

Produces

@@ -4342,7 +5412,7 @@

Produces

Responses

200

Successful operation - OAuthConsentRead + OperationRead

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -4351,12 +5421,12 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/destination_oauths/oauth_params/create
-
Sets instancewide variables to be used for the oauth flow when creating this destination. When set, these variables will be injected into a connector's configuration before any interaction with the connector image itself. This enables running oauth flows with consistent variables e.g: the company's Google Ads developer_token, client_id, and client_secret without the user having to know about these variables. (setInstancewideDestinationOauthParams)
-
+
post /v1/operations/list
+
Returns all operations for a connection. (listOperationsForConnection)
+
List operations for connection.

Consumes

@@ -4367,7 +5437,7 @@

Consumes

Request body

-
SetInstancewideDestinationOauthParamsRequestBody SetInstancewideDestinationOauthParamsRequestBody (required)
+
ConnectionIdRequestBody ConnectionIdRequestBody (required)
Body Parameter
@@ -4376,9 +5446,49 @@

Request body

+

Return type

+ +

Example data

+
Content-Type: application/json
+
{
+  "operations" : [ {
+    "name" : "name",
+    "operationId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "operatorConfiguration" : {
+      "normalization" : {
+        "option" : "basic"
+      },
+      "dbt" : {
+        "gitRepoBranch" : "gitRepoBranch",
+        "dockerImage" : "dockerImage",
+        "dbtArguments" : "dbtArguments",
+        "gitRepoUrl" : "gitRepoUrl"
+      }
+    },
+    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  }, {
+    "name" : "name",
+    "operationId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "operatorConfiguration" : {
+      "normalization" : {
+        "option" : "basic"
+      },
+      "dbt" : {
+        "gitRepoBranch" : "gitRepoBranch",
+        "dockerImage" : "dockerImage",
+        "dbtArguments" : "dbtArguments",
+        "gitRepoUrl" : "gitRepoUrl"
+      }
+    },
+    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  } ]
+}

Produces

This API call produces the following media types according to the Accept request header; @@ -4389,21 +5499,21 @@

Produces

Responses

200

- Successful - -

400

- Exception occurred; see message for details. - KnownExceptionInfo + Successful operation + OperationReadList

404

Object with given id was not found. NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo

-
+
Up -
post /v1/source_oauths/oauth_params/create
-
Sets instancewide variables to be used for the oauth flow when creating this source. When set, these variables will be injected into a connector's configuration before any interaction with the connector image itself. This enables running oauth flows with consistent variables e.g: the company's Google Ads developer_token, client_id, and client_secret without the user having to know about these variables. (setInstancewideSourceOauthParams)
+
post /v1/operations/update
+
Update an operation (updateOperation)
@@ -4415,7 +5525,7 @@

Consumes

Request body

-
SetInstancewideSourceOauthParamsRequestBody SetInstancewideSourceOauthParamsRequestBody (required)
+
OperationUpdate OperationUpdate (required)
Body Parameter
@@ -4424,71 +5534,55 @@

Request body

- - - - -

Produces

- This API call produces the following media types according to the Accept request header; - the media type will be conveyed by the Content-Type response header. -
    -
  • application/json
  • -
- -

Responses

-

200

- Successful - -

400

- Exception occurred; see message for details. - KnownExceptionInfo -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo -
-
-

Openapi

-
-
- Up -
get /v1/openapi
-
Returns the openapi specification (getOpenApiSpec)
-
- - - - - - -

Return type

+ OperationRead - File
+

Example data

+
Content-Type: application/json
+
{
+  "name" : "name",
+  "operationId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "operatorConfiguration" : {
+    "normalization" : {
+      "option" : "basic"
+    },
+    "dbt" : {
+      "gitRepoBranch" : "gitRepoBranch",
+      "dockerImage" : "dockerImage",
+      "dbtArguments" : "dbtArguments",
+      "gitRepoUrl" : "gitRepoUrl"
+    }
+  },
+  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+}

Produces

This API call produces the following media types according to the Accept request header; the media type will be conveyed by the Content-Type response header.
    -
  • text/plain
  • +
  • application/json

Responses

200

- Returns the openapi specification file - File + Successful operation + OperationRead +

422

+ Input failed validation + InvalidInputExceptionInfo

-

Operation

-
+

Scheduler

+
Up -
post /v1/operations/check
-
Check if an operation to be created is valid (checkOperation)
+
post /v1/scheduler/destinations/check_connection
+
Run check connection for a given destination configuration (executeDestinationCheckConnection)
@@ -4500,7 +5594,7 @@

Consumes

Request body

-
OperatorConfiguration OperatorConfiguration (required)
+
DestinationCoreConfig DestinationCoreConfig (required)
Body Parameter
@@ -4511,7 +5605,7 @@

Request body

Return type

@@ -4521,6 +5615,16 @@

Example data

Content-Type: application/json
{
   "message" : "message",
+  "jobInfo" : {
+    "createdAt" : 0,
+    "configId" : "configId",
+    "endedAt" : 6,
+    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "logs" : {
+      "logLines" : [ "logLines", "logLines" ]
+    },
+    "succeeded" : true
+  },
   "status" : "succeeded"
 }
@@ -4534,17 +5638,17 @@

Produces

Responses

200

Successful operation - CheckOperationRead + CheckConnectionRead

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/operations/create
-
Create an operation to be applied as part of a connection pipeline (createOperation)
+
post /v1/scheduler/sources/check_connection
+
Run check connection for a given source configuration (executeSourceCheckConnection)
@@ -4556,7 +5660,7 @@

Consumes

Request body

-
OperationCreate OperationCreate (required)
+
SourceCoreConfig SourceCoreConfig (required)
Body Parameter
@@ -4567,7 +5671,7 @@

Request body

Return type

@@ -4576,20 +5680,18 @@

Return type

Example data

Content-Type: application/json
{
-  "name" : "name",
-  "operationId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "operatorConfiguration" : {
-    "normalization" : {
-      "option" : "basic"
+  "message" : "message",
+  "jobInfo" : {
+    "createdAt" : 0,
+    "configId" : "configId",
+    "endedAt" : 6,
+    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "logs" : {
+      "logLines" : [ "logLines", "logLines" ]
     },
-    "dbt" : {
-      "gitRepoBranch" : "gitRepoBranch",
-      "dockerImage" : "dockerImage",
-      "dbtArguments" : "dbtArguments",
-      "gitRepoUrl" : "gitRepoUrl"
-    }
+    "succeeded" : true
   },
-  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  "status" : "succeeded"
 }

Produces

@@ -4602,17 +5704,17 @@

Produces

Responses

200

Successful operation - OperationRead + CheckConnectionRead

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/operations/delete
-
Delete an operation (deleteOperation)
+
post /v1/scheduler/sources/discover_schema
+
Run discover schema for a given source a source configuration (executeSourceDiscoverSchema)
@@ -4624,7 +5726,7 @@

Consumes

Request body

-
OperationIdRequestBody OperationIdRequestBody (required)
+
SourceCoreConfig SourceCoreConfig (required)
Body Parameter
@@ -4633,9 +5735,61 @@

Request body

+

Return type

+ +

Example data

+
Content-Type: application/json
+
{
+  "catalog" : {
+    "streams" : [ {
+      "stream" : {
+        "sourceDefinedPrimaryKey" : [ [ "sourceDefinedPrimaryKey", "sourceDefinedPrimaryKey" ], [ "sourceDefinedPrimaryKey", "sourceDefinedPrimaryKey" ] ],
+        "supportedSyncModes" : [ null, null ],
+        "sourceDefinedCursor" : true,
+        "name" : "name",
+        "namespace" : "namespace",
+        "defaultCursorField" : [ "defaultCursorField", "defaultCursorField" ]
+      },
+      "config" : {
+        "aliasName" : "aliasName",
+        "cursorField" : [ "cursorField", "cursorField" ],
+        "selected" : true,
+        "primaryKey" : [ [ "primaryKey", "primaryKey" ], [ "primaryKey", "primaryKey" ] ]
+      }
+    }, {
+      "stream" : {
+        "sourceDefinedPrimaryKey" : [ [ "sourceDefinedPrimaryKey", "sourceDefinedPrimaryKey" ], [ "sourceDefinedPrimaryKey", "sourceDefinedPrimaryKey" ] ],
+        "supportedSyncModes" : [ null, null ],
+        "sourceDefinedCursor" : true,
+        "name" : "name",
+        "namespace" : "namespace",
+        "defaultCursorField" : [ "defaultCursorField", "defaultCursorField" ]
+      },
+      "config" : {
+        "aliasName" : "aliasName",
+        "cursorField" : [ "cursorField", "cursorField" ],
+        "selected" : true,
+        "primaryKey" : [ [ "primaryKey", "primaryKey" ], [ "primaryKey", "primaryKey" ] ]
+      }
+    } ]
+  },
+  "jobInfo" : {
+    "createdAt" : 0,
+    "configId" : "configId",
+    "endedAt" : 6,
+    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "logs" : {
+      "logLines" : [ "logLines", "logLines" ]
+    },
+    "succeeded" : true
+  }
+}

Produces

This API call produces the following media types according to the Accept request header; @@ -4645,22 +5799,20 @@

Produces

Responses

-

204

- The resource was deleted successfully. - -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo +

200

+ Successful operation + SourceDiscoverSchemaRead

422

Input failed validation InvalidInputExceptionInfo

-
+

Source

+
Up -
post /v1/operations/get
-
Returns an operation (getOperation)
+
post /v1/sources/check_connection
+
Check connection to the source (checkConnectionToSource)
@@ -4672,7 +5824,7 @@

Consumes

Request body

-
OperationIdRequestBody OperationIdRequestBody (required)
+
SourceIdRequestBody SourceIdRequestBody (required)
Body Parameter
@@ -4683,7 +5835,7 @@

Request body

Return type

@@ -4692,20 +5844,18 @@

Return type

Example data

Content-Type: application/json
{
-  "name" : "name",
-  "operationId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "operatorConfiguration" : {
-    "normalization" : {
-      "option" : "basic"
+  "message" : "message",
+  "jobInfo" : {
+    "createdAt" : 0,
+    "configId" : "configId",
+    "endedAt" : 6,
+    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "logs" : {
+      "logLines" : [ "logLines", "logLines" ]
     },
-    "dbt" : {
-      "gitRepoBranch" : "gitRepoBranch",
-      "dockerImage" : "dockerImage",
-      "dbtArguments" : "dbtArguments",
-      "gitRepoUrl" : "gitRepoUrl"
-    }
+    "succeeded" : true
   },
-  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  "status" : "succeeded"
 }

Produces

@@ -4718,7 +5868,7 @@

Produces

Responses

200

Successful operation - OperationRead + CheckConnectionRead

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -4727,12 +5877,12 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/operations/list
-
Returns all operations for a connection. (listOperationsForConnection)
-
List operations for connection.
+
post /v1/sources/check_connection_for_update
+
Check connection for a proposed update to a source (checkConnectionToSourceForUpdate)
+

Consumes

@@ -4743,7 +5893,7 @@

Consumes

Request body

-
ConnectionIdRequestBody ConnectionIdRequestBody (required)
+
SourceUpdate SourceUpdate (required)
Body Parameter
@@ -4754,7 +5904,7 @@

Request body

Return type

@@ -4763,37 +5913,18 @@

Return type

Example data

Content-Type: application/json
{
-  "operations" : [ {
-    "name" : "name",
-    "operationId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "operatorConfiguration" : {
-      "normalization" : {
-        "option" : "basic"
-      },
-      "dbt" : {
-        "gitRepoBranch" : "gitRepoBranch",
-        "dockerImage" : "dockerImage",
-        "dbtArguments" : "dbtArguments",
-        "gitRepoUrl" : "gitRepoUrl"
-      }
-    },
-    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
-  }, {
-    "name" : "name",
-    "operationId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "operatorConfiguration" : {
-      "normalization" : {
-        "option" : "basic"
-      },
-      "dbt" : {
-        "gitRepoBranch" : "gitRepoBranch",
-        "dockerImage" : "dockerImage",
-        "dbtArguments" : "dbtArguments",
-        "gitRepoUrl" : "gitRepoUrl"
-      }
+  "message" : "message",
+  "jobInfo" : {
+    "createdAt" : 0,
+    "configId" : "configId",
+    "endedAt" : 6,
+    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "logs" : {
+      "logLines" : [ "logLines", "logLines" ]
     },
-    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
-  } ]
+    "succeeded" : true
+  },
+  "status" : "succeeded"
 }

Produces

@@ -4806,7 +5937,7 @@

Produces

Responses

200

Successful operation - OperationReadList + CheckConnectionRead

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -4815,11 +5946,11 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/operations/update
-
Update an operation (updateOperation)
+
post /v1/sources/clone
+
Clone source (cloneSource)
@@ -4831,7 +5962,7 @@

Consumes

Request body

-
OperationUpdate OperationUpdate (required)
+
SourceIdRequestBody SourceIdRequestBody (required)
Body Parameter
@@ -4842,7 +5973,7 @@

Request body

Return type

@@ -4851,19 +5982,13 @@

Return type

Example data

Content-Type: application/json
{
-  "name" : "name",
-  "operationId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "operatorConfiguration" : {
-    "normalization" : {
-      "option" : "basic"
-    },
-    "dbt" : {
-      "gitRepoBranch" : "gitRepoBranch",
-      "dockerImage" : "dockerImage",
-      "dbtArguments" : "dbtArguments",
-      "gitRepoUrl" : "gitRepoUrl"
-    }
+  "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "connectionConfiguration" : {
+    "user" : "charles"
   },
+  "name" : "name",
+  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "sourceName" : "sourceName",
   "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
 }
@@ -4877,18 +6002,20 @@

Produces

Responses

200

Successful operation - OperationRead + SourceRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo

422

Input failed validation InvalidInputExceptionInfo

-

Scheduler

-
+
Up -
post /v1/scheduler/destinations/check_connection
-
Run check connection for a given destination configuration (executeDestinationCheckConnection)
+
post /v1/sources/create
+
Create a source (createSource)
@@ -4900,7 +6027,7 @@

Consumes

Request body

-
DestinationCoreConfig DestinationCoreConfig (required)
+
SourceCreate SourceCreate (required)
Body Parameter
@@ -4911,7 +6038,7 @@

Request body

Return type

@@ -4920,18 +6047,14 @@

Return type

Example data

Content-Type: application/json
{
-  "message" : "message",
-  "jobInfo" : {
-    "createdAt" : 0,
-    "configId" : "configId",
-    "endedAt" : 6,
-    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "logs" : {
-      "logLines" : [ "logLines", "logLines" ]
-    },
-    "succeeded" : true
+  "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "connectionConfiguration" : {
+    "user" : "charles"
   },
-  "status" : "succeeded"
+  "name" : "name",
+  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "sourceName" : "sourceName",
+  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
 }

Produces

@@ -4944,17 +6067,17 @@

Produces

Responses

200

Successful operation - CheckConnectionRead + SourceRead

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/scheduler/sources/check_connection
-
Run check connection for a given source configuration (executeSourceCheckConnection)
+
post /v1/sources/delete
+
Delete a source (deleteSource)
@@ -4966,7 +6089,7 @@

Consumes

Request body

-
SourceCoreConfig SourceCoreConfig (required)
+
SourceIdRequestBody SourceIdRequestBody (required)
Body Parameter
@@ -4975,30 +6098,9 @@

Request body

-

Return type

- -

Example data

-
Content-Type: application/json
-
{
-  "message" : "message",
-  "jobInfo" : {
-    "createdAt" : 0,
-    "configId" : "configId",
-    "endedAt" : 6,
-    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "logs" : {
-      "logLines" : [ "logLines", "logLines" ]
-    },
-    "succeeded" : true
-  },
-  "status" : "succeeded"
-}

Produces

This API call produces the following media types according to the Accept request header; @@ -5008,19 +6110,22 @@

Produces

Responses

-

200

- Successful operation - CheckConnectionRead +

204

+ The resource was deleted successfully. + +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/scheduler/sources/discover_schema
-
Run discover schema for a given source a source configuration (executeSourceDiscoverSchema)
+
post /v1/sources/discover_schema
+
Discover the schema catalog of the source (discoverSchemaForSource)
@@ -5032,7 +6137,7 @@

Consumes

Request body

-
SourceCoreConfig SourceCoreConfig (required)
+
SourceDiscoverSchemaRequestBody SourceDiscoverSchemaRequestBody (required)
Body Parameter
@@ -5108,73 +6213,6 @@

Responses

200

Successful operation SourceDiscoverSchemaRead -

422

- Input failed validation - InvalidInputExceptionInfo -
-
-

Source

-
-
- Up -
post /v1/sources/check_connection
-
Check connection to the source (checkConnectionToSource)
-
- - -

Consumes

- This API call consumes the following media types via the Content-Type request header: -
    -
  • application/json
  • -
- -

Request body

-
-
SourceIdRequestBody SourceIdRequestBody (required)
- -
Body Parameter
- -
- - - - -

Return type

- - - - -

Example data

-
Content-Type: application/json
-
{
-  "message" : "message",
-  "jobInfo" : {
-    "createdAt" : 0,
-    "configId" : "configId",
-    "endedAt" : 6,
-    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "logs" : {
-      "logLines" : [ "logLines", "logLines" ]
-    },
-    "succeeded" : true
-  },
-  "status" : "succeeded"
-}
- -

Produces

- This API call produces the following media types according to the Accept request header; - the media type will be conveyed by the Content-Type response header. -
    -
  • application/json
  • -
- -

Responses

-

200

- Successful operation - CheckConnectionRead

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -5183,11 +6221,11 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/sources/check_connection_for_update
-
Check connection for a proposed update to a source (checkConnectionToSourceForUpdate)
+
post /v1/sources/get
+
Get source (getSource)
@@ -5199,7 +6237,7 @@

Consumes

Request body

-
SourceUpdate SourceUpdate (required)
+
SourceIdRequestBody SourceIdRequestBody (required)
Body Parameter
@@ -5210,7 +6248,7 @@

Request body

Return type

@@ -5219,18 +6257,14 @@

Return type

Example data

Content-Type: application/json
{
-  "message" : "message",
-  "jobInfo" : {
-    "createdAt" : 0,
-    "configId" : "configId",
-    "endedAt" : 6,
-    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "logs" : {
-      "logLines" : [ "logLines", "logLines" ]
-    },
-    "succeeded" : true
+  "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "connectionConfiguration" : {
+    "user" : "charles"
   },
-  "status" : "succeeded"
+  "name" : "name",
+  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "sourceName" : "sourceName",
+  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
 }

Produces

@@ -5243,7 +6277,7 @@

Produces

Responses

200

Successful operation - CheckConnectionRead + SourceRead

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -5252,12 +6286,12 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/sources/clone
-
Clone source (cloneSource)
-
+
post /v1/sources/list
+
List sources for workspace (listSourcesForWorkspace)
+
List sources for workspace. Does not return deleted sources.

Consumes

@@ -5268,7 +6302,7 @@

Consumes

Request body

-
SourceIdRequestBody SourceIdRequestBody (required)
+
WorkspaceIdRequestBody WorkspaceIdRequestBody (required)
Body Parameter
@@ -5279,7 +6313,7 @@

Request body

Return type

@@ -5288,14 +6322,25 @@

Return type

Example data

Content-Type: application/json
{
-  "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "connectionConfiguration" : {
-    "user" : "charles"
-  },
-  "name" : "name",
-  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "sourceName" : "sourceName",
-  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  "sources" : [ {
+    "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "connectionConfiguration" : {
+      "user" : "charles"
+    },
+    "name" : "name",
+    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "sourceName" : "sourceName",
+    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  }, {
+    "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "connectionConfiguration" : {
+      "user" : "charles"
+    },
+    "name" : "name",
+    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "sourceName" : "sourceName",
+    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  } ]
 }

Produces

@@ -5308,7 +6353,7 @@

Produces

Responses

200

Successful operation - SourceRead + SourceReadList

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -5317,11 +6362,11 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/sources/create
-
Create a source (createSource)
+
post /v1/sources/search
+
Search sources (searchSources)
@@ -5333,7 +6378,7 @@

Consumes

Request body

-
SourceCreate SourceCreate (required)
+
SourceSearch SourceSearch (required)
Body Parameter
@@ -5344,7 +6389,7 @@

Request body

Return type

@@ -5353,14 +6398,25 @@

Return type

Example data

Content-Type: application/json
{
-  "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "connectionConfiguration" : {
-    "user" : "charles"
-  },
-  "name" : "name",
-  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "sourceName" : "sourceName",
-  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  "sources" : [ {
+    "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "connectionConfiguration" : {
+      "user" : "charles"
+    },
+    "name" : "name",
+    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "sourceName" : "sourceName",
+    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  }, {
+    "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "connectionConfiguration" : {
+      "user" : "charles"
+    },
+    "name" : "name",
+    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+    "sourceName" : "sourceName",
+    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  } ]
 }

Produces

@@ -5373,17 +6429,17 @@

Produces

Responses

200

Successful operation - SourceRead + SourceReadList

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/sources/delete
-
Delete a source (deleteSource)
+
post /v1/sources/update
+
Update a source (updateSource)
@@ -5395,7 +6451,7 @@

Consumes

Request body

-
SourceIdRequestBody SourceIdRequestBody (required)
+
SourceUpdate SourceUpdate (required)
Body Parameter
@@ -5404,9 +6460,26 @@

Request body

+

Return type

+
+ SourceRead + +
+

Example data

+
Content-Type: application/json
+
{
+  "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "connectionConfiguration" : {
+    "user" : "charles"
+  },
+  "name" : "name",
+  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
+  "sourceName" : "sourceName",
+  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+}

Produces

This API call produces the following media types according to the Accept request header; @@ -5416,9 +6489,9 @@

Produces

Responses

-

204

- The resource was deleted successfully. - +

200

+ Successful operation + SourceRead

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -5427,11 +6500,12 @@

422

InvalidInputExceptionInfo

-
+

SourceDefinition

+
Up -
post /v1/sources/discover_schema
-
Discover the schema catalog of the source (discoverSchemaForSource)
+
post /v1/source_definitions/create_custom
+
Creates a custom sourceDefinition for the given workspace (createCustomSourceDefinition)
@@ -5443,7 +6517,7 @@

Consumes

Request body

-
SourceIdRequestBody SourceIdRequestBody (required)
+
CustomSourceDefinitionCreate CustomSourceDefinitionCreate (optional)
Body Parameter
@@ -5454,7 +6528,7 @@

Request body

Return type

@@ -5463,49 +6537,36 @@

Return type

Example data

Content-Type: application/json
{
-  "catalog" : {
-    "streams" : [ {
-      "stream" : {
-        "sourceDefinedPrimaryKey" : [ [ "sourceDefinedPrimaryKey", "sourceDefinedPrimaryKey" ], [ "sourceDefinedPrimaryKey", "sourceDefinedPrimaryKey" ] ],
-        "supportedSyncModes" : [ null, null ],
-        "sourceDefinedCursor" : true,
-        "name" : "name",
-        "namespace" : "namespace",
-        "defaultCursorField" : [ "defaultCursorField", "defaultCursorField" ]
-      },
-      "config" : {
-        "aliasName" : "aliasName",
-        "cursorField" : [ "cursorField", "cursorField" ],
-        "selected" : true,
-        "primaryKey" : [ [ "primaryKey", "primaryKey" ], [ "primaryKey", "primaryKey" ] ]
+  "resourceRequirements" : {
+    "default" : {
+      "cpu_limit" : "cpu_limit",
+      "memory_request" : "memory_request",
+      "memory_limit" : "memory_limit",
+      "cpu_request" : "cpu_request"
+    },
+    "jobSpecific" : [ {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
       }
     }, {
-      "stream" : {
-        "sourceDefinedPrimaryKey" : [ [ "sourceDefinedPrimaryKey", "sourceDefinedPrimaryKey" ], [ "sourceDefinedPrimaryKey", "sourceDefinedPrimaryKey" ] ],
-        "supportedSyncModes" : [ null, null ],
-        "sourceDefinedCursor" : true,
-        "name" : "name",
-        "namespace" : "namespace",
-        "defaultCursorField" : [ "defaultCursorField", "defaultCursorField" ]
-      },
-      "config" : {
-        "aliasName" : "aliasName",
-        "cursorField" : [ "cursorField", "cursorField" ],
-        "selected" : true,
-        "primaryKey" : [ [ "primaryKey", "primaryKey" ], [ "primaryKey", "primaryKey" ] ]
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
       }
     } ]
   },
-  "jobInfo" : {
-    "createdAt" : 0,
-    "configId" : "configId",
-    "endedAt" : 6,
-    "id" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "logs" : {
-      "logLines" : [ "logLines", "logLines" ]
-    },
-    "succeeded" : true
-  }
+  "documentationUrl" : "https://openapi-generator.tech",
+  "dockerImageTag" : "dockerImageTag",
+  "releaseDate" : "2000-01-23",
+  "dockerRepository" : "dockerRepository",
+  "name" : "name",
+  "icon" : "icon",
+  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
 }

Produces

@@ -5518,20 +6579,17 @@

Produces

Responses

200

Successful operation - SourceDiscoverSchemaRead -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo + SourceDefinitionRead

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/sources/get
-
Get source (getSource)
+
post /v1/source_definitions/create
+
Creates a sourceDefinition (createSourceDefinition)
@@ -5543,7 +6601,7 @@

Consumes

Request body

-
SourceIdRequestBody SourceIdRequestBody (required)
+
SourceDefinitionCreate SourceDefinitionCreate (optional)
Body Parameter
@@ -5554,7 +6612,7 @@

Request body

Return type

@@ -5563,14 +6621,36 @@

Return type

Example data

Content-Type: application/json
{
-  "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "connectionConfiguration" : {
-    "user" : "charles"
+  "resourceRequirements" : {
+    "default" : {
+      "cpu_limit" : "cpu_limit",
+      "memory_request" : "memory_request",
+      "memory_limit" : "memory_limit",
+      "cpu_request" : "cpu_request"
+    },
+    "jobSpecific" : [ {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    }, {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    } ]
   },
+  "documentationUrl" : "https://openapi-generator.tech",
+  "dockerImageTag" : "dockerImageTag",
+  "releaseDate" : "2000-01-23",
+  "dockerRepository" : "dockerRepository",
   "name" : "name",
-  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "sourceName" : "sourceName",
-  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  "icon" : "icon",
+  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
 }

Produces

@@ -5583,21 +6663,18 @@

Produces

Responses

200

Successful operation - SourceRead -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo + SourceDefinitionRead

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/sources/list
-
List sources for workspace (listSourcesForWorkspace)
-
List sources for workspace. Does not return deleted sources.
+
post /v1/source_definitions/delete_custom
+
Delete a custom source definition for the given workspace (deleteCustomSourceDefinition)
+

Consumes

@@ -5608,7 +6685,7 @@

Consumes

Request body

-
WorkspaceIdRequestBody WorkspaceIdRequestBody (required)
+
SourceDefinitionIdWithWorkspaceId SourceDefinitionIdWithWorkspaceId (required)
Body Parameter
@@ -5617,37 +6694,9 @@

Request body

-

Return type

- -

Example data

-
Content-Type: application/json
-
{
-  "sources" : [ {
-    "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "connectionConfiguration" : {
-      "user" : "charles"
-    },
-    "name" : "name",
-    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "sourceName" : "sourceName",
-    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
-  }, {
-    "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "connectionConfiguration" : {
-      "user" : "charles"
-    },
-    "name" : "name",
-    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "sourceName" : "sourceName",
-    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
-  } ]
-}

Produces

This API call produces the following media types according to the Accept request header; @@ -5657,9 +6706,9 @@

Produces

Responses

-

200

- Successful operation - SourceReadList +

204

+ The resource was deleted successfully. +

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -5668,11 +6717,11 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/sources/search
-
Search sources (searchSources)
+
post /v1/source_definitions/delete
+
Delete a source definition (deleteSourceDefinition)
@@ -5684,7 +6733,7 @@

Consumes

Request body

-
SourceSearch SourceSearch (required)
+
SourceDefinitionIdRequestBody SourceDefinitionIdRequestBody (required)
Body Parameter
@@ -5693,37 +6742,9 @@

Request body

-

Return type

- -

Example data

-
Content-Type: application/json
-
{
-  "sources" : [ {
-    "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "connectionConfiguration" : {
-      "user" : "charles"
-    },
-    "name" : "name",
-    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "sourceName" : "sourceName",
-    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
-  }, {
-    "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "connectionConfiguration" : {
-      "user" : "charles"
-    },
-    "name" : "name",
-    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-    "sourceName" : "sourceName",
-    "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
-  } ]
-}

Produces

This API call produces the following media types according to the Accept request header; @@ -5733,19 +6754,22 @@

Produces

Responses

-

200

- Successful operation - SourceReadList +

204

+ The resource was deleted successfully. + +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/sources/update
-
Update a source (updateSource)
+
post /v1/source_definitions/get
+
Get source (getSourceDefinition)
@@ -5757,7 +6781,7 @@

Consumes

Request body

-
SourceUpdate SourceUpdate (required)
+
SourceDefinitionIdRequestBody SourceDefinitionIdRequestBody (required)
Body Parameter
@@ -5768,7 +6792,7 @@

Request body

Return type

@@ -5776,15 +6800,37 @@

Return type

Example data

Content-Type: application/json
-
{
-  "sourceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "connectionConfiguration" : {
-    "user" : "charles"
+    
{
+  "resourceRequirements" : {
+    "default" : {
+      "cpu_limit" : "cpu_limit",
+      "memory_request" : "memory_request",
+      "memory_limit" : "memory_limit",
+      "cpu_request" : "cpu_request"
+    },
+    "jobSpecific" : [ {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    }, {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    } ]
   },
+  "documentationUrl" : "https://openapi-generator.tech",
+  "dockerImageTag" : "dockerImageTag",
+  "releaseDate" : "2000-01-23",
+  "dockerRepository" : "dockerRepository",
   "name" : "name",
-  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91",
-  "sourceName" : "sourceName",
-  "workspaceId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  "icon" : "icon",
+  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
 }

Produces

@@ -5797,7 +6843,7 @@

Produces

Responses

200

Successful operation - SourceRead + SourceDefinitionRead

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -5806,12 +6852,11 @@

422

InvalidInputExceptionInfo

-

SourceDefinition

-
+
Up -
post /v1/source_definitions/create
-
Creates a sourceDefinition (createSourceDefinition)
+
post /v1/source_definitions/get_for_workspace
+
Get a sourceDefinition that is configured for the given workspace (getSourceDefinitionForWorkspace)
@@ -5823,7 +6868,7 @@

Consumes

Request body

-
SourceDefinitionCreate SourceDefinitionCreate (optional)
+
SourceDefinitionIdWithWorkspaceId SourceDefinitionIdWithWorkspaceId (required)
Body Parameter
@@ -5886,16 +6931,19 @@

Responses

200

Successful operation SourceDefinitionRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo

422

Input failed validation InvalidInputExceptionInfo

-
+
Up -
post /v1/source_definitions/delete
-
Delete a source definition (deleteSourceDefinition)
+
post /v1/source_definitions/grant_definition
+
grant a private, non-custom sourceDefinition to a given workspace (grantSourceDefinitionToWorkspace)
@@ -5907,7 +6955,7 @@

Consumes

Request body

-
SourceDefinitionIdRequestBody SourceDefinitionIdRequestBody (required)
+
SourceDefinitionIdWithWorkspaceId SourceDefinitionIdWithWorkspaceId (required)
Body Parameter
@@ -5916,9 +6964,51 @@

Request body

+

Return type

+ +

Example data

+
Content-Type: application/json
+
{
+  "sourceDefinition" : {
+    "resourceRequirements" : {
+      "default" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      },
+      "jobSpecific" : [ {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      }, {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      } ]
+    },
+    "documentationUrl" : "https://openapi-generator.tech",
+    "dockerImageTag" : "dockerImageTag",
+    "releaseDate" : "2000-01-23",
+    "dockerRepository" : "dockerRepository",
+    "name" : "name",
+    "icon" : "icon",
+    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  },
+  "granted" : true
+}

Produces

This API call produces the following media types according to the Accept request header; @@ -5928,9 +7018,9 @@

Produces

Responses

-

204

- The resource was deleted successfully. - +

200

+ Successful operation + PrivateSourceDefinitionRead

404

Object with given id was not found. NotFoundKnownExceptionInfo @@ -5939,11 +7029,113 @@

422

InvalidInputExceptionInfo

-
+
Up -
post /v1/source_definitions/get
-
Get source (getSourceDefinition)
+
post /v1/source_definitions/list_latest
+
List the latest sourceDefinitions Airbyte supports (listLatestSourceDefinitions)
+
Guaranteed to retrieve the latest information on supported sources.
+ + + + + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "sourceDefinitions" : [ {
+    "resourceRequirements" : {
+      "default" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      },
+      "jobSpecific" : [ {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      }, {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      } ]
+    },
+    "documentationUrl" : "https://openapi-generator.tech",
+    "dockerImageTag" : "dockerImageTag",
+    "releaseDate" : "2000-01-23",
+    "dockerRepository" : "dockerRepository",
+    "name" : "name",
+    "icon" : "icon",
+    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  }, {
+    "resourceRequirements" : {
+      "default" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      },
+      "jobSpecific" : [ {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      }, {
+        "resourceRequirements" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        }
+      } ]
+    },
+    "documentationUrl" : "https://openapi-generator.tech",
+    "dockerImageTag" : "dockerImageTag",
+    "releaseDate" : "2000-01-23",
+    "dockerRepository" : "dockerRepository",
+    "name" : "name",
+    "icon" : "icon",
+    "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+  } ]
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + SourceDefinitionReadList +
+
+
+
+ Up +
post /v1/source_definitions/list_private
+
List all private, non-custom sourceDefinitions, and for each indicate whether the given workspace has a grant for using the definition. Used by admins to view and modify a given workspace's grants. (listPrivateSourceDefinitions)
@@ -5955,7 +7147,7 @@

Consumes

Request body

-
SourceDefinitionIdRequestBody SourceDefinitionIdRequestBody (required)
+
WorkspaceIdRequestBody WorkspaceIdRequestBody (optional)
Body Parameter
@@ -5966,7 +7158,7 @@

Request body

Return type

@@ -5975,36 +7167,75 @@

Return type

Example data

Content-Type: application/json
{
-  "resourceRequirements" : {
-    "default" : {
-      "cpu_limit" : "cpu_limit",
-      "memory_request" : "memory_request",
-      "memory_limit" : "memory_limit",
-      "cpu_request" : "cpu_request"
+  "sourceDefinitions" : [ {
+    "sourceDefinition" : {
+      "resourceRequirements" : {
+        "default" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        },
+        "jobSpecific" : [ {
+          "resourceRequirements" : {
+            "cpu_limit" : "cpu_limit",
+            "memory_request" : "memory_request",
+            "memory_limit" : "memory_limit",
+            "cpu_request" : "cpu_request"
+          }
+        }, {
+          "resourceRequirements" : {
+            "cpu_limit" : "cpu_limit",
+            "memory_request" : "memory_request",
+            "memory_limit" : "memory_limit",
+            "cpu_request" : "cpu_request"
+          }
+        } ]
+      },
+      "documentationUrl" : "https://openapi-generator.tech",
+      "dockerImageTag" : "dockerImageTag",
+      "releaseDate" : "2000-01-23",
+      "dockerRepository" : "dockerRepository",
+      "name" : "name",
+      "icon" : "icon",
+      "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+    },
+    "granted" : true
+  }, {
+    "sourceDefinition" : {
+      "resourceRequirements" : {
+        "default" : {
+          "cpu_limit" : "cpu_limit",
+          "memory_request" : "memory_request",
+          "memory_limit" : "memory_limit",
+          "cpu_request" : "cpu_request"
+        },
+        "jobSpecific" : [ {
+          "resourceRequirements" : {
+            "cpu_limit" : "cpu_limit",
+            "memory_request" : "memory_request",
+            "memory_limit" : "memory_limit",
+            "cpu_request" : "cpu_request"
+          }
+        }, {
+          "resourceRequirements" : {
+            "cpu_limit" : "cpu_limit",
+            "memory_request" : "memory_request",
+            "memory_limit" : "memory_limit",
+            "cpu_request" : "cpu_request"
+          }
+        } ]
+      },
+      "documentationUrl" : "https://openapi-generator.tech",
+      "dockerImageTag" : "dockerImageTag",
+      "releaseDate" : "2000-01-23",
+      "dockerRepository" : "dockerRepository",
+      "name" : "name",
+      "icon" : "icon",
+      "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
     },
-    "jobSpecific" : [ {
-      "resourceRequirements" : {
-        "cpu_limit" : "cpu_limit",
-        "memory_request" : "memory_request",
-        "memory_limit" : "memory_limit",
-        "cpu_request" : "cpu_request"
-      }
-    }, {
-      "resourceRequirements" : {
-        "cpu_limit" : "cpu_limit",
-        "memory_request" : "memory_request",
-        "memory_limit" : "memory_limit",
-        "cpu_request" : "cpu_request"
-      }
-    } ]
-  },
-  "documentationUrl" : "https://openapi-generator.tech",
-  "dockerImageTag" : "dockerImageTag",
-  "releaseDate" : "2000-01-23",
-  "dockerRepository" : "dockerRepository",
-  "name" : "name",
-  "icon" : "icon",
-  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+    "granted" : true
+  } ]
 }

Produces

@@ -6017,21 +7248,15 @@

Produces

Responses

200

Successful operation - SourceDefinitionRead -

404

- Object with given id was not found. - NotFoundKnownExceptionInfo -

422

- Input failed validation - InvalidInputExceptionInfo + PrivateSourceDefinitionReadList

-
+
Up -
post /v1/source_definitions/list_latest
-
List the latest sourceDefinitions Airbyte supports (listLatestSourceDefinitions)
-
Guaranteed to retrieve the latest information on supported sources.
+
post /v1/source_definitions/list
+
List all the sourceDefinitions the current Airbyte deployment is configured to use (listSourceDefinitions)
+
@@ -6128,15 +7353,27 @@

200

SourceDefinitionReadList

-
+
Up -
post /v1/source_definitions/list
-
List all the sourceDefinitions the current Airbyte deployment is configured to use (listSourceDefinitions)
+
post /v1/source_definitions/list_for_workspace
+
List all the sourceDefinitions the given workspace is configured to use (listSourceDefinitionsForWorkspace)
+

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
WorkspaceIdRequestBody WorkspaceIdRequestBody (optional)
+ +
Body Parameter
+
@@ -6230,6 +7467,141 @@

200

SourceDefinitionReadList

+
+
+ Up +
post /v1/source_definitions/revoke_definition
+
revoke a grant to a private, non-custom sourceDefinition from a given workspace (revokeSourceDefinitionFromWorkspace)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
SourceDefinitionIdWithWorkspaceId SourceDefinitionIdWithWorkspaceId (required)
+ +
Body Parameter
+ +
+ + + + + + + + +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

204

+ The resource was deleted successfully. + +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+
+
+ Up +
post /v1/source_definitions/update_custom
+
Update a custom sourceDefinition for the given workspace (updateCustomSourceDefinition)
+
+ + +

Consumes

+ This API call consumes the following media types via the Content-Type request header: +
    +
  • application/json
  • +
+ +

Request body

+
+
CustomSourceDefinitionUpdate CustomSourceDefinitionUpdate (optional)
+ +
Body Parameter
+ +
+ + + + +

Return type

+ + + + +

Example data

+
Content-Type: application/json
+
{
+  "resourceRequirements" : {
+    "default" : {
+      "cpu_limit" : "cpu_limit",
+      "memory_request" : "memory_request",
+      "memory_limit" : "memory_limit",
+      "cpu_request" : "cpu_request"
+    },
+    "jobSpecific" : [ {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    }, {
+      "resourceRequirements" : {
+        "cpu_limit" : "cpu_limit",
+        "memory_request" : "memory_request",
+        "memory_limit" : "memory_limit",
+        "cpu_request" : "cpu_request"
+      }
+    } ]
+  },
+  "documentationUrl" : "https://openapi-generator.tech",
+  "dockerImageTag" : "dockerImageTag",
+  "releaseDate" : "2000-01-23",
+  "dockerRepository" : "dockerRepository",
+  "name" : "name",
+  "icon" : "icon",
+  "sourceDefinitionId" : "046b6c7f-0b8a-43b9-b35d-6489e6daee91"
+}
+ +

Produces

+ This API call produces the following media types according to the Accept request header; + the media type will be conveyed by the Content-Type response header. +
    +
  • application/json
  • +
+ +

Responses

+

200

+ Successful operation + SourceDefinitionRead +

404

+ Object with given id was not found. + NotFoundKnownExceptionInfo +

422

+ Input failed validation + InvalidInputExceptionInfo +
+
+
+

CustomDestinationDefinitionCreate - Up

+
+
+
workspaceId
UUID format: uuid
+
destinationDefinition
+
+
+
+

CustomDestinationDefinitionUpdate - Up

+
+
+
workspaceId
UUID format: uuid
+
destinationDefinition
+
+
+
+

CustomSourceDefinitionCreate - Up

+
+
+
workspaceId
UUID format: uuid
+
sourceDefinition
+
+
+
+

CustomSourceDefinitionUpdate - Up

+
+
+
workspaceId
UUID format: uuid
+
sourceDefinition
+
+
+
+

DestinationDefinitionIdWithWorkspaceId - Up

+
+
+
destinationDefinitionId
UUID format: uuid
+
workspaceId
UUID format: uuid
+
+
+
+

PrivateDestinationDefinitionRead - Up

+
+
+
destinationDefinition
+
granted
+
+
+ +
+

PrivateSourceDefinitionRead - Up

+
+
+
sourceDefinition
+
granted
+
+
+
+
+

SourceDefinitionIdWithWorkspaceId - Up

+
+
+
sourceDefinitionId
UUID format: uuid
+
workspaceId
UUID format: uuid
+
+
+
+

SourceDiscoverSchemaRequestBody - Up

+
+
+
sourceId
UUID format: uuid
+
disable_cache (optional)
+
+

SourceIdRequestBody - Up

diff --git a/kube/overlays/stable-with-resource-limits/.env b/kube/overlays/stable-with-resource-limits/.env index a5100a8b70ed..63f654bb6532 100644 --- a/kube/overlays/stable-with-resource-limits/.env +++ b/kube/overlays/stable-with-resource-limits/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.55-alpha +AIRBYTE_VERSION=0.35.59-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable-with-resource-limits/kustomization.yaml b/kube/overlays/stable-with-resource-limits/kustomization.yaml index 270e969e32f4..7e29c645ab97 100644 --- a/kube/overlays/stable-with-resource-limits/kustomization.yaml +++ b/kube/overlays/stable-with-resource-limits/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: airbyte/bootloader - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: airbyte/scheduler - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: airbyte/server - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: airbyte/webapp - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: airbyte/worker - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/kube/overlays/stable/.env b/kube/overlays/stable/.env index 8efa4dec694d..fa2f878e6916 100644 --- a/kube/overlays/stable/.env +++ b/kube/overlays/stable/.env @@ -1,4 +1,4 @@ -AIRBYTE_VERSION=0.35.55-alpha +AIRBYTE_VERSION=0.35.59-alpha # Airbyte Internal Database, see https://docs.airbyte.io/operator-guides/configuring-airbyte-db DATABASE_HOST=airbyte-db-svc diff --git a/kube/overlays/stable/kustomization.yaml b/kube/overlays/stable/kustomization.yaml index c35ad31c38c0..fc1b3f52f70c 100644 --- a/kube/overlays/stable/kustomization.yaml +++ b/kube/overlays/stable/kustomization.yaml @@ -8,17 +8,17 @@ bases: images: - name: airbyte/db - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: airbyte/bootloader - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: airbyte/scheduler - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: airbyte/server - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: airbyte/webapp - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: airbyte/worker - newTag: 0.35.55-alpha + newTag: 0.35.59-alpha - name: temporalio/auto-setup newTag: 1.7.0 diff --git a/octavia-cli/README.md b/octavia-cli/README.md index 606b3fbad0f4..b227f092162c 100644 --- a/octavia-cli/README.md +++ b/octavia-cli/README.md @@ -1,71 +1,355 @@ # 🐙 Octavia CLI -Octavia CLI is a tool to manage Airbyte configuration in YAML. -It has the following features: -* Scaffolding of a readable directory architecture that will host the YAML configs. -* Auto-generation of YAML config file that matches the resources' schemas. -* Manage Airbyte resources with YAML config files. -* Safe resources update through diff display and validation. -* Simple secret management to avoid versioning credentials. - ## Disclaimer -The project is in **alpha** version. + +The project is in **alpha** version. Readers can refer to our [opened GitHub issues](https://github.com/airbytehq/airbyte/issues?q=is%3Aopen+is%3Aissue+label%3Aarea%2Foctavia-cli) to check the ongoing work on this project. -# Install -## 1. Install and run Docker -We are packaging this CLI as a Docker image to avoid dependency hell, **[please install and run Docker if you are not](https://docs.docker.com/get-docker/)**. +## What is `octavia` CLI? + +Octavia CLI is a tool to manage Airbyte configurations in YAML. +It has the following features: + +- Scaffolding of a readable directory architecture that will host the YAML configs (`octavia init`). +- Auto-generation of YAML config file that matches the resources' schemas (`octavia generate`). +- Manage Airbyte resources with YAML config files. +- Safe resources update through diff display and validation (`octavia apply`). +- Simple secret management to avoid versioning credentials. + +## Why should I use `octavia` CLI? + +A CLI provides freedom to users to use the tool in whatever context and use case they have. +These are non-exhaustive use cases `octavia` can be convenient for: + +- Managing Airbyte configurations with a CLI instead of a web UI. +- Versioning Airbyte configurations in Git. +- Updating of Airbyte configurations in an automated deployment pipeline. +- Integrating the Airbyte configuration deployment in a dev ops tooling stack: Helm, Ansible etc. +- Streamlining the deployment of Airbyte configurations to multiple Airbyte instance. + +Feel free to share your use cases with the community in [#octavia-cli](https://airbytehq.slack.com/archives/C02RRUG9CP5) or on [Discourse](https://discuss.airbyte.io/). + +## Table of content + +- [Workflow](#workflow) +- [Secret management](#secret-management) +- [Install](#install) +- [Commands reference](#commands-reference) +- [Contributing](#contributing) +- [Changelog](#changelog) + +## Workflow + +### 1. Generate local YAML files for sources or destination + +1. Retrieve the *definition id* of the connector you want to use using `octavia list command`. +2. Generate YAML configuration running `octavia generate source ` or `octavia generate destination `. + +### 2. Edit your local YAML configurations + +1. Edit the generated YAML configurations according to your need. +2. Use the [secret management feature](#secret-management) feature to avoid storing credentials in the YAML files. + +### 3. Create the declared sources or destinations on your Airbyte instance + +1. Run `octavia apply` to create the **sources** and **destinations** + +### 4. Generate connections + +1. Run `octavia octavia generate connection --source --destination ` to create a YAML configuration for a new connection. +2. Edit the created configuration file according to your need: change the scheduling or the replicated streams list. + +### 5. Create the declared connections + +1. Run `octavia apply` to create the newly declared connection on your Airbyte instance. + +### 6. Update your configurations + +Changes in your local configurations can be propagated to your Airbyte instance using `octavia apply`. You will be prompted for validation of changes. You can bypass the validation step using the `--force` flag. + +## Secret management + +Sources and destinations configurations have credential fields that you **do not want to store as plain text in your VCS**. +`octavia` offers secret management through environment variables expansion: + +```yaml +configuration: + password: ${MY_PASSWORD} +``` + +If you have set a `MY_PASSWORD` environment variable, `octavia apply` will load its value into the `password` field. + +## Install + +### Requirements + +We decided to package the CLI in a docker image with portability in mind. +**[Please install and run Docker if you are not](https://docs.docker.com/get-docker/)**. + +### As a command available in your bash profile -## 2.a If you are using ZSH / Bash: ```bash curl -o- https://raw.githubusercontent.com/airbytehq/airbyte/master/octavia-cli/install.sh | bash ``` This script: + 1. Pulls the [octavia-cli image](https://hub.docker.com/r/airbyte/octavia-cli/tags) from our Docker registry. 2. Creates an `octavia` alias in your profile. 3. Creates a `~/.octavia` file whose values are mapped to the octavia container's environment variables. -## 2.b If you want to directly run the CLI without alias in your current directory: +### Using `docker run` + ```bash +touch ~/.octavia # Create a file to store env variables that will be mapped the octavia-cli container mkdir my_octavia_project_directory # Create your octavia project directory where YAML configurations will be stored. -docker run -i --rm -v ./my_octavia_project_directory:/home/octavia-project --network host -e AIRBYTE_URL="http://localhost:8000" airbyte/octavia-cli:dev +docker run --name octavia-cli -i --rm -v ./my_octavia_project_directory:/home/octavia-project --network host --env-file ~/.octavia airbyte/octavia-cli:latest ``` +### Using `docker-compose` + +Using octavia in docker-compose could be convenient for automatic `apply` on start-up. + +Add another entry in the services key of your Airbyte `docker-compose.yml` -# Secret management -Sources and destinations configurations have credential fields that you **do not want to store as plain text and version on Git**. -`octavia` offers secret management through environment variables expansion: ```yaml -configuration: - password: ${MY_PASSWORD} +services: + # . . . + octavia-cli: + image: airbyte/octavia-cli:latest + command: apply --force + env_file: + - ~/.octavia # Use a local env file to store variables that will be mapped the octavia-cli container + volumes: + - :/home/octavia-project + depends_on: + - webapp ``` -If you have set a `MY_PASSWORD` environment variable, `octavia apply` will load its value into the `password` field. +Other commands besides `apply` can be run like so: -# Developing locally -1. Install Python 3.8.12. We suggest doing it through `pyenv` -2. Create a virtualenv: `python -m venv .venv` -3. Activate the virtualenv: `source .venv/bin/activate` -4. Install dev dependencies: `pip install -e .\[tests\]` -5. Install `pre-commit` hooks: `pre-commit install` -6. Run the unittest suite: `pytest --cov=octavia_cli` -7. Iterate: please check the [Contributing](#contributing) for instructions on contributing. +```bash +docker-compose run octavia-cli ` +``` + +## Commands reference + +### `octavia` command flags + +| **Flag** | **Description** | **Env Variable** | **Default** | +|------------------|-----------------------|------------------------|--------------------------------------------------------| +| `--airbyte-url` | Airbyte instance URL. | `AIRBYTE_URL` | `http://localhost:8000` | +| `--workspace-id` | Airbyte workspace id. | `AIRBYTE_WORKSPACE_ID` | The first workspace id found on your Airbyte instance. | + +### `octavia` subcommands + +| **Command** | **Usage** | +|-----------------------------------------|-------------------------------------------------------------------------------------| +| **`octavia init`** | Initialize required directories for the project. | +| **`octavia list connectors sources`** | List all sources connectors available on the remote Airbyte instance. | +| **`octavia list connectors destination`** | List all destinations connectors available on the remote Airbyte instance. | +| **`octavia list workspace sources`** | List existing sources in current the Airbyte workspace. | +| **`octavia list workspace destinations`** | List existing destinations in the current Airbyte workspace. | +| **`octavia list workspace connections`** | List existing connections in the current Airbyte workspace. | +| **`octavia generate source`** | Generate a local YAML configuration for a new source. | +| **`octavia generate destination`** | Generate a local YAML configuration for a new destination. | +| **`octavia generate connection`** | Generate a local YAML configuration for a new connection. | +| **`octavia apply`** | Create or update Airbyte remote resources according to local YAML configurations. | + +#### `octavia init` + +The `octavia init` commands scaffolds the required directory architecture for running `octavia generate` and `octavia apply` commands. + +**Example**: -## Build -Build the project locally (from the root of the repo): ```bash -SUB_BUILD=OCTAVIA_CLI ./gradlew build # from the root directory of the repo +$ mkdir my_octavia_project && cd my_octavia_project +$ octavia init +🐙 - Octavia is targetting your Airbyte instance running at http://localhost:8000 on workspace e1f46f7d-5354-4200-aed6-7816015ca54b. +🐙 - Project is not yet initialized. +🔨 - Initializing the project. +✅ - Created the following directories: sources, destinations, connections. +$ ls +connections destinations sources ``` -# Contributing + +#### `octavia list connectors sources` + +List all the source connectors currently available on your Airbyte instance. + +**Example**: + +```bash +$ octavia list connectors sources +NAME DOCKER REPOSITORY DOCKER IMAGE TAG SOURCE DEFINITION ID +Airtable airbyte/source-airtable 0.1.1 14c6e7ea-97ed-4f5e-a7b5-25e9a80b8212 +AWS CloudTrail airbyte/source-aws-cloudtrail 0.1.4 6ff047c0-f5d5-4ce5-8c81-204a830fa7e1 +Amazon Ads airbyte/source-amazon-ads 0.1.3 c6b0a29e-1da9-4512-9002-7bfd0cba2246 +Amazon Seller Partner airbyte/source-amazon-seller-partner 0.2.15 e55879a8-0ef8-4557-abcf-ab34c53ec460 +``` + +#### `octavia list connectors destinations` + +List all the destinations connectors currently available on your Airbyte instance. + +**Example**: + +```bash +$ octavia list connectors destinations +NAME DOCKER REPOSITORY DOCKER IMAGE TAG DESTINATION DEFINITION ID +Azure Blob Storage airbyte/destination-azure-blob-storage 0.1.3 b4c5d105-31fd-4817-96b6-cb923bfc04cb +Amazon SQS airbyte/destination-amazon-sqs 0.1.0 0eeee7fb-518f-4045-bacc-9619e31c43ea +BigQuery airbyte/destination-bigquery 0.6.11 22f6c74f-5699-40ff-833c-4a879ea40133 +BigQuery (denormalized typed struct) airbyte/destination-bigquery-denormalized 0.2.10 079d5540-f236-4294-ba7c-ade8fd918496 +``` + +#### `octavia list workspace sources` + +List all the sources existing on your targeted Airbyte instance. + +**Example**: + +```bash +$ octavia list workspace sources +NAME SOURCE NAME SOURCE ID +weather OpenWeather c4aa8550-2122-4a33-9a21-adbfaa638544 +``` + +#### `octavia list workspace destinations` + +List all the destinations existing on your targeted Airbyte instance. + +**Example**: + +```bash +$ octavia list workspace destinations +NAME DESTINATION NAME DESTINATION ID +my_db Postgres c0c977c2-48e7-46fe-9f57-576285c26d42 +``` + +#### `octavia list workspace connections` + +List all the connections existing on your targeted Airbyte instance. + +**Example**: + +```bash +$ octavia list workspace connections +NAME CONNECTION ID STATUS SOURCE ID DESTINATION ID +weather_to_pg a4491317-153e-436f-b646-0b39338f9aab active c4aa8550-2122-4a33-9a21-adbfaa638544 c0c977c2-48e7-46fe-9f57-576285c26d42 +``` + +#### `octavia generate source ` + +Generate a YAML configuration for a source. +The YAML file will be stored at `./sources//configuration.yaml`. + +| **Argument** | **Description** | +|-----------------|-----------------------------------------------------------------------------------------------| +| `DEFINITION_ID` | The source connector definition id. Can be retrieved using `octavia list connectors sources`. | +| `SOURCE_NAME` | The name you want to give to this source in Airbyte. | + +**Example**: + +```bash +$ octavia generate source d8540a80-6120-485d-b7d6-272bca477d9b weather +✅ - Created the source template for weather in ./sources/weather/configuration.yaml. +``` + +#### `octavia generate destination ` + +Generate a YAML configuration for a destination. +The YAML file will be stored at `./destinations//configuration.yaml`. + +| **Argument** | **Description** | +|--------------------|---------------------------------------------------------------------------------------------------------| +| `DEFINITION_ID` | The destination connector definition id. Can be retrieved using `octavia list connectors destinations`. | +| `DESTINATION_NAME` | The name you want to give to this destination in Airbyte. | + +**Example**: + +```bash +$ octavia generate destination 25c5221d-dce2-4163-ade9-739ef790f503 my_db +✅ - Created the destination template for my_db in ./destinations/my_db/configuration.yaml. +``` + +#### `octavia generate connection --source --destination ` + +Generate a YAML configuration for a connection. +The YAML file will be stored at `./connections//configuration.yaml`. + +| **Option** | **Required** | **Description** | +|-----------------|--------------|--------------------------------------------------------------------------------------------| +| `--source` | Yes | Path to the YAML configuration file of the source you want to create a connection from. | +| `--destination` | Yes | Path to the YAML configuration file of the destination you want to create a connection to. | + +| **Argument** | **Description** | +|-------------------|----------------------------------------------------------| +| `CONNECTION_NAME` | The name you want to give to this connection in Airbyte. | + +**Example**: + +```bash +$ octavia generate connection --source sources/weather/configuration.yaml --destination destinations/my_db/configuration.yaml weather_to_pg +✅ - Created the connection template for weather_to_pg in ./connections/weather_to_pg/configuration.yaml. +``` + +#### `octavia apply` + +Create or update the resource on your Airbyte instance according to local configurations found in your octavia project directory. +If the resource was not found on your Airbyte instance, **apply** will **create** the remote resource. +If the resource was found on your Airbyte instance, **apply** will prompt you for validation of the changes and will run an **update** of your resource. +Please note that if a secret field was updated on your configuration, **apply** will run this change without prompt. + +| **Option** | **Required** | **Description** | +|-----------------|--------------|--------------------------------------------------------------------------------------------| +| `--file` | No | Path to the YAML configuration files you want to create or update. | +| `--force` | No | Run update without prompting for changes validation. | + +**Example**: + +```bash +$ octavia apply +🐙 - weather exists on your Airbyte instance, let's check if we need to update it! +👀 - Here's the computed diff (🚨 remind that diff on secret fields are not displayed): + E - Value of root['lat'] changed from "46.7603" to "45.7603". +❓ - Do you want to update weather? [y/N]: y +✍️ - Running update because a diff was detected between local and remote resource. +🎉 - Successfully updated weather on your Airbyte instance! +💾 - New state for weather stored at ./sources/weather/state.yaml. +🐙 - my_db exists on your Airbyte instance, let's check if we need to update it! +😴 - Did not update because no change detected. +🐙 - weather_to_pg exists on your Airbyte instance, let's check if we need to update it! +👀 - Here's the computed diff (🚨 remind that diff on secret fields are not displayed): + E - Value of root['schedule']['timeUnit'] changed from "days" to "hours". +❓ - Do you want to update weather_to_pg? [y/N]: y +✍️ - Running update because a diff was detected between local and remote resource. +🎉 - Successfully updated weather_to_pg on your Airbyte instance! +💾 - New state for weather_to_pg stored at ./connections/weather_to_pg/state.yaml. +``` + +## Contributing + 1. Please sign up to [Airbyte's Slack workspace](https://slack.airbyte.io/) and join the `#octavia-cli`. We'll sync up community efforts in this channel. -2. Read the [execution plan](https://docs.google.com/spreadsheets/d/1weB9nf0Zx3IR_QvpkxtjBAzyfGb7B0PWpsVt6iMB5Us/edit#gid=0) and find a task you'd like to work on. -3. Open a PR, make sure to test your code thoroughly. +2. Pick an existing [GitHub issues](https://github.com/airbytehq/airbyte/issues?q=is%3Aopen+is%3Aissue+label%3Aarea%2Foctavia-cli) or **open** a new one to explain what you'd like to implement. +3. Assign the GitHub issue to yourself. +4. Fork Airbyte's repo, code and test thoroughly. +5. Open a PR on our Airbyte repo from your fork. + +### Developing locally +0. Build the project locally (from the root of Airbyte's repo): `SUB_BUILD=OCTAVIA_CLI ./gradlew build # from the root directory of the repo`. +1. Install Python 3.8.12. We suggest doing it through `pyenv`. +2. Create a virtualenv: `python -m venv .venv`. +3. Activate the virtualenv: `source .venv/bin/activate`. +4. Install dev dependencies: `pip install -e .\[tests\]`. +5. Install `pre-commit` hooks: `pre-commit install`. +6. Run the unittest suite: `pytest --cov=octavia_cli`. +7. Make sure the build passes (step 0) before opening a PR. -# Changelog +## Changelog | Version | Date | Description | PR | |---------|------------|------------------|----------------------------------------------------------| -| 0.1.0 | 2022-03-15 | Alpha release | [EPIC](https://github.com/airbytehq/airbyte/issues/10704)| +| 0.1.0 | 2022-04-07 | Alpha release | [EPIC](https://github.com/airbytehq/airbyte/issues/10704)| diff --git a/octavia-cli/publish.sh b/octavia-cli/publish.sh index 884d141aeae3..557b411ebc2c 100755 --- a/octavia-cli/publish.sh +++ b/octavia-cli/publish.sh @@ -1,6 +1,11 @@ #!/usr/bin/env bash -set -eux +set -ux VERSION=$1 -docker tag airbyte/octavia-cli:dev airbyte/octavia-cli:${VERSION} -docker push airbyte/octavia-cli:${VERSION} +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +docker buildx create --name octavia_builder > /dev/null 2>&1 +set -e +docker buildx use octavia_builder +docker buildx inspect --bootstrap +docker buildx build --push --tag airbyte/octavia-cli:${VERSION} --platform=linux/arm64,linux/amd64 ${SCRIPT_DIR}