From 5149108020645cb8e3ae47045eb13e270cd85e59 Mon Sep 17 00:00:00 2001 From: jerome DOUCET Date: Mon, 5 Dec 2022 12:33:23 +0100 Subject: [PATCH] :bug: Source ElasticSearch: avoid too_long_frame_exception (#18134) * Fix: (elasticsearch source) avoid too_long_frame_exception batch the queries on mapping with a arbitrary (but reasonable) chunk size to avoid reaching the 4096 bytes limits url size. * bump connector * auto-bump connector version Co-authored-by: Marcos Marx Co-authored-by: marcosmarxm Co-authored-by: Octavia Squidington III --- .../main/resources/seed/source_definitions.yaml | 2 +- .../src/main/resources/seed/source_specs.yaml | 10 +++++----- .../connectors/source-elasticsearch/Dockerfile | 2 +- .../connectors/source-elasticsearch/README.md | 4 ++-- .../elasticsearch/ElasticsearchConnection.java | 17 ++++++++++++----- docs/integrations/sources/elasticsearch.md | 1 + 6 files changed, 22 insertions(+), 14 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 4216aa386801..6c21b051ebaf 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -1952,7 +1952,7 @@ - name: Elasticsearch sourceDefinitionId: 7cf88806-25f5-4e1a-b422-b2fa9e1b0090 dockerRepository: airbyte/source-elasticsearch - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.1 documentationUrl: https://docs.airbyte.com/integrations/sources/elasticsearch sourceType: api releaseStage: alpha diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 7fceeed59030..146cbda082c9 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -16939,7 +16939,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-elasticsearch:0.1.0" +- dockerImage: "airbyte/source-elasticsearch:0.1.1" spec: documentationUrl: "https://docs.airbyte.com/integrations/source/elasticsearch" connectionSpecification: @@ -16948,7 +16948,7 @@ type: "object" required: - "endpoint" - additionalProperties: false + additionalProperties: true properties: endpoint: title: "Server Endpoint" @@ -16960,7 +16960,7 @@ description: "The type of authentication to be used" oneOf: - title: "None" - additionalProperties: false + additionalProperties: true description: "No authentication will be used" required: - "method" @@ -16969,7 +16969,7 @@ type: "string" const: "none" - title: "Api Key/Secret" - additionalProperties: false + additionalProperties: true description: "Use a api key and secret combination to authenticate" required: - "method" @@ -16990,7 +16990,7 @@ type: "string" airbyte_secret: true - title: "Username/Password" - additionalProperties: false + additionalProperties: true description: "Basic auth header with a username and password" required: - "method" diff --git a/airbyte-integrations/connectors/source-elasticsearch/Dockerfile b/airbyte-integrations/connectors/source-elasticsearch/Dockerfile index bb08a5f841b1..cebf561d59d1 100644 --- a/airbyte-integrations/connectors/source-elasticsearch/Dockerfile +++ b/airbyte-integrations/connectors/source-elasticsearch/Dockerfile @@ -17,5 +17,5 @@ ENV ENABLE_SENTRY true COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.version=0.1.1 LABEL io.airbyte.name=airbyte/source-elasticsearch diff --git a/airbyte-integrations/connectors/source-elasticsearch/README.md b/airbyte-integrations/connectors/source-elasticsearch/README.md index 4881e6583321..fbd9835d8656 100644 --- a/airbyte-integrations/connectors/source-elasticsearch/README.md +++ b/airbyte-integrations/connectors/source-elasticsearch/README.md @@ -52,11 +52,11 @@ Airbyte has a standard test suite that all destination connectors must pass. See All commands should be run from airbyte project root. To run unit tests: ``` -./gradlew :airbyte-integrations:connectors:sources-elasticsearch:unitTest +./gradlew :airbyte-integrations:connectors:source-elasticsearch:unitTest ``` To run acceptance and custom integration tests: ``` -./gradlew :airbyte-integrations:connectors:sources-elasticsearch:integrationTest +./gradlew :airbyte-integrations:connectors:source-elasticsearch:integrationTest ``` ## Dependency Management diff --git a/airbyte-integrations/connectors/source-elasticsearch/src/main/java/io/airbyte/integrations/source/elasticsearch/ElasticsearchConnection.java b/airbyte-integrations/connectors/source-elasticsearch/src/main/java/io/airbyte/integrations/source/elasticsearch/ElasticsearchConnection.java index e677cfa53496..e5aca6e1dba4 100644 --- a/airbyte-integrations/connectors/source-elasticsearch/src/main/java/io/airbyte/integrations/source/elasticsearch/ElasticsearchConnection.java +++ b/airbyte-integrations/connectors/source-elasticsearch/src/main/java/io/airbyte/integrations/source/elasticsearch/ElasticsearchConnection.java @@ -146,11 +146,18 @@ private RuntimeException unwrappedApiException(String message, ApiException e) { * @throws IOException throws IOException if Elasticsearch request fails */ public Map getMappings(final List indices) throws IOException { - GetMappingsRequest request = new GetMappingsRequest(); - String[] copiedIndices = indices.toArray(String[]::new); - request.indices(copiedIndices); - GetMappingsResponse getMappingResponse = client.indices().getMapping(request, RequestOptions.DEFAULT); - return getMappingResponse.mappings(); + int chunk = 15; + Map mappings = new HashMap<>(); + // Avoid too_long_frame_exception error by "batching" + // the indexes mapping calls + for(int i = 0; i < indices.size(); i += chunk){ + String[] copiedIndices = indices.subList(i, Math.min(indices.size(), i + chunk)).toArray(String[]::new); + GetMappingsRequest request = new GetMappingsRequest(); + request.indices(copiedIndices); + GetMappingsResponse getMappingResponse = client.indices().getMapping(request, RequestOptions.DEFAULT); + mappings.putAll(getMappingResponse.mappings()); + } + return mappings; } /** diff --git a/docs/integrations/sources/elasticsearch.md b/docs/integrations/sources/elasticsearch.md index b83665bfe2e7..8767d5a7118a 100644 --- a/docs/integrations/sources/elasticsearch.md +++ b/docs/integrations/sources/elasticsearch.md @@ -84,4 +84,5 @@ all values in the array must be of the same data type. Hence, every field can be | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :-------------- | +| `0.1.1` | 2022-12-02 | [18118](https://github.com/airbytehq/airbyte/pull/18118) | Avoid too_long_frame_exception | | `0.1.0` | 2022-07-12 | [14118](https://github.com/airbytehq/airbyte/pull/14118) | Initial Release |