diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 7f20aa1e57f7..cf2f720f292f 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -271,7 +271,7 @@ - name: File sourceDefinitionId: 778daa7c-feaf-4db6-96f3-70fd645acc77 dockerRepository: airbyte/source-file - dockerImageTag: 0.2.14 + dockerImageTag: 0.2.15 documentationUrl: https://docs.airbyte.io/integrations/sources/file icon: file.svg sourceType: file diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index ae7c5e63a0d5..8e024f3e1192 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2255,14 +2255,14 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-file:0.2.14" +- dockerImage: "airbyte/source-file:0.2.15" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/file" connectionSpecification: $schema: "http://json-schema.org/draft-07/schema#" title: "File Source Spec" type: "object" - additionalProperties: false + additionalProperties: true required: - "dataset_name" - "format" diff --git a/airbyte-integrations/connectors/source-file/Dockerfile b/airbyte-integrations/connectors/source-file/Dockerfile index d865b475089b..1a0769dd0cc6 100644 --- a/airbyte-integrations/connectors/source-file/Dockerfile +++ b/airbyte-integrations/connectors/source-file/Dockerfile @@ -17,5 +17,5 @@ COPY source_file ./source_file ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.14 +LABEL io.airbyte.version=0.2.15 LABEL io.airbyte.name=airbyte/source-file diff --git a/airbyte-integrations/connectors/source-file/setup.py b/airbyte-integrations/connectors/source-file/setup.py index ce832cc95d44..352340b965f0 100644 --- a/airbyte-integrations/connectors/source-file/setup.py +++ b/airbyte-integrations/connectors/source-file/setup.py @@ -7,23 +7,23 @@ MAIN_REQUIREMENTS = [ "airbyte-cdk~=0.1", - "gcsfs==0.7.1", + "gcsfs==2022.7.1", "genson==1.2.2", - "google-cloud-storage==1.35.0", + "google-cloud-storage==2.5.0", "pandas==1.4.3", - "paramiko==2.7.2", - "s3fs==0.4.2", - "smart-open[all]==4.1.2", - "lxml==4.6.5", + "paramiko==2.11.0", + "s3fs==2022.7.1", + "smart-open[all]==6.0.0", + "lxml==4.9.1", "html5lib==1.1", - "beautifulsoup4==4.9.3", - "pyarrow==8.0.0", + "beautifulsoup4==4.11.1", + "pyarrow==9.0.0", "xlrd==2.0.1", - "openpyxl==3.0.6", - "pyxlsb==1.0.8", + "openpyxl==3.0.10", + "pyxlsb==1.0.9", ] -TEST_REQUIREMENTS = ["boto3==1.16.57", "pytest==6.1.2", "pytest-docker==0.10.1", "pytest-mock~=3.7.0"] +TEST_REQUIREMENTS = ["boto3==1.21.21", "pytest==7.1.2", "pytest-docker==1.0.0", "pytest-mock~=3.8.2"] setup( name="source_file", diff --git a/airbyte-integrations/connectors/source-file/source_file/client.py b/airbyte-integrations/connectors/source-file/source_file/client.py index 36cd98e674b2..cda6b8c40db0 100644 --- a/airbyte-integrations/connectors/source-file/source_file/client.py +++ b/airbyte-integrations/connectors/source-file/source_file/client.py @@ -9,14 +9,14 @@ from typing import Iterable from urllib.parse import urlparse +import boto3 +import botocore import google import pandas as pd import smart_open from airbyte_cdk.entrypoint import logger from airbyte_cdk.models import AirbyteStream, SyncMode from azure.storage.blob import BlobServiceClient -from botocore import UNSIGNED -from botocore.config import Config from genson import SchemaBuilder from google.cloud.storage import Client as GCSClient from google.oauth2 import service_account @@ -200,10 +200,8 @@ def _open_aws_url(self, binary): aws_secret_access_key = self._provider.get("aws_secret_access_key", "") result = smart_open.open(f"{self.storage_scheme}{aws_access_key_id}:{aws_secret_access_key}@{self.url}", mode=mode) else: - config = Config(signature_version=UNSIGNED) - params = { - "resource_kwargs": {"config": config}, - } + config = botocore.client.Config(signature_version=botocore.UNSIGNED) + params = {"client": boto3.client("s3", config=config)} result = smart_open.open(self.full_url, transport_params=params, mode=mode) return result diff --git a/airbyte-integrations/connectors/source-file/source_file/spec.json b/airbyte-integrations/connectors/source-file/source_file/spec.json index 1081ad2f93e9..161a1219e15a 100644 --- a/airbyte-integrations/connectors/source-file/source_file/spec.json +++ b/airbyte-integrations/connectors/source-file/source_file/spec.json @@ -5,7 +5,7 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "File Source Spec", "type": "object", - "additionalProperties": false, + "additionalProperties": true, "required": ["dataset_name", "format", "url", "provider"], "properties": { "dataset_name": { diff --git a/docs/integrations/sources/file.md b/docs/integrations/sources/file.md index 81604e9085fc..34511122f630 100644 --- a/docs/integrations/sources/file.md +++ b/docs/integrations/sources/file.md @@ -127,6 +127,7 @@ In order to read large files from a remote location, this connector uses the [sm | Version | Date | Pull Request | Subject | |---------|------------|----------------------------------------------------------| ------------------------------------------------- | +| 0.2.15 | 2022-08-05 | [15269](https://github.com/airbytehq/airbyte/pull/15269) | Bump `smart-open` version to 6.0.0 | | 0.2.12 | 2022-07-12 | [14535](https://github.com/airbytehq/airbyte/pull/14535) | Fix invalid schema generation for JSON files | | 0.2.11 | 2022-07-12 | [9974](https://github.com/airbytehq/airbyte/pull/14588) | Add support to YAML format | | 0.2.9 | 2022-02-01 | [9974](https://github.com/airbytehq/airbyte/pull/9974) | Update airbyte-cdk 0.1.47 |