From 2507f21e27bcfde63bb176d8aa98bd11c7cd6e52 Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Thu, 21 Jul 2022 12:18:46 +0300 Subject: [PATCH 1/3] decimal added for parquet Signed-off-by: Sergey Chvalyuk --- airbyte-integrations/connectors/source-s3/Dockerfile | 2 +- .../source_s3/source_files_abstract/formats/parquet_parser.py | 1 + .../connectors/source-s3/unit_tests/abstract_test_parser.py | 3 +++ .../connectors/source-s3/unit_tests/test_parquet_parser.py | 3 +++ 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-s3/Dockerfile b/airbyte-integrations/connectors/source-s3/Dockerfile index 7faf458805eb..0229bab4038c 100644 --- a/airbyte-integrations/connectors/source-s3/Dockerfile +++ b/airbyte-integrations/connectors/source-s3/Dockerfile @@ -17,5 +17,5 @@ COPY source_s3 ./source_s3 ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.16 +LABEL io.airbyte.version=0.1.17 LABEL io.airbyte.name=airbyte/source-s3 diff --git a/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/parquet_parser.py b/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/parquet_parser.py index 7401e889b97f..a274502a0ef1 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/parquet_parser.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/source_files_abstract/formats/parquet_parser.py @@ -18,6 +18,7 @@ "boolean": ("boolean", ["BOOLEAN"], None), "number": ("number", ["DOUBLE", "FLOAT"], None), "integer": ("integer", ["INT32", "INT64", "INT96"], None), + "decimal": ("number", ["INT32", "INT64", "FIXED_LEN_BYTE_ARRAY"], None), # supported by PyArrow types "timestamp": ("string", ["INT32", "INT64", "INT96"], lambda v: v.isoformat()), "date": ("string", ["INT32", "INT64", "INT96"], lambda v: v.isoformat()), diff --git a/airbyte-integrations/connectors/source-s3/unit_tests/abstract_test_parser.py b/airbyte-integrations/connectors/source-s3/unit_tests/abstract_test_parser.py index 3b8e35260c7b..433120053b4b 100644 --- a/airbyte-integrations/connectors/source-s3/unit_tests/abstract_test_parser.py +++ b/airbyte-integrations/connectors/source-s3/unit_tests/abstract_test_parser.py @@ -9,6 +9,7 @@ import tracemalloc from abc import ABC, abstractmethod from datetime import datetime, timedelta +from decimal import Decimal from functools import lru_cache, wraps from typing import Any, Callable, List, Mapping @@ -106,6 +107,8 @@ def _generate_value(cls, typ: str) -> Any: elif typ == "time": dt = cls._generate_value("timestamp") return dt.time() if dt else None + elif typ == "decimal": + return Decimal((0, tuple([random.randint(1, 9) for _ in range(10)]), -4)) raise Exception(f"not supported type: {typ}") @classmethod diff --git a/airbyte-integrations/connectors/source-s3/unit_tests/test_parquet_parser.py b/airbyte-integrations/connectors/source-s3/unit_tests/test_parquet_parser.py index d8ccb1abf919..f9133b7ca9bb 100644 --- a/airbyte-integrations/connectors/source-s3/unit_tests/test_parquet_parser.py +++ b/airbyte-integrations/connectors/source-s3/unit_tests/test_parquet_parser.py @@ -76,6 +76,7 @@ def cases(cls) -> Mapping[str, Any]: "degrees": "number", "birthday": "string", "last_seen": "string", + "salary": "decimal", "created_at": "timestamp", "created_date_at": "date", "created_time_at": "time", @@ -200,12 +201,14 @@ def cases(cls) -> Mapping[str, Any]: "degrees": -9.2, "birthday": cls._generate_value("string"), "last_seen": cls._generate_value("string"), + "salary": cls._generate_value("decimal"), "created_at": cls._generate_value("timestamp"), "created_date_at": cls._generate_value("date"), "created_time_at": cls._generate_value("time"), } expected_record = copy.deepcopy(test_record) + expected_record["salary"] = ParquetParser.convert_field_data("decimal", expected_record["salary"]) expected_record["created_date_at"] = ParquetParser.convert_field_data("date", expected_record["created_date_at"]) expected_record["created_time_at"] = ParquetParser.convert_field_data("time", expected_record["created_time_at"]) expected_record["created_at"] = ParquetParser.convert_field_data("timestamp", expected_record["created_at"]) From f1826fa5f33bfc86b1e3cbfcaf5e630abe64e62b Mon Sep 17 00:00:00 2001 From: Sergey Chvalyuk Date: Thu, 21 Jul 2022 13:15:39 +0300 Subject: [PATCH 2/3] s3.md updated Signed-off-by: Sergey Chvalyuk --- docs/integrations/sources/s3.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md index 3f637c1facde..5e3d07e219ab 100644 --- a/docs/integrations/sources/s3.md +++ b/docs/integrations/sources/s3.md @@ -195,8 +195,9 @@ The avro parser uses [fastavro](https://fastavro.readthedocs.io/en/latest/). Cur | Version | Date | Pull Request | Subject | |:--------|:-----------|:----------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------| -| 0.1.16 | 2022-07-13 | [14669](https://github.com/airbytehq/airbyte/pull/14669) | Fixed bug when extra columns apeared to be non-present in master schema | -| 0.1.15 | 2022-05-31 | [12568](https://github.com/airbytehq/airbyte/pull/12568) | Fixed possible case of files being missed during incremental syncs | +| 0.1.17 | 2022-07-21 | [14911](https://github.com/airbytehq/airbyte/pull/14911) | "decimal" type added for parquet | +| 0.1.16 | 2022-07-13 | [14669](https://github.com/airbytehq/airbyte/pull/14669) | Fixed bug when extra columns apeared to be non-present in master schema | +| 0.1.15 | 2022-05-31 | [12568](https://github.com/airbytehq/airbyte/pull/12568) | Fixed possible case of files being missed during incremental syncs | | 0.1.14 | 2022-05-23 | [11967](https://github.com/airbytehq/airbyte/pull/11967) | Increase unit test coverage up to 90% | | 0.1.13 | 2022-05-11 | [12730](https://github.com/airbytehq/airbyte/pull/12730) | Fixed empty options issue | | 0.1.12 | 2022-05-11 | [12602](https://github.com/airbytehq/airbyte/pull/12602) | Added support for Avro file format | From 45e618fc525db5b462a1c785e1123218066c0ece Mon Sep 17 00:00:00 2001 From: Octavia Squidington III Date: Thu, 21 Jul 2022 22:01:21 +0000 Subject: [PATCH 3/3] auto-bump connector version [ci skip] --- .../init/src/main/resources/seed/source_definitions.yaml | 2 +- airbyte-config/init/src/main/resources/seed/source_specs.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index 0b0b32aec14c..283e3fadd370 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -825,7 +825,7 @@ - name: S3 sourceDefinitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2 dockerRepository: airbyte/source-s3 - dockerImageTag: 0.1.16 + dockerImageTag: 0.1.17 documentationUrl: https://docs.airbyte.io/integrations/sources/s3 icon: s3.svg sourceType: file diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index c67c13e16535..c8df57da22d5 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -7823,7 +7823,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-s3:0.1.16" +- dockerImage: "airbyte/source-s3:0.1.17" spec: documentationUrl: "https://docs.airbyte.io/integrations/sources/s3" changelogUrl: "https://docs.airbyte.io/integrations/sources/s3"