From 2568d6107cd693730bc2d8037bf3ddf9f0e5e4fb Mon Sep 17 00:00:00 2001 From: Maxime Carbonneau-Leclerc Date: Tue, 6 Dec 2022 18:55:00 -0500 Subject: [PATCH] Issue 19733 cdk clarify config error message for config files (#20019) * [ISSUE-19733] clarify error message when reading config files * [ISSUE #19733] code review and adding validation for spec file as well * [ISSUE #19733] updating typing of read_json_file * [ISSUE #19733] fix flake8 error * [ISSUE #19733] fix linting error * [ISSUE #19733] remove breaking change * [ISSUE #19733] bump airbyte cdk version * [ISSUE #19733] add test for invalid json file on read_state * [ISSUE #19733] bump version --- airbyte-cdk/python/CHANGELOG.md | 8 +++++- airbyte-cdk/python/airbyte_cdk/connector.py | 27 +++++++++++++++---- .../airbyte_cdk/sources/singer/source.py | 2 +- .../python/airbyte_cdk/sources/source.py | 5 ++-- airbyte-cdk/python/setup.py | 2 +- .../python/unit_tests/sources/test_source.py | 8 ++++++ .../python/unit_tests/test_connector.py | 25 +++++++++++++++++ 7 files changed, 66 insertions(+), 11 deletions(-) diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index f7a2b466e97c..2b48a25ca5de 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,6 +1,12 @@ # Changelog -## 0.11.2 +## 0.12.2 +Revert breaking change on `read_config` while keeping the improvement on the error message + +## 0.12.0 +Improve error readability when reading JSON config files + +## 0.11.3 Low-code: Log response error message on failure ## 0.11.2 diff --git a/airbyte-cdk/python/airbyte_cdk/connector.py b/airbyte-cdk/python/airbyte_cdk/connector.py index bd47f188c907..a130c6a3ea90 100644 --- a/airbyte-cdk/python/airbyte_cdk/connector.py +++ b/airbyte-cdk/python/airbyte_cdk/connector.py @@ -8,7 +8,7 @@ import os import pkgutil from abc import ABC, abstractmethod -from typing import Any, Generic, Mapping, Optional, Protocol, TypeVar +from typing import Any, Generic, List, Mapping, Optional, Protocol, TypeVar, Union import yaml from airbyte_cdk.models import AirbyteConnectionStatus, ConnectorSpecification @@ -47,10 +47,24 @@ def configure(self, config: Mapping[str, Any], temp_dir: str) -> TConfig: """ @staticmethod - def read_config(config_path: str) -> TConfig: - with open(config_path, "r") as file: + def read_config(config_path: str) -> Mapping[str, Any]: + config = BaseConnector._read_json_file(config_path) + if isinstance(config, Mapping): + return config + else: + raise ValueError( + f"The content of {config_path} is not an object and therefore is not a valid config. Please ensure the file represent a config." + ) + + @staticmethod + def _read_json_file(file_path: str) -> Union[None, bool, float, int, str, List[Any], Mapping[str, Any]]: + with open(file_path, "r") as file: contents = file.read() - return json.loads(contents) + + try: + return json.loads(contents) + except json.JSONDecodeError as error: + raise ValueError(f"Could not read json file {file_path}: {error}. Please ensure that it is a valid JSON.") @staticmethod def write_config(config: TConfig, config_path: str): @@ -74,7 +88,10 @@ def spec(self, logger: logging.Logger) -> ConnectorSpecification: if yaml_spec: spec_obj = yaml.load(yaml_spec, Loader=yaml.SafeLoader) elif json_spec: - spec_obj = json.loads(json_spec) + try: + spec_obj = json.loads(json_spec) + except json.JSONDecodeError as error: + raise ValueError(f"Could not read json spec file: {error}. Please ensure that it is a valid JSON.") else: raise FileNotFoundError("Unable to find spec.yaml or spec.json in the package.") diff --git a/airbyte-cdk/python/airbyte_cdk/sources/singer/source.py b/airbyte-cdk/python/airbyte_cdk/sources/singer/source.py index d70beaa2a43f..7048803f408b 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/singer/source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/singer/source.py @@ -98,7 +98,7 @@ def read(self, logger: logging.Logger, config: ConfigContainer, catalog_path: st Implements the parent class read method. """ catalogs = self._discover_internal(logger, config.config_path) - masked_airbyte_catalog = ConfiguredAirbyteCatalog.parse_obj(self.read_config(catalog_path)) + masked_airbyte_catalog = ConfiguredAirbyteCatalog.parse_obj(self._read_json_file(catalog_path)) selected_singer_catalog_path = SingerHelper.create_singer_catalog_with_selection(masked_airbyte_catalog, catalogs.singer_catalog) read_cmd = self.read_cmd(logger, config.config_path, selected_singer_catalog_path, state_path) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/source.py b/airbyte-cdk/python/airbyte_cdk/sources/source.py index f73f6819548c..fd2650cc79ac 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/source.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/source.py @@ -3,7 +3,6 @@ # -import json import logging from abc import ABC, abstractmethod from collections import defaultdict @@ -54,7 +53,7 @@ def read_state(self, state_path: str) -> Union[List[AirbyteStateMessage], Mutabl :return: The complete stream state based on the connector's previous sync """ if state_path: - state_obj = json.loads(open(state_path, "r").read()) + state_obj = self._read_json_file(state_path) if not state_obj: return self._emit_legacy_state_format({}) is_per_stream_state = isinstance(state_obj, List) @@ -87,4 +86,4 @@ def _emit_legacy_state_format(self, state_obj) -> Union[List[AirbyteStateMessage # can be overridden to change an input catalog def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog: - return ConfiguredAirbyteCatalog.parse_obj(self.read_config(catalog_path)) + return ConfiguredAirbyteCatalog.parse_obj(self._read_json_file(catalog_path)) diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index 8111b989ae33..efd60bfc3c93 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -15,7 +15,7 @@ setup( name="airbyte-cdk", - version="0.12.1", + version="0.12.2", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", diff --git a/airbyte-cdk/python/unit_tests/sources/test_source.py b/airbyte-cdk/python/unit_tests/sources/test_source.py index 1034975c1892..5b67d57444eb 100644 --- a/airbyte-cdk/python/unit_tests/sources/test_source.py +++ b/airbyte-cdk/python/unit_tests/sources/test_source.py @@ -288,6 +288,14 @@ def test_read_state(source, incoming_state, expected_state, expected_error): assert actual == expected_state +def test_read_invalid_state(source): + with tempfile.NamedTemporaryFile("w") as state_file: + state_file.write("invalid json content") + state_file.flush() + with pytest.raises(ValueError, match="Could not read json file"): + source.read_state(state_file.name) + + def test_read_state_sends_new_legacy_format_if_source_does_not_implement_read(): expected_state = [ AirbyteStateMessage( diff --git a/airbyte-cdk/python/unit_tests/test_connector.py b/airbyte-cdk/python/unit_tests/test_connector.py index 06e9dd16ead1..0474946697e5 100644 --- a/airbyte-cdk/python/unit_tests/test_connector.py +++ b/airbyte-cdk/python/unit_tests/test_connector.py @@ -65,6 +65,14 @@ def nonempty_file(mock_config): yield file +@pytest.fixture +def nonjson_file(mock_config): + with tempfile.NamedTemporaryFile("w") as file: + file.write("the content of this file is not JSON") + file.flush() + yield file + + @pytest.fixture def integration(): return MockConnector() @@ -75,6 +83,11 @@ def test_read_config(nonempty_file, integration: Connector, mock_config): assert mock_config == actual +def test_read_non_json_config(nonjson_file, integration: Connector): + with pytest.raises(ValueError, match="Could not read json file"): + integration.read_config(nonjson_file.name) + + def test_write_config(integration, mock_config): config_path = Path(tempfile.gettempdir()) / "config.json" integration.write_config(mock_config, str(config_path)) @@ -103,6 +116,14 @@ def use_json_spec(self): yield os.remove(json_path) + @pytest.fixture + def use_invalid_json_spec(self): + json_path = os.path.join(SPEC_ROOT, "spec.json") + with open(json_path, "w") as f: + f.write("the content of this file is not JSON") + yield + os.remove(json_path) + @pytest.fixture def use_yaml_spec(self): spec = {"documentationUrl": "https://airbyte.com/#yaml", "connectionSpecification": self.CONNECTION_SPECIFICATION} @@ -118,6 +139,10 @@ def test_spec_from_json_file(self, integration, use_json_spec): assert connector_spec.documentationUrl == "https://airbyte.com/#json" assert connector_spec.connectionSpecification == self.CONNECTION_SPECIFICATION + def test_spec_from_improperly_formatted_json_file(self, integration, use_invalid_json_spec): + with pytest.raises(ValueError, match="Could not read json spec file"): + integration.spec(logger) + def test_spec_from_yaml_file(self, integration, use_yaml_spec): connector_spec = integration.spec(logger) assert connector_spec.documentationUrl == "https://airbyte.com/#yaml"