diff --git a/airbyte-cdk/python/CHANGELOG.md b/airbyte-cdk/python/CHANGELOG.md index 52d72007fd6e..42dc59ff18a2 100644 --- a/airbyte-cdk/python/CHANGELOG.md +++ b/airbyte-cdk/python/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## 0.1.85 +- Make TypeTransformer more robust to incorrect incoming records + ## 0.1.84 - Emit legacy format when state is unspecified for read override connectors diff --git a/airbyte-cdk/python/airbyte_cdk/sources/utils/transform.py b/airbyte-cdk/python/airbyte_cdk/sources/utils/transform.py index 1759c316dc17..46e3f0e8f46f 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/utils/transform.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/utils/transform.py @@ -145,14 +145,18 @@ def resolve(subschema): return subschema # Transform object and array values before running json schema type checking for each element. - if schema_key == "properties": + # Recursively normalize every value of the "instance" sub-object, + # if "instance" is an incorrect type - skip recursive normalization of "instance" + if schema_key == "properties" and isinstance(instance, dict): for k, subschema in property_value.items(): - if k in (instance or {}): + if k in instance: subschema = resolve(subschema) instance[k] = self.__normalize(instance[k], subschema) - elif schema_key == "items": + # Recursively normalize every item of the "instance" sub-array, + # if "instance" is an incorrect type - skip recursive normalization of "instance" + elif schema_key == "items" and isinstance(instance, list): subschema = resolve(property_value) - for index, item in enumerate((instance or [])): + for index, item in enumerate(instance): instance[index] = self.__normalize(item, subschema) # Running native jsonschema traverse algorithm after field normalization is done. diff --git a/airbyte-cdk/python/setup.py b/airbyte-cdk/python/setup.py index 7fb67d3c8513..4798a04f06af 100644 --- a/airbyte-cdk/python/setup.py +++ b/airbyte-cdk/python/setup.py @@ -15,7 +15,7 @@ setup( name="airbyte-cdk", - version="0.1.84", + version="0.1.85", description="A framework for writing Airbyte Connectors.", long_description=README, long_description_content_type="text/markdown", diff --git a/airbyte-cdk/python/unit_tests/sources/utils/test_transform.py b/airbyte-cdk/python/unit_tests/sources/utils/test_transform.py index d61a6dfa17dc..9a219dd05b22 100644 --- a/airbyte-cdk/python/unit_tests/sources/utils/test_transform.py +++ b/airbyte-cdk/python/unit_tests/sources/utils/test_transform.py @@ -149,6 +149,18 @@ {"cpc": 6.6666}, None, ), + ( + {"type": "object", "properties": {"value": {"type": "array", "items": {"type": "string"}}}}, + {"value": {"key": "value"}}, + {"value": {"key": "value"}}, + "{'key': 'value'} is not of type 'array'", + ), + ( + {"type": "object", "properties": {"value1": {"type": "object", "properties": {"value2": {"type": "string"}}}}}, + {"value1": "value2"}, + {"value1": "value2"}, + "'value2' is not of type 'object'", + ), ], ) def test_transform(schema, actual, expected, expected_warns, caplog):