diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py index 164161de63e6..01387c7ad690 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py @@ -70,7 +70,10 @@ def extract_records(self, response: requests.Response) -> List[Record]: extracted = response_body else: pointer = [pointer.eval(self.config) for pointer in self.field_pointer] - extracted = dpath.util.get(response_body, pointer, default=[]) + if "*" in pointer: + extracted = dpath.util.values(response_body, pointer) + else: + extracted = dpath.util.get(response_body, pointer, default=[]) if isinstance(extracted, list): return extracted elif extracted: diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_dpath_extractor.py b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_dpath_extractor.py index 5b15fb5fbb6a..4e9c575ad957 100644 --- a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_dpath_extractor.py +++ b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_dpath_extractor.py @@ -26,6 +26,8 @@ ("test_field_in_config", ["{{ config['field'] }}"], {"record_array": [{"id": 1}, {"id": 2}]}, [{"id": 1}, {"id": 2}]), ("test_field_in_options", ["{{ options['options_field'] }}"], {"record_array": [{"id": 1}, {"id": 2}]}, [{"id": 1}, {"id": 2}]), ("test_field_does_not_exist", ["record"], {"id": 1}, []), + ("test_nested_list", ["list", "*", "item"], {"list": [{"item": {"id": "1"}}]}, [{"id": "1"}]), + ("test_complex_nested_list", ['data', '*', 'list', 'data2', '*'], {"data": [{"list": {"data2": [{"id": 1}, {"id": 2}]}},{"list": {"data2": [{"id": 3}, {"id": 4}]}}]}, [{"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}]) ], ) def test_dpath_extractor(test_name, field_pointer, body, expected_records): diff --git a/docs/connector-development/config-based/understanding-the-yaml-file/record-selector.md b/docs/connector-development/config-based/understanding-the-yaml-file/record-selector.md index dd729ca78ff9..bf48dcf9b60e 100644 --- a/docs/connector-development/config-based/understanding-the-yaml-file/record-selector.md +++ b/docs/connector-development/config-based/understanding-the-yaml-file/record-selector.md @@ -22,6 +22,7 @@ Schema: ``` The current record extraction implementation uses [dpath](https://pypi.org/project/dpath/) to select records from the json-decoded HTTP response. +For nested structures `*` can be used to iterate over array elements. Schema: ```yaml @@ -152,6 +153,50 @@ The selected records will be ] ``` +### Selecting fields nested in arrays + +Given a response body of the form + +```json + +{ + "data": [ + { + "record": { + "id": "1" + } + }, + { + "record": { + "id": "2" + } + } + ] +} + +``` + +and a selector + +```yaml +selector: + extractor: + field_pointer: [ "data", "*", "record" ] +``` + +The selected records will be + +```json +[ + { + "id": 1 + }, + { + "id": 2 + } +] +``` + ## Filtering records Records can be filtered by adding a record_filter to the selector.