-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Low-code CDK] Add ability to remove fields (#14402)
- Loading branch information
1 parent
f4524e3
commit 743e6c2
Showing
11 changed files
with
238 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
16 changes: 16 additions & 0 deletions
16
airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
|
||
# RecordTransformation is depended upon by every class in this module (since it's the abc everything implements). For this reason, | ||
# the order of imports matters i.e: this file must fully import RecordTransformation before importing anything which depends on RecordTransformation | ||
# Otherwise there will be a circular dependency (load order will be init.py --> RemoveFields (which tries to import RecordTransformation) --> | ||
# init.py --> circular dep error, since loading this file causes it to try to import itself down the line. | ||
# so we add the split directive below to tell isort to sort imports while keeping RecordTransformation as the first import | ||
from .transformation import RecordTransformation | ||
|
||
# isort: split | ||
from .remove_fields import RemoveFields | ||
|
||
__all__ = ["RecordTransformation", "RemoveFields"] |
56 changes: 56 additions & 0 deletions
56
airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from typing import Any, List, Mapping | ||
|
||
import dpath.exceptions | ||
import dpath.util | ||
from airbyte_cdk.sources.declarative.transformations import RecordTransformation | ||
from airbyte_cdk.sources.declarative.types import FieldPointer | ||
|
||
|
||
class RemoveFields(RecordTransformation): | ||
""" | ||
A transformation which removes fields from a record. The fields removed are designated using FieldPointers. | ||
During transformation, if a field or any of its parents does not exist in the record, no error is thrown. | ||
If an input field pointer references an item in a list (e.g: ["k", 0] in the object {"k": ["a", "b", "c"]}) then | ||
the object at that index is set to None rather than being not entirely removed from the list. TODO change this behavior. | ||
It's possible to remove objects nested in lists e.g: removing [".", 0, "k"] from {".": [{"k": "V"}]} results in {".": [{}]} | ||
Usage syntax: | ||
```yaml | ||
my_stream: | ||
<other parameters..> | ||
transformations: | ||
- type: RemoveFields | ||
field_pointers: | ||
- ["path", "to", "field1"] | ||
- ["path2"] | ||
``` | ||
""" | ||
|
||
def __init__(self, field_pointers: List[FieldPointer]): | ||
""" | ||
:param field_pointers: pointers to the fields that should be removed | ||
""" | ||
self._field_pointers = field_pointers | ||
|
||
def transform(self, record: Mapping[str, Any]) -> Mapping[str, Any]: | ||
""" | ||
:param record: The record to be transformed | ||
:return: the input record with the requested fields removed | ||
""" | ||
for pointer in self._field_pointers: | ||
# the dpath library by default doesn't delete fields from arrays | ||
|
||
try: | ||
dpath.util.delete(record, pointer) | ||
except dpath.exceptions.PathNotFound: | ||
# if the (potentially nested) property does not exist, silently skip | ||
pass | ||
|
||
return record |
22 changes: 22 additions & 0 deletions
22
airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from abc import ABC, abstractmethod | ||
from typing import Any, Mapping | ||
|
||
|
||
class RecordTransformation(ABC): | ||
""" | ||
Implementations of this class define transformations that can be applied to records of a stream. | ||
""" | ||
|
||
@abstractmethod | ||
def transform(self, record: Mapping[str, Any]) -> Mapping[str, Any]: | ||
""" | ||
:param record: the input record to be transformed | ||
:return: the transformed record | ||
""" | ||
|
||
def __eq__(self, other): | ||
return other.__dict__ == self.__dict__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_remove_fields.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# | ||
# Copyright (c) 2022 Airbyte, Inc., all rights reserved. | ||
# | ||
|
||
from typing import Any, List, Mapping | ||
|
||
import pytest | ||
from airbyte_cdk.sources.declarative.transformations import RemoveFields | ||
from airbyte_cdk.sources.declarative.types import FieldPointer | ||
|
||
|
||
@pytest.mark.parametrize( | ||
["input_record", "field_pointers", "expected"], | ||
[ | ||
pytest.param({"k1": "v", "k2": "v"}, [["k1"]], {"k2": "v"}, id="remove a field that exists (flat dict)"), | ||
pytest.param({"k1": "v", "k2": "v"}, [["k3"]], {"k1": "v", "k2": "v"}, id="remove a field that doesn't exist (flat dict)"), | ||
pytest.param({"k1": "v", "k2": "v"}, [["k1"], ["k2"]], {}, id="remove multiple fields that exist (flat dict)"), | ||
# TODO: should we instead splice the element out of the array? I think that's the more intuitive solution | ||
# Otherwise one could just set the field's value to null. | ||
pytest.param({"k1": [1, 2]}, [["k1", 0]], {"k1": [None, 2]}, id="remove field inside array (int index)"), | ||
pytest.param({"k1": [1, 2]}, [["k1", "0"]], {"k1": [None, 2]}, id="remove field inside array (string index)"), | ||
pytest.param( | ||
{"k1": "v", "k2": "v", "k3": [0, 1], "k4": "v"}, | ||
[["k1"], ["k2"], ["k3", 0]], | ||
{"k3": [None, 1], "k4": "v"}, | ||
id="test all cases (flat)", | ||
), | ||
pytest.param({"k1": [0, 1]}, [[".", "k1", 10]], {"k1": [0, 1]}, id="remove array index that doesn't exist (flat)"), | ||
pytest.param({".": {"k1": [0, 1]}}, [[".", "k1", 10]], {".": {"k1": [0, 1]}}, id="remove array index that doesn't exist (nested)"), | ||
pytest.param({".": {"k2": "v", "k1": "v"}}, [[".", "k1"]], {".": {"k2": "v"}}, id="remove nested field that exists"), | ||
pytest.param( | ||
{".": {"k2": "v", "k1": "v"}}, [[".", "k3"]], {".": {"k2": "v", "k1": "v"}}, id="remove field that doesn't exist (nested)" | ||
), | ||
pytest.param({".": {"k2": "v", "k1": "v"}}, [[".", "k1"], [".", "k2"]], {".": {}}, id="remove multiple fields that exist (nested)"), | ||
pytest.param( | ||
{".": {"k1": [0, 1]}}, [[".", "k1", 0]], {".": {"k1": [None, 1]}}, id="remove multiple fields that exist in arrays (nested)" | ||
), | ||
pytest.param( | ||
{".": {"k1": [{"k2": "v", "k3": "v"}, {"k4": "v"}]}}, | ||
[[".", "k1", 0, "k2"], [".", "k1", 1, "k4"]], | ||
{".": {"k1": [{"k3": "v"}, {}]}}, | ||
id="remove fields that exist in arrays (deeply nested)", | ||
), | ||
], | ||
) | ||
def test_remove_fields(input_record: Mapping[str, Any], field_pointers: List[FieldPointer], expected: Mapping[str, Any]): | ||
transformation = RemoveFields(field_pointers) | ||
assert transformation.transform(input_record) == expected |