-
Notifications
You must be signed in to change notification settings - Fork 323
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Model and store column lineage in Marquez DB (#2096)
* Create database representation, model classes Signed-off-by: mzareba <mzareba382@gmail.com> * Implement ColumnLevelLineageDao Signed-off-by: mzareba <mzareba382@gmail.com> * Instantiate ColumnLevelLineageDao in updateBaseMarquezModel Signed-off-by: mzareba <mzareba382@gmail.com> * Upsert ColumnLevelLineageRow to db, model representation in LineageEvent Signed-off-by: mzareba <mzareba382@gmail.com> * Fix problems in OpenLineageDao, add a list of ColumnLevelLineageRow to DatasetRecord, write test for createLineageRow() invocation Signed-off-by: mzareba <mzareba382@gmail.com> * Change wildcard imports to single class imports Signed-off-by: mzareba <mzareba382@gmail.com> * Change wildcard imports to single class imports Signed-off-by: mzareba <mzareba382@gmail.com> * Change wildcard imports to single class imports Signed-off-by: mzareba <mzareba382@gmail.com> * Apply spotless Signed-off-by: mzareba <mzareba382@gmail.com> * Check for ds.getFacets not null Signed-off-by: mzareba <mzareba382@gmail.com> * Format fix Signed-off-by: mzareba <mzareba382@gmail.com> * Update testUpdateMarquezModelDatasetWithColumnLineageFacet Signed-off-by: mzareba <mzareba382@gmail.com> * Test for column_level_lineage upsert. Signed-off-by: mzareba <mzareba382@gmail.com> * Apply spotless Signed-off-by: mzareba <mzareba382@gmail.com> * switch to data field references Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com> * fix broken tests Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com> * test when dataset_field is missing Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com> * add input_dataset_version_uuid field Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com> * increase db file version Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com> * increase db file version Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com> * rename ColumnLevelLineage -> ColumnLineage Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com> Signed-off-by: mzareba <mzareba382@gmail.com> Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com> Co-authored-by: Mariusz Zaręba <mzareba382@getindata.com> Co-authored-by: Pawel Leszczynski <leszczynski.pawel@gmail.com>
- Loading branch information
1 parent
2909864
commit b6544ec
Showing
15 changed files
with
856 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
/* | ||
* Copyright 2018-2022 contributors to the Marquez project | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package marquez.db; | ||
|
||
import java.time.Instant; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.UUID; | ||
import java.util.stream.Collectors; | ||
import marquez.db.mappers.ColumnLineageRowMapper; | ||
import marquez.db.models.ColumnLineageRow; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
import org.jdbi.v3.sqlobject.config.RegisterRowMapper; | ||
import org.jdbi.v3.sqlobject.customizer.BindBeanList; | ||
import org.jdbi.v3.sqlobject.statement.SqlQuery; | ||
import org.jdbi.v3.sqlobject.statement.SqlUpdate; | ||
|
||
@RegisterRowMapper(ColumnLineageRowMapper.class) | ||
public interface ColumnLineageDao extends BaseDao { | ||
|
||
default List<ColumnLineageRow> upsertColumnLineageRow( | ||
UUID outputDatasetVersionUuid, | ||
UUID outputDatasetFieldUuid, | ||
List<Pair<UUID, UUID>> inputs, | ||
String transformationDescription, | ||
String transformationType, | ||
Instant now) { | ||
|
||
if (inputs.isEmpty()) { | ||
return Collections.emptyList(); | ||
} | ||
|
||
doUpsertColumnLineageRow( | ||
inputs.stream() | ||
.map( | ||
input -> | ||
new ColumnLineageRow( | ||
outputDatasetVersionUuid, | ||
outputDatasetFieldUuid, | ||
input.getLeft(), // input_dataset_version_uuid | ||
input.getRight(), // input_dataset_field_uuid | ||
transformationDescription, | ||
transformationType, | ||
now, | ||
now)) | ||
.collect(Collectors.toList())); | ||
return findColumnLineageByDatasetVersionColumnAndOutputDatasetField( | ||
outputDatasetVersionUuid, outputDatasetFieldUuid); | ||
} | ||
|
||
@SqlQuery( | ||
"SELECT * FROM column_lineage WHERE output_dataset_version_uuid = :datasetVersionUuid AND output_dataset_field_uuid = :outputDatasetFieldUuid") | ||
List<ColumnLineageRow> findColumnLineageByDatasetVersionColumnAndOutputDatasetField( | ||
UUID datasetVersionUuid, UUID outputDatasetFieldUuid); | ||
|
||
@SqlUpdate( | ||
""" | ||
INSERT INTO column_lineage ( | ||
output_dataset_version_uuid, | ||
output_dataset_field_uuid, | ||
input_dataset_version_uuid, | ||
input_dataset_field_uuid, | ||
transformation_description, | ||
transformation_type, | ||
created_at, | ||
updated_at | ||
) VALUES <values> | ||
ON CONFLICT (output_dataset_version_uuid, output_dataset_field_uuid, input_dataset_version_uuid, input_dataset_field_uuid) | ||
DO UPDATE SET | ||
transformation_description = EXCLUDED.transformation_description, | ||
transformation_type = EXCLUDED.transformation_type, | ||
updated_at = EXCLUDED.updated_at | ||
""") | ||
void doUpsertColumnLineageRow( | ||
@BindBeanList( | ||
propertyNames = { | ||
"outputDatasetVersionUuid", | ||
"outputDatasetFieldUuid", | ||
"inputDatasetVersionUuid", | ||
"inputDatasetFieldUuid", | ||
"transformationDescription", | ||
"transformationType", | ||
"createdAt", | ||
"updatedAt" | ||
}, | ||
value = "values") | ||
List<ColumnLineageRow> rows); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
37 changes: 37 additions & 0 deletions
37
api/src/main/java/marquez/db/mappers/ColumnLineageRowMapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* Copyright 2018-2022 contributors to the Marquez project | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package marquez.db.mappers; | ||
|
||
import static marquez.db.Columns.TRANSFORMATION_DESCRIPTION; | ||
import static marquez.db.Columns.TRANSFORMATION_TYPE; | ||
import static marquez.db.Columns.stringOrThrow; | ||
import static marquez.db.Columns.timestampOrThrow; | ||
import static marquez.db.Columns.uuidOrThrow; | ||
|
||
import java.sql.ResultSet; | ||
import java.sql.SQLException; | ||
import lombok.NonNull; | ||
import marquez.db.Columns; | ||
import marquez.db.models.ColumnLineageRow; | ||
import org.jdbi.v3.core.mapper.RowMapper; | ||
import org.jdbi.v3.core.statement.StatementContext; | ||
|
||
public class ColumnLineageRowMapper implements RowMapper<ColumnLineageRow> { | ||
|
||
@Override | ||
public ColumnLineageRow map(@NonNull ResultSet results, @NonNull StatementContext context) | ||
throws SQLException { | ||
return new ColumnLineageRow( | ||
uuidOrThrow(results, Columns.OUTPUT_DATASET_VERSION_UUID), | ||
uuidOrThrow(results, Columns.OUTPUT_DATASET_FIELD_UUID), | ||
uuidOrThrow(results, Columns.INPUT_DATASET_VERSION_UUID), | ||
uuidOrThrow(results, Columns.INPUT_DATASET_FIELD_UUID), | ||
stringOrThrow(results, TRANSFORMATION_DESCRIPTION), | ||
stringOrThrow(results, TRANSFORMATION_TYPE), | ||
timestampOrThrow(results, Columns.CREATED_AT), | ||
timestampOrThrow(results, Columns.UPDATED_AT)); | ||
} | ||
} |
Oops, something went wrong.