Skip to content

Commit

Permalink
fix column lineage returning multiple entries for job run multiple times
Browse files Browse the repository at this point in the history
Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com>
  • Loading branch information
pawel-big-lebowski committed Oct 10, 2022
1 parent 496566e commit 5e496ec
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 3 deletions.
9 changes: 6 additions & 3 deletions api/src/main/java/marquez/db/ColumnLineageDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,12 @@ dataset_fields_view AS (
INNER JOIN datasets_view d ON d.uuid = df.dataset_uuid
),
column_lineage_recursive AS (
SELECT *, 0 as depth
FROM column_lineage
WHERE output_dataset_field_uuid IN (<datasetFieldUuids>) AND created_at <= :createdAtUntil
(
SELECT DISTINCT ON (output_dataset_field_uuid, input_dataset_field_uuid) *, 0 as depth
FROM column_lineage
WHERE output_dataset_field_uuid IN (<datasetFieldUuids>) AND created_at <= :createdAtUntil
ORDER BY output_dataset_field_uuid, input_dataset_field_uuid, updated_at DESC, updated_at
)
UNION
SELECT
upstream_node.output_dataset_version_uuid,
Expand Down
28 changes: 28 additions & 0 deletions api/src/test/java/marquez/db/ColumnLineageDaoTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -565,4 +565,32 @@ void testGetLineagePointInTime() {
20, Collections.singletonList(field_col_b), columnLineageCreatedAt.plusSeconds(1)))
.hasSize(1);
}

@Test
void testGetLineageWhenJobRunMultipleTimes() {
Dataset dataset_A = getDatasetA();
Dataset dataset_B = getDatasetB();

LineageTestUtils.createLineageRow(
openLineageDao,
"job1",
"COMPLETE",
jobFacet,
Arrays.asList(dataset_A),
Arrays.asList(dataset_B));
UpdateLineageRow lineageRow =
LineageTestUtils.createLineageRow(
openLineageDao,
"job1",
"COMPLETE",
jobFacet,
Arrays.asList(dataset_A),
Arrays.asList(dataset_B));

UpdateLineageRow.DatasetRecord datasetRecord_b = lineageRow.getOutputs().get().get(0);
UUID field_col_b = fieldDao.findUuid(datasetRecord_b.getDatasetRow().getUuid(), "col_c").get();

assertThat(dao.getLineage(20, Collections.singletonList(field_col_b), Instant.now()))
.hasSize(1);
}
}

0 comments on commit 5e496ec

Please sign in to comment.