From 90b2eff1a4b3d94fcf17907a579e814c067c94b0 Mon Sep 17 00:00:00 2001 From: "pawel.leszczynski" Date: Tue, 13 Dec 2022 17:34:13 +0100 Subject: [PATCH 01/13] java client: point-in-time for column-level lineage (#2269) Signed-off-by: Pawel Leszczynski Signed-off-by: Pawel Leszczynski --- CHANGELOG.md | 2 + .../marquez/ColumnLineageIntegrationTest.java | 17 ++- .../marquez/service/models/NodeIdTest.java | 22 ++- .../java/marquez/client/MarquezClient.java | 47 +----- .../main/java/marquez/client/MarquezUrl.java | 25 +--- .../client/models/DatasetFieldVersionId.java | 24 +++ .../client/models/DatasetVersionId.java | 22 +++ .../java/marquez/client/models/NodeId.java | 73 +++++++++ .../marquez/client/MarquezClientTest.java | 139 +++++------------- .../java/marquez/client/MarquezUrlTest.java | 55 ++++++- .../marquez/client/models/NodeIdTest.java | 74 ++++++++++ 11 files changed, 315 insertions(+), 185 deletions(-) create mode 100644 clients/java/src/main/java/marquez/client/models/DatasetFieldVersionId.java create mode 100644 clients/java/src/main/java/marquez/client/models/DatasetVersionId.java diff --git a/CHANGELOG.md b/CHANGELOG.md index c23deeca8b..aba9f74b12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ * Column-lineage endpoints supports point-in-time requests [`#2265`](https://github.com/MarquezProject/marquez/pull/2265) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski) *Enable requesting `column-lineage` endpoint by a dataset version, job version or dataset field of a specific dataset version.* +* Column lineage point in time java client [`#2269`](https://github.com/MarquezProject/marquez/pull/2269) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski) + *Java client methods to retrieve point in time `column-lineage`. Please note that existing methods `getColumnLineageByDataset`, `getColumnLineageByDataset` and `getColumnLineageByDatasetField` were replaced by a single `getColumnLineage` taking `NodeId` as a parameter.* ### Fixed diff --git a/api/src/test/java/marquez/ColumnLineageIntegrationTest.java b/api/src/test/java/marquez/ColumnLineageIntegrationTest.java index 91d0c886fd..bbdc8e2ae5 100644 --- a/api/src/test/java/marquez/ColumnLineageIntegrationTest.java +++ b/api/src/test/java/marquez/ColumnLineageIntegrationTest.java @@ -14,7 +14,11 @@ import java.util.Optional; import marquez.api.JdbiUtils; import marquez.client.MarquezClient; +import marquez.client.models.DatasetFieldId; +import marquez.client.models.DatasetId; +import marquez.client.models.JobId; import marquez.client.models.Node; +import marquez.client.models.NodeId; import marquez.db.LineageTestUtils; import marquez.db.OpenLineageDao; import marquez.jdbi.MarquezJdbiExternalPostgresExtension; @@ -64,7 +68,8 @@ public void tearDown(Jdbi jdbi) { @Test public void testColumnLineageEndpointByDataset() { - MarquezClient.Lineage lineage = client.getColumnLineageByDataset("namespace", "dataset_b"); + MarquezClient.Lineage lineage = + client.getColumnLineage(NodeId.of(new DatasetId("namespace", "dataset_b"))); assertThat(lineage.getGraph()).hasSize(3); assertThat(getNodeByFieldName(lineage, "col_a")).isPresent(); @@ -75,7 +80,7 @@ public void testColumnLineageEndpointByDataset() { @Test public void testColumnLineageEndpointByDatasetField() { MarquezClient.Lineage lineage = - client.getColumnLineageByDataset("namespace", "dataset_b", "col_c"); + client.getColumnLineage(NodeId.of(new DatasetFieldId("namespace", "dataset_b", "col_c"))); assertThat(lineage.getGraph()).hasSize(3); assertThat(getNodeByFieldName(lineage, "col_a")).isPresent(); @@ -86,7 +91,8 @@ public void testColumnLineageEndpointByDatasetField() { @Test public void testColumnLineageEndpointWithDepthLimit() { MarquezClient.Lineage lineage = - client.getColumnLineageByDatasetField("namespace", "dataset_c", "col_d", 1, false); + client.getColumnLineage( + NodeId.of(new DatasetFieldId("namespace", "dataset_c", "col_d")), 1, false); assertThat(lineage.getGraph()).hasSize(2); assertThat(getNodeByFieldName(lineage, "col_c")).isPresent(); @@ -96,7 +102,7 @@ public void testColumnLineageEndpointWithDepthLimit() { @Test public void testColumnLineageEndpointWithDownstream() { MarquezClient.Lineage lineage = - client.getColumnLineageByDatasetField("namespace", "dataset_b", "col_c", 10, true); + client.getColumnLineage(NodeId.of(new JobId("namespace", "job1")), 10, true); assertThat(lineage.getGraph()).hasSize(4); assertThat(getNodeByFieldName(lineage, "col_d")).isPresent(); @@ -104,7 +110,8 @@ public void testColumnLineageEndpointWithDownstream() { @Test public void testColumnLineageEndpointByJob() { - MarquezClient.Lineage lineage = client.getColumnLineageByJob("namespace", "job1"); + MarquezClient.Lineage lineage = + client.getColumnLineage(NodeId.of(new JobId("namespace", "job1")), 1, false); assertThat(lineage.getGraph()).hasSize(3); assertThat(getNodeByFieldName(lineage, "col_a")).isPresent(); diff --git a/api/src/test/java/marquez/service/models/NodeIdTest.java b/api/src/test/java/marquez/service/models/NodeIdTest.java index a65b76dbd1..7162af0aad 100644 --- a/api/src/test/java/marquez/service/models/NodeIdTest.java +++ b/api/src/test/java/marquez/service/models/NodeIdTest.java @@ -10,7 +10,9 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.UUID; import marquez.common.models.DatasetFieldId; +import marquez.common.models.DatasetFieldVersionId; import marquez.common.models.DatasetId; import marquez.common.models.DatasetName; import marquez.common.models.FieldName; @@ -150,12 +152,16 @@ public void testDatasetField(String namespace, String dataset, String field) { "gs://bucket$/path/to/data$col_A#aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" }, delimiter = '$') - public void testDatasetFieldVersion(String namespace, String dataset, String field) { + public void testDatasetFieldVersion(String namespace, String dataset, String fieldWithVersion) { + String version = fieldWithVersion.split(VERSION_DELIM)[1]; + String field = fieldWithVersion.split(VERSION_DELIM)[0]; + NamespaceName namespaceName = NamespaceName.of(namespace); - FieldName fieldName = FieldName.of(field); + FieldName fieldName = FieldName.of(field.split(VERSION_DELIM)[0]); DatasetName datasetName = DatasetName.of(dataset); DatasetId dsId = new DatasetId(namespaceName, datasetName); - DatasetFieldId dsfId = new DatasetFieldId(dsId, fieldName); + DatasetFieldVersionId dsfId = + new DatasetFieldVersionId(dsId, fieldName, UUID.fromString(version)); NodeId nodeId = NodeId.of(dsfId); assertFalse(nodeId.isRunType()); assertFalse(nodeId.isJobType()); @@ -163,12 +169,12 @@ public void testDatasetFieldVersion(String namespace, String dataset, String fie assertTrue(nodeId.hasVersion()); assertTrue(nodeId.isDatasetFieldVersionType()); - assertEquals(dsfId, nodeId.asDatasetFieldId()); + assertEquals(dsfId, nodeId.asDatasetFieldVersionId()); assertEquals(nodeId, NodeId.of(nodeId.getValue())); - assertEquals(namespace, nodeId.asDatasetFieldId().getDatasetId().getNamespace().getValue()); - assertEquals(dataset, nodeId.asDatasetFieldId().getDatasetId().getName().getValue()); - assertEquals(field, nodeId.asDatasetFieldId().getFieldName().getValue()); assertEquals( - field.split(VERSION_DELIM)[1], nodeId.asDatasetFieldVersionId().getVersion().toString()); + namespace, nodeId.asDatasetFieldVersionId().getDatasetId().getNamespace().getValue()); + assertEquals(dataset, nodeId.asDatasetFieldVersionId().getDatasetId().getName().getValue()); + assertEquals(field, nodeId.asDatasetFieldVersionId().getFieldName().getValue()); + assertEquals(version, nodeId.asDatasetFieldVersionId().getVersion().toString()); } } diff --git a/clients/java/src/main/java/marquez/client/MarquezClient.java b/clients/java/src/main/java/marquez/client/MarquezClient.java index 3972f868ac..3af0ed433e 100644 --- a/clients/java/src/main/java/marquez/client/MarquezClient.java +++ b/clients/java/src/main/java/marquez/client/MarquezClient.java @@ -44,6 +44,7 @@ import marquez.client.models.Namespace; import marquez.client.models.NamespaceMeta; import marquez.client.models.Node; +import marquez.client.models.NodeId; import marquez.client.models.Run; import marquez.client.models.RunMeta; import marquez.client.models.RunState; @@ -115,50 +116,12 @@ public enum SortDirection { @Getter public final String value; } - public Lineage getColumnLineageByDataset( - @NonNull String namespaceName, @NonNull String datasetName) { - return getColumnLineageByDataset( - namespaceName, datasetName, DEFAULT_LINEAGE_GRAPH_DEPTH, false); - } - - public Lineage getColumnLineageByDataset( - @NonNull String namespaceName, @NonNull String datasetName, @NonNull String field) { - return getColumnLineageByDatasetField( - namespaceName, datasetName, field, DEFAULT_LINEAGE_GRAPH_DEPTH, false); - } - - public Lineage getColumnLineageByDataset( - @NonNull String namespaceName, - @NonNull String datasetName, - int depth, - boolean withDownstream) { - final String bodyAsJson = - http.get( - url.toColumnLineageUrlByDataset(namespaceName, datasetName, depth, withDownstream)); - return Lineage.fromJson(bodyAsJson); + public Lineage getColumnLineage(NodeId nodeId) { + return getColumnLineage(nodeId, DEFAULT_LINEAGE_GRAPH_DEPTH, false); } - public Lineage getColumnLineageByDatasetField( - @NonNull String namespaceName, - @NonNull String datasetName, - @NonNull String field, - int depth, - boolean withDownstream) { - final String bodyAsJson = - http.get( - url.toColumnLineageUrlByDatasetField( - namespaceName, datasetName, field, depth, withDownstream)); - return Lineage.fromJson(bodyAsJson); - } - - public Lineage getColumnLineageByJob(@NonNull String namespaceName, @NonNull String jobName) { - return getColumnLineageByJob(namespaceName, jobName, DEFAULT_LINEAGE_GRAPH_DEPTH, false); - } - - public Lineage getColumnLineageByJob( - @NonNull String namespaceName, @NonNull String jobName, int depth, boolean withDownstream) { - final String bodyAsJson = - http.get(url.toColumnLineageUrlByJob(namespaceName, jobName, depth, withDownstream)); + public Lineage getColumnLineage(NodeId nodeId, int depth, boolean withDownstream) { + final String bodyAsJson = http.get(url.toColumnLineageUrl(nodeId, depth, withDownstream)); return Lineage.fromJson(bodyAsJson); } diff --git a/clients/java/src/main/java/marquez/client/MarquezUrl.java b/clients/java/src/main/java/marquez/client/MarquezUrl.java index 242d85e907..29a704c753 100644 --- a/clients/java/src/main/java/marquez/client/MarquezUrl.java +++ b/clients/java/src/main/java/marquez/client/MarquezUrl.java @@ -42,9 +42,6 @@ import java.util.Map; import javax.annotation.Nullable; import lombok.NonNull; -import marquez.client.models.DatasetFieldId; -import marquez.client.models.DatasetId; -import marquez.client.models.JobId; import marquez.client.models.NodeId; import marquez.client.models.RunState; import marquez.client.models.SearchFilter; @@ -211,27 +208,9 @@ URL toSearchUrl( return from(searchPath(), queryParams.build()); } - URL toColumnLineageUrlByDatasetField( - String namespace, String dataset, String field, int depth, boolean withDownstream) { + URL toColumnLineageUrl(NodeId nodeId, int depth, boolean withDownstream) { final ImmutableMap.Builder queryParams = new ImmutableMap.Builder(); - queryParams.put("nodeId", NodeId.of(new DatasetFieldId(namespace, dataset, field)).getValue()); - queryParams.put("depth", String.valueOf(depth)); - queryParams.put("withDownstream", String.valueOf(withDownstream)); - return from(columnLineagePath(), queryParams.build()); - } - - URL toColumnLineageUrlByDataset( - String namespace, String dataset, int depth, boolean withDownstream) { - final ImmutableMap.Builder queryParams = new ImmutableMap.Builder(); - queryParams.put("nodeId", NodeId.of(new DatasetId(namespace, dataset)).getValue()); - queryParams.put("depth", String.valueOf(depth)); - queryParams.put("withDownstream", String.valueOf(withDownstream)); - return from(columnLineagePath(), queryParams.build()); - } - - URL toColumnLineageUrlByJob(String namespace, String job, int depth, boolean withDownstream) { - final ImmutableMap.Builder queryParams = new ImmutableMap.Builder(); - queryParams.put("nodeId", NodeId.of(new JobId(namespace, job)).getValue()); + queryParams.put("nodeId", nodeId.getValue()); queryParams.put("depth", String.valueOf(depth)); queryParams.put("withDownstream", String.valueOf(withDownstream)); return from(columnLineagePath(), queryParams.build()); diff --git a/clients/java/src/main/java/marquez/client/models/DatasetFieldVersionId.java b/clients/java/src/main/java/marquez/client/models/DatasetFieldVersionId.java new file mode 100644 index 0000000000..8e6a744c82 --- /dev/null +++ b/clients/java/src/main/java/marquez/client/models/DatasetFieldVersionId.java @@ -0,0 +1,24 @@ +/* + * Copyright 2018-2022 contributors to the Marquez project + * SPDX-License-Identifier: Apache-2.0 + */ + +package marquez.client.models; + +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; + +/** ID for {@code DatasetField} with a version of {@code Dataset}. */ +@EqualsAndHashCode +@AllArgsConstructor +@ToString +public class DatasetFieldVersionId { + + @Getter private final String namespace; + @Getter private final String name; + @Getter private final String field; + @Getter private final UUID version; +} diff --git a/clients/java/src/main/java/marquez/client/models/DatasetVersionId.java b/clients/java/src/main/java/marquez/client/models/DatasetVersionId.java new file mode 100644 index 0000000000..cc468efd4e --- /dev/null +++ b/clients/java/src/main/java/marquez/client/models/DatasetVersionId.java @@ -0,0 +1,22 @@ +/* + * Copyright 2018-2022 contributors to the Marquez project + * SPDX-License-Identifier: Apache-2.0 + */ + +package marquez.client.models; + +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.NonNull; +import lombok.Value; + +/** Version ID for {@code Dataset}. */ +@Value +@Builder +@AllArgsConstructor +public class DatasetVersionId { + @NonNull String namespace; + @NonNull String name; + @NonNull UUID version; +} diff --git a/clients/java/src/main/java/marquez/client/models/NodeId.java b/clients/java/src/main/java/marquez/client/models/NodeId.java index c7235ec667..c655903ff1 100644 --- a/clients/java/src/main/java/marquez/client/models/NodeId.java +++ b/clients/java/src/main/java/marquez/client/models/NodeId.java @@ -12,8 +12,10 @@ import com.fasterxml.jackson.databind.annotation.JsonSerialize; import com.fasterxml.jackson.databind.util.StdConverter; import com.google.common.base.Joiner; +import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.annotation.Nullable; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NonNull; @@ -74,6 +76,31 @@ public static NodeId of(@NonNull JobId jobId) { return of(ID_JOINER.join(ID_PREFX_JOB, jobId.getNamespace(), jobId.getName())); } + public static NodeId of(@NonNull DatasetFieldVersionId datasetFieldVersionId) { + return of( + appendVersionTo( + ID_JOINER.join( + ID_PREFX_DATASET_FIELD, + datasetFieldVersionId.getNamespace(), + datasetFieldVersionId.getName(), + datasetFieldVersionId.getField()), + datasetFieldVersionId.getVersion())); + } + + public static NodeId of(@NonNull JobVersionId jobVersionId) { + return NodeId.of( + new JobId( + jobVersionId.getNamespace(), + appendVersionTo(jobVersionId.getName(), jobVersionId.getVersion()))); + } + + public static NodeId of(@NonNull DatasetVersionId versionId) { + return NodeId.of( + new DatasetId( + versionId.getNamespace(), + appendVersionTo(versionId.getName(), versionId.getVersion()))); + } + @JsonIgnore public boolean isDatasetFieldType() { return value.startsWith(ID_PREFX_DATASET_FIELD); @@ -89,6 +116,26 @@ public boolean isJobType() { return value.startsWith(ID_PREFX_JOB); } + @JsonIgnore + public boolean isDatasetFieldVersionType() { + return value.startsWith(ID_PREFX_DATASET_FIELD) && hasVersion(); + } + + @JsonIgnore + public boolean isDatasetVersionType() { + return value.startsWith(ID_PREFX_DATASET) && hasVersion(); + } + + @JsonIgnore + public boolean isJobVersionType() { + return value.startsWith(ID_PREFX_JOB) && hasVersion(); + } + + @JsonIgnore + public boolean hasVersion() { + return value.contains(VERSION_DELIM); + } + @JsonIgnore private String[] parts(int expectedParts, String expectedType) { @@ -139,6 +186,28 @@ public JobId asJobId() { return new JobId(parts[1], parts[2]); } + @JsonIgnore + public DatasetFieldVersionId asDatasetFieldVersionId() { + String[] parts = parts(4, ID_PREFX_DATASET_FIELD); + String[] nameAndVersion = parts[3].split(VERSION_DELIM); + return new DatasetFieldVersionId( + parts[1], parts[2], nameAndVersion[0], UUID.fromString(nameAndVersion[1])); + } + + @JsonIgnore + public JobVersionId asJobVersionId() { + String[] parts = parts(3, ID_PREFX_JOB); + String[] nameAndVersion = parts[2].split(VERSION_DELIM); + return new JobVersionId(parts[1], nameAndVersion[0], UUID.fromString(nameAndVersion[1])); + } + + @JsonIgnore + public DatasetVersionId asDatasetVersionId() { + String[] parts = parts(3, ID_PREFX_DATASET); + String[] nameAndVersion = parts[2].split(VERSION_DELIM); + return new DatasetVersionId(parts[1], nameAndVersion[0], UUID.fromString(nameAndVersion[1])); + } + public static class FromValue extends StdConverter { @Override public NodeId convert(@NonNull String value) { @@ -157,4 +226,8 @@ public String convert(@NonNull NodeId id) { public int compareTo(NodeId o) { return value.compareTo(o.getValue()); } + + private static String appendVersionTo(@NonNull final String value, @Nullable final UUID version) { + return (version == null) ? value : (value + VERSION_DELIM + version); + } } diff --git a/clients/java/src/test/java/marquez/client/MarquezClientTest.java b/clients/java/src/test/java/marquez/client/MarquezClientTest.java index a9ba5c54f5..a041ce310a 100644 --- a/clients/java/src/test/java/marquez/client/MarquezClientTest.java +++ b/clients/java/src/test/java/marquez/client/MarquezClientTest.java @@ -393,6 +393,34 @@ public class MarquezClientTest { private static final DatasetFieldId DATASET_FIELD_ID = new DatasetFieldId(NAMESPACE_NAME, DB_TABLE_NAME, FIELD_NAME); + private static final DatasetFieldId DATASET_FIELD_VERSION_ID = + new DatasetFieldId(NAMESPACE_NAME, DB_TABLE_NAME, FIELD_NAME); + + private static final Node LINEAGE_NODE = + new Node( + NodeId.of(DATASET_FIELD_ID), + NodeType.DATASET_FIELD, + new ColumnLineageNodeData( + NAMESPACE_NAME, + DB_TABLE_NAME, + FIELD_NAME, + "String", + Collections.singletonList( + new ColumnLineageInputField( + "namespace", + "inDataset", + "some-col1", + "transformationDescription", + "transformationType"))), + ImmutableSet.of( + Edge.of( + NodeId.of(DATASET_FIELD_ID), + NodeId.of(new DatasetFieldId("namespace", "inDataset", "some-col1")))), + ImmutableSet.of( + Edge.of( + NodeId.of(new DatasetFieldId("namespace", "outDataset", "some-col2")), + NodeId.of(DATASET_FIELD_ID)))); + private final MarquezUrl marquezUrl = MarquezUrl.create(DEFAULT_BASE_URL); @Mock private MarquezHttp http; private MarquezClient client; @@ -955,31 +983,7 @@ public void testCreateTag() throws Exception { @Test public void testGetColumnLineage() throws Exception { - Node node = - new Node( - NodeId.of(DATASET_FIELD_ID), - NodeType.DATASET_FIELD, - new ColumnLineageNodeData( - NAMESPACE_NAME, - DB_TABLE_NAME, - FIELD_NAME, - "String", - Collections.singletonList( - new ColumnLineageInputField( - "namespace", - "inDataset", - "some-col1", - "transformationDescription", - "transformationType"))), - ImmutableSet.of( - Edge.of( - NodeId.of(DATASET_FIELD_ID), - NodeId.of(new DatasetFieldId("namespace", "inDataset", "some-col1")))), - ImmutableSet.of( - Edge.of( - NodeId.of(new DatasetFieldId("namespace", "outDataset", "some-col2")), - NodeId.of(DATASET_FIELD_ID)))); - MarquezClient.Lineage lineage = new MarquezClient.Lineage(ImmutableSet.of(node)); + MarquezClient.Lineage lineage = new MarquezClient.Lineage(ImmutableSet.of(LINEAGE_NODE)); String lineageJson = lineage.toJson(); when(http.get( buildUrlFor( @@ -987,88 +991,13 @@ public void testGetColumnLineage() throws Exception { .thenReturn(lineageJson); Node retrievedNode = - client.getColumnLineageByDataset("namespace", "dataset").getGraph().stream() + client + .getColumnLineage(NodeId.of(new DatasetId("namespace", "dataset"))) + .getGraph() + .stream() .findAny() .get(); - assertThat(retrievedNode).isEqualTo(node); - } - - @Test - public void testGetColumnLineageByField() throws Exception { - Node node = - new Node( - NodeId.of(DATASET_FIELD_ID), - NodeType.DATASET_FIELD, - new ColumnLineageNodeData( - NAMESPACE_NAME, - DB_TABLE_NAME, - FIELD_NAME, - "String", - Collections.singletonList( - new ColumnLineageInputField( - "namespace", - "inDataset", - "some-col1", - "transformationDescription", - "transformationType"))), - ImmutableSet.of( - Edge.of( - NodeId.of(DATASET_FIELD_ID), - NodeId.of(new DatasetFieldId("namespace", "inDataset", "some-col1")))), - ImmutableSet.of( - Edge.of( - NodeId.of(new DatasetFieldId("namespace", "outDataset", "some-col2")), - NodeId.of(DATASET_FIELD_ID)))); - MarquezClient.Lineage lineage = new MarquezClient.Lineage(ImmutableSet.of(node)); - String lineageJson = lineage.toJson(); - when(http.get( - buildUrlFor( - "/column-lineage?nodeId=datasetField%3Anamespace%3Adataset%3Asome-col1&depth=20&withDownstream=false"))) - .thenReturn(lineageJson); - - Node retrievedNode = - client.getColumnLineageByDataset("namespace", "dataset", "some-col1").getGraph().stream() - .findAny() - .get(); - assertThat(retrievedNode).isEqualTo(node); - } - - @Test - public void testGetColumnLineageByJob() throws Exception { - Node node = - new Node( - NodeId.of(DATASET_FIELD_ID), - NodeType.DATASET_FIELD, - new ColumnLineageNodeData( - NAMESPACE_NAME, - DB_TABLE_NAME, - FIELD_NAME, - "String", - Collections.singletonList( - new ColumnLineageInputField( - "namespace", - "inDataset", - "some-col1", - "transformationDescription", - "transformationType"))), - ImmutableSet.of( - Edge.of( - NodeId.of(DATASET_FIELD_ID), - NodeId.of(new DatasetFieldId("namespace", "inDataset", "some-col1")))), - ImmutableSet.of( - Edge.of( - NodeId.of(new DatasetFieldId("namespace", "outDataset", "some-col2")), - NodeId.of(DATASET_FIELD_ID)))); - MarquezClient.Lineage lineage = new MarquezClient.Lineage(ImmutableSet.of(node)); - String lineageJson = lineage.toJson(); - when(http.get( - buildUrlFor( - "/column-lineage?nodeId=job%3Anamespace%3Ajob&depth=20&withDownstream=false"))) - .thenReturn(lineageJson); - - Node retrievedNode = - client.getColumnLineageByJob("namespace", "job").getGraph().stream().findAny().get(); - assertThat(retrievedNode).isEqualTo(node); + assertThat(retrievedNode).isEqualTo(LINEAGE_NODE); } private URL buildUrlFor(String pathTemplate) throws Exception { diff --git a/clients/java/src/test/java/marquez/client/MarquezUrlTest.java b/clients/java/src/test/java/marquez/client/MarquezUrlTest.java index 195f756981..20308e0c15 100644 --- a/clients/java/src/test/java/marquez/client/MarquezUrlTest.java +++ b/clients/java/src/test/java/marquez/client/MarquezUrlTest.java @@ -7,6 +7,14 @@ import java.net.MalformedURLException; import java.net.URL; +import java.util.UUID; +import marquez.client.models.DatasetFieldId; +import marquez.client.models.DatasetFieldVersionId; +import marquez.client.models.DatasetId; +import marquez.client.models.DatasetVersionId; +import marquez.client.models.JobId; +import marquez.client.models.JobVersionId; +import marquez.client.models.NodeId; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -15,6 +23,7 @@ public class MarquezUrlTest { static String basePath = "http://marquez:5000"; static MarquezUrl marquezUrl; + static String version = UUID.randomUUID().toString(); @BeforeAll static void beforeAll() throws MalformedURLException { @@ -38,12 +47,54 @@ void testEncodedMarquezUrl() { void testToColumnLineageUrl() { Assertions.assertEquals( "http://marquez:5000/api/v1/column-lineage?nodeId=dataset%3Anamespace%3Adataset&depth=20&withDownstream=true", - marquezUrl.toColumnLineageUrlByDataset("namespace", "dataset", 20, true).toString()); + marquezUrl + .toColumnLineageUrl(NodeId.of(new DatasetId("namespace", "dataset")), 20, true) + .toString()); Assertions.assertEquals( "http://marquez:5000/api/v1/column-lineage?nodeId=datasetField%3Anamespace%3Adataset%3Afield&depth=20&withDownstream=true", marquezUrl - .toColumnLineageUrlByDatasetField("namespace", "dataset", "field", 20, true) + .toColumnLineageUrl( + NodeId.of(new DatasetFieldId("namespace", "dataset", "field")), 20, true) + .toString()); + + Assertions.assertEquals( + "http://marquez:5000/api/v1/column-lineage?nodeId=job%3Anamespace%3Ajob&depth=20&withDownstream=true", + marquezUrl + .toColumnLineageUrl(NodeId.of(new JobId("namespace", "job")), 20, true) + .toString()); + + Assertions.assertEquals( + "http://marquez:5000/api/v1/column-lineage?nodeId=dataset%3Anamespace%3Adataset%23" + + version + + "&depth=20&withDownstream=true", + marquezUrl + .toColumnLineageUrl( + NodeId.of(new DatasetVersionId("namespace", "dataset", UUID.fromString(version))), + 20, + true) + .toString()); + + Assertions.assertEquals( + "http://marquez:5000/api/v1/column-lineage?nodeId=datasetField%3Anamespace%3Adataset%3Afield%23" + + version + + "&depth=20&withDownstream=true", + marquezUrl + .toColumnLineageUrl( + NodeId.of( + new DatasetFieldVersionId( + "namespace", "dataset", "field", UUID.fromString(version))), + 20, + true) + .toString()); + + Assertions.assertEquals( + "http://marquez:5000/api/v1/column-lineage?nodeId=job%3Anamespace%3Ajob%23" + + version + + "&depth=20&withDownstream=true", + marquezUrl + .toColumnLineageUrl( + NodeId.of(new JobVersionId("namespace", "job", UUID.fromString(version))), 20, true) .toString()); } } diff --git a/clients/java/src/test/java/marquez/client/models/NodeIdTest.java b/clients/java/src/test/java/marquez/client/models/NodeIdTest.java index 359c5e59a5..8a1ee55581 100644 --- a/clients/java/src/test/java/marquez/client/models/NodeIdTest.java +++ b/clients/java/src/test/java/marquez/client/models/NodeIdTest.java @@ -5,10 +5,12 @@ package marquez.client.models; +import static marquez.client.models.NodeId.VERSION_DELIM; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.util.UUID; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; @@ -62,4 +64,76 @@ public void testJob(String namespace, String job) { assertEquals(namespace, nodeId.asJobId().getNamespace()); assertEquals(job, nodeId.asJobId().getName()); } + + @ParameterizedTest(name = "testJobWithVersion-{index} {argumentsWithNames}") + @CsvSource( + value = { + "my-namespace$my-job#aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", + "org://team$my-job#aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + }, + delimiter = '$') + public void testJobWithVersion(String namespace, String job) { + JobId jobId = new JobId(namespace, job); + NodeId nodeId = NodeId.of(jobId); + assertTrue(nodeId.isJobType()); + assertFalse(nodeId.isDatasetType()); + assertTrue(nodeId.hasVersion()); + assertEquals(jobId, nodeId.asJobId()); + assertEquals(nodeId, NodeId.of(nodeId.getValue())); + assertEquals(namespace, nodeId.asJobId().getNamespace()); + assertEquals(job, nodeId.asJobId().getName()); + assertEquals(job.split(VERSION_DELIM)[1], nodeId.asJobVersionId().getVersion().toString()); + } + + @ParameterizedTest(name = "testDatasetWithVersion-{index} {argumentsWithNames}") + @CsvSource( + value = { + "my-namespace$my-dataset#aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", + "gs://bucket$/path/to/data#aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", + "postgresql://hostname:5432/database$my_table#aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", + "my-namespace$my_struct#aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + }, + delimiter = '$') + public void testDatasetWithVersion(String namespace, String dataset) { + DatasetId dsId = new DatasetId(namespace, dataset); + NodeId nodeId = NodeId.of(dsId); + assertFalse(nodeId.isJobType()); + assertTrue(nodeId.isDatasetType()); + assertTrue(nodeId.isDatasetVersionType()); + assertTrue(nodeId.hasVersion()); + assertEquals(dsId, nodeId.asDatasetId()); + assertEquals(namespace, nodeId.asDatasetId().getNamespace()); + assertEquals(dataset, nodeId.asDatasetId().getName()); + assertEquals( + dataset.split(VERSION_DELIM)[1], nodeId.asDatasetVersionId().getVersion().toString()); + } + + @ParameterizedTest(name = "testDatasetFieldWithVersion-{index} {argumentsWithNames}") + @CsvSource( + value = { + "my-namespace$my-dataset$colA#aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", + "gs://bucket$/path/to/data$colA#aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", + "gs://bucket$/path/to/data$col_A#aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" + }, + delimiter = '$') + public void testDatasetFieldWithVersion( + String namespace, String dataset, String fieldWithVersion) { + String version = fieldWithVersion.split(VERSION_DELIM)[1]; + String field = fieldWithVersion.split(VERSION_DELIM)[0]; + + DatasetFieldVersionId dsfId = + new DatasetFieldVersionId(namespace, dataset, field, UUID.fromString(version)); + NodeId nodeId = NodeId.of(dsfId); + assertFalse(nodeId.isJobType()); + assertFalse(nodeId.isDatasetType()); + assertTrue(nodeId.hasVersion()); + assertTrue(nodeId.isDatasetFieldVersionType()); + + assertEquals(dsfId, nodeId.asDatasetFieldVersionId()); + assertEquals(nodeId, NodeId.of(nodeId.getValue())); + assertEquals(namespace, nodeId.asDatasetFieldVersionId().getNamespace()); + assertEquals(dataset, nodeId.asDatasetFieldVersionId().getName()); + assertEquals(field, nodeId.asDatasetFieldVersionId().getField()); + assertEquals(version, nodeId.asDatasetFieldVersionId().getVersion().toString()); + } } From f66007e964b5f08988f9905da84d24335b6594cb Mon Sep 17 00:00:00 2001 From: Willy Lulciuc Date: Tue, 13 Dec 2022 17:41:08 +0100 Subject: [PATCH 02/13] Make name and type required for datasets in spec (#2305) Signed-off-by: wslulciuc Signed-off-by: wslulciuc --- spec/openapi.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/spec/openapi.yml b/spec/openapi.yml index ab32949c6c..f85ca6cfa5 100644 --- a/spec/openapi.yml +++ b/spec/openapi.yml @@ -774,8 +774,8 @@ components: schema: type: string example: dataset:food_delivery:public.delivery_7_days - description: The ID of the node. A node can either be a dataset node, a dataset field node or a job node. - The format of nodeId for dataset is `dataset::`, for dataset field + description: The ID of the node. A node can either be a dataset node, a dataset field node or a job node. + The format of nodeId for dataset is `dataset::`, for dataset field is `datasetField:::`, and for job is `job::`. required: true @@ -1165,6 +1165,9 @@ components: description: description: The description of the field. type: string + required: + - name + - type tags: description: List of tags. type: array From 78725b147067313068a0c84ab56c620428d03b20 Mon Sep 17 00:00:00 2001 From: Willy Lulciuc Date: Tue, 13 Dec 2022 16:35:57 -0800 Subject: [PATCH 03/13] Remove unused filter on RunDao.updateStartState() (#2319) Signed-off-by: wslulciuc Signed-off-by: wslulciuc --- api/src/main/java/marquez/db/RunDao.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/main/java/marquez/db/RunDao.java b/api/src/main/java/marquez/db/RunDao.java index c916e4279a..0611ecf0c3 100644 --- a/api/src/main/java/marquez/db/RunDao.java +++ b/api/src/main/java/marquez/db/RunDao.java @@ -62,7 +62,7 @@ public interface RunDao extends BaseDao { + "SET updated_at = :transitionedAt, " + " start_run_state_uuid = :startRunStateUuid," + " started_at = :transitionedAt " - + "WHERE uuid = :rowUuid AND (updated_at < :transitionedAt or start_run_state_uuid is null)") + + "WHERE uuid = :rowUuid") void updateStartState(UUID rowUuid, Instant transitionedAt, UUID startRunStateUuid); @SqlUpdate( From 26f12c4ef4daf8f24f0ce3dad0f12a1b3cc1379a Mon Sep 17 00:00:00 2001 From: Michael Robinson <68482867+merobi-hub@users.noreply.github.com> Date: Wed, 14 Dec 2022 16:12:31 -0500 Subject: [PATCH 04/13] remove eslint errors on accented characters by editing .eslintrc (#2325) Signed-off-by: Michael Robinson Signed-off-by: Michael Robinson --- web/.eslintrc.js | 13 +++++++++++-- web/src/i18n/config.ts | 12 ++++++------ web/src/types/i18next.d.ts | 3 +-- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/web/.eslintrc.js b/web/.eslintrc.js index 5969a0a9ba..25f6418ccd 100644 --- a/web/.eslintrc.js +++ b/web/.eslintrc.js @@ -57,6 +57,15 @@ module.exports = { "ignoreCase": false, "ignoreMemberSort": false, "memberSyntaxSortOrder": ["none", "all", "multiple", "single"] - }] - } + }] + }, + 'overrides': [ + { + 'files': './src/i18n/config.ts', + 'rules': { + '@typescript-eslint/quotes': 'off' + } + } + ] + } diff --git a/web/src/i18n/config.ts b/web/src/i18n/config.ts index baf948d5f5..13427dc162 100644 --- a/web/src/i18n/config.ts +++ b/web/src/i18n/config.ts @@ -8,7 +8,7 @@ const DETECTION_OPTIONS = { order: ['localStorage'], lookupLocalStorage: 'lng', caches: ['localStorage'] -}; +} i18next .use(LanguageDetector) @@ -123,16 +123,16 @@ i18next }, jobs: { latest_tab: 'DERNIÈRE COURSE', - history_tab: 'HISTORIQUE D\'EXECUTION', + history_tab: "HISTORIQUE D'EXECUTION", location: 'EMPLACEMENT', empty_title: 'Pas de Course les Informations Disponibles', - empty_body: 'Essayez d\'ajouter quelques exécutions pour ce travail.', + empty_body: "Essayez d'ajouter quelques exécutions pour ce travail.", runinfo_subhead: 'FACETTES', runs_subhead: 'FACETTES' }, search: { search: 'Recherche', - jobs: 'd\'Emplois', + jobs: "d'Emplois", and: 'et', datasets: 'Jeux de Données' }, @@ -151,7 +151,7 @@ i18next }, dataset_info: { empty_title: 'Aucun jeu de données trouvé', - empty_body: 'Essayez d\'ajouter des champs de jeu de données.', + empty_body: "Essayez d'ajouter des champs de jeu de données.", facets_subhead: 'FACETTES', run_subhead: 'Créé par Run', duration: 'Durée' @@ -178,7 +178,7 @@ i18next name_col: 'NOM', namespace_col: 'ESPACE DE NOMS', updated_col: 'MISE À JOUR À', - latest_run_col: 'DERNIÈRE DURÉE D\'EXÉCUTION' + latest_run_col: "DERNIÈRE DURÉE D'EXÉCUTION" }, runs_columns: { id: 'ID', diff --git a/web/src/types/i18next.d.ts b/web/src/types/i18next.d.ts index 7198cf83e4..baa8eefe9c 100644 --- a/web/src/types/i18next.d.ts +++ b/web/src/types/i18next.d.ts @@ -1,5 +1,4 @@ -import i18next from '../i18n/config' -import { resources, defaultNS } from '../i18n/config' +import { defaultNS, resources } from '../i18n/config' declare module 'i18next' { interface CustomTypeOptions { From bc27201827d2c97fcb27d2eb0622e4d2590d954b Mon Sep 17 00:00:00 2001 From: Michael Robinson <68482867+merobi-hub@users.noreply.github.com> Date: Wed, 14 Dec 2022 17:40:29 -0500 Subject: [PATCH 05/13] redo event viewer icon fix (#2321) Signed-off-by: Michael Robinson Signed-off-by: Michael Robinson Co-authored-by: Willy Lulciuc --- web/src/components/sidenav/Sidenav.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/web/src/components/sidenav/Sidenav.tsx b/web/src/components/sidenav/Sidenav.tsx index 1463c854e3..b100211038 100644 --- a/web/src/components/sidenav/Sidenav.tsx +++ b/web/src/components/sidenav/Sidenav.tsx @@ -90,7 +90,10 @@ class Sidenav extends React.Component { title={i18next.t('sidenav.events')} active={this.props.location.pathname === '/events'} > - + From ef68ee5727510deee1f29e4d87ebf77c31903390 Mon Sep 17 00:00:00 2001 From: Peter Hicks Date: Thu, 15 Dec 2022 09:24:36 -0800 Subject: [PATCH 06/13] Web/update lint (#2322) * Updates for eslint-fix run. * Adding lint to ci check Co-authored-by: phix Co-authored-by: Willy Lulciuc --- .circleci/config.yml | 1 + .../components/core/date-picker/MqDatePicker.tsx | 10 +++++----- web/src/components/jobs/Runs.tsx | 1 - web/src/components/search/SearchPlaceholder.tsx | 4 +--- web/src/helpers/index.ts | 8 ++++---- web/src/helpers/time.ts | 6 +++--- web/src/store/actionCreators/index.ts | 13 +++++++++++-- web/src/store/reducers/events.ts | 6 +----- web/src/store/reducers/index.ts | 2 +- web/src/store/requests/events.ts | 4 ++-- web/src/store/sagas/index.ts | 13 ++++++++++--- web/src/types/api.ts | 8 ++++---- web/src/types/index.ts | 2 +- 13 files changed, 44 insertions(+), 34 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8fcb3e11d8..8b5bf0727f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -83,6 +83,7 @@ jobs: - v1-web-{{ .Branch }} - run: npm install - run: npm run test + - run: npm run eslint-fix - run: npm run build - save_cache: paths: diff --git a/web/src/components/core/date-picker/MqDatePicker.tsx b/web/src/components/core/date-picker/MqDatePicker.tsx index c314026721..14f9eb3737 100644 --- a/web/src/components/core/date-picker/MqDatePicker.tsx +++ b/web/src/components/core/date-picker/MqDatePicker.tsx @@ -1,11 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 -import React from 'react' +import { DateTimePicker } from '@material-ui/pickers' import { Theme } from '@material-ui/core' +import { alpha } from '@material-ui/core/styles' +import React from 'react' import createStyles from '@material-ui/core/styles/createStyles' import withStyles, { WithStyles } from '@material-ui/core/styles/withStyles' -import { DateTimePicker } from '@material-ui/pickers' -import { alpha } from '@material-ui/core/styles' const styles = (theme: Theme) => createStyles({ @@ -57,7 +57,7 @@ type DatePickerProps = WithStyles & OwnProps class MqDatePicker extends React.Component { render() { - const { classes, value, onChange, label = '', format = "MMM DD yyyy hh:mm a" } = this.props + const { classes, value, onChange, label = '', format = 'MMM DD yyyy hh:mm a' } = this.props return ( { } } -export default withStyles(styles)(MqDatePicker) \ No newline at end of file +export default withStyles(styles)(MqDatePicker) diff --git a/web/src/components/jobs/Runs.tsx b/web/src/components/jobs/Runs.tsx index d36ca210c2..3fd6ff22f7 100644 --- a/web/src/components/jobs/Runs.tsx +++ b/web/src/components/jobs/Runs.tsx @@ -27,7 +27,6 @@ import RunInfo from './RunInfo' import RunStatus from './RunStatus' import transitions from '@material-ui/core/styles/transitions' - const styles = (theme: Theme) => { return createStyles({ status: { diff --git a/web/src/components/search/SearchPlaceholder.tsx b/web/src/components/search/SearchPlaceholder.tsx index c8f0602f78..c0649ff7b0 100644 --- a/web/src/components/search/SearchPlaceholder.tsx +++ b/web/src/components/search/SearchPlaceholder.tsx @@ -21,9 +21,7 @@ const styles = (theme: Theme) => const importI18next = () => { const i18next = require('i18next') - return ( - i18next - ) + return i18next } const SearchPlaceholder: React.FC> = ({ classes }) => { diff --git a/web/src/helpers/index.ts b/web/src/helpers/index.ts index f311cf8435..307a166f2b 100644 --- a/web/src/helpers/index.ts +++ b/web/src/helpers/index.ts @@ -22,9 +22,9 @@ export const formatUpdatedAt = (updatedAt: string) => { } export const fileSize = (data: string) => { - const size = encodeURI(data).split(/%..|./).length - 1; + const size = encodeURI(data).split(/%..|./).length - 1 return { - kiloBytes: size / 1024, - megaBytes: (size / 1024) / 1024 + kiloBytes: size / 1024, + megaBytes: size / 1024 / 1024 } -} \ No newline at end of file +} diff --git a/web/src/helpers/time.ts b/web/src/helpers/time.ts index 637542695a..fceeac4bf0 100644 --- a/web/src/helpers/time.ts +++ b/web/src/helpers/time.ts @@ -28,9 +28,9 @@ export function stopWatchDuration(durationMs: number) { } export function formatDatePicker(val: string) { - return moment(val).format("YYYY-MM-DDTHH:mm:ss") + return moment(val).format('YYYY-MM-DDTHH:mm:ss') } export function formatDateAPIQuery(val: string) { - return moment(val).format("YYYY-MM-DDTHH:mm:ss[.000Z]") -} \ No newline at end of file + return moment(val).format('YYYY-MM-DDTHH:mm:ss[.000Z]') +} diff --git a/web/src/store/actionCreators/index.ts b/web/src/store/actionCreators/index.ts index f888f19c42..c993054de5 100644 --- a/web/src/store/actionCreators/index.ts +++ b/web/src/store/actionCreators/index.ts @@ -2,7 +2,16 @@ import * as actionTypes from './actionTypes' -import { Event, Dataset, DatasetVersion, Job, LineageGraph, Namespace, Run, Search } from '../../types/api' +import { + Dataset, + DatasetVersion, + Event, + Job, + LineageGraph, + Namespace, + Run, + Search +} from '../../types/api' import { JobOrDataset } from '../../components/lineage/types' export const fetchEvents = (after: string, before: string, limit: number) => ({ @@ -14,7 +23,7 @@ export const fetchEvents = (after: string, before: string, limit: number) => ({ } }) -export const fetchEventsSuccess = (events: Event[]) => ({ +export const fetchEventsSuccess = (events: Event[]) => ({ type: actionTypes.FETCH_EVENTS_SUCCESS, payload: { events diff --git a/web/src/store/reducers/events.ts b/web/src/store/reducers/events.ts index 13425856e2..7fd47c998c 100644 --- a/web/src/store/reducers/events.ts +++ b/web/src/store/reducers/events.ts @@ -1,11 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 import { Event } from '../../types/api' -import { - FETCH_EVENTS, - FETCH_EVENTS_SUCCESS, - RESET_EVENTS -} from '../actionCreators/actionTypes' +import { FETCH_EVENTS, FETCH_EVENTS_SUCCESS, RESET_EVENTS } from '../actionCreators/actionTypes' import { fetchEventsSuccess } from '../actionCreators' export type IEventsState = { isLoading: boolean; result: Event[]; init: boolean } diff --git a/web/src/store/reducers/index.ts b/web/src/store/reducers/index.ts index 32220728c8..b8be4e015b 100644 --- a/web/src/store/reducers/index.ts +++ b/web/src/store/reducers/index.ts @@ -5,8 +5,8 @@ import { Reducer, combineReducers } from 'redux' import { connectRouter } from 'connected-react-router' import datasetVersions, { IDatasetVersionsState } from './datasetVersions' import datasets, { IDatasetsState } from './datasets' -import events, { IEventsState } from './events' import display, { IDisplayState } from './display' +import events, { IEventsState } from './events' import jobs, { IJobsState } from './jobs' import lineage, { ILineageState } from './lineage' import namespaces, { INamespacesState } from './namespaces' diff --git a/web/src/store/requests/events.ts b/web/src/store/requests/events.ts index 4315aef6d7..dcaca7a9b4 100644 --- a/web/src/store/requests/events.ts +++ b/web/src/store/requests/events.ts @@ -7,6 +7,6 @@ import { genericFetchWrapper } from './index' export const getEvents = async (after = '', before = '', limit = 100, sortDirection = 'desc') => { const url = `${API_URL}/events/lineage?limit=${limit}&before=${before}&after=${after}&sortDirection=${sortDirection}` return genericFetchWrapper(url, { method: 'GET' }, 'fetchEvents').then((r: Events) => { - return r.events.map((d) => ({ ...d })) + return r.events.map(d => ({ ...d })) }) -} \ No newline at end of file +} diff --git a/web/src/store/sagas/index.ts b/web/src/store/sagas/index.ts index 0ed2578c4a..20bc3ced57 100644 --- a/web/src/store/sagas/index.ts +++ b/web/src/store/sagas/index.ts @@ -4,11 +4,11 @@ import * as Effects from 'redux-saga/effects' import { FETCH_DATASETS, FETCH_DATASET_VERSIONS, + FETCH_EVENTS, FETCH_JOBS, FETCH_LINEAGE, FETCH_RUNS, - FETCH_SEARCH, - FETCH_EVENTS + FETCH_SEARCH } from '../actionCreators/actionTypes' import { Namespaces } from '../../types/api' import { all, put, take } from 'redux-saga/effects' @@ -26,7 +26,14 @@ import { fetchRunsSuccess, fetchSearchSuccess } from '../actionCreators' -import { getDatasetVersions, getDatasets, getEvents, getJobs, getNamespaces, getRuns } from '../requests' +import { + getDatasetVersions, + getDatasets, + getEvents, + getJobs, + getNamespaces, + getRuns +} from '../requests' import { getLineage } from '../requests/lineage' import { getSearch } from '../requests/search' diff --git a/web/src/types/api.ts b/web/src/types/api.ts index 1c95b14e2f..7ef3267414 100644 --- a/web/src/types/api.ts +++ b/web/src/types/api.ts @@ -42,13 +42,13 @@ export interface Event { facets: object } inputs: { - name: string, - namespace: string, + name: string + namespace: string facets: object }[] outputs: { - name: string, - namespace: string, + name: string + namespace: string facets: object }[] } diff --git a/web/src/types/index.ts b/web/src/types/index.ts index 31ddea72b6..2e42864914 100644 --- a/web/src/types/index.ts +++ b/web/src/types/index.ts @@ -10,4 +10,4 @@ export interface IJob extends Job { latestRuns?: Run[] } -export type IFilterByDisplay = 'namespace' | 'sourceName' \ No newline at end of file +export type IFilterByDisplay = 'namespace' | 'sourceName' From a2a63ae17f82e0694b3cecdb60472956c83df597 Mon Sep 17 00:00:00 2001 From: Peter Hicks Date: Thu, 15 Dec 2022 09:53:28 -0800 Subject: [PATCH 07/13] Fix asset loading for web (#2323) * Updates for assets. * Newline. Co-authored-by: phix Co-authored-by: Willy Lulciuc --- web/src/components/header/Header.tsx | 9 ++------- web/src/{img => components/header}/marquez_logo.svg | 0 web/styles.d.ts | 2 ++ web/tsconfig.json | 5 ++++- web/webpack.common.js | 11 +++-------- 5 files changed, 11 insertions(+), 16 deletions(-) rename web/src/{img => components/header}/marquez_logo.svg (100%) diff --git a/web/src/components/header/Header.tsx b/web/src/components/header/Header.tsx index 79037fe15e..d65ab42bd7 100644 --- a/web/src/components/header/Header.tsx +++ b/web/src/components/header/Header.tsx @@ -10,6 +10,7 @@ import MqText from '../core/text/MqText' import NamespaceSelect from '../namespace-select/NamespaceSelect' import React, { ReactElement } from 'react' import Search from '../search/Search' +import marquez_logo from './marquez_logo.svg' const styles = (theme: Theme) => { return createStyles({ @@ -47,13 +48,7 @@ const Header = (props: HeaderProps): ReactElement => { - Marquez Logo + Marquez Logo diff --git a/web/src/img/marquez_logo.svg b/web/src/components/header/marquez_logo.svg similarity index 100% rename from web/src/img/marquez_logo.svg rename to web/src/components/header/marquez_logo.svg diff --git a/web/styles.d.ts b/web/styles.d.ts index de8f2c7dd7..cefbbd72f3 100644 --- a/web/styles.d.ts +++ b/web/styles.d.ts @@ -2,3 +2,5 @@ declare module '*.css' { const content: any; export = content; } + +declare module '*.svg'; diff --git a/web/tsconfig.json b/web/tsconfig.json index f420bd0957..e6ee8872e5 100644 --- a/web/tsconfig.json +++ b/web/tsconfig.json @@ -14,6 +14,9 @@ "downlevelIteration": true, "resolveJsonModule": false }, - "include": ["./src/**/*"], + "include": [ + "./src/**/*", + "styles.d.ts" + ], "exclude": ["./src/__tests__/*"] } diff --git a/web/webpack.common.js b/web/webpack.common.js index c95616ef90..73757e3d2a 100644 --- a/web/webpack.common.js +++ b/web/webpack.common.js @@ -29,13 +29,7 @@ module.exports = { }, { test: /\.(png|jpe?g|gif|svg)(\?v=\d+\.\d+\.\d+)?$/, - use: [{ - loader: 'file-loader', - options: { - name: '[name].[ext]', - outputPath: 'img/' - } - }] + loader: 'file-loader' }, { test: /\.(woff(2)?|ttf|eot|otf)(\?v=\d+\.\d+\.\d+)?$/, @@ -60,6 +54,7 @@ module.exports = { }, output: { filename: 'bundle.js', - path: path.resolve(__dirname, 'dist') + path: path.resolve(__dirname, 'dist'), + publicPath: '/' } }; From bc98a0bc22468e285e575eebf5c040e8e1a183e8 Mon Sep 17 00:00:00 2001 From: Peter Hicks Date: Fri, 16 Dec 2022 10:04:13 -0800 Subject: [PATCH 08/13] Updates for some styling synchronization. (#2324) * Updates for some styling synchronization. * Adding another page navigator for the bottom of the page. Co-authored-by: phix Co-authored-by: Willy Lulciuc --- .../core/date-picker/MqDatePicker.tsx | 3 +- .../core/input-base/MqInputBase.tsx | 16 ++++ web/src/routes/events/Events.tsx | 80 +++++++++++++------ 3 files changed, 75 insertions(+), 24 deletions(-) diff --git a/web/src/components/core/date-picker/MqDatePicker.tsx b/web/src/components/core/date-picker/MqDatePicker.tsx index 14f9eb3737..1fce20613e 100644 --- a/web/src/components/core/date-picker/MqDatePicker.tsx +++ b/web/src/components/core/date-picker/MqDatePicker.tsx @@ -14,8 +14,9 @@ const styles = (theme: Theme) => cursor: 'pointer', backgroundColor: 'transparent', border: `2px solid ${theme.palette.common.white}`, - padding: `${theme.spacing(1)}px ${theme.spacing(1)}px`, + padding: `${theme.spacing(1)}px ${theme.spacing(3)}px`, transition: theme.transitions.create(['border-color', 'box-shadow']), + borderRadius: theme.spacing(4), '& *': { cursor: 'pointer' }, diff --git a/web/src/components/core/input-base/MqInputBase.tsx b/web/src/components/core/input-base/MqInputBase.tsx index fe10c8701c..da7bfc9968 100644 --- a/web/src/components/core/input-base/MqInputBase.tsx +++ b/web/src/components/core/input-base/MqInputBase.tsx @@ -18,6 +18,14 @@ export const MqInputBase = withStyles((theme: Theme) => borderColor: theme.palette.primary.main, boxShadow: `${alpha(theme.palette.primary.main, 0.25)} 0 0 0 3px`, borderRadius: theme.spacing(4) + }, + '&:hover': { + borderColor: theme.palette.primary.main, + boxShadow: `${alpha(theme.palette.primary.main, 0.25)} 0 0 0 3px`, + '& > label': { + color: theme.palette.primary.main, + transition: theme.transitions.create(['color']) + } } } }) @@ -39,6 +47,14 @@ export const MqInputNoIcon = withStyles((theme: Theme) => borderColor: theme.palette.primary.main, boxShadow: `${alpha(theme.palette.primary.main, 0.25)} 0 0 0 3px`, borderRadius: theme.spacing(4) + }, + '&:hover': { + borderColor: theme.palette.primary.main, + boxShadow: `${alpha(theme.palette.primary.main, 0.25)} 0 0 0 3px`, + '& > label': { + color: theme.palette.primary.main, + transition: theme.transitions.create(['color']) + } } } }) diff --git a/web/src/routes/events/Events.tsx b/web/src/routes/events/Events.tsx index 738f4b8ff8..e8d38204c3 100644 --- a/web/src/routes/events/Events.tsx +++ b/web/src/routes/events/Events.tsx @@ -9,8 +9,10 @@ import { TableCell, TableHead, TableRow, - Theme + Theme, + Tooltip } from '@material-ui/core' +import { ChevronLeftRounded, ChevronRightRounded } from '@material-ui/icons' import { Event } from '../../types/api' import { IState } from '../../store/reducers' import { MqScreenLoad } from '../../components/core/screen-load/MqScreenLoad' @@ -22,6 +24,7 @@ import { formatDateAPIQuery, formatDatePicker } from '../../helpers/time' import { saveAs } from 'file-saver' import { theme } from '../../helpers/theme' import Box from '@material-ui/core/Box' +import IconButton from '@material-ui/core/IconButton' import MqDatePicker from '../../components/core/date-picker/MqDatePicker' import MqEmpty from '../../components/core/empty/MqEmpty' import MqJson from '../../components/core/code/MqJson' @@ -49,13 +52,16 @@ const styles = (theme: Theme) => { borderRadius: '50%' }, table: { - marginBottom: '100px' + marginBottom: theme.spacing(2) }, row: { cursor: 'pointer', '&:hover': { backgroundColor: theme.palette.action.hover } + }, + ml2: { + marginLeft: theme.spacing(2) } }) } @@ -129,7 +135,7 @@ class Events extends React.Component { if (eventsProps !== eventsState) { this.setState({ events: eventsProps, - pageIsLast: eventsProps.length < page * this.pageSize ? true : false + pageIsLast: eventsProps.length < page * this.pageSize }) } } @@ -140,7 +146,7 @@ class Events extends React.Component { getEvents() { const { events, page } = this.state - return events.slice(0 + (page - 1) * this.pageSize, this.pageSize + (page - 1) * this.pageSize) + return events.slice((page - 1) * this.pageSize, this.pageSize + (page - 1) * this.pageSize) } pageNavigation() { @@ -192,9 +198,32 @@ class Events extends React.Component { <> - - {i18next.t('events_route.title')} - Page: {this.pageNavigation()} + + + {i18next.t('events_route.title')} + Page: {this.pageNavigation()} + + + + this.handleClickPage('prev')} + > + + + + + this.handleClickPage('next')} + > + + + + { value={formatDatePicker(dateTo)} onChange={(e: any) => this.handleChangeDatepicker(e, 'to')} /> - - {events.length === 0 ? ( @@ -324,6 +337,27 @@ class Events extends React.Component { )} + + + this.handleClickPage('prev')} + > + + + + + this.handleClickPage('next')} + > + + + + From 30bc79efb73468bcb92466a89ad43855758894ad Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 19 Dec 2022 07:38:38 +0100 Subject: [PATCH 09/13] fix(deps): update dependency pmd to v6.52.0 (#2311) Signed-off-by: Renovate Bot Signed-off-by: Renovate Bot Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index baebfc39f7..8b1c284627 100644 --- a/build.gradle +++ b/build.gradle @@ -106,7 +106,7 @@ subprojects { pmd { consoleOutput = true - toolVersion = "6.46.0" + toolVersion = "6.52.0" rulesMinimumPriority = 5 ruleSetFiles = rootProject.files("pmd-marquez.xml") ruleSets = [] From b3f1ae3c8d24059fcdd1265a9f8f88dd499c541f Mon Sep 17 00:00:00 2001 From: Michael Robinson <68482867+merobi-hub@users.noreply.github.com> Date: Mon, 19 Dec 2022 14:03:56 -0500 Subject: [PATCH 10/13] update changelog for 0.29.0 release (#2327) Signed-off-by: Michael Robinson Signed-off-by: Michael Robinson --- CHANGELOG.md | 51 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aba9f74b12..0f18616f5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,21 +1,54 @@ # Changelog -## [Unreleased](https://github.com/MarquezProject/marquez/compare/0.28.0...HEAD) +## [Unreleased](https://github.com/MarquezProject/marquez/compare/0.29.0...HEAD) + +## [0.29.0](https://github.com/MarquezProject/marquez/compare/0.28.0...0.29.0) - 2022-12-19 ### Added -* Column-lineage endpoints supports point-in-time requests [`#2265`](https://github.com/MarquezProject/marquez/pull/2265) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski) - *Enable requesting `column-lineage` endpoint by a dataset version, job version or dataset field of a specific dataset version.* -* Column lineage point in time java client [`#2269`](https://github.com/MarquezProject/marquez/pull/2269) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski) - *Java client methods to retrieve point in time `column-lineage`. Please note that existing methods `getColumnLineageByDataset`, `getColumnLineageByDataset` and `getColumnLineageByDatasetField` were replaced by a single `getColumnLineage` taking `NodeId` as a parameter.* +* Add point-in-time requests support to column-lineage endpoints [`#2265`](https://github.com/MarquezProject/marquez/pull/2265) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski) + *Enables requesting `column-lineage` endpoint by a dataset version, job version or dataset field of a specific dataset version.* +* Add column lineage point-in-time Java client methods [`#2269`](https://github.com/MarquezProject/marquez/pull/2269) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski) + *Java client methods to retrieve point-in-time `column-lineage`. Please note that the existing methods `getColumnLineageByDataset`, `getColumnLineageByDataset` and `getColumnLineageByDatasetField` are replaced by a single `getColumnLineage` method taking `NodeId` as a parameter.* +* Add raw event viewer to UI [`#2249`](https://github.com/MarquezProject/marquez/pull/2249) [@tito12](https://github.com/tito12) + *A new events page enables filtering events by date and expanding the payload by clicking on each event.* +* Update events page with styling synchronization [`#2324`](https://github.com/MarquezProject/marquez/pull/2324) [@phixMe](https://github.com/phixMe) + *Makes some updates to the new page to make it conform better to the overall design system.* +* Update helm Ingress template to be cross-compatible with recent k8s versions [`#2275`](https://github.com/MarquezProject/marquez/pull/2275) [@jlukenoff](https://github.com/jlukenoff) + *Certain components of the Ingress schema have changed in recent versions of Kubernetes. This change updates the Ingress helm template to render based on the semantic Kubernetes version.* +* Add delete namespace endpoint doc to OpenAPI docs [`#2295`](https://github.com/MarquezProject/marquez/pull/2295) [@mobuchowski](https://github.com/mobuchowski) + *Adds a doc about the delete namespace endpoint.* +* Add i18next and language switcher for i18n of UI [`#2254`](https://github.com/MarquezProject/marquez/pull/2254) [@merobi-hub](https://github.com/merobi-hub) [@phixMe](https://github.com/phixMe) + *Adds i18next framework, language switcher, and translations for i18n of UI.* +* Add indexed `created_at` column to lineage events table [`#2299`](https://github.com/MarquezProject/marquez/pull/2299) [@prachim-collab](https://github.com/prachim-collab) + *A new timestamp column in the database supports analytics use cases by allowing for identification of incrementally created events (backwards-compatible).* ### Fixed -* Allow null column type in column-lineage [`#2272`](https://github.com/MarquezProject/marquez/pull/2272) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski) +* Allow null column type in column lineage [`#2272`](https://github.com/MarquezProject/marquez/pull/2272) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski) + *The column-lineage endpoint was throwing an exception when no data type of the field was provided. Includes a test.* * Include error message for JSON processing exception [`#2271`](https://github.com/MarquezProject/marquez/pull/2271) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski) - *In case of JSON processing exceptions Marquez API should return exception message to a client.* + *In case of JSON processing exceptions, the Marquez API now returns an exception message to a client.* * Fix column lineage when multiple jobs write to same dataset [`#2289`](https://github.com/MarquezProject/marquez/pull/2289) [@pawel-big-lebowski](https://github.com/pawel-big-lebowski) - *The fix deprecates the way fields `transformationDescription` and `transformationType` are returned. The depracated way of returning those fields will be removed in 0.30.0.* + *The fix deprecates the way the fields `transformationDescription` and `transformationType` are returned. The deprecated way of returning those fields will be removed in 0.30.0.* +* Use raw link for `iconSearchArrow.svg` [`#2280`](https://github.com/MarquezProject/marquez/pull/2280) [@wslulciuc](https://github.com/wslulciuc) + *Using a direct link to the events viewer icon fixes a loading issue.* +* Fill run state of parent run when created by child run [`#2296`](https://github.com/MarquezProject/marquez/pull/2296) [@fm100](https://github.com/fm100) + *Adds a run state to the parent at creation time to address a missing run state issue in Airflow integration.* +* Update migration query to make it work with existing view [`#2308`](https://github.com/MarquezProject/marquez/pull/2308) [@fm100](https://github.com/fm100) + *Changes the V52 migration query to drop the view before `ALTER`. Because repeatable migration runs only when its checksum changes, it was necessary to get the view definition first then drop and recreate it.* +* Fix lineage for orphaned datasets [`#2314`](https://github.com/MarquezProject/marquez/pull/2314) [@collado-mike](https://github.com/collado-mike) + *Fixes lineage for datasets generated by jobs whose current versions no longer write to the databases in question.* +* Ensure job data in lineage query is not null or empty [`#2253`](https://github.com/MarquezProject/marquez/pull/2253) [@wslulciuc](https://github.com/wslulciuc) + *Changes the API to return an empty graph in the edge case of a job UUID that has no lineage when calling `LineageDao.getLineage()` yet is associated with a dataset. This case formerly resulted in an empty set and backend exception. Also includes logging and an API check for a `nodeID`.* +* Make `name` and `type` required for datasets [`#2305`](https://github.com/MarquezProject/marquez/pull/2305) [@wslulciuc](https://github.com/wslulciuc) + *When generating Typescript from the OpenAPI spec, `name` and `type` were not required but should have been.* +* Remove unused filter on `RunDao.updateStartState()` [`#2319`](https://github.com/MarquezProject/marquez/pull/2319) [@wslulciuc](https://github.com/wslulciuc) + *Removes the conditions `updated_at < transitionedAt` and `start_run_state_uuid != null` to allow for updating the run state.* +* Update linter [`#2322`](https://github.com/MarquezProject/marquez/pull/2322) [@phixMe](https://github.com/phixMe) + *Adds `npm run eslint-fix` to the CI config to fail if it does not return with a RC 0.* +* Fix asset loading for web [`#2323`](https://github.com/MarquezProject/marquez/pull/2323) [@phixMe](https://github.com/phixMe) + *Fixes the webpack config and allows files to be imported in a modern capacity that enforces the assets exist.* ## [0.28.0](https://github.com/MarquezProject/marquez/compare/0.27.0...0.28.0) - 2022-11-21 @@ -23,7 +56,7 @@ * Optimize current runs query for lineage API [`#2211`](https://github.com/MarquezProject/marquez/pull/2211) [@prachim-collab](https://github.com/prachim-collab) *Add a simpler, alternate `getCurrentRuns` query that gets only simple runs from the database without the additional data from tables such as `run_args`, `job_context`, `facets`, etc., which required extra table joins.* -* Add Code Quality, DCO and Governance docs to project [`#2237`](https://github.com/MarquezProject/marquez/pull/2237) [`#2241`](https://github.com/MarquezProject/marquez/pull/2241) [@merobi-hub](https://github.com/MarquezProject/marquez/commits?author=merobi-hub) +* Add Code Quality, DCO and Governance docs to project [`#2237`](https://github.com/MarquezProject/marquez/pull/2237) [`#2241`](https://github.com/MarquezProject/marquez/pull/2241) [@merobi-hub](https://github.com/merobi-hub) *Adds a number of standard governance and procedure docs to the project.* * Add possibility to soft-delete namespaces [`#2244`](https://github.com/MarquezProject/marquez/pull/2244) [@mobuchowski](https://github.com/mobuchowski) *Adds the ability to "hide" inactive namespaces. The namespaces are undeleted when a relevant OL event is received.* From 7aa6ed003b61a89239fcc1a9637b6fabda67e1ab Mon Sep 17 00:00:00 2001 From: Michael Robinson Date: Mon, 19 Dec 2022 14:09:26 -0500 Subject: [PATCH 11/13] Prepare for release 0.29.0 Signed-off-by: Michael Robinson --- .env.example | 2 +- chart/Chart.yaml | 2 +- chart/values.yaml | 6 ++-- clients/java/README.md | 4 +-- docker/up.sh | 2 +- docs/openapi.html | 76 ++++++++++++++++++++++-------------------- gradle.properties | 2 +- spec/openapi.yml | 2 +- 8 files changed, 50 insertions(+), 46 deletions(-) diff --git a/.env.example b/.env.example index 6a4373070a..2981260fbd 100644 --- a/.env.example +++ b/.env.example @@ -1,4 +1,4 @@ API_PORT=5000 API_ADMIN_PORT=5001 WEB_PORT=3000 -TAG=0.28.0 +TAG=0.29.0 diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 7efda6839f..d7f6d39ff9 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -29,4 +29,4 @@ name: marquez sources: - https://github.com/MarquezProject/marquez - https://marquezproject.github.io/marquez/ -version: 0.28.0 +version: 0.29.0 diff --git a/chart/values.yaml b/chart/values.yaml index 074a271d62..0e0985100c 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -17,7 +17,7 @@ marquez: image: registry: docker.io repository: marquezproject/marquez - tag: 0.28.0 + tag: 0.29.0 pullPolicy: IfNotPresent ## Name of the existing secret containing credentials for the Marquez installation. ## When this is specified, it will take precedence over the values configured in the 'db' section. @@ -75,7 +75,7 @@ web: image: registry: docker.io repository: marquezproject/marquez-web - tag: 0.28.0 + tag: 0.29.0 pullPolicy: IfNotPresent ## Marquez website will run on this port ## @@ -107,7 +107,7 @@ postgresql: ## @param image.tag PostgreSQL image tag (immutable tags are recommended) ## image: - tag: 12.1.0 + tag: 0.29.0 ## Authentication parameters ## ref: https://github.com/bitnami/bitnami-docker-postgresql/blob/master/README.md#setting-the-root-password-on-first-run ## ref: https://github.com/bitnami/bitnami-docker-postgresql/blob/master/README.md#creating-a-database-on-first-run diff --git a/clients/java/README.md b/clients/java/README.md index 2ac38ca746..ddb6ddf80b 100644 --- a/clients/java/README.md +++ b/clients/java/README.md @@ -10,14 +10,14 @@ Maven: io.github.marquezproject marquez-java - 0.28.0 + 0.29.0 ``` or Gradle: ```groovy -implementation 'io.github.marquezproject:marquez-java:0.28.0 +implementation 'io.github.marquezproject:marquez-java:0.29.0 ``` ## Usage diff --git a/docker/up.sh b/docker/up.sh index b69065ebf2..43794e7c35 100755 --- a/docker/up.sh +++ b/docker/up.sh @@ -5,7 +5,7 @@ set -e -VERSION=0.28.0 +VERSION=0.29.0 DOCKER_DIR=$(dirname $0) title() { diff --git a/docs/openapi.html b/docs/openapi.html index 4ffd54cf00..fd98eb02c7 100644 --- a/docs/openapi.html +++ b/docs/openapi.html @@ -2156,7 +2156,7 @@ -

Marquez (0.28.0)

Download OpenAPI specification:Download

License: Apache 2.0

Marquez is an open source metadata service for the collection, aggregation, and visualization of a data ecosystem's metadata.

+ " fill="currentColor">

Marquez (0.29.0)

Download OpenAPI specification:Download

License: Apache 2.0

Marquez is an open source metadata service for the collection, aggregation, and visualization of a data ecosystem's metadata.

Namespaces

Create a namespace

Creates a new namespace object. A namespace enables the contextual grouping of related jobs and datasets. Namespaces must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), dashes (-), colons (:), slashes (/), or dots (.). A namespace is case-insensitive with a maximum length of 1024 characters. Note jobs and datasets will be unique within a namespace, but not across namespaces.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

Request Body schema: application/json
ownerName
required
string

The owner of the namespace.

@@ -2185,35 +2185,39 @@
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

Responses

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

List all namespaces

Returns a list of namespaces.

+
http://localhost:5000/api/v1/namespaces/{namespace}

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Deletes a namespace

Soft deletes a namespace, and every job and dataset inside. On next event containing this namespace, the namespace will be undeleted.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+

Responses

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

List all namespaces

Returns a list of namespaces.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "namespaces": [
    ]
}

Events

List all received OpenLineage events.

Returns a list of OpenLineage events, sorted in direction of passed sort parameter. By default it is desc.

+
http://localhost:5000/api/v1/namespaces

Response samples

Content type
application/json
{
  • "namespaces": [
    ]
}

Events

List all received OpenLineage events.

Returns a list of OpenLineage events, sorted in direction of passed sort parameter. By default it is desc.

query Parameters
sortDirection
string
Example: sortDirection=name

Sorts the results of your query by indicated direction asc or desc.

before
string <date-time>
Example: before=2022-09-15T07:47:19Z

Returns events before passed date.

after
string <date-time>
Example: after=2022-09-15T07:47:19Z

Returns events after passed date.

limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

Responses

Response samples

Content type
application/json
{}

Sources

Create a source Deprecated

Creates a new source object. A source is the physical location of a dataset such as a table in PostgreSQL, or topic in Kafka. A source enables the grouping of physical datasets to their physical source.

+
http://localhost:5000/api/v1/events/lineage

Response samples

Content type
application/json
{}

Sources

Create a source Deprecated

Creates a new source object. A source is the physical location of a dataset such as a table in PostgreSQL, or topic in Kafka. A source enables the grouping of physical datasets to their physical source.

path Parameters
source
required
string <= 1024 characters
Example: my-source

The name of the source.

Request Body schema: application/json
type
required
string

The type of the source.

connectionUrl
required
string <URL>

The URL to the location of the source.

description
string

The description of the source.

Responses

Request samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Retrieve a source

Returns a source.

+
http://localhost:5000/api/v1/sources/{source}

Request samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Retrieve a source

Returns a source.

path Parameters
source
required
string <= 1024 characters
Example: my-source

The name of the source.

Responses

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

List all sources

Returns a list of sources.

+
http://localhost:5000/api/v1/sources/{source}

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

List all sources

Returns a list of sources.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "sources": [
    ]
}

Datasets

Create a dataset Deprecated

Creates a new dataset.

+
http://localhost:5000/api/v1/sources

Response samples

Content type
application/json
{
  • "sources": [
    ]
}

Datasets

Create a dataset Deprecated

Creates a new dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

Request Body schema: application/json
Any of
type
required
string
Value: "DB_TABLE"

The type of the dataset.

@@ -2225,48 +2229,48 @@
runId
string

The ID associated with the run modifying the table.

Responses

Request samples

Content type
application/json
Example
{
  • "type": "DB_TABLE",
  • "physicalName": "public.mytable",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "description": "My first dataset!"
}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a dataset

Returns a dataset.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}

Request samples

Content type
application/json
Example
{
  • "type": "DB_TABLE",
  • "physicalName": "public.mytable",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "description": "My first dataset!"
}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a dataset

Returns a dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Soft deletes dataset.

Soft deletes dataset. It will be un-deleted if new OpenLineage event containing this dataset comes.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Soft deletes dataset.

Soft deletes dataset. It will be un-deleted if new OpenLineage event containing this dataset comes.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a version for a dataset

Returns a version for a dataset.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a version for a dataset

Returns a version for a dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

version
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the job or dataset version.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "version": "d224dac0-35d7-4d9b-bbbe-6fff1a8485ad",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "description": "My first dataset!",
  • "createdByRun": {
    }
}

List all versions for a dataset

Returns a list of versions for a dataset.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}/versions/{version}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "version": "d224dac0-35d7-4d9b-bbbe-6fff1a8485ad",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "description": "My first dataset!",
  • "createdByRun": {
    }
}

List all versions for a dataset

Returns a list of versions for a dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

Responses

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

List all datasets

Returns a list of datasets.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}/versions

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

List all datasets

Returns a list of datasets.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "datasets": [
    ],
  • "totalCount": 0
}

Tag a dataset

Tag an existing dataset.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets

Response samples

Content type
application/json
{
  • "datasets": [
    ],
  • "totalCount": 0
}

Tag a dataset

Tag an existing dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

tag
required
string
Example: SENSITIVE

The name of the tag.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Tag a field

Tag an existing field of a dataset.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}/tags/{tag}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Tag a field

Tag an existing field of a dataset.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

field
required
string
Example: my_field

The name of the field.

tag
required
string
Example: SENSITIVE

The name of the tag.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Jobs

Create a job Deprecated

Creates a new job object. All job objects are immutable and are uniquely identified by a generated ID. Marquez will create a version of a job each time the contents of the object is modified. For example, the location of a job may change over time resulting in new versions. The accumulated versions can be listed, used to rerun a specific job version or possibly help debug a failed job run.

+
http://localhost:5000/api/v1/namespaces/{namespace}/datasets/{dataset}/fields/{field}/tags/{tag}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "upodatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Jobs

Create a job Deprecated

Creates a new job object. All job objects are immutable and are uniquely identified by a generated ID. Marquez will create a version of a job each time the contents of the object is modified. For example, the location of a job may change over time resulting in new versions. The accumulated versions can be listed, used to rerun a specific job version or possibly help debug a failed job run.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

Request Body schema: application/json
object

The ID of the job.

@@ -2279,34 +2283,34 @@
runId
string

An optional run ID used to associate a job version to an existing job run.

Responses

Request samples

Content type
application/json
{}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a job

Retrieve a job.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}

Request samples

Content type
application/json
{}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a job

Retrieve a job.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Soft deletes job.

Soft deletes job. It will be un-deleted if new OpenLineage event containing this job comes.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Soft deletes job.

Soft deletes job. It will be un-deleted if new OpenLineage event containing this job comes.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

List all jobs

Returns a list of jobs.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

List all jobs

Returns a list of jobs.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "jobs": [
    ],
  • "totalCount": 0
}

Retrieve a version for a job

Returns a version for a job.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs

Response samples

Content type
application/json
{
  • "jobs": [
    ],
  • "totalCount": 0
}

Retrieve a version for a job

Returns a version for a job.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

version
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the job or dataset version.

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "version": "56472c57-a2ef-4218-b7b7-d2af02a343fd",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "facets": { }
}

List all versions for a job

Returns a list of versions for a job.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}/versions/{version}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "version": "56472c57-a2ef-4218-b7b7-d2af02a343fd",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "facets": { }
}

List all versions for a job

Returns a list of versions for a job.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

Responses

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

Create a run Deprecated

Creates a new run object for a job.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}/versions

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

Create a run Deprecated

Creates a new run object for a job.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

Request Body schema: application/json
id
string <uuid>

An optional user-provided unique ID of the run. A run ID must be an UUID. If an ID for the run is not provided, a random UUID will be generated for the given run.

@@ -2315,69 +2319,69 @@
args
object

The arguments of the run.

Responses

Request samples

Content type
application/json
{
  • "args": {
    }
}

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "COMPLETED",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": "2019-05-09T20:05:46.815920Z",
  • "durationMs": 4250894125,
  • "args": {
    },
  • "context": {
    },
  • "facets": { }
}

List all runs

Returns a list of runs for a job.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}/runs

Request samples

Content type
application/json
{
  • "args": {
    }
}

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "COMPLETED",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": "2019-05-09T20:05:46.815920Z",
  • "durationMs": 4250894125,
  • "args": {
    },
  • "context": {
    },
  • "facets": { }
}

List all runs

Returns a list of runs for a job.

path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

job
required
string <= 1024 characters
Example: my-job

The name of the job.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "runs": [
    ]
}

Retrieve a run

Retrieve a run.

+
http://localhost:5000/api/v1/namespaces/{namespace}/jobs/{job}/runs

Response samples

Content type
application/json
{
  • "runs": [
    ]
}

Retrieve a run

Retrieve a run.

path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Start a run Deprecated

Marks the run as RUNNING.

+
http://localhost:5000/api/v1/jobs/runs/{id}

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Start a run Deprecated

Marks the run as RUNNING.

path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Complete a run Deprecated

Marks the run as COMPLETED.

+
http://localhost:5000/api/v1/jobs/runs/{id}/start

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Complete a run Deprecated

Marks the run as COMPLETED.

path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "COMPLETED",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": "2019-05-09T20:05:46.815920Z",
  • "durationMs": 4250894125,
  • "args": {
    },
  • "context": {
    },
  • "facets": { }
}

Fail a run Deprecated

Marks the run as FAILED.

+
http://localhost:5000/api/v1/jobs/runs/{id}/complete

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "COMPLETED",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": "2019-05-09T20:05:46.815920Z",
  • "durationMs": 4250894125,
  • "args": {
    },
  • "context": {
    },
  • "facets": { }
}

Fail a run Deprecated

Marks the run as FAILED.

path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Abort a run Deprecated

Marks the run as ABORTED.

+
http://localhost:5000/api/v1/jobs/runs/{id}/fail

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Abort a run Deprecated

Marks the run as ABORTED.

path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Lineage

Record a single lineage event

Receive, process, and store lineage metadata using the OpenLineage standard.

+
http://localhost:5000/api/v1/jobs/runs/{id}/abort

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Lineage

Record a single lineage event

Receive, process, and store lineage metadata using the OpenLineage standard.

Request Body schema: application/json
any (LineageEvent)

Responses

Request samples

Content type
application/json
{}

Get a lineage graph

query Parameters
nodeId
required
string
Example: nodeId=dataset:food_delivery:public.delivery_7_days

The ID of the node. A node can either be a dataset node, a dataset field node or a job node. The format of nodeId for dataset is dataset:<namespace_of_dataset>:<name_of_the_dataset>, for dataset field is datasetField:<namespace_of_dataset>:<name_of_the_dataset>:<name_of_field>, and for job is job:<namespace_of_the_job>:<name_of_the_job>.

+
http://localhost:5000/api/v1/lineage

Request samples

Content type
application/json
{}

Get a lineage graph

query Parameters
nodeId
required
string
Example: nodeId=dataset:food_delivery:public.delivery_7_days

The ID of the node. A node can either be a dataset node, a dataset field node or a job node. The format of nodeId for dataset is dataset:<namespace_of_dataset>:<name_of_the_dataset>, for dataset field is datasetField:<namespace_of_dataset>:<name_of_the_dataset>:<name_of_field>, and for job is job:<namespace_of_the_job>:<name_of_the_job>.

depth
integer
Default: 20

Depth of lineage graph to create.

Responses

Response samples

Content type
application/json
{
  • "graph": [
    ]
}

Column lineage

Get a column-lineage graph

query Parameters
nodeId
required
string
Example: nodeId=dataset:food_delivery:public.delivery_7_days

The ID of the node. A node can either be a dataset node, a dataset field node or a job node. The format of nodeId for dataset is dataset:<namespace_of_dataset>:<name_of_the_dataset>, for dataset field is datasetField:<namespace_of_dataset>:<name_of_the_dataset>:<name_of_field>, and for job is job:<namespace_of_the_job>:<name_of_the_job>.

+
http://localhost:5000/api/v1/lineage

Response samples

Content type
application/json
{
  • "graph": [
    ]
}

Column lineage

Get a column-lineage graph

query Parameters
nodeId
required
string
Example: nodeId=dataset:food_delivery:public.delivery_7_days

The ID of the node. A node can either be a dataset node, a dataset field node or a job node. The format of nodeId for dataset is dataset:<namespace_of_dataset>:<name_of_the_dataset>, for dataset field is datasetField:<namespace_of_dataset>:<name_of_the_dataset>:<name_of_field>, and for job is job:<namespace_of_the_job>:<name_of_the_job>.

depth
integer
Default: 20

Depth of lineage graph to create.

withDownstream
boolean
Default: false

Determines if downstream lineage should be returned.

Responses

Response samples

Content type
application/json
{
  • "graph": [
    ]
}

Tags

Create a tag

Creates a new tag object.

+
http://localhost:5000/api/v1/column-lineage

Response samples

Content type
application/json
{
  • "graph": [
    ]
}

Tags

Create a tag

Creates a new tag object.

path Parameters
tag
required
string
Example: SENSITIVE

The name of the tag.

Request Body schema: application/json
description
string

The description of the tag.

Responses

Request samples

Content type
application/json
{
  • "description": "My first tag!"
}

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

List all tags

Returns a list of tags.

+
http://localhost:5000/api/v1/tags/{tag}

Request samples

Content type
application/json
{
  • "description": "My first tag!"
}

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

List all tags

Returns a list of tags.

query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

offset
integer
Default: 0

The initial position from which to return results

Responses

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

Search

Query all datasets and jobs

Returns one or more datasets and jobs of your query.

+
http://localhost:5000/api/v1/tags

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

Search

Query all datasets and jobs

Returns one or more datasets and jobs of your query.

query Parameters
q
required
string
Example: q=my-dataset

Query containing pattern to match; datasets and jobs pattern matching is string based and case-insensitive. Use percent sign (%) to match any string of zero or more characters (my-job%), or an underscore (_) to match a single character (_job_).

filter
string
Example: filter=dataset

Filters the results of your query by dataset or job.

sort
string
Example: sort=name

Sorts the results of your query by name or updated_at.

limit
integer
Default: 100
Example: limit=25

The number of results to return from offset

Responses

Response samples

Content type
application/json
{
  • "totalCount": 1,
  • "results": [
    ]
}
+
http://localhost:5000/api/v1/search

Response samples

Content type
application/json
{
  • "totalCount": 1,
  • "results": [
    ]
}