diff --git a/.circleci/api-load-test.sh b/.circleci/api-load-test.sh index 510a84ca92..d8e1295efa 100755 --- a/.circleci/api-load-test.sh +++ b/.circleci/api-load-test.sh @@ -14,7 +14,7 @@ set -e # Build version of Marquez -readonly MARQUEZ_VERSION=0.50.0-SNAPSHOT +readonly MARQUEZ_VERSION=0.51.0-SNAPSHOT # Fully qualified path to marquez.jar readonly MARQUEZ_JAR="api/build/libs/marquez-api-${MARQUEZ_VERSION}.jar" diff --git a/.circleci/db-migration.sh b/.circleci/db-migration.sh index 7bcc68659d..e91b3fbe75 100755 --- a/.circleci/db-migration.sh +++ b/.circleci/db-migration.sh @@ -13,7 +13,7 @@ # Version of PostgreSQL readonly POSTGRES_VERSION="14" # Version of Marquez -readonly MARQUEZ_VERSION=0.49.0 +readonly MARQUEZ_VERSION=0.50.0 # Build version of Marquez readonly MARQUEZ_BUILD_VERSION="$(git log --pretty=format:'%h' -n 1)" # SHA1 readonly POSTGRES_PORT=5432 diff --git a/.env.example b/.env.example index 6cb62675e4..0b1cc7b81c 100644 --- a/.env.example +++ b/.env.example @@ -3,4 +3,4 @@ API_ADMIN_PORT=5001 WEB_PORT=3000 POSTGRES_PORT=5432 SEARCH_PORT=9200 -TAG=0.49.0 +TAG=0.50.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c23771cc8..ca14da1c02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,45 @@ # Changelog -## [Unreleased](https://github.com/MarquezProject/marquez/compare/0.48.0...HEAD) +## [Unreleased](https://github.com/MarquezProject/marquez/compare/0.50.0...HEAD) + +## [0.50.0](https://github.com/MarquezProject/marquez/compare/0.49.0...0.50.0) - 2024-10-23 + +### Added + +* Web: **New** _Data Observability_ dashboard for _stats_ on OpenLineage events (`24hrs`, past `7.days`); views are also available for _sources_, _datasets_, and _jobs_; **new** job list view has also been introduced displaying the latest `N` runs (and duration) for a given job [`#2913`](https://github.com/MarquezProject/marquez/pull/2913) [@phixMe](https://github.com/phixMe) +* Web: `404` page [`#2890`](https://github.com/MarquezProject/marquez/pull/2890) [@phixMe](https://github.com/phixMe) +* Web: Display _parent_ job (if present) in _job_ panel [`#2868`](https://github.com/MarquezProject/marquez/pull/2868) [@phixMe](https://github.com/phixMe) +* Web: Allow override of `web.port` via `WEB_PORT` environment variable [`#2838`](https://github.com/MarquezProject/marquez/pull/2838) [@bidlako](https://github.com/bidlako) +* Web: Allow _nullable_ columns for schema in _dataset_ panel (use `N/A`) [`#2896`](https://github.com/MarquezProject/marquez/pull/2896) [@phixMe](https://github.com/phixMe) +* Web: Better feedback when lineage events are loading [`#2916`](https://github.com/MarquezProject/marquez/pull/2916) [@NisargChokshi45](https://github.com/NisargChokshi45) +* API: `Job` object will now return `Job.latestRuns` (latest `N` runs) and `Job.latestRun` (last run to execute) [`#2901`](https://github.com/MarquezProject/marquez/pull/2901) [@phixMe](https://github.com/phixMe) +* API: Use `io.openlineage.server.*` pkg and **class** [`Metadata`](https://github.com/MarquezProject/marquez/blob/main/api/src/main/java/marquez/api/models/Metadata.java) (utility class for `OpenLineage.RunEvent`) [`#2853`](https://github.com/MarquezProject/marquez/pull/2853) [@wslulciuc](https://github.com/wslulciuc) +* API: Use `TIMESTAMPTZ` for _timestamps_ in database; supports _Data Observability_ dashboard with timezone of user [`#2924`](https://github.com/MarquezProject/marquez/pull/2924) [@wslulciuc](https://github.com/wslulciuc) +* API: Set `current_run_uuid` in **table** `jobs` optimizing query for `JobDao.findAll()` [`#2929`](https://github.com/MarquezProject/marquez/pull/2929) [@wslulciuc](https://github.com/wslulciuc) +* API: **New** `GET` `/api/v1/jobs` [`#2930`](https://github.com/MarquezProject/marquez/pull/2930) [@wslulciuc](https://github.com/wslulciuc) +* CLI: **New** cmd args for [`cli.MetadataCommand`](https://github.com/MarquezProject/marquez/blob/main/api/src/main/java/marquez/cli/MetadataCommand.java) [`#2923`](https://github.com/MarquezProject/marquez/pull/2923) [@wslulciuc](https://github.com/wslulciuc) + * `--jobs`: _limits OL jobs up to N (default: 5)_ + * `--runs-per-job`: _limits OL run executions per job up to N (default: 10)_ + * `--runs-active`: _limits OL run executions marked as active (='RUNNING') up to N_ + * `--max-run-fails-per-job`: _maximum OL run fails per job (default: 2)_ + * `--min-run-duration`: _minimum OL run duration (in seconds) per execution (default: 300)_ + * `--run-start-time`: _specifies the OL run start time in UTC ISO ('YYYY-MM-DDTHH:MM:SSZ'); used for the initial OL run, with subsequent runs starting relative to the initial start time. (default: 2024-10-15T01:00:11.080828Z)_ + * `--run-end-time`: _specifies the OL run end time in UTC ISO ('YYYY-MM-DDTHH:MM:SSZ'); used for the initial OL run, with subsequent runs ending relative to the initial end time. (default: 2024-10-15T01:07:25.080828Z)_ + +### Fixed + +* Web: Better rendering of long text [`#2942`](https://github.com/MarquezProject/marquez/pull/2942) [@phixMe](https://github.com/phixMe) +* Web: Display full `runID` and check icon when copied [`#2940`](https://github.com/MarquezProject/marquez/pull/2940) [`#2941`](https://github.com/MarquezProject/marquez/pull/2941) [@wslulciuc](https://github.com/wslulciuc) [@phixMe](https://github.com/phixMe) +* Web: Use **DatasetVersionAPI** to display latest schema and remove extra job facets API call in _dataset_ panel [`#2938`](https://github.com/MarquezProject/marquez/pull/2938) [@phixMe](https://github.com/phixMe) +* Web: Use **DatasetAPI** for data quality assertions in _dataset_ panel [`#2937`](https://github.com/MarquezProject/marquez/pull/2937) [@phixMe](https://github.com/phixMe) +* Web: Fill-in _job_ node in lineage graph with correct color for `JobEvent`s [`#2934`](https://github.com/MarquezProject/marquez/pull/2934) [@phixMe](https://github.com/phixMe) +* Web: Fill-in _job_ node in lineage graph with correct color for run states `RUNNING`, `COMPLETED`, etc [`#2897`](https://github.com/MarquezProject/marquez/pull/2897) [@phixMe](https://github.com/phixMe) +* API: Pagination for `DatasetVersion.findAll()`; not all dataset versions were returned for `GET` `/api/v1/namespaces/{namespace}/datasets/{dataset}/versions` [`#2944`](https://github.com/MarquezProject/marquez/pull/2945) [@inanalper](https://github.com/inanalper) +* API: `null` namespace and dataset name in **view** `dataset_view` for old versions; use **table** `dataset_versions` instead in column lineage query [#2881](https://github.com/MarquezProject/marquez/pull/2881) [@sophiely](https://github.com/sophiely) +* API: Missing `DELETE CASCADE` on **table** `job_facets` [`#2878`](https://github.com/MarquezProject/marquez/pull/2878) [@mattwparas](https://github.com/mattwparas) +* API: Ensure `Job.latestRun` in `Job` object is set for runs in a `RUNNING` state; before `Job.latestRun` was set only for a run in a _done_ state (`COMPLETED` / `FAILED`) [`#2933`](https://github.com/MarquezProject/marquez/pull/2933) [@phixMe](https://github.com/phixMe) +* CLI: Repurpose cmd `db-migrate` to run all pending database migrations, no longer coupling migrations with HTTP server startup [`#2936`](https://github.com/MarquezProject/marquez/pull/2936) [@davidjgoss](https://github.com/davidjgoss) +* Chart: Missing common `labels` for `deployment.replicas` [`#2877`](https://github.com/MarquezProject/marquez/pull/2877) [@alaturqua](https://github.com/alaturqua) ## [0.49.0](https://github.com/MarquezProject/marquez/compare/0.48.0...0.49.0) - 2024-08-07 diff --git a/README.md b/README.md index 0baa0ad58b..66beec2a66 100644 --- a/README.md +++ b/README.md @@ -98,8 +98,8 @@ Versions of Marquez are compatible with OpenLineage unless noted otherwise. We e | **Marquez** | **OpenLineage** | **Status** | |--------------------------------------------------------------------------------------------------|---------------------------------------------------------------|---------------| | [`UNRELEASED`](https://github.com/MarquezProject/marquez/blob/main/CHANGELOG.md#unreleased) | [`2-0-2`](https://openlineage.io/spec/2-0-2/OpenLineage.json) | `CURRENT` | -| [`0.49.0`](https://github.com/MarquezProject/marquez/blob/0.49.0/CHANGELOG.md#0490---2024-08-07) | [`2-0-2`](https://openlineage.io/spec/2-0-2/OpenLineage.json) | `RECOMMENDED` | -| [`0.48.0`](https://github.com/MarquezProject/marquez/blob/0.45.0/CHANGELOG.md#0480---2024-08-05) | [`2-0-2`](https://openlineage.io/spec/2-0-2/OpenLineage.json) | `MAINTENANCE` | +| [`0.50.0`](https://github.com/MarquezProject/marquez/blob/main/CHANGELOG.md#0500---2024-10-23) | [`2-0-2`](https://openlineage.io/spec/2-0-2/OpenLineage.json) | `RECOMMENDED` | +| [`0.49.0`](https://github.com/MarquezProject/marquez/blob/0.49.0/CHANGELOG.md#0490---2024-08-07) | [`2-0-2`](https://openlineage.io/spec/2-0-2/OpenLineage.json) | `MAINTENANCE` | > **Note:** The [`openlineage-python`](https://pypi.org/project/openlineage-python) and [`openlineage-java`](https://central.sonatype.com/artifact/io.openlineage/openlineage-java) libraries will a higher version than the OpenLineage [specification](https://github.com/OpenLineage/OpenLineage/tree/main/spec) as they have different version requirements. diff --git a/api/build.gradle b/api/build.gradle index 2d90f41958..3965a751e5 100644 --- a/api/build.gradle +++ b/api/build.gradle @@ -51,8 +51,8 @@ dependencies { implementation 'com.graphql-java:graphql-java:20.9' implementation 'com.graphql-java-kickstart:graphql-java-servlet:12.0.0' - implementation 'org.opensearch.client:opensearch-rest-client:2.15.0' - implementation 'org.opensearch.client:opensearch-java:2.6.0' + implementation 'org.opensearch.client:opensearch-rest-client:2.17.1' + implementation 'org.opensearch.client:opensearch-java:2.16.0' testImplementation "io.dropwizard:dropwizard-testing:${dropwizardVersion}" testImplementation "org.jdbi:jdbi3-testing:${jdbi3Version}" diff --git a/api/src/main/java/marquez/MarquezApp.java b/api/src/main/java/marquez/MarquezApp.java index 37a7b2c16e..15406d95c2 100644 --- a/api/src/main/java/marquez/MarquezApp.java +++ b/api/src/main/java/marquez/MarquezApp.java @@ -28,7 +28,7 @@ import marquez.api.filter.JobRedirectFilter; import marquez.api.filter.exclusions.Exclusions; import marquez.api.filter.exclusions.ExclusionsConfig; -import marquez.cli.DbMigrationCommand; +import marquez.cli.DbMigrateCommand; import marquez.cli.DbRetentionCommand; import marquez.cli.MetadataCommand; import marquez.cli.SeedCommand; @@ -90,6 +90,7 @@ public void initialize(@NonNull Bootstrap bootstrap) { new EnvironmentVariableSubstitutor(ERROR_ON_UNDEFINED))); // Add CLI commands + bootstrap.addCommand(new DbMigrateCommand()); bootstrap.addCommand(new DbRetentionCommand()); bootstrap.addCommand(new MetadataCommand()); bootstrap.addCommand(new SeedCommand()); @@ -202,12 +203,6 @@ public void registerResources( } } - @Override - protected void addDefaultCommands(Bootstrap bootstrap) { - bootstrap.addCommand(new DbMigrationCommand<>(this)); - super.addDefaultCommands(bootstrap); - } - private void registerServlets(@NonNull Environment env) { log.debug("Registering servlets..."); diff --git a/api/src/main/java/marquez/MarquezContext.java b/api/src/main/java/marquez/MarquezContext.java index ad3a5b4b04..8478c40752 100644 --- a/api/src/main/java/marquez/MarquezContext.java +++ b/api/src/main/java/marquez/MarquezContext.java @@ -155,7 +155,7 @@ private MarquezContext( this.tagService = new TagService(baseDao); this.tagService.init(tags); this.openLineageService = new OpenLineageService(baseDao, runService); - this.lineageService = new LineageService(lineageDao, jobDao); + this.lineageService = new LineageService(lineageDao, jobDao, runDao); this.columnLineageService = new ColumnLineageService(columnLineageDao, datasetFieldDao); this.searchService = new SearchService(searchConfig); this.statsService = new StatsService(statsDao); diff --git a/api/src/main/java/marquez/cli/DbMigrateCommand.java b/api/src/main/java/marquez/cli/DbMigrateCommand.java new file mode 100644 index 0000000000..2b295ff51a --- /dev/null +++ b/api/src/main/java/marquez/cli/DbMigrateCommand.java @@ -0,0 +1,42 @@ +/* + * Copyright 2018-2022 contributors to the Marquez project + * SPDX-License-Identifier: Apache-2.0 + */ + +package marquez.cli; + +import io.dropwizard.cli.ConfiguredCommand; +import io.dropwizard.db.DataSourceFactory; +import io.dropwizard.db.ManagedDataSource; +import io.dropwizard.setup.Bootstrap; +import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; +import marquez.MarquezConfig; +import marquez.db.DbMigration; +import net.sourceforge.argparse4j.inf.Namespace; + +/** + * A command to manually run database migrations. This command to be used to run migrations + * decoupled from application deployment. + */ +@Slf4j +public class DbMigrateCommand extends ConfiguredCommand { + + public DbMigrateCommand() { + super("db-migrate", "A command to manually run database migrations."); + } + + @Override + protected void run( + @NonNull Bootstrap bootstrap, + @NonNull Namespace namespace, + @NonNull MarquezConfig configuration) + throws Exception { + + final DataSourceFactory sourceFactory = configuration.getDataSourceFactory(); + final ManagedDataSource source = + sourceFactory.build(bootstrap.getMetricRegistry(), "MarquezApp-source"); + + DbMigration.migrateDbOrError(configuration.getFlywayFactory(), source, true); + } +} diff --git a/api/src/main/java/marquez/cli/DbMigrationCommand.java b/api/src/main/java/marquez/cli/DbMigrationCommand.java deleted file mode 100644 index 34953fddcd..0000000000 --- a/api/src/main/java/marquez/cli/DbMigrationCommand.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright 2018-2022 contributors to the Marquez project - * SPDX-License-Identifier: Apache-2.0 - */ - -package marquez.cli; - -import io.dropwizard.Application; -import io.dropwizard.cli.EnvironmentCommand; -import io.dropwizard.db.DataSourceFactory; -import io.dropwizard.db.ManagedDataSource; -import io.dropwizard.jdbi3.JdbiFactory; -import io.dropwizard.setup.Environment; -import javax.sql.DataSource; -import lombok.extern.slf4j.Slf4j; -import marquez.db.migrations.V57_1__BackfillFacets; -import net.sourceforge.argparse4j.inf.Namespace; -import net.sourceforge.argparse4j.inf.Subparser; -import org.jdbi.v3.core.Jdbi; -import org.jdbi.v3.jackson2.Jackson2Plugin; -import org.jdbi.v3.postgres.PostgresPlugin; -import org.jdbi.v3.sqlobject.SqlObjectPlugin; - -/** - * A command to manually run database migrations when needed. This migration requires a heavy DB - * operation which can be done asynchronously (with limited API downtime) due to separate migration - * command. - */ -@Slf4j -public class DbMigrationCommand extends EnvironmentCommand { - - private static final String DB_MIGRATE = "db-migrate"; - private static final String MIGRATION_V57_DESCRIPTION = - """ - A command to manually run V57 database migration. - Please refer to https://github.com/MarquezProject/marquez/blob/main/api/src/main/resources/marquez/db/migration/V57__readme.md for more details. - """; - - private static final String COMMAND_DESCRIPTION = - """ - A command to manually run database migrations. - Extra parameters are required to specify the migration to run. - """; - - /** - * Creates a new environment command. - * - * @param application the application providing this command - */ - public DbMigrationCommand(Application application) { - super(application, DB_MIGRATE, COMMAND_DESCRIPTION); - } - - @Override - public void configure(Subparser subparser) { - subparser - .addArgument("--chunkSize") - .dest("chunkSize") - .type(Integer.class) - .required(false) - .setDefault(V57_1__BackfillFacets.DEFAULT_CHUNK_SIZE) - .help("amount of lineage_events rows processed in a single SQL query and transaction."); - - subparser - .addArgument("--version") - .dest("version") - .type(String.class) - .required(true) - .help("migration version to apply like 'v57'"); - - addFileArgument(subparser); - } - - @Override - protected void run( - Environment environment, Namespace namespace, marquez.MarquezConfig configuration) - throws Exception { - - final DataSourceFactory sourceFactory = configuration.getDataSourceFactory(); - final DataSource source = sourceFactory.build(environment.metrics(), "MarquezApp-source"); - final JdbiFactory factory = new JdbiFactory(); - - Jdbi jdbi = - factory - .build( - environment, - configuration.getDataSourceFactory(), - (ManagedDataSource) source, - "postgresql-command") - .installPlugin(new SqlObjectPlugin()) - .installPlugin(new PostgresPlugin()) - .installPlugin(new Jackson2Plugin()); - - MarquezMigrations.valueOf(namespace.getString("version")).run(jdbi, namespace); - } - - enum MarquezMigrations { - v57 { - public void run(Jdbi jdbi, Namespace namespace) throws Exception { - log.info("Running V57_1__BackfillFacets migration"); - V57_1__BackfillFacets migration = new V57_1__BackfillFacets(); - migration.setManual(true); - migration.setJdbi(jdbi); - migration.setChunkSize(namespace.getInt("chunkSize")); - migration.migrate(null); - } - }; - - public void run(Jdbi jdbi, Namespace namespace) throws Exception { - throw new UnsupportedOperationException(); - } - } -} diff --git a/api/src/main/java/marquez/db/DatasetVersionDao.java b/api/src/main/java/marquez/db/DatasetVersionDao.java index 0f4d4d0fd5..df06e714b4 100644 --- a/api/src/main/java/marquez/db/DatasetVersionDao.java +++ b/api/src/main/java/marquez/db/DatasetVersionDao.java @@ -168,14 +168,14 @@ default void updateDatasetVersionMetric( WITH selected_dataset_versions AS ( SELECT dv.* FROM dataset_versions dv - WHERE dv.version = :version + WHERE dv.uuid = :version ), selected_dataset_version_facets AS ( SELECT dv.uuid, dv.dataset_name, dv.namespace_name, df.run_uuid, df.lineage_event_time, df.facet FROM selected_dataset_versions dv LEFT JOIN dataset_facets_view df ON df.dataset_version_uuid = dv.uuid ) SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state,\s - dv.created_at, dv.version, dv.dataset_schema_version_uuid, dv.fields, dv.run_uuid AS createdByRunUuid, + dv.created_at, dv.uuid AS current_version_uuid, dv.version, dv.dataset_schema_version_uuid, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location, t.tags, f.facets FROM selected_dataset_versions dv LEFT JOIN datasets_view d ON d.uuid = dv.dataset_uuid @@ -206,7 +206,7 @@ WITH selected_dataset_versions AS ( LEFT JOIN dataset_facets_view df ON df.dataset_version_uuid = dv.uuid AND (df.type ILIKE 'dataset' OR df.type ILIKE 'unknown' OR df.type ILIKE 'input') ) SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state,\s - dv.created_at, dv.version, dv.dataset_schema_version_uuid, dv.fields, dv.run_uuid AS createdByRunUuid, + dv.created_at, dv.uuid AS current_version_uuid, dv.version, dv.dataset_schema_version_uuid, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location, t.tags, f.facets FROM selected_dataset_versions dv LEFT JOIN datasets_view d ON d.uuid = dv.dataset_uuid @@ -255,7 +255,7 @@ default Optional findByWithRun(UUID version) { """ WITH dataset_info AS ( SELECT d.type, d.name, d.physical_name, d.namespace_name, d.source_name, d.description, dv.lifecycle_state, - dv.created_at, dv.version, dv.dataset_schema_version_uuid, dv.fields, dv.run_uuid AS createdByRunUuid, + dv.created_at, dv.uuid AS current_version_uuid, dv.version, dv.dataset_schema_version_uuid, dv.fields, dv.run_uuid AS createdByRunUuid, sv.schema_location, t.tags, f.facets, f.lineage_event_time, f.dataset_version_uuid, facet_name FROM dataset_versions dv LEFT JOIN datasets_view d ON d.uuid = dv.dataset_uuid @@ -277,7 +277,6 @@ LEFT JOIN ( ) f ON f.dataset_version_uuid = dv.uuid WHERE dv.namespace_name = :namespaceName AND dv.dataset_name = :datasetName - LIMIT :limit OFFSET :offset ), dataset_symlinks_names as ( SELECT DISTINCT dataset_uuid, name @@ -286,15 +285,16 @@ dataset_symlinks_names as ( ) SELECT type, name, physical_name, namespace_name, source_name, description, lifecycle_state, - created_at, version, dataset_schema_version_uuid, fields, createdByRunUuid, schema_location, + created_at, current_version_uuid, version, dataset_schema_version_uuid, fields, createdByRunUuid, schema_location, tags, dataset_version_uuid, JSONB_AGG(facets ORDER BY lineage_event_time ASC) AS facets FROM dataset_info WHERE name NOT IN (SELECT name FROM dataset_symlinks_names) GROUP BY type, name, physical_name, namespace_name, source_name, description, lifecycle_state, - created_at, version, dataset_schema_version_uuid, fields, createdByRunUuid, schema_location, + created_at, current_version_uuid, version, dataset_schema_version_uuid, fields, createdByRunUuid, schema_location, tags, dataset_version_uuid ORDER BY created_at DESC + LIMIT :limit OFFSET :offset """) List findAll(String namespaceName, String datasetName, int limit, int offset); diff --git a/api/src/main/java/marquez/db/DbMigration.java b/api/src/main/java/marquez/db/DbMigration.java index c46ea11bdb..a1c139d8b1 100644 --- a/api/src/main/java/marquez/db/DbMigration.java +++ b/api/src/main/java/marquez/db/DbMigration.java @@ -25,7 +25,7 @@ public static void migrateDbOrError(@NonNull final DataSource source) { public static void migrateDbOrError( @NonNull final FlywayFactory flywayFactory, @NonNull final DataSource source, - final boolean migrateDbOnStartup) { + final boolean migrateNow) { final Flyway flyway = flywayFactory.build(source); // Only attempt a database migration if there are pending changes to be applied, // or on the initialization of a new database. Otherwise, error on pending changes @@ -33,7 +33,7 @@ public static void migrateDbOrError( if (!hasPendingDbMigrations(flyway)) { log.info("No pending migrations found, skipping..."); return; - } else if (!migrateDbOnStartup && hasDbMigrationsApplied(flyway)) { + } else if (!migrateNow && hasDbMigrationsApplied(flyway)) { errorOnPendingDbMigrations(flyway); } // Attempt to perform a database migration. An exception is thrown on failed migration attempts diff --git a/api/src/main/java/marquez/db/mappers/DatasetVersionMapper.java b/api/src/main/java/marquez/db/mappers/DatasetVersionMapper.java index 77e5242719..4faa72eca6 100644 --- a/api/src/main/java/marquez/db/mappers/DatasetVersionMapper.java +++ b/api/src/main/java/marquez/db/mappers/DatasetVersionMapper.java @@ -52,7 +52,7 @@ public DatasetVersion map(@NonNull ResultSet results, @NonNull StatementContext DatasetName.of(stringOrThrow(results, Columns.NAME)), DatasetName.of(stringOrThrow(results, Columns.PHYSICAL_NAME)), timestampOrThrow(results, Columns.CREATED_AT), - Version.of(uuidOrThrow(results, Columns.VERSION)), + Version.of(uuidOrThrow(results, Columns.CURRENT_VERSION_UUID)), SourceName.of(stringOrThrow(results, Columns.SOURCE_NAME)), toFields(results, "fields"), columnNames.contains("tags") ? toTags(results, "tags") : null, @@ -70,7 +70,7 @@ public DatasetVersion map(@NonNull ResultSet results, @NonNull StatementContext DatasetName.of(stringOrThrow(results, Columns.NAME)), DatasetName.of(stringOrThrow(results, Columns.PHYSICAL_NAME)), timestampOrThrow(results, Columns.CREATED_AT), - Version.of(uuidOrThrow(results, Columns.VERSION)), + Version.of(uuidOrThrow(results, Columns.CURRENT_VERSION_UUID)), SourceName.of(stringOrThrow(results, Columns.SOURCE_NAME)), toURL(stringOrThrow(results, Columns.SCHEMA_LOCATION)), toFields(results, "fields"), diff --git a/api/src/main/java/marquez/db/mappers/JobDataMapper.java b/api/src/main/java/marquez/db/mappers/JobDataMapper.java index ca68f5deaa..2e93f48292 100644 --- a/api/src/main/java/marquez/db/mappers/JobDataMapper.java +++ b/api/src/main/java/marquez/db/mappers/JobDataMapper.java @@ -41,6 +41,7 @@ public JobData map(@NonNull ResultSet results, @NonNull StatementContext context stringOrThrow(results, Columns.SIMPLE_NAME), stringOrNull(results, Columns.PARENT_JOB_NAME), uuidOrNull(results, Columns.PARENT_JOB_UUID), + uuidOrNull(results, Columns.CURRENT_RUN_UUID), timestampOrThrow(results, Columns.CREATED_AT), timestampOrThrow(results, Columns.UPDATED_AT), NamespaceName.of(stringOrThrow(results, Columns.NAMESPACE_NAME)), diff --git a/api/src/main/java/marquez/service/LineageService.java b/api/src/main/java/marquez/service/LineageService.java index 3d6c7a3ea9..f9b19cc654 100644 --- a/api/src/main/java/marquez/service/LineageService.java +++ b/api/src/main/java/marquez/service/LineageService.java @@ -36,6 +36,7 @@ import marquez.db.LineageDao.DatasetSummary; import marquez.db.LineageDao.JobSummary; import marquez.db.LineageDao.RunSummary; +import marquez.db.RunDao; import marquez.db.models.JobRow; import marquez.service.DelegatingDaos.DelegatingLineageDao; import marquez.service.LineageService.UpstreamRunLineage; @@ -58,9 +59,12 @@ public record UpstreamRun(JobSummary job, RunSummary run, List i private final JobDao jobDao; - public LineageService(LineageDao delegate, JobDao jobDao) { + private final RunDao runDao; + + public LineageService(LineageDao delegate, JobDao jobDao, RunDao runDao) { super(delegate); this.jobDao = jobDao; + this.runDao = runDao; } // TODO make input parameters easily extendable if adding more options like 'withJobFacets' @@ -89,20 +93,11 @@ public Lineage lineage(NodeId nodeId, int depth) { return toLineageWithOrphanDataset(nodeId.asDatasetId()); } - List runs = - getCurrentRuns(jobData.stream().map(JobData::getUuid).collect(Collectors.toSet())); - for (JobData j : jobData) { - if (j.getLatestRun().isEmpty()) { - for (Run run : runs) { - if (j.getName().getValue().equalsIgnoreCase(run.getJobName()) - && j.getNamespace().getValue().equalsIgnoreCase(run.getNamespaceName())) { - j.setLatestRun(run); - break; - } - } - } + Optional run = runDao.findRunByUuid(j.getCurrentRunUuid()); + run.ifPresent(j::setLatestRun); } + Set datasetIds = jobData.stream() .flatMap(jd -> Stream.concat(jd.getInputUuids().stream(), jd.getOutputUuids().stream())) diff --git a/api/src/main/java/marquez/service/models/JobData.java b/api/src/main/java/marquez/service/models/JobData.java index 8495545347..f7f625a9b0 100644 --- a/api/src/main/java/marquez/service/models/JobData.java +++ b/api/src/main/java/marquez/service/models/JobData.java @@ -35,6 +35,7 @@ public class JobData implements NodeData { @NonNull String simpleName; @Nullable String parentJobName; @Nullable UUID parentJobUuid; + @Getter @Nullable UUID currentRunUuid; @NonNull Instant createdAt; @NonNull Instant updatedAt; @NonNull NamespaceName namespace; diff --git a/api/src/test/java/marquez/service/LineageServiceTest.java b/api/src/test/java/marquez/service/LineageServiceTest.java index c439a4b171..74ec7ccd44 100644 --- a/api/src/test/java/marquez/service/LineageServiceTest.java +++ b/api/src/test/java/marquez/service/LineageServiceTest.java @@ -36,6 +36,7 @@ import marquez.db.LineageTestUtils.DatasetConsumerJob; import marquez.db.LineageTestUtils.JobLineage; import marquez.db.OpenLineageDao; +import marquez.db.RunDao; import marquez.db.models.UpdateLineageRow; import marquez.jdbi.MarquezJdbiExternalPostgresExtension; import marquez.service.LineageService.UpstreamRunLineage; @@ -86,7 +87,8 @@ public class LineageServiceTest { public static void setUpOnce(Jdbi jdbi) { LineageServiceTest.jdbi = jdbi; lineageDao = jdbi.onDemand(LineageDao.class); - lineageService = new LineageService(lineageDao, jdbi.onDemand(JobDao.class)); + lineageService = + new LineageService(lineageDao, jdbi.onDemand(JobDao.class), jdbi.onDemand(RunDao.class)); openLineageDao = jdbi.onDemand(OpenLineageDao.class); datasetDao = jdbi.onDemand(DatasetDao.class); jobDao = jdbi.onDemand(JobDao.class); diff --git a/build.gradle b/build.gradle index 2d9cea24fb..8eba0bd320 100644 --- a/build.gradle +++ b/build.gradle @@ -51,15 +51,15 @@ subprojects { } ext { - assertjVersion = '3.25.3' + assertjVersion = '3.26.3' dropwizardVersion = '2.1.12' jacocoVersion = '0.8.12' junit5Version = '5.10.2' lombokVersion = '1.18.34' mockitoVersion = '5.4.0' - openlineageVersion = '1.13.1' + openlineageVersion = '1.23.0' slf4jVersion = '1.7.36' - postgresqlVersion = '42.6.0' + postgresqlVersion = '42.7.4' } dependencies { diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 36cd9ba259..6d4c87ec0e 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -29,4 +29,4 @@ name: marquez sources: - https://github.com/MarquezProject/marquez - https://marquezproject.github.io/marquez/ -version: 0.49.0 +version: 0.50.0 diff --git a/chart/values.yaml b/chart/values.yaml index 6f91df8ff6..e70dbffd40 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -20,7 +20,7 @@ marquez: image: registry: docker.io repository: marquezproject/marquez - tag: 0.49.0 + tag: 0.50.0 pullPolicy: IfNotPresent ## Name of the existing secret containing credentials for the Marquez installation. ## When this is specified, it will take precedence over the values configured in the 'db' section. @@ -80,7 +80,7 @@ web: image: registry: docker.io repository: marquezproject/marquez-web - tag: 0.49.0 + tag: 0.50.0 pullPolicy: IfNotPresent ## Marquez website will run on this port ## diff --git a/clients/java/README.md b/clients/java/README.md index 2dad915123..58132fe73c 100644 --- a/clients/java/README.md +++ b/clients/java/README.md @@ -10,14 +10,14 @@ Maven: io.github.marquezproject marquez-java - 0.49.0 + 0.50.0 ``` or Gradle: ```groovy -implementation 'io.github.marquezproject:marquez-java:0.49.0 +implementation 'io.github.marquezproject:marquez-java:0.50.0 ``` ## Usage diff --git a/clients/java/build.gradle b/clients/java/build.gradle index 2010b197dd..c982f917fe 100644 --- a/clients/java/build.gradle +++ b/clients/java/build.gradle @@ -22,7 +22,7 @@ plugins { dependencies { implementation "io.dropwizard:dropwizard-jackson:${dropwizardVersion}" implementation "org.slf4j:slf4j-api:${slf4jVersion}" - implementation 'org.apache.commons:commons-lang3:3.14.0' + implementation 'org.apache.commons:commons-lang3:3.17.0' implementation 'org.apache.httpcomponents:httpclient:4.5.14' testImplementation "org.slf4j:slf4j-simple:${slf4jVersion}" diff --git a/clients/python/marquez_client/__init__.py b/clients/python/marquez_client/__init__.py index 1e6154d35d..5b3b798c43 100644 --- a/clients/python/marquez_client/__init__.py +++ b/clients/python/marquez_client/__init__.py @@ -4,7 +4,7 @@ # -*- coding: utf-8 -*- __author__ = """Marquez Project""" -__version__ = "0.50.0" +__version__ = "0.51.0" from marquez_client.client import MarquezClient # noqa: F401 from marquez_client.clients import Clients # noqa: F401 diff --git a/clients/python/setup.cfg b/clients/python/setup.cfg index a97116a777..c5d1b85de1 100644 --- a/clients/python/setup.cfg +++ b/clients/python/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.50.0 +current_version = 0.51.0 commit = False tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?P.*) diff --git a/clients/python/setup.py b/clients/python/setup.py index b6ac6f4213..d8cc7a854e 100644 --- a/clients/python/setup.py +++ b/clients/python/setup.py @@ -24,7 +24,7 @@ setup( name="marquez-python", - version="0.50.0", + version="0.51.0", description="Marquez Python Client", long_description=readme, long_description_content_type="text/markdown", diff --git a/codecov.yml b/codecov.yml index 80cbe3cb66..0b4c95b468 100644 --- a/codecov.yml +++ b/codecov.yml @@ -9,4 +9,4 @@ ignore: - "api/src/main/java/marquez/db/migrations/V44_1__UpdateRunsWithJobUUID.java" - "api/src/main/java/marquez/db/migrations/V44_2__BackfillAirflowParentRuns.java" - "api/src/main/java/marquez/db/migrations/V44_3_BackfillJobsWithParents.java" - - "api/src/main/java/marquez/cli/DbMigrationCommand.java" + - "api/src/main/java/marquez/cli/DbMigrateCommand.java" diff --git a/docker-compose.seed.yml b/docker-compose.seed.yml index e1fb0e1ab2..c7b024dee7 100644 --- a/docker-compose.seed.yml +++ b/docker-compose.seed.yml @@ -9,7 +9,7 @@ services: volumes: - ./docker/wait-for-it.sh:/usr/src/app/wait-for-it.sh - ./docker/seed.sh:/usr/src/app/seed.sh - - ./docker/metadata.json:/usr/src/app/metadata.json + - ./docker/metadata.template.json:/usr/src/app/metadata.template.json links: - "db:postgres" depends_on: diff --git a/docker/metadata.json b/docker/metadata.template.json similarity index 98% rename from docker/metadata.json rename to docker/metadata.template.json index 4bebf89faf..3babdf0002 100644 --- a/docker/metadata.json +++ b/docker/metadata.template.json @@ -1,7 +1,7 @@ [ { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "4d3b8069-69b6-4708-ade0-3275112c9f04", "facets": { @@ -82,7 +82,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:45:52.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_8_MINUTES}}", "run": { "runId": "4d3b8069-69b6-4708-ade0-3275112c9f04" }, @@ -94,7 +94,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "6f0c13a5-f29b-46a5-90c1-0ffbebbbd1aa", "facets": { @@ -171,7 +171,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:44:52.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_9_MINUTES}}", "run": { "runId": "6f0c13a5-f29b-46a5-90c1-0ffbebbbd1aa" }, @@ -183,7 +183,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "e05901b1-3a06-4b98-8d9c-aaf188c9a28c", "facets": { @@ -265,7 +265,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:44:54.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_8_MINUTES}}", "run": { "runId": "e05901b1-3a06-4b98-8d9c-aaf188c9a28c" }, @@ -277,7 +277,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "a43a8523-349f-4296-807f-3354ac491990", "facets": { @@ -364,7 +364,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:44:52.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_5_MINUTES}}", "run": { "runId": "a43a8523-349f-4296-807f-3354ac491990" }, @@ -376,7 +376,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "ffba2c14-4170-48da-bec3-ab5fd4ec9a3f", "facets": { @@ -570,7 +570,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:44:02.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_7_MINUTES}}", "run": { "runId": "ffba2c14-4170-48da-bec3-ab5fd4ec9a3f" }, @@ -582,7 +582,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "182a9eaf-881a-4d49-860c-f7e260b8bf60", "facets": { @@ -679,7 +679,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:44:55.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_10_MINUTES}}", "run": { "runId": "182a9eaf-881a-4d49-860c-f7e260b8bf60" }, @@ -691,7 +691,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "b7098939-87f0-4207-878f-dfd8e8804d8a", "facets": { @@ -788,7 +788,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:44:52.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_7_MINUTES}}", "run": { "runId": "b7098939-87f0-4207-878f-dfd8e8804d8a" }, @@ -800,7 +800,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "9f3db1c5-5e9a-4280-8184-18aca4592c77", "facets": { @@ -923,7 +923,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:44:52.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_5_MINUTES}}", "run": { "runId": "9f3db1c5-5e9a-4280-8184-18aca4592c77" }, @@ -935,7 +935,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "8ddfb1d9-415f-4850-bcd6-01d02f011abe", "facets": { @@ -1052,7 +1052,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:44:56.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_10_MINUTES}}", "run": { "runId": "8ddfb1d9-415f-4850-bcd6-01d02f011abe" }, @@ -1064,7 +1064,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "d5a2a4c4-fc78-428d-ae85-08c942ed8371", "facets": { @@ -1382,7 +1382,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:48:12.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_6_MINUTES}}", "run": { "runId": "d5a2a4c4-fc78-428d-ae85-08c942ed8371" }, @@ -1394,7 +1394,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "bd41a42a-bf18-4b74-9bb7-cd62637823d8", "facets": { @@ -1655,7 +1655,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:58:02.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_6_MINUTES}}", "run": { "runId": "bd41a42a-bf18-4b74-9bb7-cd62637823d8" }, @@ -1667,7 +1667,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "adc8507c-595e-4d76-9dac-be2bf0ffe1ee", "facets": { @@ -1776,7 +1776,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:46:12.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_5_MINUTES}}", "run": { "runId": "adc8507c-595e-4d76-9dac-be2bf0ffe1ee" }, @@ -1788,7 +1788,7 @@ }, { "eventType": "START", - "eventTime": "2020-02-22T22:42:42.000Z", + "eventTime": "{{RUN_START_TIME}}", "run": { "runId": "3ab25429-cf9c-4d1d-9166-1e1946f9d3c3", "facets": { @@ -1830,7 +1830,7 @@ }, { "eventType": "COMPLETE", - "eventTime": "2020-02-22T22:56:01.000Z", + "eventTime": "{{RUN_END_TIME_AFTER_6_MINUTES}}", "run": { "runId": "3ab25429-cf9c-4d1d-9166-1e1946f9d3c3" }, diff --git a/docker/seed.sh b/docker/seed.sh index bcdbe377af..6cba57085f 100755 --- a/docker/seed.sh +++ b/docker/seed.sh @@ -7,4 +7,24 @@ set -e +# As ISO-8601 format +NOW=$(date -u +"%Y-%m-%dT%H:%M:%S.000Z") + +RUN_END_TIME_AFTER_5_MINUTES=$(date -u -d "5 minutes" +"%Y-%m-%dT%H:%M:%S.000Z") +RUN_END_TIME_AFTER_6_MINUTES=$(date -u -d "6 minutes" +"%Y-%m-%dT%H:%M:%S.000Z") +RUN_END_TIME_AFTER_7_MINUTES=$(date -u -d "7 minutes" +"%Y-%m-%dT%H:%M:%S.000Z") +RUN_END_TIME_AFTER_8_MINUTES=$(date -u -d "8 minutes" +"%Y-%m-%dT%H:%M:%S.000Z") +RUN_END_TIME_AFTER_9_MINUTES=$(date -u -d "9 minutes" +"%Y-%m-%dT%H:%M:%S.000Z") +RUN_END_TIME_AFTER_10_MINUTES=$(date -u -d "10 minutes" +"%Y-%m-%dT%H:%M:%S.000Z") + +# Replace '{{RUN_START_TIME}}' and '{{RUN_END_TIME_AFTER_*_MINUTES}}'. +sed -e "s/{{RUN_START_TIME}}/$NOW/" \ + -e "s/{{RUN_END_TIME_AFTER_5_MINUTES}}/$RUN_END_TIME_AFTER_5_MINUTES/" \ + -e "s/{{RUN_END_TIME_AFTER_6_MINUTES}}/$RUN_END_TIME_AFTER_6_MINUTES/" \ + -e "s/{{RUN_END_TIME_AFTER_7_MINUTES}}/$RUN_END_TIME_AFTER_7_MINUTES/" \ + -e "s/{{RUN_END_TIME_AFTER_8_MINUTES}}/$RUN_END_TIME_AFTER_8_MINUTES/" \ + -e "s/{{RUN_END_TIME_AFTER_9_MINUTES}}/$RUN_END_TIME_AFTER_9_MINUTES/" \ + -e "s/{{RUN_END_TIME_AFTER_10_MINUTES}}/$RUN_END_TIME_AFTER_10_MINUTES/" \ + metadata.template.json > metadata.json + java -jar marquez-api-*.jar seed --url "${MARQUEZ_URL:-http://localhost:5000}" --metadata metadata.json diff --git a/docker/up.sh b/docker/up.sh index 590a4e3a4c..56878e19da 100755 --- a/docker/up.sh +++ b/docker/up.sh @@ -8,9 +8,9 @@ set -e # Version of Marquez -readonly VERSION=0.49.0 +readonly VERSION=0.50.0 # Build version of Marquez -readonly BUILD_VERSION=0.49.0 +readonly BUILD_VERSION=0.50.0 title() { echo -e "\033[1m${1}\033[0m" @@ -157,7 +157,7 @@ fi # Create docker volumes for Marquez if [[ "${NO_VOLUMES}" = "false" ]]; then - ./docker/volumes.sh marquez + ./docker/volumes.sh $(basename "$project_root") fi # Enable search in UI an API if search container is enabled diff --git a/docs/openapi.html b/docs/openapi.html index 9a4b99add6..475b50216c 100644 --- a/docs/openapi.html +++ b/docs/openapi.html @@ -12,2196 +12,377 @@ margin: 0; } - -

Marquez (0.49.0)

Download OpenAPI specification:Download

License: Apache 2.0

Marquez is an open source metadata service for the collection, aggregation, and visualization of a data ecosystem's metadata.

-

Namespaces

Create a namespace

Creates a new namespace object. A namespace enables the contextual grouping of related jobs and datasets. Namespaces must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), dashes (-), colons (:), slashes (/), or dots (.). A namespace is case-insensitive with a maximum length of 1024 characters. Note jobs and datasets will be unique within a namespace, but not across namespaces.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
Request Body schema: application/json
ownerName
required
string

The owner of the namespace.

-
description
string

The description of the namespace.

-

Responses

Request samples

Content type
application/json
{
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Retrieve a namespace

Returns a namespace.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-

Responses

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Deletes a namespace

Soft deletes a namespace, and every job and dataset inside. On next event containing this namespace, the namespace will be undeleted.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-

Responses

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

List all namespaces

Returns a list of namespaces.

-
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

-
offset
integer
Default: 0

The initial position from which to return results.

-

Responses

Response samples

Content type
application/json
{
  • "namespaces": [
    ]
}

Events

List all received OpenLineage events.

Returns a list of OpenLineage events, sorted in direction of passed sort parameter. By default it is desc.

-
query Parameters
sortDirection
string
Example: sortDirection=name

Sorts the results of your query by indicated direction asc or desc.

-
before
string <date-time>
Example: before=2022-09-15T07:47:19Z

Returns events before passed date.

-
after
string <date-time>
Example: after=2022-09-15T07:47:19Z

Returns events after passed date.

-
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

-
offset
integer
Default: 0

The initial position from which to return results.

-

Responses

Response samples

Content type
application/json
{}

Sources

Create a source Deprecated

Creates a new source object. A source is the physical location of a dataset such as a table in PostgreSQL, or topic in Kafka. A source enables the grouping of physical datasets to their physical source.

-
path Parameters
source
required
string <= 1024 characters
Example: my-source

The name of the source.

-
Request Body schema: application/json
type
required
string

The type of the source.

-
connectionUrl
required
string <URL>

The URL to the location of the source.

-
description
string

The description of the source.

-

Responses

Request samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Retrieve a source

Returns a source.

-
path Parameters
source
required
string <= 1024 characters
Example: my-source

The name of the source.

-

Responses

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

List all sources

Returns a list of sources.

-
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

-
offset
integer
Default: 0

The initial position from which to return results.

-

Responses

Response samples

Content type
application/json
{
  • "sources": [
    ]
}

Datasets

Create a dataset Deprecated

Creates a new dataset.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

-
Request Body schema: application/json
Any of
type
required
string
Value: "DB_TABLE"

The type of the dataset.

-
physicalName
required
string

The physical name of the table.

-
sourceName
required
string

The name of the source associated with the table.

-
required
Array of objects

The fields of the table.

-
tags
Array of strings

List of tags.

-
description
string

The description of the table.

-
runId
string

The ID associated with the run modifying the table.

-

Responses

Request samples

Content type
application/json
Example
{
  • "type": "DB_TABLE",
  • "physicalName": "public.mytable",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "description": "My first dataset!"
}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a dataset

Returns a dataset.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

-

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Soft deletes dataset.

Soft deletes dataset. It will be un-deleted if new OpenLineage event containing this dataset comes.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

-

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a version for a dataset

Returns a version for a dataset.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

-
version
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the job or dataset version.

-

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "version": "d224dac0-35d7-4d9b-bbbe-6fff1a8485ad",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "description": "My first dataset!",
  • "createdByRun": {
    }
}

List all versions for a dataset

Returns a list of versions for a dataset.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

-
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

-
offset
integer
Default: 0

The initial position from which to return results.

-

Responses

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

List all datasets

Returns a list of datasets.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

-
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

-
offset
integer
Default: 0

The initial position from which to return results.

-

Responses

Response samples

Content type
application/json
{
  • "datasets": [
    ],
  • "totalCount": 0
}

Tag a dataset

Tag an existing dataset.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

-
tag
required
string
Example: SENSITIVE

The name of the tag.

-

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Tag a field

Tag an existing field of a dataset.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

-
field
required
string
Example: my_field

The name of the field.

-
tag
required
string
Example: SENSITIVE

The name of the tag.

-

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Jobs

Create a job Deprecated

Creates a new job object. All job objects are immutable and are uniquely identified by a generated ID. Marquez will create a version of a job each time the contents of the object is modified. For example, the location of a job may change over time resulting in new versions. The accumulated versions can be listed, used to rerun a specific job version or possibly help debug a failed job run.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
job
required
string <= 1024 characters
Example: my-job

The name of the job.

-
Request Body schema: application/json
object

The ID of the job.

-
type
required
string (JobType)
Enum: "BATCH" "STREAM" "SERVICE"

The type of the job.

-
required
Array of objects (DatasetId) unique

The set of input datasets.

-
required
Array of objects (DatasetId) unique

The set of output datasets.

-
location
string <URL>

The URL of the job source code or artifact.

-
context
object
Deprecated

A key/value pair that must be of type string. A context can be used for getting additional details about the job.

-
description
string

The description of the job.

-
runId
string

An optional run ID used to associate a job version to an existing job run.

-

Responses

Request samples

Content type
application/json
{}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a job

Retrieve a job.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
job
required
string <= 1024 characters
Example: my-job

The name of the job.

-

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Soft deletes job.

Soft deletes job. It will be un-deleted if new OpenLineage event containing this job comes.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
job
required
string <= 1024 characters
Example: my-job

The name of the job.

-

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

List all jobs

Returns a list of jobs.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

-
offset
integer
Default: 0

The initial position from which to return results.

-

Responses

Response samples

Content type
application/json
{
  • "jobs": [
    ],
  • "totalCount": 0
}

Retrieve a version for a job

Returns a version for a job.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
job
required
string <= 1024 characters
Example: my-job

The name of the job.

-
version
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the job or dataset version.

-

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "version": "56472c57-a2ef-4218-b7b7-d2af02a343fd",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "facets": { }
}

List all versions for a job

Returns a list of versions for a job.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
job
required
string <= 1024 characters
Example: my-job

The name of the job.

-

Responses

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

Create a run Deprecated

Creates a new run object for a job.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
job
required
string <= 1024 characters
Example: my-job

The name of the job.

-
Request Body schema: application/json
id
string <uuid>

An optional user-provided unique ID of the run. A run ID must be an UUID. If an ID for the run is not provided, a random UUID will be generated for the given run.

-
nominalStartTime
string <date-time>

An ISO-8601 timestamp representing the nominal start time of the run.

-
nominalEndTime
string <date-time>

An ISO-8601 timestamp representing the nominal end time of the run.

-
args
object

The arguments of the run.

-

Responses

Request samples

Content type
application/json
{
  • "args": {
    }
}

Response samples

Content type
application/json
Example
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

List all runs

Returns a list of runs for a job.

-
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

-
job
required
string <= 1024 characters
Example: my-job

The name of the job.

-
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

-
offset
integer
Default: 0

The initial position from which to return results.

-

Responses

Response samples

Content type
application/json
{
  • "runs": [
    ]
}

Retrieve a run

Retrieve a run.

-
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

-

Responses

Response samples

Content type
application/json
Example
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Retrieve run or job facets for a run.

Retrieve run or job facets for a run.

-
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

-
query Parameters
type
required
string
Enum: "run" "job"

Indicates if should return job or run facets.

-

Responses

Response samples

Content type
application/json

Start a run Deprecated

Marks the run as RUNNING.

-
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

-
query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

-

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Complete a run Deprecated

Marks the run as COMPLETED.

-
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

-
query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

-

Responses

Response samples

Content type
application/json
Example
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Fail a run Deprecated

Marks the run as FAILED.

-
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

-
query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

-

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Abort a run Deprecated

Marks the run as ABORTED.

-
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

-
query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

-

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Lineage

Record a single lineage event

Receive, process, and store lineage metadata using the OpenLineage standard.

-
Request Body schema: application/json
any (LineageEvent)

Responses

Request samples

Content type
application/json
{}

Get a lineage graph

query Parameters
nodeId
required
string
Example: nodeId=dataset:food_delivery:public.delivery_7_days

The ID of the node. A node can either be a dataset node, a dataset field node or a job node. The format of nodeId for dataset is dataset:<namespace_of_dataset>:<name_of_the_dataset>, for dataset field is datasetField:<namespace_of_dataset>:<name_of_the_dataset>:<name_of_field>, and for job is job:<namespace_of_the_job>:<name_of_the_job>.

-
depth
integer
Default: 20

Depth of lineage graph to create.

-

Responses

Response samples

Content type
application/json
{
  • "graph": [
    ]
}

Get the upstream lineage for a given run

Responses

Response samples

Content type
application/json
{
  • "runs": [
    ]
}

Column lineage

Get a column lineage graph

query Parameters
nodeId
required
string
Example: nodeId=dataset:food_delivery:public.delivery_7_days

The ID of the node. A node can either be a dataset node, a dataset field node or a job node. The format of nodeId for dataset is dataset:<namespace_of_dataset>:<name_of_the_dataset>, for dataset field is datasetField:<namespace_of_dataset>:<name_of_the_dataset>:<name_of_field>, and for job is job:<namespace_of_the_job>:<name_of_the_job>.

-
depth
integer
Default: 20

Depth of lineage graph to create.

-
withDownstream
boolean
Default: false

Determines if downstream lineage should be returned.

-

Responses

Response samples

Content type
application/json
{
  • "graph": [
    ]
}

Tags

Create a tag

Creates a new tag object.

-
path Parameters
tag
required
string
Example: SENSITIVE

The name of the tag.

-
Request Body schema: application/json
description
string

The description of the tag.

-

Responses

Request samples

Content type
application/json
{
  • "description": "My first tag!"
}

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

List all tags

Returns a list of tags.

-
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

-
offset
integer
Default: 0

The initial position from which to return results.

-

Responses

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

Search

Query all datasets and jobs

Returns one or more datasets and jobs of your query.

-
query Parameters
q
required
string
Example: q=my-dataset

Query containing pattern to match; datasets and jobs pattern matching is string based and case-insensitive. Use percent sign (%) to match any string of zero or more characters (my-job%), or an underscore (_) to match a single character (_job_).

-
filter
string
Example: filter=dataset

Filters the results of your query by dataset or job.

-
sort
string
Example: sort=name

Sorts the results of your query by name or updated_at.

-
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

-
namespace
string <= 1024 characters
Example: namespace=my-namespace

Match jobs or datasets within the given namespace.

-
before
stringYYYY-MM-DD
Example: before=2022-09-15

Match jobs or datasets before YYYY-MM-DD.

-
after
stringYYYY-MM-DD
Example: after=2022-09-15

Match jobs or datasets after YYYY-MM-DD.

-

Responses

Response samples

Content type
application/json
{
  • "totalCount": 1,
  • "results": [
    ]
}
- + " fill="currentColor">

Marquez (0.50.0)

Download OpenAPI specification:Download

License: Apache 2.0

Marquez is an open source metadata service for the collection, aggregation, and visualization of a data ecosystem's metadata.

+

Namespaces

Create a namespace

Creates a new namespace object. A namespace enables the contextual grouping of related jobs and datasets. Namespaces must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), dashes (-), colons (:), slashes (/), or dots (.). A namespace is case-insensitive with a maximum length of 1024 characters. Note jobs and datasets will be unique within a namespace, but not across namespaces.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
Request Body schema: application/json
ownerName
required
string

The owner of the namespace.

+
description
string

The description of the namespace.

+

Responses

Request samples

Content type
application/json
{
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Retrieve a namespace

Returns a namespace.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+

Responses

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

Deletes a namespace

Soft deletes a namespace, and every job and dataset inside. On next event containing this namespace, the namespace will be undeleted.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+

Responses

Response samples

Content type
application/json
{
  • "name": "my-namespace",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "ownerName": "me",
  • "description": "My first namespace!"
}

List all namespaces

Returns a list of namespaces.

+
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

+
offset
integer
Default: 0

The initial position from which to return results.

+

Responses

Response samples

Content type
application/json
{
  • "namespaces": [
    ]
}

Events

List all received OpenLineage events.

Returns a list of OpenLineage events, sorted in direction of passed sort parameter. By default it is desc.

+
query Parameters
sortDirection
string
Example: sortDirection=name

Sorts the results of your query by indicated direction asc or desc.

+
before
string <date-time>
Example: before=2022-09-15T07:47:19Z

Returns events before passed date.

+
after
string <date-time>
Example: after=2022-09-15T07:47:19Z

Returns events after passed date.

+
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

+
offset
integer
Default: 0

The initial position from which to return results.

+

Responses

Response samples

Content type
application/json
{}

Sources

Create a source Deprecated

Creates a new source object. A source is the physical location of a dataset such as a table in PostgreSQL, or topic in Kafka. A source enables the grouping of physical datasets to their physical source.

+
path Parameters
source
required
string <= 1024 characters
Example: my-source

The name of the source.

+
Request Body schema: application/json
type
required
string

The type of the source.

+
connectionUrl
required
string <URL>

The URL to the location of the source.

+
description
string

The description of the source.

+

Responses

Request samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

Retrieve a source

Returns a source.

+
path Parameters
source
required
string <= 1024 characters
Example: my-source

The name of the source.

+

Responses

Response samples

Content type
application/json
{
  • "type": "POSTGRESQL",
  • "name": "my-source",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "connectionUrl": "jdbc:postgresql://db.example.com/mydb",
  • "description": "My first source!"
}

List all sources

Returns a list of sources.

+
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

+
offset
integer
Default: 0

The initial position from which to return results.

+

Responses

Response samples

Content type
application/json
{
  • "sources": [
    ]
}

Datasets

Create a dataset Deprecated

Creates a new dataset.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

+
Request Body schema: application/json
Any of
type
required
string
Value: "DB_TABLE"

The type of the dataset.

+
physicalName
required
string

The physical name of the table.

+
sourceName
required
string

The name of the source associated with the table.

+
required
Array of objects

The fields of the table.

+
tags
Array of strings

List of tags.

+
description
string

The description of the table.

+
runId
string

The ID associated with the run modifying the table.

+

Responses

Request samples

Content type
application/json
Example
{
  • "type": "DB_TABLE",
  • "physicalName": "public.mytable",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "description": "My first dataset!"
}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a dataset

Returns a dataset.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

+

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Soft deletes dataset.

Soft deletes dataset. It will be un-deleted if new OpenLineage event containing this dataset comes.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

+

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a version for a dataset

Returns a version for a dataset.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

+
version
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the job or dataset version.

+

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "version": "d224dac0-35d7-4d9b-bbbe-6fff1a8485ad",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "description": "My first dataset!",
  • "createdByRun": {
    }
}

List all versions for a dataset

Returns a list of versions for a dataset.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

+
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

+
offset
integer
Default: 0

The initial position from which to return results.

+

Responses

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

List all datasets

Returns a list of datasets.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

+
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

+
offset
integer
Default: 0

The initial position from which to return results.

+

Responses

Response samples

Content type
application/json
{
  • "datasets": [
    ],
  • "totalCount": 0
}

Tag a dataset

Tag an existing dataset.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

+
tag
required
string
Example: SENSITIVE

The name of the tag.

+

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Tag a field

Tag an existing field of a dataset.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
dataset
required
string <= 1024 characters
Example: my-dataset

The name of the dataset.

+
field
required
string
Example: my_field

The name of the field.

+
tag
required
string
Example: SENSITIVE

The name of the tag.

+

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "DB_TABLE",
  • "name": "my-dataset",
  • "physicalName": "public.mytable",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "sourceName": "my-source",
  • "fields": [
    ],
  • "tags": [ ],
  • "lastModifiedAt": null,
  • "description": "My first dataset!",
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Jobs

Create a job Deprecated

Creates a new job object. All job objects are immutable and are uniquely identified by a generated ID. Marquez will create a version of a job each time the contents of the object is modified. For example, the location of a job may change over time resulting in new versions. The accumulated versions can be listed, used to rerun a specific job version or possibly help debug a failed job run.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
job
required
string <= 1024 characters
Example: my-job

The name of the job.

+
Request Body schema: application/json
object

The ID of the job.

+
type
required
string (JobType)
Enum: "BATCH" "STREAM" "SERVICE"

The type of the job.

+
required
Array of objects (DatasetId) unique

The set of input datasets.

+
required
Array of objects (DatasetId) unique

The set of output datasets.

+
location
string <URL>

The URL of the job source code or artifact.

+
context
object
Deprecated

A key/value pair that must be of type string. A context can be used for getting additional details about the job.

+
description
string

The description of the job.

+
runId
string

An optional run ID used to associate a job version to an existing job run.

+

Responses

Request samples

Content type
application/json
{}

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "latestRuns": [ ],
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Retrieve a job

Retrieve a job.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
job
required
string <= 1024 characters
Example: my-job

The name of the job.

+

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "latestRuns": [ ],
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

Soft deletes job.

Soft deletes job. It will be un-deleted if new OpenLineage event containing this job comes.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
job
required
string <= 1024 characters
Example: my-job

The name of the job.

+

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "latestRun": null,
  • "latestRuns": [ ],
  • "facets": { },
  • "currentVersion": "b1d626a2-6d3a-475e-9ecf-943176d4a8c6"
}

List all jobs

Returns a list of jobs.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

+
offset
integer
Default: 0

The initial position from which to return results.

+

Responses

Response samples

Content type
application/json
{
  • "jobs": [
    ],
  • "totalCount": 0
}

Retrieve a version for a job

Returns a version for a job.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
job
required
string <= 1024 characters
Example: my-job

The name of the job.

+
version
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the job or dataset version.

+

Responses

Response samples

Content type
application/json
{
  • "id": {
    },
  • "type": "BATCH",
  • "name": "my-job",
  • "version": "56472c57-a2ef-4218-b7b7-d2af02a343fd",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "namespace": "my-namespace",
  • "inputs": [
    ],
  • "outputs": [ ],
  • "context": {
    },
  • "description": "My first job!",
  • "facets": { }
}

List all versions for a job

Returns a list of versions for a job.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
job
required
string <= 1024 characters
Example: my-job

The name of the job.

+

Responses

Response samples

Content type
application/json
{
  • "versions": [
    ]
}

Create a run Deprecated

Creates a new run object for a job.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
job
required
string <= 1024 characters
Example: my-job

The name of the job.

+
Request Body schema: application/json
id
string <uuid>

An optional user-provided unique ID of the run. A run ID must be an UUID. If an ID for the run is not provided, a random UUID will be generated for the given run.

+
nominalStartTime
string <date-time>

An ISO-8601 timestamp representing the nominal start time of the run.

+
nominalEndTime
string <date-time>

An ISO-8601 timestamp representing the nominal end time of the run.

+
args
object

The arguments of the run.

+

Responses

Request samples

Content type
application/json
{
  • "args": {
    }
}

Response samples

Content type
application/json
Example
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

List all runs

Returns a list of runs for a job.

+
path Parameters
namespace
required
string <= 1024 characters
Example: my-namespace

The name of the namespace.

+
job
required
string <= 1024 characters
Example: my-job

The name of the job.

+
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

+
offset
integer
Default: 0

The initial position from which to return results.

+

Responses

Response samples

Content type
application/json
{
  • "runs": [
    ]
}

Retrieve a run

Retrieve a run.

+
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

+

Responses

Response samples

Content type
application/json
Example
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Retrieve run or job facets for a run.

Retrieve run or job facets for a run.

+
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

+
query Parameters
type
required
string
Enum: "run" "job"

Indicates if should return job or run facets.

+

Responses

Response samples

Content type
application/json

Start a run Deprecated

Marks the run as RUNNING.

+
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

+
query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

+

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Complete a run Deprecated

Marks the run as COMPLETED.

+
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

+
query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

+

Responses

Response samples

Content type
application/json
Example
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Fail a run Deprecated

Marks the run as FAILED.

+
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

+
query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

+

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Abort a run Deprecated

Marks the run as ABORTED.

+
path Parameters
id
required
string <uuid>
Example: ea9badc5-7cb2-49af-9a9f-155771d3a797

The ID of the run.

+
query Parameters
at
string <date-time>

An ISO-8601 timestamp representing the time when the run transitioned.

+

Responses

Response samples

Content type
application/json
{
  • "id": "870492da-ecfb-4be0-91b9-9a89ddd3db90",
  • "createdAt": "2019-05-09T19:49:24.201361Z",
  • "updatedAt": "2019-05-09T19:49:24.201361Z",
  • "nominalStartTime": null,
  • "nominalEndTime": null,
  • "state": "RUNNING",
  • "startedAt": "2019-05-09T15:17:32.690346",
  • "endedAt": null,
  • "durationMs": null,
  • "args": {
    },
  • "facets": { }
}

Lineage

Record a single lineage event

Receive, process, and store lineage metadata using the OpenLineage standard.

+
Request Body schema: application/json
any (LineageEvent)

Responses

Request samples

Content type
application/json
{}

Get a lineage graph

query Parameters
nodeId
required
string
Example: nodeId=dataset:food_delivery:public.delivery_7_days

The ID of the node. A node can either be a dataset node, a dataset field node or a job node. The format of nodeId for dataset is dataset:<namespace_of_dataset>:<name_of_the_dataset>, for dataset field is datasetField:<namespace_of_dataset>:<name_of_the_dataset>:<name_of_field>, and for job is job:<namespace_of_the_job>:<name_of_the_job>.

+
depth
integer
Default: 20

Depth of lineage graph to create.

+

Responses

Response samples

Content type
application/json
{
  • "graph": [
    ]
}

Get the upstream lineage for a given run

Responses

Response samples

Content type
application/json
{
  • "runs": [
    ]
}

Column lineage

Get a column lineage graph

query Parameters
nodeId
required
string
Example: nodeId=dataset:food_delivery:public.delivery_7_days

The ID of the node. A node can either be a dataset node, a dataset field node or a job node. The format of nodeId for dataset is dataset:<namespace_of_dataset>:<name_of_the_dataset>, for dataset field is datasetField:<namespace_of_dataset>:<name_of_the_dataset>:<name_of_field>, and for job is job:<namespace_of_the_job>:<name_of_the_job>.

+
depth
integer
Default: 20

Depth of lineage graph to create.

+
withDownstream
boolean
Default: false

Determines if downstream lineage should be returned.

+

Responses

Response samples

Content type
application/json
{
  • "graph": [
    ]
}

Tags

Create a tag

Creates a new tag object.

+
path Parameters
tag
required
string
Example: SENSITIVE

The name of the tag.

+
Request Body schema: application/json
description
string

The description of the tag.

+

Responses

Request samples

Content type
application/json
{
  • "description": "My first tag!"
}

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

List all tags

Returns a list of tags.

+
query Parameters
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

+
offset
integer
Default: 0

The initial position from which to return results.

+

Responses

Response samples

Content type
application/json
{
  • "tags": [
    ]
}

Search

Query all datasets and jobs

Returns one or more datasets and jobs of your query.

+
query Parameters
q
required
string
Example: q=my-dataset

Query containing pattern to match; datasets and jobs pattern matching is string based and case-insensitive. Use percent sign (%) to match any string of zero or more characters (my-job%), or an underscore (_) to match a single character (_job_).

+
filter
string
Example: filter=dataset

Filters the results of your query by dataset or job.

+
sort
string
Example: sort=name

Sorts the results of your query by name or updated_at.

+
limit
integer
Default: 100
Example: limit=25

The number of results to return from offset.

+
namespace
string <= 1024 characters
Example: namespace=my-namespace

Match jobs or datasets within the given namespace.

+
before
stringYYYY-MM-DD
Example: before=2022-09-15

Match jobs or datasets before YYYY-MM-DD.

+
after
stringYYYY-MM-DD
Example: after=2022-09-15

Match jobs or datasets after YYYY-MM-DD.

+

Responses

Response samples

Content type
application/json
{
  • "totalCount": 1,
  • "results": [
    ]
}
+ - \ No newline at end of file + diff --git a/gradle.properties b/gradle.properties index 9da2e3b805..d3b908d4bc 100644 --- a/gradle.properties +++ b/gradle.properties @@ -7,4 +7,4 @@ org.gradle.jvmargs=--add-exports jdk.compiler/com.sun.tools.javac.api=ALL-UNNAME --add-exports jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED \ --add-exports jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED -version=0.50.0-SNAPSHOT +version=0.51.0-SNAPSHOT diff --git a/new-version.sh b/new-version.sh index a07b9e1cff..f348742a45 100755 --- a/new-version.sh +++ b/new-version.sh @@ -6,7 +6,7 @@ # Requirements: # * You're on the 'main' branch # * You've installed 'bump2version' -# * You've installed 'redoc-cli' +# * You've installed 'redocly' # # Usage: $ ./new-version.sh --release-version RELEASE_VERSION --next-version NEXT_VERSION @@ -59,9 +59,9 @@ if [[ ! $(type -P bump2version) ]]; then exit 1; fi -# Verify redoc-cli is installed -if [[ ! $(type -P redoc-cli) ]]; then - echo "redoc-cli not installed! Please see https://redoc.ly/docs/redoc/quickstart/cli" +# Verify redocly is installed +if [[ ! $(type -P redocly) ]]; then + echo "redocly not installed! Please see https://redoc.ly/docs/redoc/quickstart/cli" exit 1; fi @@ -140,7 +140,7 @@ sed -i "" "s/.*/${RELEASE_VERSION}<\/version>/g" ./clients/jav sed -i "" "s/marquez-java:.*/marquez-java:${RELEASE_VERSION}/g" ./clients/java/README.md # (5) Bundle openAPI docs -redoc-cli bundle spec/openapi.yml --output docs/openapi.html --title "Marquez API Reference" +redocly build-docs spec/openapi.yml --output docs/openapi.html --title "Marquez API Reference" # (6) Prepare release commit git commit -sam "Prepare for release ${RELEASE_VERSION}" --no-verify diff --git a/renovate.json b/renovate.json index e23197964f..52a431426a 100644 --- a/renovate.json +++ b/renovate.json @@ -23,5 +23,5 @@ "addLabels": ["dependencies", "renovate"] } ], - "ignorePaths": ["**/chart/**", "**/dev/**"] + "ignorePaths": ["**/chart/**", "**/dev/**", "**/stats/**"] } diff --git a/spec/openapi.yml b/spec/openapi.yml index ae2a62ca4b..0622e86e8e 100644 --- a/spec/openapi.yml +++ b/spec/openapi.yml @@ -3,7 +3,7 @@ openapi: 3.0.2 info: title: Marquez - version: 0.50.0-SNAPSHOT + version: 0.51.0-SNAPSHOT description: Marquez is an open source **metadata service** for the **collection**, **aggregation**, and **visualization** of a data ecosystem's metadata. license: diff --git a/web/docs/demo.gif b/web/docs/demo.gif index e95144e345..19e0fe9a61 100644 Binary files a/web/docs/demo.gif and b/web/docs/demo.gif differ diff --git a/web/setupProxy.js b/web/setupProxy.js index ecd677b607..d8db40b2ba 100644 --- a/web/setupProxy.js +++ b/web/setupProxy.js @@ -3,13 +3,23 @@ const { createProxyMiddleware } = require('http-proxy-middleware'); const express = require('express') const router = express.Router() +const environmentVariable = (variableName) => { + const value = process.env[variableName] + if (!value) { + console.error(`Error: ${variableName} environment variable is not defined.`) + console.error(`Please set ${variableName} and restart the application.`) + process.exit(1) + } + return value +} + const apiOptions = { - target: `http://${process.env.MARQUEZ_HOST}:${process.env.MARQUEZ_PORT}/` + target: `http://${(environmentVariable("MARQUEZ_HOST"))}:${environmentVariable("MARQUEZ_PORT")}/` } const app = express() const path = __dirname + '/dist' -const port = process.env.WEB_PORT +const port = environmentVariable("WEB_PORT") app.use('/', express.static(path)) app.use('/datasets', express.static(path)) diff --git a/web/src/components/core/copy/MqCopy.tsx b/web/src/components/core/copy/MqCopy.tsx index ce62eecf31..670cdfe629 100644 --- a/web/src/components/core/copy/MqCopy.tsx +++ b/web/src/components/core/copy/MqCopy.tsx @@ -1,6 +1,7 @@ // Copyright 2018-2023 contributors to the Marquez project // SPDX-License-Identifier: Apache-2.0 +import { Check } from '@mui/icons-material' import { Snackbar } from '@mui/material' import ContentCopyIcon from '@mui/icons-material/ContentCopy' import IconButton from '@mui/material/IconButton' @@ -13,6 +14,7 @@ interface MqCopyProps { const MqEmpty: React.FC = ({ string }) => { const [open, setOpen] = React.useState(false) + const [hasCopied, setHasCopied] = React.useState(false) const handleClose = (event: React.SyntheticEvent | Event, reason?: string) => { if (reason === 'clickaway') { return @@ -28,12 +30,16 @@ const MqEmpty: React.FC = ({ string }) => { event.stopPropagation() navigator.clipboard.writeText(string) setOpen(true) + setHasCopied(true) + setTimeout(() => { + setHasCopied(false) + }, 3000) }} aria-label='copy' size={'small'} - color={'primary'} + color={'secondary'} > - + {hasCopied ? : } void + onRefresh?: () => void } -export default function SplitButton({ options, onClick }: Props) { +export default function SplitButton({ options, onClick, onRefresh }: Props) { const [open, setOpen] = React.useState(false) const anchorRef = React.useRef(null) const [selectedIndex, setSelectedIndex] = React.useState(0) @@ -66,6 +69,11 @@ export default function SplitButton({ options, onClick }: Props) { > + + + = (props) => { const { datasets, dataset, + isDatasetLoading, + display, fetchDataset, resetDataset, resetDatasetVersions, - fetchInitialDatasetVersions, - initVersions, - initVersionsLoading, + deleteDataset, + dialogToggle, lineageDataset, tabIndex, setTabIndex, @@ -101,9 +99,8 @@ const DatasetDetailPage: FunctionComponent = (props) => { // might need to map first version to its own state useEffect(() => { - fetchInitialDatasetVersions(lineageDataset.namespace, lineageDataset.name) fetchDataset(lineageDataset.namespace, lineageDataset.name) - }, [lineageDataset.name, showTags]) + }, [lineageDataset.name]) // if the dataset is deleted then redirect to datasets end point useEffect(() => { @@ -116,7 +113,7 @@ const DatasetDetailPage: FunctionComponent = (props) => { setTabIndex(newValue) } - if (initVersionsLoading && initVersions.length === 0) { + if (!dataset || isDatasetLoading) { return ( @@ -124,15 +121,9 @@ const DatasetDetailPage: FunctionComponent = (props) => { ) } - if (initVersions.length === 0) { - return null - } - - const firstVersion = initVersions[0] - const { name, tags, description } = firstVersion - const facetsStatus = datasetFacetsStatus(firstVersion.facets) - - const assertions = datasetFacetsQualityAssertions(firstVersion.facets) + const { name, tags, description } = dataset + const facetsStatus = datasetFacetsStatus(dataset.facets) + const assertions = datasetFacetsQualityAssertions(dataset.facets) return ( @@ -186,21 +177,21 @@ const DatasetDetailPage: FunctionComponent = (props) => { } label={'Updated at'.toUpperCase()} - value={formatUpdatedAt(firstVersion.createdAt)} + value={formatUpdatedAt(dataset.createdAt)} /> } label={'Dataset Type'.toUpperCase()} - value={{firstVersion.type}} + value={{dataset.type}} /> } label={'Fields'.toUpperCase()} - value={`${firstVersion.fields.length} columns`} + value={`${dataset.fields.length} columns`} /> @@ -284,7 +275,7 @@ const DatasetDetailPage: FunctionComponent = (props) => { checked={showTags} onChange={() => setShowTags(!showTags)} inputProps={{ 'aria-label': 'toggle show tags' }} - disabled={initVersionsLoading} + disabled={isDatasetLoading} /> } label={i18next.t('datasets.show_field_tags')} @@ -295,9 +286,8 @@ const DatasetDetailPage: FunctionComponent = (props) => { {tabIndex === 0 && ( @@ -310,16 +300,14 @@ const DatasetDetailPage: FunctionComponent = (props) => { const mapStateToProps = (state: IState) => ({ datasets: state.datasets, dataset: state.dataset.result, + isDatasetLoading: state.dataset.isLoading, display: state.display, - initVersions: state.datasetVersions.initDsVersion.versions, - initVersionsLoading: state.datasetVersions.isInitDsVerLoading, tabIndex: state.lineage.tabIndex, }) const mapDispatchToProps = (dispatch: Redux.Dispatch) => bindActionCreators( { - fetchInitialDatasetVersions: fetchInitialDatasetVersions, fetchDataset: fetchDataset, resetDatasetVersions: resetDatasetVersions, resetDataset: resetDataset, diff --git a/web/src/components/datasets/DatasetInfo.tsx b/web/src/components/datasets/DatasetInfo.tsx index 6511af7ac1..1ddf50d893 100644 --- a/web/src/components/datasets/DatasetInfo.tsx +++ b/web/src/components/datasets/DatasetInfo.tsx @@ -1,33 +1,19 @@ // Copyright 2018-2024 contributors to the Marquez project // SPDX-License-Identifier: Apache-2.0 -import * as Redux from 'redux' import { Box, Chip, Table, TableBody, TableCell, TableHead, TableRow } from '@mui/material' -import { Dataset, Field, Run } from '../../types/api' -import { IState } from '../../store/reducers' +import { Dataset, Field } from '../../types/api' import { Link } from 'react-router-dom' -import { connect, useSelector } from 'react-redux' import { encodeQueryString } from '../../routes/column-level/ColumnLineageColumnNode' -import { fetchJobFacets, resetFacets } from '../../store/actionCreators' import DatasetTags from './DatasetTags' import IconButton from '@mui/material/IconButton' import MQTooltip from '../core/tooltip/MQTooltip' import MqEmpty from '../core/empty/MqEmpty' import MqJsonView from '../core/json-view/MqJsonView' import MqText from '../core/text/MqText' -import React, { FunctionComponent, useEffect } from 'react' +import React, { FunctionComponent } from 'react' import SplitscreenIcon from '@mui/icons-material/Splitscreen' -export interface DispatchProps { - fetchJobFacets: typeof fetchJobFacets - resetFacets: typeof resetFacets -} - -interface JobFacets { - [key: string]: object -} - export interface JobFacetsProps { - jobFacets: JobFacets isCurrentVersion?: boolean dataset: Dataset } @@ -35,31 +21,12 @@ export interface JobFacetsProps { type DatasetInfoProps = { datasetFields: Field[] facets?: object - run?: Run showTags?: boolean -} & JobFacetsProps & - DispatchProps +} & JobFacetsProps const DatasetInfo: FunctionComponent = (props) => { - const { datasetFields, facets, run, dataset, fetchJobFacets, resetFacets, showTags } = props + const { datasetFields, facets, dataset, showTags } = props const i18next = require('i18next') - const dsNamespace = useSelector( - (state: IState) => state.datasetVersions.initDsVersion.versions[0].namespace - ) - const dsName = useSelector( - (state: IState) => state.datasetVersions.initDsVersion.versions[0].name - ) - - useEffect(() => { - run && fetchJobFacets(run.id) - }, [run]) - - useEffect( - () => () => { - resetFacets() - }, - [] - ) return ( @@ -159,8 +126,8 @@ const DatasetInfo: FunctionComponent = (props) => { {showTags && ( @@ -186,17 +153,4 @@ const DatasetInfo: FunctionComponent = (props) => { ) } -const mapStateToProps = (state: IState) => ({ - jobFacets: state.facets.result, -}) - -const mapDispatchToProps = (dispatch: Redux.Dispatch) => - Redux.bindActionCreators( - { - fetchJobFacets: fetchJobFacets, - resetFacets: resetFacets, - }, - dispatch - ) - -export default connect(mapStateToProps, mapDispatchToProps)(DatasetInfo) +export default DatasetInfo diff --git a/web/src/components/datasets/DatasetVersions.tsx b/web/src/components/datasets/DatasetVersions.tsx index a158f013a2..046e1076e3 100644 --- a/web/src/components/datasets/DatasetVersions.tsx +++ b/web/src/components/datasets/DatasetVersions.tsx @@ -23,6 +23,7 @@ import { formatUpdatedAt } from '../../helpers' import { useTheme } from '@emotion/react' import DatasetInfo from './DatasetInfo' import IconButton from '@mui/material/IconButton' +import MQTooltip from '../core/tooltip/MQTooltip' import MqCopy from '../core/copy/MqCopy' import MqPaging from '../paging/MqPaging' import MqText from '../core/text/MqText' @@ -92,12 +93,7 @@ const DatasetVersions: FunctionComponent = - + ) } @@ -149,7 +145,9 @@ const DatasetVersions: FunctionComponent = > - {version.version.substring(0, 8)}... + + {version.version.substring(0, 8)}... + @@ -159,7 +157,7 @@ const DatasetVersions: FunctionComponent = {version.createdByRun ? ( <> - {version.createdByRun.id.substring(0, 8)}... + {version.createdByRun.id.substring(0, 8)}... ) : ( diff --git a/web/src/components/jobs/Runs.tsx b/web/src/components/jobs/Runs.tsx index 07ecffa2a5..847e8267cf 100644 --- a/web/src/components/jobs/Runs.tsx +++ b/web/src/components/jobs/Runs.tsx @@ -24,6 +24,7 @@ import { formatUpdatedAt } from '../../helpers' import { runStateColor } from '../../helpers/nodes' import { stopWatchDuration } from '../../helpers/time' import { useTheme } from '@emotion/react' +import MQTooltip from '../core/tooltip/MQTooltip' import MqCode from '../core/code/MqCode' import MqCopy from '../core/copy/MqCopy' import MqEmpty from '../core/empty/MqEmpty' @@ -156,7 +157,11 @@ const Runs: FunctionComponent = (props) => { > - {run.id.substring(0, 8)}... + + + {run.id.substring(0, 8)}... + + diff --git a/web/src/helpers/text.ts b/web/src/helpers/text.ts index d6e46a2823..d6a885c993 100644 --- a/web/src/helpers/text.ts +++ b/web/src/helpers/text.ts @@ -5,6 +5,13 @@ export const truncateText = (text: string, maxLength: number) => { return text } +export const truncateTextFront = (text: string, maxLength: number) => { + if (text.length > maxLength) { + return `...${text.substring(text.length - maxLength)}` + } + return text +} + export const pluralize = (count: number, singular: string, plural: string) => { const noun = count === 1 ? singular : plural return `${count} ${noun}` diff --git a/web/src/i18n/config.ts b/web/src/i18n/config.ts index 23699082a9..493bd7b932 100644 --- a/web/src/i18n/config.ts +++ b/web/src/i18n/config.ts @@ -151,7 +151,7 @@ i18next empty_body: 'Try changing dates or consulting our documentation to add events.', }, events_columns: { - id: 'ID', + id: 'RUN_ID', state: 'STATE', name: 'NAME', namespace: 'NAMESPACE', diff --git a/web/src/routes/dashboard/Dashboard.tsx b/web/src/routes/dashboard/Dashboard.tsx index 6b7816d388..8daff43aff 100644 --- a/web/src/routes/dashboard/Dashboard.tsx +++ b/web/src/routes/dashboard/Dashboard.tsx @@ -150,6 +150,15 @@ const Dashboard: React.FC = ({ { failed: 0, started: 0, completed: 0, aborted: 0 } ) + const refresh = () => { + const currentSearchParams = searchParams.get('timeframe') + fetchJobs(null, JOB_RUN_LIMIT, 0) + fetchLineageMetrics(currentSearchParams === 'week' ? 'week' : 'day') + fetchJobMetrics(currentSearchParams === 'week' ? 'week' : 'day') + fetchDatasetMetrics(currentSearchParams === 'week' ? 'week' : 'day') + fetchSourceMetrics(currentSearchParams === 'week' ? 'week' : 'day') + } + const { failed, started, completed, aborted } = metrics return ( @@ -195,6 +204,7 @@ const Dashboard: React.FC = ({ REFRESH refresh()} onClick={(option) => { setIntervalKey(option as RefreshInterval) }} @@ -351,15 +361,6 @@ const Dashboard: React.FC = ({ 'Try changing namespaces, run state, or consulting our documentation to add jobs.' } - )} diff --git a/web/src/routes/dashboard/JobRunItem.tsx b/web/src/routes/dashboard/JobRunItem.tsx index 43e74308eb..8533305aa3 100644 --- a/web/src/routes/dashboard/JobRunItem.tsx +++ b/web/src/routes/dashboard/JobRunItem.tsx @@ -45,9 +45,13 @@ const JobRunItem: React.FC = ({ job }) => { > - - {truncateText(job.name, 40)} - + + + + {truncateText(job.name, 75)} + + + {job.tags.slice(0, 3).map((tag, index) => ( ))} diff --git a/web/src/routes/events/Events.tsx b/web/src/routes/events/Events.tsx index 34d0dd8de1..f11e6247ce 100644 --- a/web/src/routes/events/Events.tsx +++ b/web/src/routes/events/Events.tsx @@ -66,7 +66,7 @@ type EventsProps = StateProps & DispatchProps const EVENTS_COLUMNS = ['ID', 'STATE', 'NAME', 'NAMESPACE', 'TIME'] -const PAGE_SIZE = 20 +const PAGE_SIZE = 50 const EVENTS_HEADER_HEIGHT = 64 const Events: React.FC = ({ @@ -296,7 +296,7 @@ const Events: React.FC = ({ > - {event.run.runId.substring(0, 8)}... + {event.run.runId} @@ -306,9 +306,15 @@ const Events: React.FC = ({ label={event.eventType} /> - {truncateText(event.job.name, 40)} - {truncateText(event.job.namespace, 40)} + + {truncateText(event.job.name, 40)} + + + + + {truncateText(event.job.namespace, 40)} + {formatUpdatedAt(event.eventTime)} diff --git a/web/src/routes/table-level/TableLineageDatasetNode.tsx b/web/src/routes/table-level/TableLineageDatasetNode.tsx index 30604f2d8d..6e02f15e0c 100644 --- a/web/src/routes/table-level/TableLineageDatasetNode.tsx +++ b/web/src/routes/table-level/TableLineageDatasetNode.tsx @@ -15,7 +15,7 @@ import { datasetFacetsQualityAssertions, datasetFacetsStatus } from '../../helpe import { faDatabase } from '@fortawesome/free-solid-svg-icons/faDatabase' import { fetchDataset, resetDataset } from '../../store/actionCreators' import { formatUpdatedAt } from '../../helpers' -import { truncateText } from '../../helpers/text' +import { truncateText, truncateTextFront } from '../../helpers/text' import { useNavigate, useParams, useSearchParams } from 'react-router-dom' import Box from '@mui/system/Box' import IconButton from '@mui/material/IconButton' @@ -72,7 +72,7 @@ const TableLineageDatasetNode = ({ Namespace: - {truncateText(lineageDataset.namespace, 40)} + {truncateTextFront(lineageDataset.namespace, 40)} @@ -80,7 +80,7 @@ const TableLineageDatasetNode = ({ Name: - {truncateText(lineageDataset.name, 40)} + {truncateTextFront(lineageDataset.name, 40)} {lineageDataset.description && ( diff --git a/web/src/routes/table-level/TableLineageJobNode.tsx b/web/src/routes/table-level/TableLineageJobNode.tsx index c2e3734fa4..7e0f6e0ae2 100644 --- a/web/src/routes/table-level/TableLineageJobNode.tsx +++ b/web/src/routes/table-level/TableLineageJobNode.tsx @@ -10,7 +10,7 @@ import { faCog } from '@fortawesome/free-solid-svg-icons/faCog' import { formatUpdatedAt } from '../../helpers' import { runStateColor } from '../../helpers/nodes' import { theme } from '../../helpers/theme' -import { truncateText } from '../../helpers/text' +import { truncateText, truncateTextFront } from '../../helpers/text' import { useNavigate, useParams } from 'react-router-dom' import Box from '@mui/system/Box' import MQTooltip from '../../components/core/tooltip/MQTooltip' @@ -49,7 +49,7 @@ const TableLineageJobNode = ({ node }: TableLineageJobNodeProps & StateProps) => Namespace: - {truncateText(job.namespace, 40)} + {truncateTextFront(job.namespace, 40)} @@ -57,7 +57,7 @@ const TableLineageJobNode = ({ node }: TableLineageJobNodeProps & StateProps) => Name: - {truncateText(job.name, 40)} + {truncateTextFront(job.name, 40)} {job.description && (