Skip to content

Commit

Permalink
Runless events - consume job event
Browse files Browse the repository at this point in the history
Signed-off-by: Pawel Leszczynski <leszczynski.pawel@gmail.com>
  • Loading branch information
pawel-big-lebowski committed Oct 31, 2023
1 parent 3aacc39 commit bdcfec7
Show file tree
Hide file tree
Showing 16 changed files with 582 additions and 98 deletions.
2 changes: 1 addition & 1 deletion api/src/main/java/marquez/api/JobResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ public Response getJob(

final Job job =
jobService
.findWithRun(namespaceName.getValue(), jobName.getValue())
.findWithDatasetsAndRun(namespaceName.getValue(), jobName.getValue())
.orElseThrow(() -> new JobNotFoundException(jobName));
return Response.ok(job).build();
}
Expand Down
5 changes: 5 additions & 0 deletions api/src/main/java/marquez/api/OpenLineageResource.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import marquez.service.ServiceFactory;
import marquez.service.models.BaseEvent;
import marquez.service.models.DatasetEvent;
import marquez.service.models.JobEvent;
import marquez.service.models.LineageEvent;
import marquez.service.models.NodeId;

Expand Down Expand Up @@ -73,6 +74,10 @@ public void create(@Valid @NotNull BaseEvent event, @Suspended final AsyncRespon
openLineageService
.createAsync((DatasetEvent) event)
.whenComplete((result, err) -> onComplete(result, err, asyncResponse));
} else if (event instanceof JobEvent) {
openLineageService
.createAsync((JobEvent) event)
.whenComplete((result, err) -> onComplete(result, err, asyncResponse));
} else {
log.warn("Unsupported event type {}. Skipping without error", event.getClass().getName());

Expand Down
57 changes: 41 additions & 16 deletions api/src/main/java/marquez/db/JobDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
import marquez.common.models.JobName;
import marquez.common.models.JobType;
import marquez.common.models.NamespaceName;
import marquez.db.JobVersionDao.IoType;
import marquez.db.JobVersionDao.JobDataset;
import marquez.db.JobVersionDao.JobDatasetMapper;
import marquez.db.mappers.JobMapper;
import marquez.db.mappers.JobRowMapper;
import marquez.db.models.JobRow;
Expand All @@ -34,6 +37,7 @@

@RegisterRowMapper(JobRowMapper.class)
@RegisterRowMapper(JobMapper.class)
@RegisterRowMapper(JobDatasetMapper.class)
public interface JobDao extends BaseDao {

@SqlQuery(
Expand All @@ -56,21 +60,14 @@ SELECT EXISTS (

@SqlQuery(
"""
SELECT j.*, f.facets
WITH job_versions_facets AS (
SELECT job_version_uuid, JSON_AGG(facet) as facets
FROM job_facets
GROUP BY job_version_uuid
)
SELECT j.*, facets
FROM jobs_view j
LEFT OUTER JOIN job_versions AS jv ON jv.uuid = j.current_version_uuid
LEFT OUTER JOIN (
SELECT run_uuid, JSON_AGG(e.facet) AS facets
FROM (
SELECT jf.run_uuid, jf.facet
FROM job_facets_view AS jf
INNER JOIN job_versions jv2 ON jv2.latest_run_uuid=jf.run_uuid
INNER JOIN jobs_view j2 ON j2.current_version_uuid=jv2.uuid
WHERE j2.name=:jobName AND j2.namespace_name=:namespaceName
ORDER BY lineage_event_time ASC
) e
GROUP BY e.run_uuid
) f ON f.run_uuid=jv.latest_run_uuid
LEFT OUTER JOIN job_versions_facets f ON j.current_version_uuid = f.job_version_uuid
WHERE j.namespace_name=:namespaceName AND (j.name=:jobName OR :jobName = ANY(j.aliases))
""")
Optional<Job> findJobByName(String namespaceName, String jobName);
Expand All @@ -94,12 +91,18 @@ SELECT run_uuid, JSON_AGG(e.facet) AS facets
""")
void deleteByNamespaceName(String namespaceName);

default Optional<Job> findWithRun(String namespaceName, String jobName) {
default Optional<Job> findWithDatasetsAndRun(String namespaceName, String jobName) {
Optional<Job> job = findJobByName(namespaceName, jobName);
job.ifPresent(
j -> {
Optional<Run> run = createRunDao().findByLatestJob(namespaceName, jobName);
run.ifPresent(r -> this.setJobData(r, j));
run.ifPresentOrElse(
r -> this.setJobData(r, j),
() ->
this.setJobData(
createJobVersionDao()
.findCurrentInputOutputDatasetsFor(namespaceName, jobName),
j));
});
return job;
}
Expand Down Expand Up @@ -200,6 +203,28 @@ default List<Job> findAllWithRun(String namespaceName, int limit, int offset) {
.collect(Collectors.toList());
}

default void setJobData(List<JobDataset> datasets, Job j) {
Optional.of(
datasets.stream()
.filter(d -> d.ioType().equals(IoType.INPUT))
.map(
ds ->
new DatasetId(NamespaceName.of(ds.namespace()), DatasetName.of(ds.name())))
.collect(Collectors.toSet()))
.filter(s -> !s.isEmpty())
.ifPresent(s -> j.setInputs(s));

Optional.of(
datasets.stream()
.filter(d -> d.ioType().equals(IoType.OUTPUT))
.map(
ds ->
new DatasetId(NamespaceName.of(ds.namespace()), DatasetName.of(ds.name())))
.collect(Collectors.toSet()))
.filter(s -> !s.isEmpty())
.ifPresent(s -> j.setOutputs(s));
}

default void setJobData(Run run, Job j) {
j.setLatestRun(run);
DatasetVersionDao datasetVersionDao = createDatasetVersionDao();
Expand Down
53 changes: 51 additions & 2 deletions api/src/main/java/marquez/db/JobFacetsDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.Spliterators;
import java.util.UUID;
import java.util.stream.StreamSupport;
import javax.annotation.Nullable;
import lombok.NonNull;
import marquez.common.Utils;
import marquez.db.mappers.JobFacetsMapper;
Expand Down Expand Up @@ -55,6 +56,32 @@ void insertJobFacet(
String name,
PGobject facet);

@SqlUpdate(
"""
INSERT INTO job_facets (
created_at,
job_uuid,
job_version_uuid,
lineage_event_time,
name,
facet
) VALUES (
:createdAt,
:jobUuid,
:jobVersionUuid,
:lineageEventTime,
:name,
:facet
)
""")
void insertJobFacet(
Instant createdAt,
UUID jobUuid,
UUID jobVersionUuid,
Instant lineageEventTime,
String name,
PGobject facet);

@SqlQuery(
"""
SELECT
Expand All @@ -72,9 +99,31 @@ void insertJobFacet(
@Transaction
default void insertJobFacetsFor(
@NonNull UUID jobUuid,
@NonNull UUID runUuid,
@NonNull UUID jobVersionUuid,
@NonNull Instant lineageEventTime,
@NonNull LineageEvent.JobFacet jobFacet) {
final Instant now = Instant.now();

JsonNode jsonNode = Utils.getMapper().valueToTree(jobFacet);
StreamSupport.stream(
Spliterators.spliteratorUnknownSize(jsonNode.fieldNames(), Spliterator.DISTINCT), false)
.forEach(
fieldName ->
insertJobFacet(
now,
jobUuid,
jobVersionUuid,
lineageEventTime,
fieldName,
FacetUtils.toPgObject(fieldName, jsonNode.get(fieldName))));
}

@Transaction
default void insertJobFacetsFor(
@NonNull UUID jobUuid,
@Nullable UUID runUuid,
@NonNull Instant lineageEventTime,
@NonNull String lineageEventType,
@Nullable String lineageEventType,
@NonNull LineageEvent.JobFacet jobFacet) {
final Instant now = Instant.now();

Expand Down
Loading

0 comments on commit bdcfec7

Please sign in to comment.