Skip to content

Commit

Permalink
Merge pull request #136 from umccr/fix/filemanage-ingest-constraint
Browse files Browse the repository at this point in the history
fix(filemanager): ingest constraints
  • Loading branch information
mmalenic authored Mar 8, 2024
2 parents b0e5509 + 607787e commit 6d28e31
Show file tree
Hide file tree
Showing 7 changed files with 923 additions and 238 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ create table s3_object (
bucket text not null,
-- The key of the object.
key text not null,
-- The version id of the object. A 'null' string is used to indicate no version id. This matches logic in AWS which
-- also returns 'null' strings. See https://docs.aws.amazon.com/AmazonS3/latest/userguide/versioning-workflows.html
version_id text not null default 'null',
-- When this object was created. A null value here means that a deleted event has occurred before a created event.
created_date timestamptz default null,
-- When this object was deleted, a null value means that the object has not yet been deleted.
Expand All @@ -41,8 +44,6 @@ create table s3_object (
e_tag text default null,
-- The S3 storage class of the object.
storage_class storage_class default null,
-- The version id of the object, if present.
version_id text default null,
-- A sequencer value for when the object was created. Used to synchronise out of order and duplicate events.
created_sequencer text default null,
-- A sequencer value for when the object was deleted. Used to synchronise out of order and duplicate events.
Expand All @@ -53,6 +54,6 @@ create table s3_object (
number_duplicate_events integer not null default 0,

-- The sequencers should be unique with the bucket, key, and its version, otherwise this is a duplicate event.
constraint created_sequencer_unique unique nulls not distinct (bucket, key, version_id, created_sequencer),
constraint deleted_sequencer_unique unique nulls not distinct (bucket, key, version_id, deleted_sequencer)
constraint created_sequencer_unique unique (bucket, key, version_id, created_sequencer),
constraint deleted_sequencer_unique unique (bucket, key, version_id, deleted_sequencer)
);
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ current_objects as (
join input on
input.bucket = s3_object.bucket and
input.key = s3_object.key and
input.version_id is not distinct from s3_object.version_id
input.version_id = s3_object.version_id
-- Lock this pre-emptively for the update.
for update
),
Expand All @@ -70,12 +70,17 @@ objects_to_update as (
-- If a sequencer already exists this event should be reprocessed because this
-- sequencer could belong to another object.
current_objects.created_sequencer < current_objects.input_created_sequencer
)
) and
-- And there should not be any objects with a created sequencer that is the same as the input created
-- sequencer because this is a duplicate event that would cause a constraint error in the update.
and current_objects.input_created_sequencer not in (
current_objects.input_created_sequencer not in (
select created_sequencer from current_objects where created_sequencer is not null
)
-- Only one event entry should be updated, and that entry must be the one with the
-- deleted sequencer that is minimum, i.e. closest to the created sequencer which
-- is going to be inserted.
order by current_objects.deleted_sequencer asc
limit 1
),
-- Finally, update the required objects.
update as (
Expand Down Expand Up @@ -107,7 +112,7 @@ select
last_modified_date,
e_tag,
storage_class as "storage_class?: StorageClass",
version_id,
version_id as "version_id!",
created_sequencer as sequencer,
number_reordered,
number_duplicate_events,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ current_objects as (
join input on
input.bucket = s3_object.bucket and
input.key = s3_object.key and
input.version_id is not distinct from s3_object.version_id
input.version_id = s3_object.version_id
-- Lock this pre-emptively for the update.
for update
),
Expand All @@ -55,12 +55,17 @@ objects_to_update as (
-- If a sequencer already exists this event should be reprocessed because this
-- sequencer would belong to another object.
current_objects.deleted_sequencer > current_objects.input_deleted_sequencer
)
) and
-- And there should not be any objects with a deleted sequencer that is the same as the input deleted
-- sequencer because this is a duplicate event that would cause a constraint error in the update.
and current_objects.input_deleted_sequencer not in (
current_objects.input_deleted_sequencer not in (
select deleted_sequencer from current_objects where deleted_sequencer is not null
)
-- Only one event entry should be updated, and that entry must be the one with the
-- created sequencer that is maximum, i.e. closest to the deleted sequencer which
-- is going to be inserted.
order by current_objects.created_sequencer desc
limit 1
),
-- Finally, update the required objects.
update as (
Expand All @@ -82,7 +87,7 @@ select
last_modified_date,
e_tag,
storage_class as "storage_class?: StorageClass",
version_id,
version_id as "version_id!",
deleted_sequencer as sequencer,
number_reordered,
number_duplicate_events,
Expand Down
Loading

0 comments on commit 6d28e31

Please sign in to comment.