-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #88 from umccr/feat/event-order-macros
feat: de-duplicate events at the database level
- Loading branch information
Showing
15 changed files
with
641 additions
and
208 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
FROM postgres:16 | ||
FROM postgres:15 | ||
|
||
COPY migrations/ /docker-entrypoint-initdb.d/ |
19 changes: 3 additions & 16 deletions
19
lib/workload/stateful/filemanager/database/migrations/0001_add_object_table.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,9 @@ | ||
-- An general object table common across all storage types. | ||
create table object ( | ||
-- The unique id for this object. | ||
object_id uuid not null default gen_random_uuid() primary key, | ||
-- The bucket location. | ||
bucket varchar(255) not null, | ||
-- The name of the object. | ||
key varchar(1024) not null, | ||
object_id uuid not null primary key default gen_random_uuid(), | ||
-- The size of the object. | ||
size int default null, | ||
size integer default null, | ||
-- A unique identifier for the object, if it is present. | ||
hash varchar(255) default null, | ||
-- When this object was created. | ||
created_date timestamptz not null default now(), | ||
-- When this object was last modified. | ||
last_modified_date timestamptz not null default now(), | ||
-- When this object was deleted, a null value means that the object has not yet been deleted. | ||
deleted_date timestamptz default null, | ||
-- The date of the object and its id combined. | ||
portal_run_id varchar(255) not null | ||
-- provenance - history of all objects and how they move? | ||
checksum text default null | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 27 additions & 0 deletions
27
...workload/stateful/filemanager/database/queries/ingester/aws/insert_s3_created_objects.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
-- Bulk insert of s3 objects. | ||
insert into s3_object ( | ||
s3_object_id, | ||
object_id, | ||
bucket, | ||
key, | ||
created_date, | ||
last_modified_date, | ||
e_tag, | ||
storage_class, | ||
version_id, | ||
created_sequencer | ||
) | ||
values ( | ||
unnest($1::uuid[]), | ||
unnest($2::uuid[]), | ||
unnest($3::text[]), | ||
unnest($4::text[]), | ||
unnest($5::timestamptz[]), | ||
unnest($6::timestamptz[]), | ||
unnest($7::text[]), | ||
unnest($8::storage_class[]), | ||
unnest($9::text[]), | ||
unnest($10::text[]) | ||
) on conflict on constraint created_sequencer_unique do update | ||
set number_duplicate_events = s3_object.number_duplicate_events + 1 | ||
returning object_id, number_duplicate_events; |
31 changes: 31 additions & 0 deletions
31
...workload/stateful/filemanager/database/queries/ingester/aws/insert_s3_deleted_objects.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
-- Bulk insert of s3 objects. | ||
insert into s3_object ( | ||
s3_object_id, | ||
object_id, | ||
bucket, | ||
key, | ||
-- We default the created date to a value event if this is a deleted event, | ||
-- as we are expecting this to get updated. | ||
created_date, | ||
deleted_date, | ||
last_modified_date, | ||
e_tag, | ||
storage_class, | ||
version_id, | ||
deleted_sequencer | ||
) | ||
values ( | ||
unnest($1::uuid[]), | ||
unnest($2::uuid[]), | ||
unnest($3::text[]), | ||
unnest($4::text[]), | ||
unnest($5::timestamptz[]), | ||
unnest($6::timestamptz[]), | ||
unnest($7::timestamptz[]), | ||
unnest($8::text[]), | ||
unnest($9::storage_class[]), | ||
unnest($10::text[]), | ||
unnest($11::text[]) | ||
) on conflict on constraint deleted_sequencer_unique do update | ||
set number_duplicate_events = s3_object.number_duplicate_events + 1 | ||
returning object_id, number_duplicate_events; |
6 changes: 0 additions & 6 deletions
6
lib/workload/stateful/filemanager/database/queries/ingester/aws/insert_s3_objects.sql
This file was deleted.
Oops, something went wrong.
6 changes: 3 additions & 3 deletions
6
...abase/queries/ingester/update_deleted.sql → ...e/queries/ingester/aws/update_deleted.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,9 @@ | ||
-- Update the deleted time of s3 objects. | ||
update object | ||
-- Update the deleted time of objects. | ||
update s3_object | ||
set deleted_date = data.deleted_time | ||
from (select | ||
unnest($1::varchar[]) as key, | ||
unnest($2::varchar[]) as bucket, | ||
unnest($3::timestamptz[]) as deleted_time | ||
) as data | ||
where object.key = data.key and object.bucket = data.bucket; | ||
where s3_object.key = data.key and s3_object.bucket = data.bucket; |
13 changes: 4 additions & 9 deletions
13
lib/workload/stateful/filemanager/database/queries/ingester/insert_objects.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,7 @@ | ||
-- Bulk insert of objects | ||
insert into object (object_id, bucket, key, size, hash, created_date, last_modified_date, portal_run_id) | ||
insert into object (object_id, size, checksum) | ||
values ( | ||
unnest($1::uuid[]), | ||
unnest($2::varchar[]), | ||
unnest($3::varchar[]), | ||
unnest($4::int[]), | ||
unnest($5::varchar[]), | ||
unnest($6::timestamptz[]), | ||
unnest($7::timestamptz[]), | ||
unnest($8::varchar[]) | ||
); | ||
unnest($2::int[]), | ||
unnest($3::text[]) | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.