-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #615 from umccr/feat/filemanager-partitions
feat: filemanager partitions
- Loading branch information
Showing
27 changed files
with
832 additions
and
352 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
31 changes: 31 additions & 0 deletions
31
lib/workload/stateless/stacks/filemanager/database/migrations/0003_s3_current_state.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
-- Creates the `is_current_state` column to separate objects by current and historical records. | ||
|
||
begin; | ||
|
||
-- -- Initially, set the `is_current_state` to false to make migrating existing data easier. | ||
alter table s3_object add column is_current_state boolean not null default false; | ||
|
||
-- This migrates existing data, first find the current state and update existing records. | ||
with to_update as ( | ||
-- Get all records representing the current state. | ||
select * from ( | ||
select distinct on (bucket, key, version_id) * from s3_object | ||
order by bucket, key, version_id, sequencer desc | ||
) as s3_object | ||
where event_type = 'Created' and is_delete_marker = false | ||
) | ||
-- Update `is_current_state` on existing records. | ||
update s3_object | ||
set is_current_state = true | ||
from to_update | ||
where s3_object.s3_object_id = to_update.s3_object_id; | ||
|
||
-- Then, set the default to true to match new logic using `is_current_state`. | ||
alter table s3_object alter column is_current_state set default true; | ||
|
||
-- Create an indexes for now, although partitioning will be required later. | ||
create index is_current_state_index on s3_object (is_current_state); | ||
-- This helps the query which resets the current state when ingesting objects. | ||
create index reset_current_state_index on s3_object (bucket, key, version_id, sequencer, is_current_state); | ||
|
||
commit; |
59 changes: 59 additions & 0 deletions
59
lib/workload/stateless/stacks/filemanager/database/queries/api/reset_current_state.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
-- Resets the `is_current_state` to false for a set of objects based on the `bucket`, `key`, `version_id` | ||
-- and `sequencer`. This is used to update the current state so that a new object can have it's `is_current_state` | ||
-- set to true based on whether it is a `Created` or `Deleted` event. | ||
|
||
-- Unnest input. | ||
with input as ( | ||
select | ||
* | ||
from unnest( | ||
$1::text[], | ||
$2::text[], | ||
$3::text[], | ||
$4::text[] | ||
) as input ( | ||
bucket, | ||
key, | ||
version_id, | ||
sequencer | ||
) | ||
), | ||
-- Select objects to update. | ||
to_update as ( | ||
select * from input cross join lateral ( | ||
select | ||
s3_object_id, | ||
-- This finds the first value in the set which represents the most up-to-date state. | ||
-- If ordered by the sequencer, the first row is the one that needs to have `is_current_state` | ||
-- set to 'true' only for `Created` events, as `Deleted` events are always non-current state. | ||
case when row_number() over (order by s3_object.sequencer desc) = 1 then | ||
event_type = 'Created' | ||
-- Set `is_current_state` to 'false' for all other rows, as this is now historical data. | ||
else | ||
false | ||
end as updated_state | ||
from s3_object | ||
where | ||
-- This should be fairly efficient as it's only targeting objects where `is_current_state` is true, | ||
-- or objects with the highest sequencer values (in case of an out-of-order event). This means that | ||
-- although there is a performance impact for running this on ingestion, it should be minimal with | ||
-- the right indexes. | ||
input.bucket = s3_object.bucket and | ||
input.key = s3_object.key and | ||
input.version_id = s3_object.version_id and | ||
-- This is an optimization which prevents querying against all objects in the set. | ||
( | ||
-- Only need to update current objects | ||
s3_object.is_current_state = true or | ||
-- Or objects where there is a newer sequencer than the one being processed. | ||
-- This is required in case an out-of-order event is encountered. This always | ||
-- includes the object being processed as it's required for the above row-logic. | ||
s3_object.sequencer >= input.sequencer | ||
) | ||
) s3_object | ||
) | ||
update s3_object | ||
set is_current_state = updated_state | ||
from to_update | ||
where s3_object.s3_object_id = to_update.s3_object_id | ||
returning s3_object.s3_object_id, s3_object.is_current_state; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.