Skip to content

Commit

Permalink
bundle: convert head_commit_row and head_commit_field to mat-views
Browse files Browse the repository at this point in the history
Big speedups.  Working around what sure seems to be a PostgreSQL bug #285.  If
we keep this approach, would be nice to add write-blocking triggers on the
internal rowset* tables, as they should never be modified directly, #286.
  • Loading branch information
erichanson committed Nov 29, 2023
1 parent 92efb0e commit 6ddc9a1
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 25 deletions.
50 changes: 26 additions & 24 deletions extensions/bundle/000-data_model.sql
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ create table bundle (
unique(name)
);



-- TODO: Add triggers on these tables that block modification. They must never
-- be modified directly, as materialized views rely on them and must be kept in
-- sync.

create table rowset (
id uuid not null default public.uuid_generate_v4() primary key
);
Expand Down Expand Up @@ -138,37 +144,28 @@ create table merge_conflict (
------------------------------------------------------------------------------

-- head_commit_row: show the rows head commit
-- TODO: make these materialized, update on commit?
create view head_commit_row as
select bundle.id as bundle_id, c.id as commit_id, rr.row_id from bundle.bundle bundle
join bundle.commit c on bundle.head_commit_id=c.id
create materialized view head_commit_row as
select b.id as bundle_id, c.id as commit_id, rr.id as rowset_row_id, rr.row_id
from bundle.bundle b
join bundle.commit c on b.head_commit_id=c.id
join bundle.rowset r on r.id = c.rowset_id
join bundle.rowset_row rr on rr.rowset_id = r.id;


-- head_commit_row: show the fields in each head commit
-- TODO: select hcr.*, rrf.field_id from head_commit_row join rowset_row_field rrf on ...
-- TODO: make these materialized, update on commit?
create view head_commit_field as
select bundle.id as bundle_id, rr.row_id, f.field_id, f.value_hash from bundle.bundle bundle
join bundle.commit c on bundle.head_commit_id=c.id
join bundle.rowset r on r.id = c.rowset_id
join bundle.rowset_row rr on rr.rowset_id = r.id
join bundle.rowset_row_field f on f.rowset_row_id = rr.id;
create materialized view head_commit_field as
select hcr.*, rrf.field_id, rrf.value_hash
from bundle.head_commit_row hcr
join bundle.rowset_row_field rrf on rrf.rowset_row_id = hcr.rowset_row_id;


-- head_commit_row_with_exists: rows in the head commit, along with whether or
-- not that row actually exists in the database
-- TODO: select hcf.*, meta.row_exists(rr.row_id) as exists from head_commit_field hcf ...
-- can't be materialized because of call to row_exists()
-- TODO: can we optimize this query by calling something like meta.rows_exist(row_id[])?
create view head_commit_row_with_exists as
select bundle.id as bundle_id, c.id as commit_id, rr.row_id, meta.row_exists(rr.row_id) as exists
from bundle.bundle bundle
join bundle.commit c on bundle.head_commit_id=c.id
join bundle.rowset r on r.id = c.rowset_id
join bundle.rowset_row rr on rr.rowset_id = r.id;
-- order by meta.row_exists(rr.row_id), rr.row_id;
select hcf.*, meta.row_exists(hcf.row_id) as exists
from head_commit_field hcf;



Expand Down Expand Up @@ -296,6 +293,7 @@ create or replace view bundle.offstage_field_changed as
-- they have changed
and b.value != f.new_value;

/*
create or replace function bundle.offstage_field_changed(_bundle_id uuid) returns setof bundle.offstage_field_changed as $$
-- get literal_value (the expensive part) in a CTE
with f as (
Expand All @@ -317,13 +315,15 @@ create or replace view bundle.offstage_field_changed as
-- if the change is staged, skip it
left join bundle.stage_field_changed sfc on sfc.bundle_id = _bundle_id and f.field_id = sfc.field_id
where
/* sfc.bundle_id = _bundle_id
and */sfc.field_id is null
sfc.bundle_id = _bundle_id
and sfc.field_id is null
-- they have changed
and b.value != f.new_value
$$ language sql;
*/

create or replace function offstage_field_changed(_bundle_id uuid) returns void /* setof meta.row_id */ as $$
/*
create or replace function offstage_field_changed(_bundle_id uuid) returns void / * setof meta.row_id * / as $$
declare
rel record;
stmt text;
Expand Down Expand Up @@ -362,6 +362,8 @@ begin
end
$$ language plpgsql;
*/


/*
strategy for optimizing this:
Expand Down Expand Up @@ -674,8 +676,8 @@ from (
join bundle b on hcr.bundle_id=b.id
full outer join bundle.stage_row sr on hcr.row_id=sr.row_id
left join stage_field_changed sfc on (sfc.field_id)::meta.row_id=hcr.row_id
-- left join offstage_field_changed ofc on ofc.bundle_id = b.id and (ofc.field_id)::meta.row_id=hcr.row_id
left join offstage_field_changed(b.id) ofc on (ofc.field_id)::meta.row_id=hcr.row_id
left join offstage_field_changed ofc on ofc.bundle_id = b.id and (ofc.field_id)::meta.row_id=hcr.row_id
-- left join offstage_field_changed(b.id) ofc on (ofc.field_id)::meta.row_id=hcr.row_id
-- where b.checkout_commit_id is not null -- TODO I added this for a reason but now I can't remember why and it is breaking stuff
group by hcr.bundle_id, hcr.commit_id, hcr.row_id, sr.bundle_id, sr.row_id, (sfc.field_id)::meta.row_id, (ofc.field_id)::meta.row_id

Expand Down
12 changes: 12 additions & 0 deletions extensions/bundle/001-functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ create function bundle.bundle_copy(_bundle_id uuid, new_name text) returns uuid
insert into bundle.rowset select * from bundle.rowset r join commit c on c.rowset_id=r.id where c.bundle_id = _bundle_id;
insert into bundle.commit select * from bundle.commit where bundle_id = _bundle_id;
refresh materialized view bundle.head_commit_row;
refresh materialized view bundle.head_commit_field;
$$ language sql;
*/

Expand Down Expand Up @@ -188,6 +190,10 @@ create or replace function commit_delete(in _commit_id uuid) returns void as $$
-- TODO: delete commits in order?
delete from bundle.rowset r where r.id in (select c.rowset_id from bundle.commit c where c.id = _commit_id);
delete from bundle.commit c where c.id = _commit_id;

refresh materialized view bundle.head_commit_row;
refresh materialized view bundle.head_commit_field;

$$ language sql;


Expand Down Expand Up @@ -286,6 +292,12 @@ create or replace function commit (bundle_name text, message text) returns void
delete from bundle.stage_row_deleted where bundle_id=_bundle_id;
delete from bundle.stage_field_changed where bundle_id=_bundle_id;

-- update head_commit_* materialized views
execute format ('refresh materialized view bundle.head_commit_row');
execute format ('refresh materialized view bundle.head_commit_field');

return;

end

$$ language plpgsql;
Expand Down
9 changes: 8 additions & 1 deletion extensions/bundle/002-utils.sql
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ $$;
-- import
-- import a bundle from a csv export (created by above).

create or replace function bundle.bundle_import_csv(directory text)
create or replace function bundle.bundle_import_csv(directory text, refresh_materialized_views boolean default true)
returns uuid
language plpgsql
as $$
Expand Down Expand Up @@ -98,9 +98,16 @@ begin
execute format('insert into bundle.bundle_csv(directory, bundle_id) select %L, id from origin_temp', directory);

-- make sure that checkout_commit_is null
-- FIXME: when you import multiple bundles in a transaction, this origin_temp table name collides...
select name from origin_temp into bundle_name;
update bundle.bundle set checkout_commit_id = NULL where name = bundle_name;

-- refresh materialized views
if refresh_materialized_views then
execute format('refresh materialized view bundle.head_commit_row');
execute format('refresh materialized view bundle.head_commit_field');
end if;

-- return bundle.id
select id from origin_temp into bundle_id;
return bundle_id;
Expand Down

0 comments on commit 6ddc9a1

Please sign in to comment.