diff --git a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_raw_users/01_create_view.jinja2.sql b/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_raw_users/01_create_view.jinja2.sql new file mode 100644 index 0000000000..881de80e99 --- /dev/null +++ b/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_raw_users/01_create_view.jinja2.sql @@ -0,0 +1,15 @@ +DROP MATERIALIZED VIEW IF EXISTS report.raw_users CASCADE; + +-- The building blocks of a user. We won't export this. See +-- `*_entities_decorated/*_users` for the version for external consumers. +CREATE MATERIALIZED VIEW report.raw_users AS ( + SELECT + id, + username, + registered_date::date + FROM h.user AS users + WHERE + users.authority = '{{ region.authority }}' + -- users.authority = 'lms.hypothes.is' + ORDER BY registered_date +) WITH NO DATA; diff --git a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_raw_users/02_initial_fill.sql b/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_raw_users/02_initial_fill.sql new file mode 100644 index 0000000000..ddf1c5c3d2 --- /dev/null +++ b/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_raw_users/02_initial_fill.sql @@ -0,0 +1,8 @@ +DROP INDEX IF EXISTS report.raw_users_id_idx; + +REFRESH MATERIALIZED VIEW report.raw_users; + +ANALYSE report.raw_users; + +-- A unique index is mandatory for concurrent updates used in the refresh +CREATE UNIQUE INDEX raw_users_id_idx ON report.raw_users (id); diff --git a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_users/01_create_view.jinja2.sql b/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_users/01_create_view.jinja2.sql deleted file mode 100644 index 98885ce0bb..0000000000 --- a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_users/01_create_view.jinja2.sql +++ /dev/null @@ -1,18 +0,0 @@ -DROP MATERIALIZED VIEW IF EXISTS report.users CASCADE; - --- I'm not sure this is completely required right now, but it makes things --- simple to think about, as it represents our concept of a unique user in --- the reporting space. That happens to basically be an H user at the moment, --- but we are likely to tack on extra details in future, like email address --- etc. -CREATE MATERIALIZED VIEW report.users AS ( - SELECT - id, - username, - registered_date::date - FROM h.user AS users - WHERE - users.authority = '{{ region.authority }}' - -- users.authority = 'lms.hypothes.is' - ORDER BY registered_date -) WITH NO DATA; diff --git a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/02_user_map/01_create_view.sql b/lms/data_tasks/report/create_from_scratch/02_entities/01_users/02_user_map/01_create_view.sql index ecfaf82533..ea1e36678f 100644 --- a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/02_user_map/01_create_view.sql +++ b/lms/data_tasks/report/create_from_scratch/02_entities/01_users/02_user_map/01_create_view.sql @@ -4,11 +4,11 @@ DROP MATERIALIZED VIEW IF EXISTS report.user_map CASCADE; CREATE MATERIALIZED VIEW report.user_map AS ( SELECT lms_users.id AS lms_user_id, - users.id AS user_id, + raw_users.id AS user_id, application_instances.organization_id FROM "user" AS lms_users - JOIN report.users ON - users.username = SUBSTRING(SPLIT_PART(lms_users.h_userid, '@', 1), 6) + JOIN report.raw_users ON + raw_users.username = SUBSTRING(SPLIT_PART(lms_users.h_userid, '@', 1), 6) JOIN application_instances ON lms_users.application_instance_id = application_instances.id ORDER BY lms_user_id, user_id diff --git a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/03_users_sensitive/01_create_view.sql b/lms/data_tasks/report/create_from_scratch/02_entities/01_users/03_users_sensitive/01_create_view.sql deleted file mode 100644 index 70aa2087dc..0000000000 --- a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/03_users_sensitive/01_create_view.sql +++ /dev/null @@ -1,30 +0,0 @@ -DROP MATERIALIZED VIEW IF EXISTS report.users_sensitive CASCADE; - --- Create a separate table for sensitive user information, which should not be --- stored out of region. -CREATE MATERIALIZED VIEW report.users_sensitive AS ( - SELECT DISTINCT - user_map.user_id AS id, - - -- The order here should ensure empty strings and nulls are sorted to - -- the back, so the "first" value should be non-null if possible - FIRST_VALUE(users.email) OVER ( - PARTITION BY user_map.user_id - ORDER BY users.email - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING - ) AS email, - - FIRST_VALUE(users.display_name) OVER ( - PARTITION BY user_map.user_id - ORDER BY users.display_name - RANGE BETWEEN - UNBOUNDED PRECEDING AND - UNBOUNDED FOLLOWING - ) AS display_name - FROM report.user_map - JOIN "user" AS users ON - users.id = user_map.lms_user_id - ORDER BY user_map.user_id -); diff --git a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/03_users_sensitive/02_initial_fill.sql b/lms/data_tasks/report/create_from_scratch/02_entities/01_users/03_users_sensitive/02_initial_fill.sql deleted file mode 100644 index a1443def95..0000000000 --- a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/03_users_sensitive/02_initial_fill.sql +++ /dev/null @@ -1,8 +0,0 @@ -DROP INDEX IF EXISTS report.users_sensitive_id_idx; - -REFRESH MATERIALIZED VIEW report.users_sensitive; - -ANALYSE report.users_sensitive; - --- A unique index is mandatory for concurrent updates used in the refresh -CREATE UNIQUE INDEX users_sensitive_id_idx ON report.users_sensitive (id); diff --git a/lms/data_tasks/report/create_from_scratch/05_entities_decorated/01_users/01_create_view.jinja2.sql b/lms/data_tasks/report/create_from_scratch/05_entities_decorated/01_users/01_create_view.jinja2.sql new file mode 100644 index 0000000000..5cd8e66a98 --- /dev/null +++ b/lms/data_tasks/report/create_from_scratch/05_entities_decorated/01_users/01_create_view.jinja2.sql @@ -0,0 +1,58 @@ +DROP MATERIALIZED VIEW IF EXISTS report.users CASCADE; + +CREATE MATERIALIZED VIEW report.users AS ( + WITH + user_details AS ( + SELECT DISTINCT + user_map.user_id, + + -- The order here should ensure empty strings and nulls are + -- sorted to the back, so the "first" value should be non-null + -- if possible + FIRST_VALUE(lms_user.display_name) OVER ( + PARTITION BY user_map.user_id + ORDER BY lms_user.display_name + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING + ) AS display_name, + + FIRST_VALUE(lms_user.email) OVER ( + PARTITION BY user_map.user_id + ORDER BY lms_user.email + RANGE BETWEEN + UNBOUNDED PRECEDING AND + UNBOUNDED FOLLOWING + ) AS email, + + true AS is_teacher + FROM report.user_map + JOIN "user" AS lms_user ON + lms_user.id = user_map.lms_user_id + WHERE + -- Ensure we are only providing contact details for users who + -- are a teacher in at least one context + user_map.user_id IN ( + SELECT DISTINCT(user_id) + FROM report.organization_roles + WHERE role = 'teacher' + ) + ) + + SELECT + raw_users.id, + user_details.display_name, + user_details.email, + raw_users.username, + CASE + WHEN user_details.is_teacher IS true THEN true ELSE false + END AS is_teacher, + -- Add a column which indicates that a user is a teacher in at least + -- one context. This helps us notice / demonstrate we are only keeping + -- teacher contact info. Keeping student contact info out of region is + -- not permitted. + raw_users.registered_date + FROM report.raw_users + LEFT OUTER JOIN user_details ON + user_details.user_id = raw_users.id +) WITH NO DATA; diff --git a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_users/02_initial_fill.sql b/lms/data_tasks/report/create_from_scratch/05_entities_decorated/01_users/02_initial_fill.sql similarity index 62% rename from lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_users/02_initial_fill.sql rename to lms/data_tasks/report/create_from_scratch/05_entities_decorated/01_users/02_initial_fill.sql index 9951dcd8cf..a0b3fc1508 100644 --- a/lms/data_tasks/report/create_from_scratch/02_entities/01_users/01_users/02_initial_fill.sql +++ b/lms/data_tasks/report/create_from_scratch/05_entities_decorated/01_users/02_initial_fill.sql @@ -1,5 +1,4 @@ DROP INDEX IF EXISTS report.users_id_idx; -DROP INDEX IF EXISTS report.users_registered_date_idx; REFRESH MATERIALIZED VIEW report.users; @@ -7,4 +6,3 @@ ANALYSE report.users; -- A unique index is mandatory for concurrent updates used in the refresh CREATE UNIQUE INDEX users_id_idx ON report.users (id); -CREATE INDEX users_registered_date_idx ON report.users USING BRIN (registered_date); diff --git a/lms/data_tasks/report/create_from_scratch/05_entities_decorated/02_users_sensitive/01_create_view.sql b/lms/data_tasks/report/create_from_scratch/05_entities_decorated/02_users_sensitive/01_create_view.sql new file mode 100644 index 0000000000..657214385b --- /dev/null +++ b/lms/data_tasks/report/create_from_scratch/05_entities_decorated/02_users_sensitive/01_create_view.sql @@ -0,0 +1,8 @@ +DROP VIEW IF EXISTS report.users_sensitive CASCADE; + +-- Create a plain view to act as a shim while we replace `users_sensitive` +CREATE VIEW report.users_sensitive AS ( + SELECT + id, email, display_name + FROM report.users +); diff --git a/lms/data_tasks/report/create_from_scratch/05_grant_permissions/01_grant_schema.jinja2.sql b/lms/data_tasks/report/create_from_scratch/99_grant_permissions/01_grant_schema.jinja2.sql similarity index 100% rename from lms/data_tasks/report/create_from_scratch/05_grant_permissions/01_grant_schema.jinja2.sql rename to lms/data_tasks/report/create_from_scratch/99_grant_permissions/01_grant_schema.jinja2.sql diff --git a/lms/data_tasks/report/refresh/01_entities_refresh.sql b/lms/data_tasks/report/refresh/01_entities_refresh.sql index c5b29c99c8..c5d72fe721 100644 --- a/lms/data_tasks/report/refresh/01_entities_refresh.sql +++ b/lms/data_tasks/report/refresh/01_entities_refresh.sql @@ -1,5 +1,5 @@ -REFRESH MATERIALIZED VIEW CONCURRENTLY report.users; -ANALYSE report.users; +REFRESH MATERIALIZED VIEW CONCURRENTLY report.raw_users; +ANALYSE report.raw_users; REFRESH MATERIALIZED VIEW CONCURRENTLY report.user_map; ANALYSE report.user_map; diff --git a/lms/data_tasks/report/refresh/04_entities_decorated.sql b/lms/data_tasks/report/refresh/04_entities_decorated.sql new file mode 100644 index 0000000000..80d10743b0 --- /dev/null +++ b/lms/data_tasks/report/refresh/04_entities_decorated.sql @@ -0,0 +1,2 @@ +REFRESH MATERIALIZED VIEW CONCURRENTLY report.users; +ANALYSE report.users;