From 7489e5a7e11955dd4687848e3121fd4679f8cb1a Mon Sep 17 00:00:00 2001 From: Marcos Prieto Date: Thu, 29 Aug 2024 10:12:50 +0200 Subject: [PATCH] Fix the LMSCourse backfill from grouping migration The existing LMSCourse has two assumptions, with one unique index for each: - The same course (same authority_provided_id) can belong to multiple applications instances but we'll keep only one row in LMSCourse for it (unique h_authority_provided_id) - authority_provided_id is generated from guid, lti_context_id, then, those two columns should also be unique together. There's however a case when this is not true: - Applications instance with a null GUID launches and creates a course (None, CONTEXT_ID). This generates a hashed authority_provided_id. - The same application instance (ie, same row in ApplicationInstance) starts sending a GUID. We calculate a new (GUID, CONTEXT_ID) hash creating a new course, with a different authority_provided_id. When we join with ApplicationInstance for the backfill we only have the most recent GUID and we end up creating (GUID, CONTEXT_ID) duplicates. The solution is to exclude this type of duplicate, which is safe because this only happens on: - GUID changes from null to not null. We don't allow other value changes for GUID. - We only deduplicate courses when we have a newer version of the course, the older one can no longer be launched, the ApplicationInstance now has a GUID and it will sent along with every launch. --- .../f61cb94edfc8_backfill_lms_course.py | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/lms/migrations/versions/f61cb94edfc8_backfill_lms_course.py b/lms/migrations/versions/f61cb94edfc8_backfill_lms_course.py index 20c60f44fd..be2f0b64c0 100644 --- a/lms/migrations/versions/f61cb94edfc8_backfill_lms_course.py +++ b/lms/migrations/versions/f61cb94edfc8_backfill_lms_course.py @@ -13,21 +13,26 @@ def upgrade() -> None: sa.text( """ WITH backfill as ( - -- Deduplicate "grouping" courses on authority_provided_id - SELECT DISTINCT ON (authority_provided_id) - "grouping".created, - "grouping".updated, - tool_consumer_instance_guid, - authority_provided_id, - lms_name, - lms_id - FROM "grouping" - -- join on application_instances to get the GUID - JOIN application_instances on application_instances.id = "grouping".application_instance_id - -- Pick only courses, not sections or groups - WHERE grouping.type ='course' - -- Pick the most recent "grouping" there are duplicates - ORDER BY authority_provided_id, "grouping".updated desc + -- Deduplicate on guid, lms_id + SELECT DISTINCT on (tool_consumer_instance_guid, lms_id) * + FROM ( + -- Deduplicate "grouping" courses on authority_provided_id + SELECT DISTINCT ON (authority_provided_id) + "grouping".created, + "grouping".updated, + tool_consumer_instance_guid, + authority_provided_id, + lms_name, + lms_id + FROM "grouping" + -- join on application_instances to get the GUID + JOIN application_instances on application_instances.id = "grouping".application_instance_id + -- Pick only courses, not sections or groups + WHERE grouping.type ='course' + -- Pick the most recent "grouping" there are duplicates + ORDER BY authority_provided_id, "grouping".updated desc + ) unique_by_authority_provided_id + ORDER BY tool_consumer_instance_guid, lms_id, unique_by_authority_provided_id.updated desc ) INSERT INTO lms_course ( created,