diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py index 0e5d7e9a15af..3e0fadd717a9 100644 --- a/openedx/core/djangoapps/content/search/api.py +++ b/openedx/core/djangoapps/content/search/api.py @@ -15,7 +15,7 @@ from django.core.cache import cache from django.core.paginator import Paginator from meilisearch import Client as MeilisearchClient -from meilisearch.errors import MeilisearchError +from meilisearch.errors import MeilisearchApiError, MeilisearchError from meilisearch.models.task import TaskInfo from opaque_keys.edx.keys import UsageKey from opaque_keys.edx.locator import LibraryLocatorV2, LibraryCollectionLocator @@ -575,6 +575,24 @@ def upsert_library_block_index_doc(usage_key: UsageKey) -> None: _update_index_docs(docs) +def _get_document_from_index(document_id: str) -> dict: + """ + Returns the Document identified by the given ID, from the given index. + + Returns None if the document or index do not exist. + """ + client = _get_meilisearch_client() + document = None + try: + index = client.get_index(STUDIO_INDEX_NAME) + return index.get_document(id) + except (MeilisearchError, MeilisearchApiError) as err: + # The index or documennt doesn't exist + log.exception(err) + + return None + + def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collection_key: str) -> None: """ Creates, updates, or deletes the document for the given Library Collection in the search index. @@ -582,21 +600,69 @@ def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collectio If the Collection is not found or disabled (i.e. soft-deleted), then delete it from the search index. """ doc = searchable_doc_for_collection(library_key, collection_key) + update_components = False # Soft-deleted/disabled collections are removed from the index + # and their components updated. if doc.get('_disabled'): _delete_index_doc(doc[Fields.id]) - # Hard-deleted collections are also deleted from the index + update_components = True + + # Hard-deleted collections are also deleted from the index, + # but their components are automatically updated as part of the deletion process, so we don't have to. elif not doc.get(Fields.type): _delete_index_doc(doc[Fields.id]) # Otherwise, upsert the collection. + # Newly-added/restored collection get their components updated too. else: + already_indexed = _get_document_from_index(doc[Fields.id]) + if not already_indexed: + update_components = True + _update_index_docs([doc]) + # Asynchronously update the collection's components "collections" field + if update_components: + from .tasks import update_library_components_collections as update_task + + update_task.delay(str(library_key), collection_key) + + +def update_library_components_collections( + library_key: LibraryLocatorV2, + collection_key: str, + batch_size: int = 1000, +) -> None: + """ + Updates the "collections" field for all components associated with a given Library Collection. + + Because there may be a lot of components, we send these updates to Meilisearch in batches. + """ + library = lib_api.get_library(library_key) + components = authoring_api.get_collection_components(library.learning_package.id, collection_key) + + paginator = Paginator(components, batch_size) + for page in paginator.page_range: + docs = [] + + for component in paginator.page(page).object_list: + usage_key = lib_api.library_component_usage_key( + library_key, + component, + ) + doc = searchable_doc_collections(usage_key) + docs.append(doc) + + log.info( + f"Updating document.collections for library {library_key} components" + f" page {page} / {paginator.num_pages}" + ) + _update_index_docs(docs) + def upsert_content_library_index_docs(library_key: LibraryLocatorV2) -> None: """ diff --git a/openedx/core/djangoapps/content/search/tasks.py b/openedx/core/djangoapps/content/search/tasks.py index d9dad834db29..98390a12f3b3 100644 --- a/openedx/core/djangoapps/content/search/tasks.py +++ b/openedx/core/djangoapps/content/search/tasks.py @@ -90,10 +90,23 @@ def update_content_library_index_docs(library_key_str: str) -> None: @set_code_owner_attribute def update_library_collection_index_doc(library_key_str: str, collection_key: str) -> None: """ - Celery task to update the content index documents for a library collection + Celery task to update the content index document for a library collection """ library_key = LibraryLocatorV2.from_string(library_key_str) log.info("Updating content index documents for collection %s in library%s", collection_key, library_key) api.upsert_library_collection_index_doc(library_key, collection_key) + + +@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError)) +@set_code_owner_attribute +def update_library_components_collections(library_key_str: str, collection_key: str) -> None: + """ + Celery task to update the "collections" field for components in the given content library collection. + """ + library_key = LibraryLocatorV2.from_string(library_key_str) + + log.info("Updating document.collections for library %s collection %s components", library_key, collection_key) + + api.update_library_components_collections(library_key, collection_key) diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py index 49304af3e9fc..2f43896c197e 100644 --- a/openedx/core/djangoapps/content/search/tests/test_api.py +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -669,12 +669,16 @@ def test_delete_collection(self, mock_meilisearch): mock_meilisearch.return_value.index.return_value.delete_document.assert_called_once_with( self.collection_dict["id"], ) - ## TODO: ...and update the component's "collections" field - # mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([ - # doc_problem_without_collection, - # ]) + # ...and update the component's "collections" field + mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([ + doc_problem_without_collection, + ]) mock_meilisearch.return_value.index.reset_mock() + # We need to mock get_document here so that when we restore the collection below, meilisearch knows the + # collection is being re-added, so it will update its components too. + mock_meilisearch.return_value.get_index.return_value.get_document.return_value = None + # Restore the collection restored_date = datetime(2023, 8, 9, 10, 11, 12, tzinfo=timezone.utc) with freeze_time(restored_date): @@ -687,12 +691,12 @@ def test_delete_collection(self, mock_meilisearch): doc_collection["num_children"] = 1 doc_collection["modified"] = restored_date.timestamp() - # Should update the collection TODO and its component's "collections" field - assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 1 # TODO 2 + # Should update the collection and its component's "collections" field + assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2 mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls( [ call([doc_collection]), - # TODO call([doc_problem_with_collection]), + call([doc_problem_with_collection]), ], any_order=True, ) @@ -709,7 +713,7 @@ def test_delete_collection(self, mock_meilisearch): mock_meilisearch.return_value.index.return_value.delete_document.assert_called_once_with( self.collection_dict["id"], ) - ## TODO ...and cascade delete updates the "collections" field for the associated components - # mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([ - # doc_problem_without_collection, - # ]) + # ...and cascade delete updates the "collections" field for the associated components + mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([ + doc_problem_without_collection, + ]) diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py index 67123767c106..a9601a4e70a7 100644 --- a/openedx/core/djangoapps/content_libraries/api.py +++ b/openedx/core/djangoapps/content_libraries/api.py @@ -79,11 +79,9 @@ from opaque_keys import InvalidKeyError from openedx_events.content_authoring.data import ( ContentLibraryData, - ContentObjectChangedData, LibraryBlockData, ) from openedx_events.content_authoring.signals import ( - CONTENT_OBJECT_ASSOCIATIONS_CHANGED, CONTENT_LIBRARY_CREATED, CONTENT_LIBRARY_DELETED, CONTENT_LIBRARY_UPDATED, @@ -1235,15 +1233,6 @@ def update_library_collection_components( created_by=created_by, ) - # Emit a CONTENT_OBJECT_ASSOCIATIONS_CHANGED event for each of the objects added/removed - for usage_key in usage_keys: - CONTENT_OBJECT_ASSOCIATIONS_CHANGED.send_event( - content_object=ContentObjectChangedData( - object_id=str(usage_key), - changes=["collections"], - ), - ) - return collection diff --git a/openedx/core/djangoapps/content_libraries/signal_handlers.py b/openedx/core/djangoapps/content_libraries/signal_handlers.py index 8ea997234c22..fedee045a9f6 100644 --- a/openedx/core/djangoapps/content_libraries/signal_handlers.py +++ b/openedx/core/djangoapps/content_libraries/signal_handlers.py @@ -5,20 +5,23 @@ import logging from django.conf import settings -from django.db.models.signals import post_save, post_delete +from django.db.models.signals import post_save, post_delete, m2m_changed from django.dispatch import receiver -from opaque_keys import InvalidKeyError # lint-amnesty, pylint: disable=wrong-import-order -from opaque_keys.edx.locator import LibraryUsageLocatorV2 # lint-amnesty, pylint: disable=wrong-import-order +from opaque_keys import InvalidKeyError +from opaque_keys.edx.locator import LibraryLocatorV2, LibraryUsageLocatorV2 from openedx_events.content_authoring.data import ( + ContentObjectChangedData, LibraryCollectionData, ) from openedx_events.content_authoring.signals import ( + CONTENT_OBJECT_ASSOCIATIONS_CHANGED, LIBRARY_COLLECTION_CREATED, LIBRARY_COLLECTION_DELETED, LIBRARY_COLLECTION_UPDATED, ) -from openedx_learning.api.authoring_models import Collection +from openedx_learning.api.authoring import get_collection_components, get_component, get_components +from openedx_learning.api.authoring_models import Collection, CollectionPublishableEntity, Component from lms.djangoapps.grades.api import signals as grades_signals @@ -114,3 +117,92 @@ def library_collection_deleted(sender, instance, **kwargs): collection_key=instance.key, ) ) + + +def _library_collection_component_changed( + component: Component, + library_key: LibraryLocatorV2 | None = None, +) -> None: + """ + Sends a CONTENT_OBJECT_ASSOCIATIONS_CHANGED event for the component. + """ + if not library_key: + try: + library = ContentLibrary.objects.get( + learning_package_id=component.learning_package_id, + ) + except ContentLibrary.DoesNotExist: + log.error("{component} is not associated with a content library.") + return + + library_key = library.library_key + + assert library_key + + usage_key = library_component_usage_key( + library_key, + component, + ) + CONTENT_OBJECT_ASSOCIATIONS_CHANGED.send_event( + content_object=ContentObjectChangedData( + object_id=str(usage_key), + changes=["collections"], + ), + ) + + +@receiver(post_save, sender=CollectionPublishableEntity, dispatch_uid="library_collection_entity_saved") +def library_collection_entity_saved(sender, instance, created, **kwargs): + """ + Sends a CONTENT_OBJECT_ASSOCIATIONS_CHANGED event for components added to a collection. + """ + if created: + # Component.pk matches its entity.pk + component = get_component(instance.entity_id) + _library_collection_component_changed(component) + + +@receiver(post_delete, sender=CollectionPublishableEntity, dispatch_uid="library_collection_entity_deleted") +def library_collection_entity_deleted(sender, instance, **kwargs): + """ + Sends a CONTENT_OBJECT_ASSOCIATIONS_CHANGED event for components removed from a collection. + """ + # Component.pk matches its entity.pk + component = get_component(instance.entity_id) + _library_collection_component_changed(component) + + +@receiver(m2m_changed, sender=CollectionPublishableEntity, dispatch_uid="library_collection_entities_changed") +def library_collection_entities_changed(sender, instance, action, pk_set, **kwargs): + """ + Sends a CONTENT_OBJECT_ASSOCIATIONS_CHANGED event for components added/removed/cleared from a collection. + """ + if not isinstance(instance, Collection): + return + + if action not in ["post_add", "post_remove", "post_clear"]: + return + + try: + library = ContentLibrary.objects.get( + learning_package_id=instance.learning_package_id, + ) + except ContentLibrary.DoesNotExist: + log.error("{instance} is not associated with a content library.") + return + + if pk_set: + components = get_collection_components( + instance.learning_package_id, + instance.key, + ).filter(pk__in=pk_set) + else: + # When action=="post_clear", pk_set==None + # Since the collection instance now has an empty entities set, + # we don't know which ones were removed, so we need to update associations for all library components. + components = get_components( + instance.learning_package_id, + ) + + for component in components.all(): + _library_collection_component_changed(component, library.library_key) diff --git a/openedx/core/djangoapps/content_libraries/tests/test_api.py b/openedx/core/djangoapps/content_libraries/tests/test_api.py index 84805451f6bc..8041c508dc31 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_api.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_api.py @@ -455,11 +455,11 @@ def test_update_library_collection_components_event(self): assert event_receiver.call_count == 3 self.assertDictContainsSubset( { - "signal": LIBRARY_COLLECTION_UPDATED, + "signal": CONTENT_OBJECT_ASSOCIATIONS_CHANGED, "sender": None, - "library_collection": LibraryCollectionData( - self.lib1.library_key, - collection_key="COL1", + "content_object": ContentObjectChangedData( + object_id=self.lib1_problem_block["id"], + changes=["collections"], ), }, event_receiver.call_args_list[0].kwargs, @@ -469,7 +469,7 @@ def test_update_library_collection_components_event(self): "signal": CONTENT_OBJECT_ASSOCIATIONS_CHANGED, "sender": None, "content_object": ContentObjectChangedData( - object_id=self.lib1_problem_block["id"], + object_id=self.lib1_html_block["id"], changes=["collections"], ), }, @@ -477,11 +477,11 @@ def test_update_library_collection_components_event(self): ) self.assertDictContainsSubset( { - "signal": CONTENT_OBJECT_ASSOCIATIONS_CHANGED, + "signal": LIBRARY_COLLECTION_UPDATED, "sender": None, - "content_object": ContentObjectChangedData( - object_id=self.lib1_html_block["id"], - changes=["collections"], + "library_collection": LibraryCollectionData( + self.lib1.library_key, + collection_key="COL1", ), }, event_receiver.call_args_list[2].kwargs,