diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py index 4a775a710da6..71d09590d003 100644 --- a/openedx/core/djangoapps/content/search/api.py +++ b/openedx/core/djangoapps/content/search/api.py @@ -18,7 +18,7 @@ from meilisearch.errors import MeilisearchError from meilisearch.models.task import TaskInfo from opaque_keys.edx.keys import UsageKey -from opaque_keys.edx.locator import LibraryLocatorV2 +from opaque_keys.edx.locator import LibraryLocatorV2, LibraryCollectionLocator from openedx_learning.api import authoring as authoring_api from common.djangoapps.student.roles import GlobalStaff from rest_framework.request import Request @@ -36,6 +36,7 @@ searchable_doc_for_library_block, searchable_doc_collections, searchable_doc_tags, + searchable_doc_tags_for_collection, ) log = logging.getLogger(__name__) @@ -395,13 +396,12 @@ def index_library(lib_key: str) -> list: return docs ############## Collections ############## - def index_collection_batch(batch, num_done) -> int: + def index_collection_batch(batch, num_done, library_key) -> int: docs = [] for collection in batch: try: doc = searchable_doc_for_collection(collection) - # Uncomment below line once collections are tagged. - # doc.update(searchable_doc_tags(collection.id)) + doc.update(searchable_doc_tags_for_collection(library_key, collection)) docs.append(doc) except Exception as err: # pylint: disable=broad-except status_cb(f"Error indexing collection {collection}: {err}") @@ -428,7 +428,11 @@ def index_collection_batch(batch, num_done) -> int: status_cb(f"{num_collections_done + 1}/{num_collections}. Now indexing collections in library {lib_key}") paginator = Paginator(collections, 100) for p in paginator.page_range: - num_collections_done = index_collection_batch(paginator.page(p).object_list, num_collections_done) + num_collections_done = index_collection_batch( + paginator.page(p).object_list, + num_collections_done, + lib_key, + ) status_cb(f"{num_collections_done}/{num_collections} collections indexed for library {lib_key}") num_contexts_done += 1 @@ -604,6 +608,17 @@ def upsert_block_collections_index_docs(usage_key: UsageKey): _update_index_docs([doc]) +def upsert_collection_tags_index_docs(collection_usage_key: LibraryCollectionLocator): + """ + Updates the tags data in documents for the given library collection + """ + collection = lib_api.get_library_collection_from_usage_key(collection_usage_key) + + doc = {Fields.id: collection.id} + doc.update(searchable_doc_tags_for_collection(collection_usage_key.library_key, collection)) + _update_index_docs([doc]) + + def _get_user_orgs(request: Request) -> list[str]: """ Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole. diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py index 6f19b610fe86..f9041468c296 100644 --- a/openedx/core/djangoapps/content/search/documents.py +++ b/openedx/core/djangoapps/content/search/documents.py @@ -10,6 +10,7 @@ from django.core.exceptions import ObjectDoesNotExist from opaque_keys.edx.keys import LearningContextKey, UsageKey from openedx_learning.api import authoring as authoring_api +from opaque_keys.edx.locator import LibraryLocatorV2 from openedx.core.djangoapps.content.search.models import SearchAccess from openedx.core.djangoapps.content_libraries import api as lib_api @@ -339,6 +340,28 @@ def searchable_doc_collections(usage_key: UsageKey) -> dict: return doc +def searchable_doc_tags_for_collection( + library_key: LibraryLocatorV2, + collection, +) -> dict: + """ + Generate a dictionary document suitable for ingestion into a search engine + like Meilisearch or Elasticsearch, with the tags data for the given library collection. + """ + doc = { + Fields.id: collection.id, + } + + collection_usage_key = lib_api.get_library_collection_usage_key( + library_key, + collection.key, + ) + + doc.update(_tags_for_content_object(collection_usage_key)) + + return doc + + def searchable_doc_for_course_block(block) -> dict: """ Generate a dictionary document suitable for ingestion into a search engine @@ -382,6 +405,7 @@ def searchable_doc_for_collection(collection) -> dict: doc.update({ Fields.context_key: str(context_key), Fields.org: org, + Fields.usage_key: str(lib_api.get_library_collection_usage_key(context_key, collection.key)), }) except LearningPackage.contentlibrary.RelatedObjectDoesNotExist: log.warning(f"Related library not found for {collection}") diff --git a/openedx/core/djangoapps/content/search/handlers.py b/openedx/core/djangoapps/content/search/handlers.py index 6a341c92ed2b..1605f8ebfd58 100644 --- a/openedx/core/djangoapps/content/search/handlers.py +++ b/openedx/core/djangoapps/content/search/handlers.py @@ -8,6 +8,7 @@ from django.dispatch import receiver from opaque_keys import InvalidKeyError from opaque_keys.edx.keys import UsageKey +from opaque_keys.edx.locator import LibraryCollectionLocator from openedx_events.content_authoring.data import ( ContentLibraryData, ContentObjectChangedData, @@ -32,7 +33,12 @@ from openedx.core.djangoapps.content.course_overviews.models import CourseOverview from openedx.core.djangoapps.content.search.models import SearchAccess -from .api import only_if_meilisearch_enabled, upsert_block_collections_index_docs, upsert_block_tags_index_docs +from .api import ( + only_if_meilisearch_enabled, + upsert_block_collections_index_docs, + upsert_block_tags_index_docs, + upsert_collection_tags_index_docs, +) from .tasks import ( delete_library_block_index_doc, delete_xblock_index_doc, @@ -191,12 +197,19 @@ def content_object_associations_changed_handler(**kwargs) -> None: # Check if valid if course or library block usage_key = UsageKey.from_string(str(content_object.object_id)) except InvalidKeyError: - log.error("Received invalid content object id") - return + try: + # Check if valid if library collection + usage_key = LibraryCollectionLocator.from_string(str(content_object.object_id)) + except InvalidKeyError: + log.error("Received invalid content object id") + return # This event's changes may contain both "tags" and "collections", but this will happen rarely, if ever. # So we allow a potential double "upsert" here. if not content_object.changes or "tags" in content_object.changes: - upsert_block_tags_index_docs(usage_key) + if isinstance(usage_key, LibraryCollectionLocator): + upsert_collection_tags_index_docs(usage_key) + else: + upsert_block_tags_index_docs(usage_key) if not content_object.changes or "collections" in content_object.changes: upsert_block_collections_index_docs(usage_key) diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py index 023265f4d0f5..4aa41a156dab 100644 --- a/openedx/core/djangoapps/content/search/tests/test_api.py +++ b/openedx/core/djangoapps/content/search/tests/test_api.py @@ -185,9 +185,11 @@ def setUp(self): created_by=None, description="my collection description" ) + self.collection_usage_key = "lib-collection:org1:lib:MYCOL" self.collection_dict = { "id": self.collection.id, "block_id": self.collection.key, + "usage_key": self.collection_usage_key, "type": "collection", "display_name": "my_collection", "description": "my collection description", @@ -221,6 +223,8 @@ def test_reindex_meilisearch(self, mock_meilisearch): doc_problem2 = copy.deepcopy(self.doc_problem2) doc_problem2["tags"] = {} doc_problem2["collections"] = {} + doc_collection = copy.deepcopy(self.collection_dict) + doc_collection["tags"] = {} api.rebuild_index() assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 3 @@ -228,7 +232,7 @@ def test_reindex_meilisearch(self, mock_meilisearch): [ call([doc_sequential, doc_vertical]), call([doc_problem1, doc_problem2]), - call([self.collection_dict]), + call([doc_collection]), ], any_order=True, ) @@ -459,6 +463,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): doc_collection1_created = { "id": collection1.id, "block_id": collection1.key, + "usage_key": f"lib-collection:org1:lib:{collection1.key}", "type": "collection", "display_name": "Collection 1", "description": "First Collection", @@ -473,6 +478,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): doc_collection2_created = { "id": collection2.id, "block_id": collection2.key, + "usage_key": f"lib-collection:org1:lib:{collection2.key}", "type": "collection", "display_name": "Collection 2", "description": "Second Collection", @@ -487,6 +493,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): doc_collection2_updated = { "id": collection2.id, "block_id": collection2.key, + "usage_key": f"lib-collection:org1:lib:{collection2.key}", "type": "collection", "display_name": "Collection 2", "description": "Second Collection", @@ -501,6 +508,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch): doc_collection1_updated = { "id": collection1.id, "block_id": collection1.key, + "usage_key": f"lib-collection:org1:lib:{collection1.key}", "type": "collection", "display_name": "Collection 1", "description": "First Collection", @@ -576,3 +584,34 @@ def test_delete_all_drafts(self, mock_meilisearch): mock_meilisearch.return_value.index.return_value.delete_documents.assert_called_once_with( filter=delete_filter ) + + @override_settings(MEILISEARCH_ENABLED=True) + def test_index_tags_in_collections(self, mock_meilisearch): + # Tag collection + tagging_api.tag_object(self.collection_usage_key, self.taxonomyA, ["one", "two"]) + tagging_api.tag_object(self.collection_usage_key, self.taxonomyB, ["three", "four"]) + + # Build expected docs with tags at each stage + doc_collection_with_tags1 = { + "id": self.collection.id, + "tags": { + 'taxonomy': ['A'], + 'level0': ['A > one', 'A > two'] + } + } + doc_collection_with_tags2 = { + "id": self.collection.id, + "tags": { + 'taxonomy': ['A', 'B'], + 'level0': ['A > one', 'A > two', 'B > four', 'B > three'] + } + } + + assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2 + mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls( + [ + call([doc_collection_with_tags1]), + call([doc_collection_with_tags2]), + ], + any_order=True, + ) diff --git a/openedx/core/djangoapps/content/search/tests/test_documents.py b/openedx/core/djangoapps/content/search/tests/test_documents.py index 7ff330c0b491..9d51bd127bb4 100644 --- a/openedx/core/djangoapps/content/search/tests/test_documents.py +++ b/openedx/core/djangoapps/content/search/tests/test_documents.py @@ -19,6 +19,7 @@ from ..documents import ( searchable_doc_for_course_block, searchable_doc_tags, + searchable_doc_tags_for_collection, searchable_doc_collections, searchable_doc_for_collection, searchable_doc_for_library_block, @@ -27,6 +28,7 @@ except RuntimeError: searchable_doc_for_course_block = lambda x: x searchable_doc_tags = lambda x: x + searchable_doc_tags_for_collection = lambda x: x searchable_doc_for_collection = lambda x: x searchable_doc_for_library_block = lambda x: x SearchAccess = {} @@ -76,6 +78,7 @@ def setUpClass(cls): created_by=None, description="my toy collection description" ) + cls.collection_usage_key = "lib-collection:edX:2012_Fall:TOY_COLLECTION" cls.library_block = library_api.create_library_block( cls.library.key, "html", @@ -109,6 +112,7 @@ def setUpClass(cls): tagging_api.tag_object(str(cls.html_block_key), cls.subject_tags, tags=["Chinese", "Jump Links"]) tagging_api.tag_object(str(cls.html_block_key), cls.difficulty_tags, tags=["Normal"]) tagging_api.tag_object(str(cls.library_block.usage_key), cls.difficulty_tags, tags=["Normal"]) + tagging_api.tag_object(cls.collection_usage_key, cls.difficulty_tags, tags=["Normal"]) @property def toy_course_access_id(self): @@ -296,9 +300,12 @@ def test_html_library_block(self): def test_collection_with_library(self): doc = searchable_doc_for_collection(self.collection) + doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection)) + assert doc == { "id": self.collection.id, "block_id": self.collection.key, + "usage_key": self.collection_usage_key, "type": "collection", "org": "edX", "display_name": "Toy Collection", @@ -309,6 +316,10 @@ def test_collection_with_library(self): "breadcrumbs": [{"display_name": "some content_library"}], "created": 1680674828.0, "modified": 1680674828.0, + 'tags': { + 'taxonomy': ['Difficulty'], + 'level0': ['Difficulty > Normal'] + } } def test_collection_with_no_library(self): diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py index 816bd8c0099b..3dc33aec9616 100644 --- a/openedx/core/djangoapps/content_libraries/api.py +++ b/openedx/core/djangoapps/content_libraries/api.py @@ -73,7 +73,8 @@ from opaque_keys.edx.locator import ( LibraryLocatorV2, LibraryUsageLocatorV2, - LibraryLocator as LibraryLocatorV1 + LibraryLocator as LibraryLocatorV1, + LibraryCollectionLocator, ) from opaque_keys import InvalidKeyError from openedx_events.content_authoring.data import ( @@ -1262,6 +1263,43 @@ def update_library_collection_components( return collection +def get_library_collection_usage_key( + library_key: LibraryLocatorV2, + collection_key: str, + # As an optimization, callers may pass in a pre-fetched ContentLibrary instance + content_library: ContentLibrary | None = None, +) -> LibraryCollectionLocator: + """ + Returns the LibraryCollectionLocator associated to a collection + """ + if not content_library: + content_library = ContentLibrary.objects.get_by_key(library_key) # type: ignore[attr-defined] + assert content_library + assert content_library.learning_package_id + assert content_library.library_key == library_key + + return LibraryCollectionLocator(library_key, collection_key) + + +def get_library_collection_from_usage_key( + collection_usage_key: LibraryCollectionLocator, +) -> Collection: + """ + Return a Collection using the LibraryCollectionLocator + """ + + library_key = collection_usage_key.library_key + collection_key = collection_usage_key.collection_id + content_library = ContentLibrary.objects.get_by_key(library_key) # type: ignore[attr-defined] + try: + return authoring_api.get_collection( + content_library.learning_package_id, + collection_key, + ) + except Collection.DoesNotExist as exc: + raise ContentLibraryCollectionNotFound from exc + + # V1/V2 Compatibility Helpers # (Should be removed as part of # https://github.com/openedx/edx-platform/issues/32457)