feat: Add collection tags to index

openedx · Sep 13, 2024 · 5a34dbd · 5a34dbd
1 parent dd59dc6
commit 5a34dbd
Show file tree

Hide file tree

Showing 4 changed files with 55 additions and 6 deletions.
diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
@@ -36,6 +36,7 @@
     searchable_doc_for_library_block,
     searchable_doc_collections,
     searchable_doc_tags,
+    searchable_doc_tags_for_collection,
 )
 
 log = logging.getLogger(__name__)
@@ -395,13 +396,12 @@ def index_library(lib_key: str) -> list:
             return docs
 
         ############## Collections ##############
-        def index_collection_batch(batch, num_done) -> int:
+        def index_collection_batch(batch, num_done, library_key) -> int:
             docs = []
             for collection in batch:
                 try:
                     doc = searchable_doc_for_collection(collection)
-                    # Uncomment below line once collections are tagged.
-                    # doc.update(searchable_doc_tags(collection.id))
+                    doc.update(searchable_doc_tags_for_collection(library_key, collection))
                     docs.append(doc)
                 except Exception as err:  # pylint: disable=broad-except
                     status_cb(f"Error indexing collection {collection}: {err}")
@@ -428,7 +428,11 @@ def index_collection_batch(batch, num_done) -> int:
             status_cb(f"{num_collections_done + 1}/{num_collections}. Now indexing collections in library {lib_key}")
             paginator = Paginator(collections, 100)
             for p in paginator.page_range:
-                num_collections_done = index_collection_batch(paginator.page(p).object_list, num_collections_done)
+                num_collections_done = index_collection_batch(
+                    paginator.page(p).object_list,
+                    num_collections_done,
+                    lib_key,
+                )
             status_cb(f"{num_collections_done}/{num_collections} collections indexed for library {lib_key}")
 
             num_contexts_done += 1

diff --git a/openedx/core/djangoapps/content/search/documents.py b/openedx/core/djangoapps/content/search/documents.py
@@ -10,6 +10,7 @@
 from django.core.exceptions import ObjectDoesNotExist
 from opaque_keys.edx.keys import LearningContextKey, UsageKey
 from openedx_learning.api import authoring as authoring_api
+from opaque_keys.edx.locator import LibraryLocatorV2
 
 from openedx.core.djangoapps.content.search.models import SearchAccess
 from openedx.core.djangoapps.content_libraries import api as lib_api
@@ -339,6 +340,28 @@ def searchable_doc_collections(usage_key: UsageKey) -> dict:
     return doc
 
 
+def searchable_doc_tags_for_collection(
+    library_key: LibraryLocatorV2,
+    collection,
+) -> dict:
+    """
+    Generate a dictionary document suitable for ingestion into a search engine
+    like Meilisearch or Elasticsearch, with the tags data for the given library collection.
+    """
+    doc = {
+        Fields.id: collection.id,
+    }
+
+    collection_usage_key = lib_api.get_library_collection_usage_key(
+        library_key,
+        collection.key,
+    )
+
+    doc.update(_tags_for_content_object(collection_usage_key))
+
+    return doc
+
+
 def searchable_doc_for_course_block(block) -> dict:
     """
     Generate a dictionary document suitable for ingestion into a search engine

diff --git a/openedx/core/djangoapps/content/search/tests/test_api.py b/openedx/core/djangoapps/content/search/tests/test_api.py
@@ -221,14 +221,17 @@ def test_reindex_meilisearch(self, mock_meilisearch):
         doc_problem2 = copy.deepcopy(self.doc_problem2)
         doc_problem2["tags"] = {}
         doc_problem2["collections"] = {}
+        doc_collection = copy.deepcopy(self.collection_dict)
+        doc_collection["tags"] = {}
+        doc_collection["collections"] = {}
 
         api.rebuild_index()
         assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 3
         mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls(
             [
                 call([doc_sequential, doc_vertical]),
                 call([doc_problem1, doc_problem2]),
-                call([self.collection_dict]),
+                call([doc_collection]),
             ],
             any_order=True,
         )

diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py
@@ -73,7 +73,8 @@
 from opaque_keys.edx.locator import (
     LibraryLocatorV2,
     LibraryUsageLocatorV2,
-    LibraryLocator as LibraryLocatorV1
+    LibraryLocator as LibraryLocatorV1,
+    LibraryCollectionLocator,
 )
 from opaque_keys import InvalidKeyError
 from openedx_events.content_authoring.data import (
@@ -1247,6 +1248,24 @@ def update_library_collection_components(
     return collection
 
 
+def get_library_collection_usage_key(
+    library_key: LibraryLocatorV2,
+    collection_key: str,
+    # As an optimization, callers may pass in a pre-fetched ContentLibrary instance
+    content_library: ContentLibrary | None = None,
+) -> LibraryCollectionLocator:
+    """
+    Returns the LibraryCollectionLocator associated to a collection
+    """
+    if not content_library:
+        content_library = ContentLibrary.objects.get_by_key(library_key)  # type: ignore[attr-defined]
+    assert content_library
+    assert content_library.learning_package_id
+    assert content_library.library_key == library_key
+
+    return LibraryCollectionLocator(library_key, collection_key)
+
+
 # V1/V2 Compatibility Helpers
 # (Should be removed as part of
 #  https://github.com/openedx/edx-platform/issues/32457)