Skip to content

Commit

Permalink
feat: Add collection tags to index [FC-0062] (#35483)
Browse files Browse the repository at this point in the history
* feat: Add collection tags to index

* feat: Add api functions to update tags in collections

* feat: Update tags on index when tag_object
  • Loading branch information
ChrisChV authored Sep 17, 2024
1 parent a94b5af commit 5927be7
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 11 deletions.
25 changes: 20 additions & 5 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from meilisearch.errors import MeilisearchError
from meilisearch.models.task import TaskInfo
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import LibraryLocatorV2
from opaque_keys.edx.locator import LibraryLocatorV2, LibraryCollectionLocator
from openedx_learning.api import authoring as authoring_api
from common.djangoapps.student.roles import GlobalStaff
from rest_framework.request import Request
Expand All @@ -36,6 +36,7 @@
searchable_doc_for_library_block,
searchable_doc_collections,
searchable_doc_tags,
searchable_doc_tags_for_collection,
)

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -395,13 +396,12 @@ def index_library(lib_key: str) -> list:
return docs

############## Collections ##############
def index_collection_batch(batch, num_done) -> int:
def index_collection_batch(batch, num_done, library_key) -> int:
docs = []
for collection in batch:
try:
doc = searchable_doc_for_collection(collection)
# Uncomment below line once collections are tagged.
# doc.update(searchable_doc_tags(collection.id))
doc.update(searchable_doc_tags_for_collection(library_key, collection))
docs.append(doc)
except Exception as err: # pylint: disable=broad-except
status_cb(f"Error indexing collection {collection}: {err}")
Expand All @@ -428,7 +428,11 @@ def index_collection_batch(batch, num_done) -> int:
status_cb(f"{num_collections_done + 1}/{num_collections}. Now indexing collections in library {lib_key}")
paginator = Paginator(collections, 100)
for p in paginator.page_range:
num_collections_done = index_collection_batch(paginator.page(p).object_list, num_collections_done)
num_collections_done = index_collection_batch(
paginator.page(p).object_list,
num_collections_done,
lib_key,
)
status_cb(f"{num_collections_done}/{num_collections} collections indexed for library {lib_key}")

num_contexts_done += 1
Expand Down Expand Up @@ -604,6 +608,17 @@ def upsert_block_collections_index_docs(usage_key: UsageKey):
_update_index_docs([doc])


def upsert_collection_tags_index_docs(collection_usage_key: LibraryCollectionLocator):
"""
Updates the tags data in documents for the given library collection
"""
collection = lib_api.get_library_collection_from_usage_key(collection_usage_key)

doc = {Fields.id: collection.id}
doc.update(searchable_doc_tags_for_collection(collection_usage_key.library_key, collection))
_update_index_docs([doc])


def _get_user_orgs(request: Request) -> list[str]:
"""
Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole.
Expand Down
24 changes: 24 additions & 0 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from django.core.exceptions import ObjectDoesNotExist
from opaque_keys.edx.keys import LearningContextKey, UsageKey
from openedx_learning.api import authoring as authoring_api
from opaque_keys.edx.locator import LibraryLocatorV2

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content_libraries import api as lib_api
Expand Down Expand Up @@ -339,6 +340,28 @@ def searchable_doc_collections(usage_key: UsageKey) -> dict:
return doc


def searchable_doc_tags_for_collection(
library_key: LibraryLocatorV2,
collection,
) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the tags data for the given library collection.
"""
doc = {
Fields.id: collection.id,
}

collection_usage_key = lib_api.get_library_collection_usage_key(
library_key,
collection.key,
)

doc.update(_tags_for_content_object(collection_usage_key))

return doc


def searchable_doc_for_course_block(block) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
Expand Down Expand Up @@ -382,6 +405,7 @@ def searchable_doc_for_collection(collection) -> dict:
doc.update({
Fields.context_key: str(context_key),
Fields.org: org,
Fields.usage_key: str(lib_api.get_library_collection_usage_key(context_key, collection.key)),
})
except LearningPackage.contentlibrary.RelatedObjectDoesNotExist:
log.warning(f"Related library not found for {collection}")
Expand Down
21 changes: 17 additions & 4 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.dispatch import receiver
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import LibraryCollectionLocator
from openedx_events.content_authoring.data import (
ContentLibraryData,
ContentObjectChangedData,
Expand All @@ -32,7 +33,12 @@
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.search.models import SearchAccess

from .api import only_if_meilisearch_enabled, upsert_block_collections_index_docs, upsert_block_tags_index_docs
from .api import (
only_if_meilisearch_enabled,
upsert_block_collections_index_docs,
upsert_block_tags_index_docs,
upsert_collection_tags_index_docs,
)
from .tasks import (
delete_library_block_index_doc,
delete_xblock_index_doc,
Expand Down Expand Up @@ -191,12 +197,19 @@ def content_object_associations_changed_handler(**kwargs) -> None:
# Check if valid if course or library block
usage_key = UsageKey.from_string(str(content_object.object_id))
except InvalidKeyError:
log.error("Received invalid content object id")
return
try:
# Check if valid if library collection
usage_key = LibraryCollectionLocator.from_string(str(content_object.object_id))
except InvalidKeyError:
log.error("Received invalid content object id")
return

# This event's changes may contain both "tags" and "collections", but this will happen rarely, if ever.
# So we allow a potential double "upsert" here.
if not content_object.changes or "tags" in content_object.changes:
upsert_block_tags_index_docs(usage_key)
if isinstance(usage_key, LibraryCollectionLocator):
upsert_collection_tags_index_docs(usage_key)
else:
upsert_block_tags_index_docs(usage_key)
if not content_object.changes or "collections" in content_object.changes:
upsert_block_collections_index_docs(usage_key)
41 changes: 40 additions & 1 deletion openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,11 @@ def setUp(self):
created_by=None,
description="my collection description"
)
self.collection_usage_key = "lib-collection:org1:lib:MYCOL"
self.collection_dict = {
"id": self.collection.id,
"block_id": self.collection.key,
"usage_key": self.collection_usage_key,
"type": "collection",
"display_name": "my_collection",
"description": "my collection description",
Expand Down Expand Up @@ -221,14 +223,16 @@ def test_reindex_meilisearch(self, mock_meilisearch):
doc_problem2 = copy.deepcopy(self.doc_problem2)
doc_problem2["tags"] = {}
doc_problem2["collections"] = {}
doc_collection = copy.deepcopy(self.collection_dict)
doc_collection["tags"] = {}

api.rebuild_index()
assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 3
mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls(
[
call([doc_sequential, doc_vertical]),
call([doc_problem1, doc_problem2]),
call([self.collection_dict]),
call([doc_collection]),
],
any_order=True,
)
Expand Down Expand Up @@ -459,6 +463,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
doc_collection1_created = {
"id": collection1.id,
"block_id": collection1.key,
"usage_key": f"lib-collection:org1:lib:{collection1.key}",
"type": "collection",
"display_name": "Collection 1",
"description": "First Collection",
Expand All @@ -473,6 +478,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
doc_collection2_created = {
"id": collection2.id,
"block_id": collection2.key,
"usage_key": f"lib-collection:org1:lib:{collection2.key}",
"type": "collection",
"display_name": "Collection 2",
"description": "Second Collection",
Expand All @@ -487,6 +493,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
doc_collection2_updated = {
"id": collection2.id,
"block_id": collection2.key,
"usage_key": f"lib-collection:org1:lib:{collection2.key}",
"type": "collection",
"display_name": "Collection 2",
"description": "Second Collection",
Expand All @@ -501,6 +508,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
doc_collection1_updated = {
"id": collection1.id,
"block_id": collection1.key,
"usage_key": f"lib-collection:org1:lib:{collection1.key}",
"type": "collection",
"display_name": "Collection 1",
"description": "First Collection",
Expand Down Expand Up @@ -576,3 +584,34 @@ def test_delete_all_drafts(self, mock_meilisearch):
mock_meilisearch.return_value.index.return_value.delete_documents.assert_called_once_with(
filter=delete_filter
)

@override_settings(MEILISEARCH_ENABLED=True)
def test_index_tags_in_collections(self, mock_meilisearch):
# Tag collection
tagging_api.tag_object(self.collection_usage_key, self.taxonomyA, ["one", "two"])
tagging_api.tag_object(self.collection_usage_key, self.taxonomyB, ["three", "four"])

# Build expected docs with tags at each stage
doc_collection_with_tags1 = {
"id": self.collection.id,
"tags": {
'taxonomy': ['A'],
'level0': ['A > one', 'A > two']
}
}
doc_collection_with_tags2 = {
"id": self.collection.id,
"tags": {
'taxonomy': ['A', 'B'],
'level0': ['A > one', 'A > two', 'B > four', 'B > three']
}
}

assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
[
call([doc_collection_with_tags1]),
call([doc_collection_with_tags2]),
],
any_order=True,
)
11 changes: 11 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ..documents import (
searchable_doc_for_course_block,
searchable_doc_tags,
searchable_doc_tags_for_collection,
searchable_doc_collections,
searchable_doc_for_collection,
searchable_doc_for_library_block,
Expand All @@ -27,6 +28,7 @@
except RuntimeError:
searchable_doc_for_course_block = lambda x: x
searchable_doc_tags = lambda x: x
searchable_doc_tags_for_collection = lambda x: x
searchable_doc_for_collection = lambda x: x
searchable_doc_for_library_block = lambda x: x
SearchAccess = {}
Expand Down Expand Up @@ -76,6 +78,7 @@ def setUpClass(cls):
created_by=None,
description="my toy collection description"
)
cls.collection_usage_key = "lib-collection:edX:2012_Fall:TOY_COLLECTION"
cls.library_block = library_api.create_library_block(
cls.library.key,
"html",
Expand Down Expand Up @@ -109,6 +112,7 @@ def setUpClass(cls):
tagging_api.tag_object(str(cls.html_block_key), cls.subject_tags, tags=["Chinese", "Jump Links"])
tagging_api.tag_object(str(cls.html_block_key), cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(str(cls.library_block.usage_key), cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(cls.collection_usage_key, cls.difficulty_tags, tags=["Normal"])

@property
def toy_course_access_id(self):
Expand Down Expand Up @@ -296,9 +300,12 @@ def test_html_library_block(self):

def test_collection_with_library(self):
doc = searchable_doc_for_collection(self.collection)
doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection))

assert doc == {
"id": self.collection.id,
"block_id": self.collection.key,
"usage_key": self.collection_usage_key,
"type": "collection",
"org": "edX",
"display_name": "Toy Collection",
Expand All @@ -309,6 +316,10 @@ def test_collection_with_library(self):
"breadcrumbs": [{"display_name": "some content_library"}],
"created": 1680674828.0,
"modified": 1680674828.0,
'tags': {
'taxonomy': ['Difficulty'],
'level0': ['Difficulty > Normal']
}
}

def test_collection_with_no_library(self):
Expand Down
40 changes: 39 additions & 1 deletion openedx/core/djangoapps/content_libraries/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@
from opaque_keys.edx.locator import (
LibraryLocatorV2,
LibraryUsageLocatorV2,
LibraryLocator as LibraryLocatorV1
LibraryLocator as LibraryLocatorV1,
LibraryCollectionLocator,
)
from opaque_keys import InvalidKeyError
from openedx_events.content_authoring.data import (
Expand Down Expand Up @@ -1262,6 +1263,43 @@ def update_library_collection_components(
return collection


def get_library_collection_usage_key(
library_key: LibraryLocatorV2,
collection_key: str,
# As an optimization, callers may pass in a pre-fetched ContentLibrary instance
content_library: ContentLibrary | None = None,
) -> LibraryCollectionLocator:
"""
Returns the LibraryCollectionLocator associated to a collection
"""
if not content_library:
content_library = ContentLibrary.objects.get_by_key(library_key) # type: ignore[attr-defined]
assert content_library
assert content_library.learning_package_id
assert content_library.library_key == library_key

return LibraryCollectionLocator(library_key, collection_key)


def get_library_collection_from_usage_key(
collection_usage_key: LibraryCollectionLocator,
) -> Collection:
"""
Return a Collection using the LibraryCollectionLocator
"""

library_key = collection_usage_key.library_key
collection_key = collection_usage_key.collection_id
content_library = ContentLibrary.objects.get_by_key(library_key) # type: ignore[attr-defined]
try:
return authoring_api.get_collection(
content_library.learning_package_id,
collection_key,
)
except Collection.DoesNotExist as exc:
raise ContentLibraryCollectionNotFound from exc


# V1/V2 Compatibility Helpers
# (Should be removed as part of
# https://github.com/openedx/edx-platform/issues/32457)
Expand Down

0 comments on commit 5927be7

Please sign in to comment.