Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add collection tags to index [FC-0062] #35483

Merged
25 changes: 20 additions & 5 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from meilisearch.errors import MeilisearchError
from meilisearch.models.task import TaskInfo
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import LibraryLocatorV2
from opaque_keys.edx.locator import LibraryLocatorV2, LibraryCollectionLocator
from openedx_learning.api import authoring as authoring_api
from common.djangoapps.student.roles import GlobalStaff
from rest_framework.request import Request
Expand All @@ -36,6 +36,7 @@
searchable_doc_for_library_block,
searchable_doc_collections,
searchable_doc_tags,
searchable_doc_tags_for_collection,
)

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -395,13 +396,12 @@ def index_library(lib_key: str) -> list:
return docs

############## Collections ##############
def index_collection_batch(batch, num_done) -> int:
def index_collection_batch(batch, num_done, library_key) -> int:
docs = []
for collection in batch:
try:
doc = searchable_doc_for_collection(collection)
# Uncomment below line once collections are tagged.
# doc.update(searchable_doc_tags(collection.id))
doc.update(searchable_doc_tags_for_collection(library_key, collection))
docs.append(doc)
except Exception as err: # pylint: disable=broad-except
status_cb(f"Error indexing collection {collection}: {err}")
Expand All @@ -428,7 +428,11 @@ def index_collection_batch(batch, num_done) -> int:
status_cb(f"{num_collections_done + 1}/{num_collections}. Now indexing collections in library {lib_key}")
paginator = Paginator(collections, 100)
for p in paginator.page_range:
num_collections_done = index_collection_batch(paginator.page(p).object_list, num_collections_done)
num_collections_done = index_collection_batch(
paginator.page(p).object_list,
num_collections_done,
lib_key,
)
status_cb(f"{num_collections_done}/{num_collections} collections indexed for library {lib_key}")

num_contexts_done += 1
Expand Down Expand Up @@ -604,6 +608,17 @@ def upsert_block_collections_index_docs(usage_key: UsageKey):
_update_index_docs([doc])


def upsert_collection_tags_index_docs(collection_usage_key: LibraryCollectionLocator):
"""
Updates the tags data in documents for the given library collection
"""
collection = lib_api.get_library_collection_from_usage_key(collection_usage_key)

doc = {Fields.id: collection.id}
doc.update(searchable_doc_tags_for_collection(collection_usage_key.library_key, collection))
_update_index_docs([doc])


def _get_user_orgs(request: Request) -> list[str]:
"""
Get the org.short_names for the organizations that the requesting user has OrgStaffRole or OrgInstructorRole.
Expand Down
24 changes: 24 additions & 0 deletions openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from django.core.exceptions import ObjectDoesNotExist
from opaque_keys.edx.keys import LearningContextKey, UsageKey
from openedx_learning.api import authoring as authoring_api
from opaque_keys.edx.locator import LibraryLocatorV2

from openedx.core.djangoapps.content.search.models import SearchAccess
from openedx.core.djangoapps.content_libraries import api as lib_api
Expand Down Expand Up @@ -339,6 +340,28 @@ def searchable_doc_collections(usage_key: UsageKey) -> dict:
return doc


def searchable_doc_tags_for_collection(
library_key: LibraryLocatorV2,
collection,
) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the tags data for the given library collection.
"""
doc = {
Fields.id: collection.id,
}

collection_usage_key = lib_api.get_library_collection_usage_key(
library_key,
collection.key,
)

doc.update(_tags_for_content_object(collection_usage_key))

return doc


def searchable_doc_for_course_block(block) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
Expand Down Expand Up @@ -382,6 +405,7 @@ def searchable_doc_for_collection(collection) -> dict:
doc.update({
Fields.context_key: str(context_key),
Fields.org: org,
Fields.usage_key: str(lib_api.get_library_collection_usage_key(context_key, collection.key)),
})
except LearningPackage.contentlibrary.RelatedObjectDoesNotExist:
log.warning(f"Related library not found for {collection}")
Expand Down
21 changes: 17 additions & 4 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.dispatch import receiver
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import LibraryCollectionLocator
from openedx_events.content_authoring.data import (
ContentLibraryData,
ContentObjectChangedData,
Expand All @@ -32,7 +33,12 @@
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from openedx.core.djangoapps.content.search.models import SearchAccess

from .api import only_if_meilisearch_enabled, upsert_block_collections_index_docs, upsert_block_tags_index_docs
from .api import (
only_if_meilisearch_enabled,
upsert_block_collections_index_docs,
upsert_block_tags_index_docs,
upsert_collection_tags_index_docs,
)
from .tasks import (
delete_library_block_index_doc,
delete_xblock_index_doc,
Expand Down Expand Up @@ -191,12 +197,19 @@ def content_object_associations_changed_handler(**kwargs) -> None:
# Check if valid if course or library block
usage_key = UsageKey.from_string(str(content_object.object_id))
except InvalidKeyError:
log.error("Received invalid content object id")
return
try:
# Check if valid if library collection
usage_key = LibraryCollectionLocator.from_string(str(content_object.object_id))
except InvalidKeyError:
log.error("Received invalid content object id")
return

# This event's changes may contain both "tags" and "collections", but this will happen rarely, if ever.
# So we allow a potential double "upsert" here.
if not content_object.changes or "tags" in content_object.changes:
upsert_block_tags_index_docs(usage_key)
if isinstance(usage_key, LibraryCollectionLocator):
upsert_collection_tags_index_docs(usage_key)
else:
upsert_block_tags_index_docs(usage_key)
if not content_object.changes or "collections" in content_object.changes:
upsert_block_collections_index_docs(usage_key)
41 changes: 40 additions & 1 deletion openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,11 @@ def setUp(self):
created_by=None,
description="my collection description"
)
self.collection_usage_key = "lib-collection:org1:lib:MYCOL"
self.collection_dict = {
"id": self.collection.id,
"block_id": self.collection.key,
"usage_key": self.collection_usage_key,
"type": "collection",
"display_name": "my_collection",
"description": "my collection description",
Expand Down Expand Up @@ -221,14 +223,16 @@ def test_reindex_meilisearch(self, mock_meilisearch):
doc_problem2 = copy.deepcopy(self.doc_problem2)
doc_problem2["tags"] = {}
doc_problem2["collections"] = {}
doc_collection = copy.deepcopy(self.collection_dict)
doc_collection["tags"] = {}

api.rebuild_index()
assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 3
mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls(
[
call([doc_sequential, doc_vertical]),
call([doc_problem1, doc_problem2]),
call([self.collection_dict]),
call([doc_collection]),
],
any_order=True,
)
Expand Down Expand Up @@ -459,6 +463,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
doc_collection1_created = {
"id": collection1.id,
"block_id": collection1.key,
"usage_key": f"lib-collection:org1:lib:{collection1.key}",
"type": "collection",
"display_name": "Collection 1",
"description": "First Collection",
Expand All @@ -473,6 +478,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
doc_collection2_created = {
"id": collection2.id,
"block_id": collection2.key,
"usage_key": f"lib-collection:org1:lib:{collection2.key}",
"type": "collection",
"display_name": "Collection 2",
"description": "Second Collection",
Expand All @@ -487,6 +493,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
doc_collection2_updated = {
"id": collection2.id,
"block_id": collection2.key,
"usage_key": f"lib-collection:org1:lib:{collection2.key}",
"type": "collection",
"display_name": "Collection 2",
"description": "Second Collection",
Expand All @@ -501,6 +508,7 @@ def test_index_library_block_and_collections(self, mock_meilisearch):
doc_collection1_updated = {
"id": collection1.id,
"block_id": collection1.key,
"usage_key": f"lib-collection:org1:lib:{collection1.key}",
"type": "collection",
"display_name": "Collection 1",
"description": "First Collection",
Expand Down Expand Up @@ -576,3 +584,34 @@ def test_delete_all_drafts(self, mock_meilisearch):
mock_meilisearch.return_value.index.return_value.delete_documents.assert_called_once_with(
filter=delete_filter
)

@override_settings(MEILISEARCH_ENABLED=True)
def test_index_tags_in_collections(self, mock_meilisearch):
# Tag collection
tagging_api.tag_object(self.collection_usage_key, self.taxonomyA, ["one", "two"])
tagging_api.tag_object(self.collection_usage_key, self.taxonomyB, ["three", "four"])

# Build expected docs with tags at each stage
doc_collection_with_tags1 = {
"id": self.collection.id,
"tags": {
'taxonomy': ['A'],
'level0': ['A > one', 'A > two']
}
}
doc_collection_with_tags2 = {
"id": self.collection.id,
"tags": {
'taxonomy': ['A', 'B'],
'level0': ['A > one', 'A > two', 'B > four', 'B > three']
}
}

assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
[
call([doc_collection_with_tags1]),
call([doc_collection_with_tags2]),
],
any_order=True,
)
11 changes: 11 additions & 0 deletions openedx/core/djangoapps/content/search/tests/test_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ..documents import (
searchable_doc_for_course_block,
searchable_doc_tags,
searchable_doc_tags_for_collection,
searchable_doc_collections,
searchable_doc_for_collection,
searchable_doc_for_library_block,
Expand All @@ -27,6 +28,7 @@
except RuntimeError:
searchable_doc_for_course_block = lambda x: x
searchable_doc_tags = lambda x: x
searchable_doc_tags_for_collection = lambda x: x
searchable_doc_for_collection = lambda x: x
searchable_doc_for_library_block = lambda x: x
SearchAccess = {}
Expand Down Expand Up @@ -76,6 +78,7 @@ def setUpClass(cls):
created_by=None,
description="my toy collection description"
)
cls.collection_usage_key = "lib-collection:edX:2012_Fall:TOY_COLLECTION"
cls.library_block = library_api.create_library_block(
cls.library.key,
"html",
Expand Down Expand Up @@ -109,6 +112,7 @@ def setUpClass(cls):
tagging_api.tag_object(str(cls.html_block_key), cls.subject_tags, tags=["Chinese", "Jump Links"])
tagging_api.tag_object(str(cls.html_block_key), cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(str(cls.library_block.usage_key), cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(cls.collection_usage_key, cls.difficulty_tags, tags=["Normal"])

@property
def toy_course_access_id(self):
Expand Down Expand Up @@ -296,9 +300,12 @@ def test_html_library_block(self):

def test_collection_with_library(self):
doc = searchable_doc_for_collection(self.collection)
doc.update(searchable_doc_tags_for_collection(self.library.key, self.collection))

assert doc == {
"id": self.collection.id,
"block_id": self.collection.key,
"usage_key": self.collection_usage_key,
"type": "collection",
"org": "edX",
"display_name": "Toy Collection",
Expand All @@ -309,6 +316,10 @@ def test_collection_with_library(self):
"breadcrumbs": [{"display_name": "some content_library"}],
"created": 1680674828.0,
"modified": 1680674828.0,
'tags': {
'taxonomy': ['Difficulty'],
'level0': ['Difficulty > Normal']
}
}

def test_collection_with_no_library(self):
Expand Down
40 changes: 39 additions & 1 deletion openedx/core/djangoapps/content_libraries/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@
from opaque_keys.edx.locator import (
LibraryLocatorV2,
LibraryUsageLocatorV2,
LibraryLocator as LibraryLocatorV1
LibraryLocator as LibraryLocatorV1,
LibraryCollectionLocator,
)
from opaque_keys import InvalidKeyError
from openedx_events.content_authoring.data import (
Expand Down Expand Up @@ -1262,6 +1263,43 @@ def update_library_collection_components(
return collection


def get_library_collection_usage_key(
library_key: LibraryLocatorV2,
collection_key: str,
# As an optimization, callers may pass in a pre-fetched ContentLibrary instance
content_library: ContentLibrary | None = None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great! 👍

) -> LibraryCollectionLocator:
"""
Returns the LibraryCollectionLocator associated to a collection
"""
if not content_library:
content_library = ContentLibrary.objects.get_by_key(library_key) # type: ignore[attr-defined]
assert content_library
assert content_library.learning_package_id
assert content_library.library_key == library_key

return LibraryCollectionLocator(library_key, collection_key)


def get_library_collection_from_usage_key(
collection_usage_key: LibraryCollectionLocator,
) -> Collection:
"""
Return a Collection using the LibraryCollectionLocator
"""

library_key = collection_usage_key.library_key
collection_key = collection_usage_key.collection_id
content_library = ContentLibrary.objects.get_by_key(library_key) # type: ignore[attr-defined]
try:
return authoring_api.get_collection(
content_library.learning_package_id,
collection_key,
)
except Collection.DoesNotExist as exc:
raise ContentLibraryCollectionNotFound from exc


# V1/V2 Compatibility Helpers
# (Should be removed as part of
# https://github.com/openedx/edx-platform/issues/32457)
Expand Down
Loading