Skip to content

Commit

Permalink
Merge pull request #684 from open-craft/jill/reindex-collection
Browse files Browse the repository at this point in the history
Store Collection metadata + component count in meilisearch
  • Loading branch information
rpenido authored Sep 12, 2024
2 parents 360ec35 + 2c6c8cf commit 79f83d7
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 39 deletions.
16 changes: 16 additions & 0 deletions openedx/core/djangoapps/content/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,22 @@ def upsert_library_block_index_doc(usage_key: UsageKey) -> None:
_update_index_docs(docs)


def upsert_library_collection_index_doc(library_key: LibraryLocatorV2, collection_key: str) -> None:
"""
Creates or updates the document for the given Library Collection in the search index
"""
content_library = lib_api.ContentLibrary.objects.get_by_key(library_key)
collection = authoring_api.get_collection(
learning_package_id=content_library.learning_package_id,
collection_key=collection_key,
)
docs = [
searchable_doc_for_collection(collection)
]

_update_index_docs(docs)


def upsert_content_library_index_docs(library_key: LibraryLocatorV2) -> None:
"""
Creates or updates the documents for the given Content Library in the search index
Expand Down
12 changes: 11 additions & 1 deletion openedx/core/djangoapps/content/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ class Fields:
id = "id"
usage_key = "usage_key"
type = "type" # DocType.course_block or DocType.library_block (see below)
block_id = "block_id" # The block_id part of the usage key. Sometimes human-readable, sometimes a random hex ID
# The block_id part of the usage key for course or library blocks.
# If it's a collection, the collection.key is stored here.
# Sometimes human-readable, sometimes a random hex ID
# Is only unique within the given context_key.
block_id = "block_id"
display_name = "display_name"
description = "description"
modified = "modified"
Expand Down Expand Up @@ -65,6 +69,10 @@ class Fields:
# Text (html) blocks have an "html_content" key in here, capa has "capa_content" and "problem_types", and so on.
content = "content"

# Collections use this field to communicate how many entities/components they contain.
# Structural XBlocks may use this one day to indicate how many child blocks they ocntain.
num_children = "num_children"

# Note: new fields or values can be added at any time, but if they need to be indexed for filtering or keyword
# search, the index configuration will need to be changed, which is only done as part of the 'reindex_studio'
# command (changing those settings on an large active index is not recommended).
Expand Down Expand Up @@ -355,6 +363,7 @@ def searchable_doc_for_collection(collection) -> dict:
"""
doc = {
Fields.id: collection.id,
Fields.block_id: collection.key,
Fields.type: DocType.collection,
Fields.display_name: collection.title,
Fields.description: collection.description,
Expand All @@ -364,6 +373,7 @@ def searchable_doc_for_collection(collection) -> dict:
# If related contentlibrary is found, it will override this value below.
# Mostly contentlibrary.library_key == learning_package.key
Fields.context_key: collection.learning_package.key,
Fields.num_children: collection.entities.count(),
}
# Just in case learning_package is not related to a library
try:
Expand Down
29 changes: 27 additions & 2 deletions openedx/core/djangoapps/content/search/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,23 @@

from django.db.models.signals import post_delete
from django.dispatch import receiver
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from openedx_events.content_authoring.data import (
ContentLibraryData,
ContentObjectChangedData,
LibraryBlockData,
LibraryCollectionData,
XBlockData,
)
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from openedx_events.content_authoring.signals import (
CONTENT_LIBRARY_DELETED,
CONTENT_LIBRARY_UPDATED,
LIBRARY_BLOCK_CREATED,
LIBRARY_BLOCK_DELETED,
LIBRARY_BLOCK_UPDATED,
LIBRARY_COLLECTION_CREATED,
LIBRARY_COLLECTION_UPDATED,
XBLOCK_CREATED,
XBLOCK_DELETED,
XBLOCK_UPDATED,
Expand All @@ -34,6 +37,7 @@
delete_library_block_index_doc,
delete_xblock_index_doc,
update_content_library_index_docs,
update_library_collection_index_doc,
upsert_library_block_index_doc,
upsert_xblock_index_doc,
)
Expand Down Expand Up @@ -151,6 +155,27 @@ def content_library_updated_handler(**kwargs) -> None:
update_content_library_index_docs.apply(args=[str(content_library_data.library_key)])


@receiver(LIBRARY_COLLECTION_CREATED)
@receiver(LIBRARY_COLLECTION_UPDATED)
@only_if_meilisearch_enabled
def library_collection_updated_handler(**kwargs) -> None:
"""
Create or update the index for the content library collection
"""
library_collection = kwargs.get("library_collection", None)
if not library_collection or not isinstance(library_collection, LibraryCollectionData): # pragma: no cover
log.error("Received null or incorrect data for event")
return

# Update collection index synchronously to make sure that search index is updated before
# the frontend invalidates/refetches index.
# See content_library_updated_handler for more details.
update_library_collection_index_doc.apply(args=[
str(library_collection.library_key),
library_collection.collection_key,
])


@receiver(CONTENT_OBJECT_ASSOCIATIONS_CHANGED)
@only_if_meilisearch_enabled
def content_object_associations_changed_handler(**kwargs) -> None:
Expand Down
13 changes: 13 additions & 0 deletions openedx/core/djangoapps/content/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,16 @@ def update_content_library_index_docs(library_key_str: str) -> None:
# Delete all documents in this library that were not published by above function
# as this task is also triggered on discard event.
api.delete_all_draft_docs_for_library(library_key)


@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
def update_library_collection_index_doc(library_key_str: str, collection_key: str) -> None:
"""
Celery task to update the content index documents for a library collection
"""
library_key = LibraryLocatorV2.from_string(library_key_str)

log.info("Updating content index documents for collection %s in library%s", collection_key, library_key)

api.upsert_library_collection_index_doc(library_key, collection_key)
141 changes: 105 additions & 36 deletions openedx/core/djangoapps/content/search/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,16 +186,18 @@ def setUp(self):
description="my collection description"
)
self.collection_dict = {
'id': self.collection.id,
'type': 'collection',
'display_name': 'my_collection',
'description': 'my collection description',
'context_key': 'lib:org1:lib',
'org': 'org1',
'created': created_date.timestamp(),
'modified': created_date.timestamp(),
"id": self.collection.id,
"block_id": self.collection.key,
"type": "collection",
"display_name": "my_collection",
"description": "my collection description",
"num_children": 0,
"context_key": "lib:org1:lib",
"org": "org1",
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
"access_id": lib_access.id,
'breadcrumbs': [{'display_name': 'Library'}]
"breadcrumbs": [{"display_name": "Library"}],
}

@override_settings(MEILISEARCH_ENABLED=False)
Expand Down Expand Up @@ -415,38 +417,101 @@ def test_index_library_block_tags(self, mock_meilisearch):
)

@override_settings(MEILISEARCH_ENABLED=True)
def test_index_library_block_collections(self, mock_meilisearch):
def test_index_library_block_and_collections(self, mock_meilisearch):
"""
Test indexing an Library Block that is in two collections.
Test indexing an Library Block and the Collections it's in.
"""
collection1 = authoring_api.create_collection(
learning_package_id=self.library.learning_package.id,
key="COL1",
title="Collection 1",
created_by=None,
description="First Collection",
)

collection2 = authoring_api.create_collection(
learning_package_id=self.library.learning_package.id,
key="COL2",
title="Collection 2",
created_by=None,
description="Second Collection",
)
# Create collections (these internally call `upsert_library_collection_index_doc`)
created_date = datetime(2023, 5, 6, 7, 8, 9, tzinfo=timezone.utc)
with freeze_time(created_date):
collection1 = library_api.create_library_collection(
self.library.key,
collection_key="COL1",
title="Collection 1",
created_by=None,
description="First Collection",
)

# Add Problem1 to both Collections (these internally call `upsert_block_collections_index_docs`)
# (adding in reverse order to test sorting of collection tag))
for collection in (collection2, collection1):
library_api.update_library_collection_components(
collection2 = library_api.create_library_collection(
self.library.key,
collection_key=collection.key,
usage_keys=[
self.problem1.usage_key,
],
collection_key="COL2",
title="Collection 2",
created_by=None,
description="Second Collection",
)

# Build expected docs with collections at each stage
# Add Problem1 to both Collections (these internally call `upsert_block_collections_index_docs` and
# `upsert_library_collection_index_doc`)
# (adding in reverse order to test sorting of collection tag)
updated_date = datetime(2023, 6, 7, 8, 9, 10, tzinfo=timezone.utc)
with freeze_time(updated_date):
for collection in (collection2, collection1):
library_api.update_library_collection_components(
self.library.key,
collection_key=collection.key,
usage_keys=[
self.problem1.usage_key,
],
)

# Build expected docs at each stage
lib_access, _ = SearchAccess.objects.get_or_create(context_key=self.library.key)
doc_collection1_created = {
"id": collection1.id,
"block_id": collection1.key,
"type": "collection",
"display_name": "Collection 1",
"description": "First Collection",
"num_children": 0,
"context_key": "lib:org1:lib",
"org": "org1",
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
"access_id": lib_access.id,
"breadcrumbs": [{"display_name": "Library"}],
}
doc_collection2_created = {
"id": collection2.id,
"block_id": collection2.key,
"type": "collection",
"display_name": "Collection 2",
"description": "Second Collection",
"num_children": 0,
"context_key": "lib:org1:lib",
"org": "org1",
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
"access_id": lib_access.id,
"breadcrumbs": [{"display_name": "Library"}],
}
doc_collection2_updated = {
"id": collection2.id,
"block_id": collection2.key,
"type": "collection",
"display_name": "Collection 2",
"description": "Second Collection",
"num_children": 1,
"context_key": "lib:org1:lib",
"org": "org1",
"created": created_date.timestamp(),
"modified": updated_date.timestamp(),
"access_id": lib_access.id,
"breadcrumbs": [{"display_name": "Library"}],
}
doc_collection1_updated = {
"id": collection1.id,
"block_id": collection1.key,
"type": "collection",
"display_name": "Collection 1",
"description": "First Collection",
"num_children": 1,
"context_key": "lib:org1:lib",
"org": "org1",
"created": created_date.timestamp(),
"modified": updated_date.timestamp(),
"access_id": lib_access.id,
"breadcrumbs": [{"display_name": "Library"}],
}
doc_problem_with_collection1 = {
"id": self.doc_problem1["id"],
"collections": {
Expand All @@ -462,9 +527,13 @@ def test_index_library_block_collections(self, mock_meilisearch):
},
}

assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 6
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
[
call([doc_collection1_created]),
call([doc_collection2_created]),
call([doc_collection2_updated]),
call([doc_collection1_updated]),
call([doc_problem_with_collection1]),
call([doc_problem_with_collection2]),
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,12 @@ def test_collection_with_library(self):
doc = searchable_doc_for_collection(self.collection)
assert doc == {
"id": self.collection.id,
"block_id": self.collection.key,
"type": "collection",
"org": "edX",
"display_name": "Toy Collection",
"description": "my toy collection description",
"num_children": 1,
"context_key": "lib:edX:2012_Fall",
"access_id": self.library_access_id,
"breadcrumbs": [{"display_name": "some content_library"}],
Expand All @@ -327,9 +329,11 @@ def test_collection_with_no_library(self):
doc = searchable_doc_for_collection(collection)
assert doc == {
"id": collection.id,
"block_id": collection.key,
"type": "collection",
"display_name": "my_collection",
"description": "my collection description",
"num_children": 0,
"context_key": learning_package.key,
"access_id": self.toy_course_access_id,
"breadcrumbs": [{"display_name": "some learning_package"}],
Expand Down

0 comments on commit 79f83d7

Please sign in to comment.