From 08146b66c1a9df3531261d0d3c0dca73e9f0c88d Mon Sep 17 00:00:00 2001 From: Matt Bertrand Date: Tue, 5 Mar 2024 16:50:28 -0500 Subject: [PATCH] some tweaking and unit tests --- learning_resources/admin.py | 1 + learning_resources/etl/loaders.py | 4 +- learning_resources/etl/loaders_test.py | 31 ++++++++-- learning_resources/etl/utils.py | 2 +- learning_resources/etl/utils_test.py | 29 ++++++++- learning_resources/models.py | 5 +- learning_resources/utils_test.py | 12 ++++ learning_resources_search/api.py | 2 +- learning_resources_search/api_test.py | 73 ++++++++++++++++++++++- learning_resources_search/plugins_test.py | 23 +++++++ 10 files changed, 167 insertions(+), 15 deletions(-) diff --git a/learning_resources/admin.py b/learning_resources/admin.py index dcfbe52be0..367b2f5502 100644 --- a/learning_resources/admin.py +++ b/learning_resources/admin.py @@ -105,6 +105,7 @@ class VideoPlaylistInline(TabularInline): model = models.VideoPlaylist extra = 0 show_change_link = True + fields = ("channel",) class ProgramInline(TabularInline): diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py index 0cb7888916..27d302f2cf 100644 --- a/learning_resources/etl/loaders.py +++ b/learning_resources/etl/loaders.py @@ -861,9 +861,10 @@ def load_video_channels(video_channels_data: iter) -> list[VideoChannel]: list of VideoChannel: the loaded video channels """ video_channels = [] - + channel_ids = [] for video_channel_data in video_channels_data: channel_id = video_channel_data["channel_id"] + channel_ids.append(channel_id) try: video_channel = load_video_channel(video_channel_data) except ExtractException: @@ -879,7 +880,6 @@ def load_video_channels(video_channels_data: iter) -> list[VideoChannel]: else: video_channels.append(video_channel) - channel_ids = [video_channel.channel_id for video_channel in video_channels] VideoChannel.objects.exclude(channel_id__in=channel_ids).update(published=False) # Unpublish any video playlists not included in published channels diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py index 9fbe91a38d..6dac1e8fa3 100644 --- a/learning_resources/etl/loaders_test.py +++ b/learning_resources/etl/loaders_test.py @@ -936,6 +936,11 @@ def test_load_video(mocker, mock_upsert_tasks, video_exists, is_published, pass_ VideoFactory.create() if video_exists else VideoFactory.build() ).learning_resource offered_by = LearningResourceOfferorFactory.create() + expected_topics = [{"name": "Biology"}, {"name": "Chemistry"}] + mock_similar_topics_action = mocker.patch( + "learning_resources.etl.loaders.similar_topics_action", + return_value=expected_topics, + ) assert Video.objects.count() == (1 if video_exists else 0) @@ -954,6 +959,8 @@ def test_load_video(mocker, mock_upsert_tasks, video_exists, is_published, pass_ "published": is_published, "video": {"duration": video_resource.video.duration}, } + if pass_topics: + props["topics"] = expected_topics result = load_video(props) assert Video.objects.count() == 1 @@ -962,6 +969,11 @@ def test_load_video(mocker, mock_upsert_tasks, video_exists, is_published, pass_ assert isinstance(result, LearningResource) assert result.published == is_published + assert mock_similar_topics_action.call_count == (0 if pass_topics else 1) + assert list(result.topics.values_list("name", flat=True).order_by("name")) == [ + topic["name"] for topic in expected_topics + ] + for key, value in props.items(): assert getattr(result, key) == value, f"Property {key} should equal {value}" @@ -986,22 +998,25 @@ def test_load_videos(): assert Video.objects.count() == len(video_resources) -def test_load_playlist(): +def test_load_playlist(mocker): """Test load_playlist""" + expected_topics = [{"name": "Biology"}, {"name": "Physics"}] + mock_most_common_topics = mocker.patch( + "learning_resources.etl.loaders.most_common_topics", + return_value=expected_topics, + ) channel = VideoChannelFactory.create(playlists=None) playlist = VideoPlaylistFactory.build().learning_resource assert VideoPlaylist.objects.count() == 0 assert Video.objects.count() == 0 - videos_resources = [ - video.learning_resource for video in VideoFactory.build_batch(5) - ] + video_resources = [video.learning_resource for video in VideoFactory.build_batch(5)] videos_data = [ { **model_to_dict(video, exclude=non_transformable_attributes), "platform": PlatformType.youtube.name, "offered_by": {"code": LearningResourceOfferorFactory.create().code}, } - for video in videos_resources + for video in video_resources ] props = { @@ -1015,9 +1030,13 @@ def test_load_playlist(): result = load_playlist(channel, props) assert isinstance(result, LearningResource) + mock_most_common_topics.assert_called_once() - assert result.resources.count() == len(videos_resources) + assert result.resources.count() == len(video_resources) assert result.video_playlist.channel == channel + assert list(result.topics.values_list("name", flat=True).order_by("name")) == [ + topic["name"] for topic in expected_topics + ] def test_load_playlists_unpublish(mocker): diff --git a/learning_resources/etl/utils.py b/learning_resources/etl/utils.py index 8e6dea4cea..2b7cf06ecd 100644 --- a/learning_resources/etl/utils.py +++ b/learning_resources/etl/utils.py @@ -645,7 +645,7 @@ def most_common_topics( list: The most common topic names as a dict """ counter = Counter( - list({topic.name for resource in resources for topic in resource.topics.all()}) + [topic.name for resource in resources for topic in resource.topics.all()] ) common_topics = dict(counter.most_common(max_topics)).keys() return [{"name": topic} for topic in common_topics] diff --git a/learning_resources/etl/utils_test.py b/learning_resources/etl/utils_test.py index 742e7efaf8..2c5676c253 100644 --- a/learning_resources/etl/utils_test.py +++ b/learning_resources/etl/utils_test.py @@ -2,6 +2,7 @@ import datetime import pathlib +from random import randrange from subprocess import check_call from tempfile import TemporaryDirectory from unittest.mock import ANY @@ -16,7 +17,12 @@ PlatformType, ) from learning_resources.etl import utils -from learning_resources.factories import ContentFileFactory, LearningResourceRunFactory +from learning_resources.factories import ( + ContentFileFactory, + LearningResourceFactory, + LearningResourceRunFactory, + LearningResourceTopicFactory, +) pytestmark = pytest.mark.django_db @@ -326,3 +332,24 @@ def test_extract_valid_department_from_id(readable_id, is_ocw, dept_ids): assert ( utils.extract_valid_department_from_id(readable_id, is_ocw=is_ocw) == dept_ids ) + + +def test_most_common_topics(): + """Test that most_common_topics returns the correct topics""" + max_topics = 4 + common_topics = LearningResourceTopicFactory.create_batch(max_topics) + uncommon_topics = LearningResourceTopicFactory.create_batch(3) + resources = [] + for topic in common_topics: + resources.extend( + LearningResourceFactory.create_batch(randrange(2, 4), topics=[topic]) # noqa: S311 + ) + resources.extend( + [LearningResourceFactory.create(topics=[topic]) for topic in uncommon_topics] + ) + assert sorted( + [ + topic["name"] + for topic in utils.most_common_topics(resources, max_topics=max_topics) + ] + ) == [topic.name for topic in common_topics] diff --git a/learning_resources/models.py b/learning_resources/models.py index d512753e06..de39aac41a 100644 --- a/learning_resources/models.py +++ b/learning_resources/models.py @@ -111,16 +111,15 @@ class LearningResource(TimestampedModel): "runs__image", "children__child", "children__child__runs", + "children__child__parents", "children__child__runs__instructors", - "children__child__course", - "children__child__program", - "children__child__learning_path", "children__child__departments", "children__child__platform", "children__child__topics", "children__child__image", "children__child__offered_by", "children__child__content_tags", + *[f"children__child__{item.name}" for item in LearningResourceType], ] related_selects = [ diff --git a/learning_resources/utils_test.py b/learning_resources/utils_test.py index 9f59700faa..38720c839c 100644 --- a/learning_resources/utils_test.py +++ b/learning_resources/utils_test.py @@ -212,6 +212,18 @@ def test_resource_upserted_actions(mock_plugin_manager, fixture_resource): ) +def test_similar_topics_action(mock_plugin_manager, fixture_resource) -> dict: + """ + similar_topics_action should trigger plugin hook's resource_similar_topics function + """ + mock_topics = [{"name": "Biology"}, {"name": "Chemistry"}] + mock_plugin_manager.hook.resource_similar_topics.return_value = [mock_topics] + assert utils.similar_topics_action(fixture_resource) == mock_topics + mock_plugin_manager.hook.resource_similar_topics.assert_called_once_with( + resource=fixture_resource + ) + + def test_resource_unpublished_actions(mock_plugin_manager, fixture_resource): """ resource_unpublished_actions function should trigger plugin hook's resource_unpublished function diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py index 52660b02c8..4b433871f1 100644 --- a/learning_resources_search/api.py +++ b/learning_resources_search/api.py @@ -553,4 +553,4 @@ def get_similar_topics( topics = [topic.to_dict()["name"] for hit in response.hits for topic in hit.topics] counter = Counter(topics) - return dict(counter.most_common(num_topics)).keys() + return list(dict(counter.most_common(num_topics)).keys()) diff --git a/learning_resources_search/api_test.py b/learning_resources_search/api_test.py index fbc4793e88..9906d29b39 100644 --- a/learning_resources_search/api_test.py +++ b/learning_resources_search/api_test.py @@ -1,4 +1,5 @@ """Search API function tests""" +from unittest.mock import Mock import pytest @@ -12,9 +13,18 @@ generate_learning_resources_text_clause, generate_sort_clause, generate_suggest_clause, + get_similar_topics, relevant_indexes, ) -from learning_resources_search.constants import SOURCE_EXCLUDED_FIELDS +from learning_resources_search.constants import COURSE_TYPE, SOURCE_EXCLUDED_FIELDS + + +def os_topic(topic_name) -> Mock: + """ + Given a topic name, return a mock object emulating an + OpenSearch topic AttrDict object + """ + return Mock(to_dict=Mock(return_value={"name": topic_name})) @pytest.mark.parametrize( @@ -1382,3 +1392,64 @@ def test_execute_learn_search(opensearch): body=query, index=["testindex_course_default"], ) + + +def test_get_similar_topics(settings, opensearch): + """Test get_similar_topics makes a query for similar document topics""" + input_doc = {"title": "title text", "description": "description text"} + + # topic d is least popular and should not show up, order does not matter + opensearch.conn.search.return_value = { + "hits": { + "hits": [ + { + "_source": { + "topics": [ + os_topic("topic a"), + os_topic("topic b"), + os_topic("topic d"), + ] + } + }, + {"_source": {"topics": [os_topic("topic a"), os_topic("topic c")]}}, + {"_source": {"topics": [os_topic("topic a"), os_topic("topic c")]}}, + {"_source": {"topics": [os_topic("topic a"), os_topic("topic c")]}}, + {"_source": {"topics": [os_topic("topic a"), os_topic("topic b")]}}, + ] + } + } + + # results should be top 3 in decreasing order of frequency + assert get_similar_topics(input_doc, 3, 1, 15) == ["topic a", "topic c", "topic b"] + + opensearch.conn.search.assert_called_once_with( + body={ + "_source": {"includes": "topics"}, + "query": { + "bool": { + "filter": [{"term": {"resource_type": "course"}}], + "must": [ + { + "more_like_this": { + "like": [ + { + "doc": input_doc, + "fields": ["title", "description"], + } + ], + "fields": [ + "course.course_numbers.value", + "title", + "description", + "full_description", + ], + "min_term_freq": 1, + "min_doc_freq": 15, + } + } + ], + } + }, + }, + index=[f"{settings.OPENSEARCH_INDEX}_{COURSE_TYPE}_default"], + ) diff --git a/learning_resources_search/plugins_test.py b/learning_resources_search/plugins_test.py index 5295c2257e..42af17e07a 100644 --- a/learning_resources_search/plugins_test.py +++ b/learning_resources_search/plugins_test.py @@ -117,3 +117,26 @@ def test_search_index_plugin_resource_run_delete(mock_search_index_helpers): False, # noqa: FBT003 ) assert LearningResourceRun.objects.filter(id=run_id).exists() is False + + +@pytest.mark.django_db() +def test_resource_similar_topics(mocker, settings): + """The plugin function should return expected topics for a resource""" + expected_topics = ["topic1", "topic2"] + mock_similar_topics = mocker.patch( + "learning_resources_search.plugins.get_similar_topics", + return_value=expected_topics, + ) + resource = LearningResourceFactory.create() + topics = SearchIndexPlugin().resource_similar_topics(resource) + assert topics == [{"name": topic} for topic in expected_topics] + mock_similar_topics.assert_called_once_with( + { + "title": resource.title, + "description": resource.description, + "full_description": resource.full_description, + }, + settings.OPEN_VIDEO_MAX_TOPICS, + settings.OPEN_VIDEO_MIN_TERM_FREQ, + settings.OPEN_VIDEO_MIN_DOC_FREQ, + )