From 08146b66c1a9df3531261d0d3c0dca73e9f0c88d Mon Sep 17 00:00:00 2001
From: Matt Bertrand <mrbertrand@gmail.com>
Date: Tue, 5 Mar 2024 16:50:28 -0500
Subject: [PATCH] some tweaking and unit tests

---
 learning_resources/admin.py               |  1 +
 learning_resources/etl/loaders.py         |  4 +-
 learning_resources/etl/loaders_test.py    | 31 ++++++++--
 learning_resources/etl/utils.py           |  2 +-
 learning_resources/etl/utils_test.py      | 29 ++++++++-
 learning_resources/models.py              |  5 +-
 learning_resources/utils_test.py          | 12 ++++
 learning_resources_search/api.py          |  2 +-
 learning_resources_search/api_test.py     | 73 ++++++++++++++++++++++-
 learning_resources_search/plugins_test.py | 23 +++++++
 10 files changed, 167 insertions(+), 15 deletions(-)

diff --git a/learning_resources/admin.py b/learning_resources/admin.py
index dcfbe52be0..367b2f5502 100644
--- a/learning_resources/admin.py
+++ b/learning_resources/admin.py
@@ -105,6 +105,7 @@ class VideoPlaylistInline(TabularInline):
     model = models.VideoPlaylist
     extra = 0
     show_change_link = True
+    fields = ("channel",)
 
 
 class ProgramInline(TabularInline):
diff --git a/learning_resources/etl/loaders.py b/learning_resources/etl/loaders.py
index 0cb7888916..27d302f2cf 100644
--- a/learning_resources/etl/loaders.py
+++ b/learning_resources/etl/loaders.py
@@ -861,9 +861,10 @@ def load_video_channels(video_channels_data: iter) -> list[VideoChannel]:
         list of VideoChannel: the loaded video channels
     """
     video_channels = []
-
+    channel_ids = []
     for video_channel_data in video_channels_data:
         channel_id = video_channel_data["channel_id"]
+        channel_ids.append(channel_id)
         try:
             video_channel = load_video_channel(video_channel_data)
         except ExtractException:
@@ -879,7 +880,6 @@ def load_video_channels(video_channels_data: iter) -> list[VideoChannel]:
         else:
             video_channels.append(video_channel)
 
-    channel_ids = [video_channel.channel_id for video_channel in video_channels]
     VideoChannel.objects.exclude(channel_id__in=channel_ids).update(published=False)
 
     # Unpublish any video playlists not included in published channels
diff --git a/learning_resources/etl/loaders_test.py b/learning_resources/etl/loaders_test.py
index 9fbe91a38d..6dac1e8fa3 100644
--- a/learning_resources/etl/loaders_test.py
+++ b/learning_resources/etl/loaders_test.py
@@ -936,6 +936,11 @@ def test_load_video(mocker, mock_upsert_tasks, video_exists, is_published, pass_
         VideoFactory.create() if video_exists else VideoFactory.build()
     ).learning_resource
     offered_by = LearningResourceOfferorFactory.create()
+    expected_topics = [{"name": "Biology"}, {"name": "Chemistry"}]
+    mock_similar_topics_action = mocker.patch(
+        "learning_resources.etl.loaders.similar_topics_action",
+        return_value=expected_topics,
+    )
 
     assert Video.objects.count() == (1 if video_exists else 0)
 
@@ -954,6 +959,8 @@ def test_load_video(mocker, mock_upsert_tasks, video_exists, is_published, pass_
         "published": is_published,
         "video": {"duration": video_resource.video.duration},
     }
+    if pass_topics:
+        props["topics"] = expected_topics
 
     result = load_video(props)
     assert Video.objects.count() == 1
@@ -962,6 +969,11 @@ def test_load_video(mocker, mock_upsert_tasks, video_exists, is_published, pass_
     assert isinstance(result, LearningResource)
     assert result.published == is_published
 
+    assert mock_similar_topics_action.call_count == (0 if pass_topics else 1)
+    assert list(result.topics.values_list("name", flat=True).order_by("name")) == [
+        topic["name"] for topic in expected_topics
+    ]
+
     for key, value in props.items():
         assert getattr(result, key) == value, f"Property {key} should equal {value}"
 
@@ -986,22 +998,25 @@ def test_load_videos():
     assert Video.objects.count() == len(video_resources)
 
 
-def test_load_playlist():
+def test_load_playlist(mocker):
     """Test load_playlist"""
+    expected_topics = [{"name": "Biology"}, {"name": "Physics"}]
+    mock_most_common_topics = mocker.patch(
+        "learning_resources.etl.loaders.most_common_topics",
+        return_value=expected_topics,
+    )
     channel = VideoChannelFactory.create(playlists=None)
     playlist = VideoPlaylistFactory.build().learning_resource
     assert VideoPlaylist.objects.count() == 0
     assert Video.objects.count() == 0
-    videos_resources = [
-        video.learning_resource for video in VideoFactory.build_batch(5)
-    ]
+    video_resources = [video.learning_resource for video in VideoFactory.build_batch(5)]
     videos_data = [
         {
             **model_to_dict(video, exclude=non_transformable_attributes),
             "platform": PlatformType.youtube.name,
             "offered_by": {"code": LearningResourceOfferorFactory.create().code},
         }
-        for video in videos_resources
+        for video in video_resources
     ]
 
     props = {
@@ -1015,9 +1030,13 @@ def test_load_playlist():
     result = load_playlist(channel, props)
 
     assert isinstance(result, LearningResource)
+    mock_most_common_topics.assert_called_once()
 
-    assert result.resources.count() == len(videos_resources)
+    assert result.resources.count() == len(video_resources)
     assert result.video_playlist.channel == channel
+    assert list(result.topics.values_list("name", flat=True).order_by("name")) == [
+        topic["name"] for topic in expected_topics
+    ]
 
 
 def test_load_playlists_unpublish(mocker):
diff --git a/learning_resources/etl/utils.py b/learning_resources/etl/utils.py
index 8e6dea4cea..2b7cf06ecd 100644
--- a/learning_resources/etl/utils.py
+++ b/learning_resources/etl/utils.py
@@ -645,7 +645,7 @@ def most_common_topics(
         list: The most common topic names as a dict
     """
     counter = Counter(
-        list({topic.name for resource in resources for topic in resource.topics.all()})
+        [topic.name for resource in resources for topic in resource.topics.all()]
     )
     common_topics = dict(counter.most_common(max_topics)).keys()
     return [{"name": topic} for topic in common_topics]
diff --git a/learning_resources/etl/utils_test.py b/learning_resources/etl/utils_test.py
index 742e7efaf8..2c5676c253 100644
--- a/learning_resources/etl/utils_test.py
+++ b/learning_resources/etl/utils_test.py
@@ -2,6 +2,7 @@
 
 import datetime
 import pathlib
+from random import randrange
 from subprocess import check_call
 from tempfile import TemporaryDirectory
 from unittest.mock import ANY
@@ -16,7 +17,12 @@
     PlatformType,
 )
 from learning_resources.etl import utils
-from learning_resources.factories import ContentFileFactory, LearningResourceRunFactory
+from learning_resources.factories import (
+    ContentFileFactory,
+    LearningResourceFactory,
+    LearningResourceRunFactory,
+    LearningResourceTopicFactory,
+)
 
 pytestmark = pytest.mark.django_db
 
@@ -326,3 +332,24 @@ def test_extract_valid_department_from_id(readable_id, is_ocw, dept_ids):
     assert (
         utils.extract_valid_department_from_id(readable_id, is_ocw=is_ocw) == dept_ids
     )
+
+
+def test_most_common_topics():
+    """Test that most_common_topics returns the correct topics"""
+    max_topics = 4
+    common_topics = LearningResourceTopicFactory.create_batch(max_topics)
+    uncommon_topics = LearningResourceTopicFactory.create_batch(3)
+    resources = []
+    for topic in common_topics:
+        resources.extend(
+            LearningResourceFactory.create_batch(randrange(2, 4), topics=[topic])  # noqa: S311
+        )
+    resources.extend(
+        [LearningResourceFactory.create(topics=[topic]) for topic in uncommon_topics]
+    )
+    assert sorted(
+        [
+            topic["name"]
+            for topic in utils.most_common_topics(resources, max_topics=max_topics)
+        ]
+    ) == [topic.name for topic in common_topics]
diff --git a/learning_resources/models.py b/learning_resources/models.py
index d512753e06..de39aac41a 100644
--- a/learning_resources/models.py
+++ b/learning_resources/models.py
@@ -111,16 +111,15 @@ class LearningResource(TimestampedModel):
         "runs__image",
         "children__child",
         "children__child__runs",
+        "children__child__parents",
         "children__child__runs__instructors",
-        "children__child__course",
-        "children__child__program",
-        "children__child__learning_path",
         "children__child__departments",
         "children__child__platform",
         "children__child__topics",
         "children__child__image",
         "children__child__offered_by",
         "children__child__content_tags",
+        *[f"children__child__{item.name}" for item in LearningResourceType],
     ]
 
     related_selects = [
diff --git a/learning_resources/utils_test.py b/learning_resources/utils_test.py
index 9f59700faa..38720c839c 100644
--- a/learning_resources/utils_test.py
+++ b/learning_resources/utils_test.py
@@ -212,6 +212,18 @@ def test_resource_upserted_actions(mock_plugin_manager, fixture_resource):
     )
 
 
+def test_similar_topics_action(mock_plugin_manager, fixture_resource) -> dict:
+    """
+    similar_topics_action should trigger plugin hook's resource_similar_topics function
+    """
+    mock_topics = [{"name": "Biology"}, {"name": "Chemistry"}]
+    mock_plugin_manager.hook.resource_similar_topics.return_value = [mock_topics]
+    assert utils.similar_topics_action(fixture_resource) == mock_topics
+    mock_plugin_manager.hook.resource_similar_topics.assert_called_once_with(
+        resource=fixture_resource
+    )
+
+
 def test_resource_unpublished_actions(mock_plugin_manager, fixture_resource):
     """
     resource_unpublished_actions function should trigger plugin hook's resource_unpublished function
diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py
index 52660b02c8..4b433871f1 100644
--- a/learning_resources_search/api.py
+++ b/learning_resources_search/api.py
@@ -553,4 +553,4 @@ def get_similar_topics(
     topics = [topic.to_dict()["name"] for hit in response.hits for topic in hit.topics]
 
     counter = Counter(topics)
-    return dict(counter.most_common(num_topics)).keys()
+    return list(dict(counter.most_common(num_topics)).keys())
diff --git a/learning_resources_search/api_test.py b/learning_resources_search/api_test.py
index fbc4793e88..9906d29b39 100644
--- a/learning_resources_search/api_test.py
+++ b/learning_resources_search/api_test.py
@@ -1,4 +1,5 @@
 """Search API function tests"""
+from unittest.mock import Mock
 
 import pytest
 
@@ -12,9 +13,18 @@
     generate_learning_resources_text_clause,
     generate_sort_clause,
     generate_suggest_clause,
+    get_similar_topics,
     relevant_indexes,
 )
-from learning_resources_search.constants import SOURCE_EXCLUDED_FIELDS
+from learning_resources_search.constants import COURSE_TYPE, SOURCE_EXCLUDED_FIELDS
+
+
+def os_topic(topic_name) -> Mock:
+    """
+    Given a topic name, return a mock object emulating an
+    OpenSearch topic AttrDict object
+    """
+    return Mock(to_dict=Mock(return_value={"name": topic_name}))
 
 
 @pytest.mark.parametrize(
@@ -1382,3 +1392,64 @@ def test_execute_learn_search(opensearch):
         body=query,
         index=["testindex_course_default"],
     )
+
+
+def test_get_similar_topics(settings, opensearch):
+    """Test get_similar_topics makes a query for similar document topics"""
+    input_doc = {"title": "title text", "description": "description text"}
+
+    # topic d is least popular and should not show up, order does not matter
+    opensearch.conn.search.return_value = {
+        "hits": {
+            "hits": [
+                {
+                    "_source": {
+                        "topics": [
+                            os_topic("topic a"),
+                            os_topic("topic b"),
+                            os_topic("topic d"),
+                        ]
+                    }
+                },
+                {"_source": {"topics": [os_topic("topic a"), os_topic("topic c")]}},
+                {"_source": {"topics": [os_topic("topic a"), os_topic("topic c")]}},
+                {"_source": {"topics": [os_topic("topic a"), os_topic("topic c")]}},
+                {"_source": {"topics": [os_topic("topic a"), os_topic("topic b")]}},
+            ]
+        }
+    }
+
+    # results should be top 3 in decreasing order of frequency
+    assert get_similar_topics(input_doc, 3, 1, 15) == ["topic a", "topic c", "topic b"]
+
+    opensearch.conn.search.assert_called_once_with(
+        body={
+            "_source": {"includes": "topics"},
+            "query": {
+                "bool": {
+                    "filter": [{"term": {"resource_type": "course"}}],
+                    "must": [
+                        {
+                            "more_like_this": {
+                                "like": [
+                                    {
+                                        "doc": input_doc,
+                                        "fields": ["title", "description"],
+                                    }
+                                ],
+                                "fields": [
+                                    "course.course_numbers.value",
+                                    "title",
+                                    "description",
+                                    "full_description",
+                                ],
+                                "min_term_freq": 1,
+                                "min_doc_freq": 15,
+                            }
+                        }
+                    ],
+                }
+            },
+        },
+        index=[f"{settings.OPENSEARCH_INDEX}_{COURSE_TYPE}_default"],
+    )
diff --git a/learning_resources_search/plugins_test.py b/learning_resources_search/plugins_test.py
index 5295c2257e..42af17e07a 100644
--- a/learning_resources_search/plugins_test.py
+++ b/learning_resources_search/plugins_test.py
@@ -117,3 +117,26 @@ def test_search_index_plugin_resource_run_delete(mock_search_index_helpers):
         False,  # noqa: FBT003
     )
     assert LearningResourceRun.objects.filter(id=run_id).exists() is False
+
+
+@pytest.mark.django_db()
+def test_resource_similar_topics(mocker, settings):
+    """The plugin function should return expected topics for a resource"""
+    expected_topics = ["topic1", "topic2"]
+    mock_similar_topics = mocker.patch(
+        "learning_resources_search.plugins.get_similar_topics",
+        return_value=expected_topics,
+    )
+    resource = LearningResourceFactory.create()
+    topics = SearchIndexPlugin().resource_similar_topics(resource)
+    assert topics == [{"name": topic} for topic in expected_topics]
+    mock_similar_topics.assert_called_once_with(
+        {
+            "title": resource.title,
+            "description": resource.description,
+            "full_description": resource.full_description,
+        },
+        settings.OPEN_VIDEO_MAX_TOPICS,
+        settings.OPEN_VIDEO_MIN_TERM_FREQ,
+        settings.OPEN_VIDEO_MIN_DOC_FREQ,
+    )