Skip to content

Commit

Permalink
some tweaking and unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mbertrand committed Mar 5, 2024
1 parent 504087e commit 08146b6
Show file tree
Hide file tree
Showing 10 changed files with 167 additions and 15 deletions.
1 change: 1 addition & 0 deletions learning_resources/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class VideoPlaylistInline(TabularInline):
model = models.VideoPlaylist
extra = 0
show_change_link = True
fields = ("channel",)


class ProgramInline(TabularInline):
Expand Down
4 changes: 2 additions & 2 deletions learning_resources/etl/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,9 +861,10 @@ def load_video_channels(video_channels_data: iter) -> list[VideoChannel]:
list of VideoChannel: the loaded video channels
"""
video_channels = []

channel_ids = []
for video_channel_data in video_channels_data:
channel_id = video_channel_data["channel_id"]
channel_ids.append(channel_id)
try:
video_channel = load_video_channel(video_channel_data)
except ExtractException:
Expand All @@ -879,7 +880,6 @@ def load_video_channels(video_channels_data: iter) -> list[VideoChannel]:
else:
video_channels.append(video_channel)

channel_ids = [video_channel.channel_id for video_channel in video_channels]
VideoChannel.objects.exclude(channel_id__in=channel_ids).update(published=False)

# Unpublish any video playlists not included in published channels
Expand Down
31 changes: 25 additions & 6 deletions learning_resources/etl/loaders_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,11 @@ def test_load_video(mocker, mock_upsert_tasks, video_exists, is_published, pass_
VideoFactory.create() if video_exists else VideoFactory.build()
).learning_resource
offered_by = LearningResourceOfferorFactory.create()
expected_topics = [{"name": "Biology"}, {"name": "Chemistry"}]
mock_similar_topics_action = mocker.patch(
"learning_resources.etl.loaders.similar_topics_action",
return_value=expected_topics,
)

assert Video.objects.count() == (1 if video_exists else 0)

Expand All @@ -954,6 +959,8 @@ def test_load_video(mocker, mock_upsert_tasks, video_exists, is_published, pass_
"published": is_published,
"video": {"duration": video_resource.video.duration},
}
if pass_topics:
props["topics"] = expected_topics

result = load_video(props)
assert Video.objects.count() == 1
Expand All @@ -962,6 +969,11 @@ def test_load_video(mocker, mock_upsert_tasks, video_exists, is_published, pass_
assert isinstance(result, LearningResource)
assert result.published == is_published

assert mock_similar_topics_action.call_count == (0 if pass_topics else 1)
assert list(result.topics.values_list("name", flat=True).order_by("name")) == [
topic["name"] for topic in expected_topics
]

for key, value in props.items():
assert getattr(result, key) == value, f"Property {key} should equal {value}"

Expand All @@ -986,22 +998,25 @@ def test_load_videos():
assert Video.objects.count() == len(video_resources)


def test_load_playlist():
def test_load_playlist(mocker):
"""Test load_playlist"""
expected_topics = [{"name": "Biology"}, {"name": "Physics"}]
mock_most_common_topics = mocker.patch(
"learning_resources.etl.loaders.most_common_topics",
return_value=expected_topics,
)
channel = VideoChannelFactory.create(playlists=None)
playlist = VideoPlaylistFactory.build().learning_resource
assert VideoPlaylist.objects.count() == 0
assert Video.objects.count() == 0
videos_resources = [
video.learning_resource for video in VideoFactory.build_batch(5)
]
video_resources = [video.learning_resource for video in VideoFactory.build_batch(5)]
videos_data = [
{
**model_to_dict(video, exclude=non_transformable_attributes),
"platform": PlatformType.youtube.name,
"offered_by": {"code": LearningResourceOfferorFactory.create().code},
}
for video in videos_resources
for video in video_resources
]

props = {
Expand All @@ -1015,9 +1030,13 @@ def test_load_playlist():
result = load_playlist(channel, props)

assert isinstance(result, LearningResource)
mock_most_common_topics.assert_called_once()

assert result.resources.count() == len(videos_resources)
assert result.resources.count() == len(video_resources)
assert result.video_playlist.channel == channel
assert list(result.topics.values_list("name", flat=True).order_by("name")) == [
topic["name"] for topic in expected_topics
]


def test_load_playlists_unpublish(mocker):
Expand Down
2 changes: 1 addition & 1 deletion learning_resources/etl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ def most_common_topics(
list: The most common topic names as a dict
"""
counter = Counter(
list({topic.name for resource in resources for topic in resource.topics.all()})
[topic.name for resource in resources for topic in resource.topics.all()]
)
common_topics = dict(counter.most_common(max_topics)).keys()
return [{"name": topic} for topic in common_topics]
29 changes: 28 additions & 1 deletion learning_resources/etl/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datetime
import pathlib
from random import randrange
from subprocess import check_call
from tempfile import TemporaryDirectory
from unittest.mock import ANY
Expand All @@ -16,7 +17,12 @@
PlatformType,
)
from learning_resources.etl import utils
from learning_resources.factories import ContentFileFactory, LearningResourceRunFactory
from learning_resources.factories import (
ContentFileFactory,
LearningResourceFactory,
LearningResourceRunFactory,
LearningResourceTopicFactory,
)

pytestmark = pytest.mark.django_db

Expand Down Expand Up @@ -326,3 +332,24 @@ def test_extract_valid_department_from_id(readable_id, is_ocw, dept_ids):
assert (
utils.extract_valid_department_from_id(readable_id, is_ocw=is_ocw) == dept_ids
)


def test_most_common_topics():
"""Test that most_common_topics returns the correct topics"""
max_topics = 4
common_topics = LearningResourceTopicFactory.create_batch(max_topics)
uncommon_topics = LearningResourceTopicFactory.create_batch(3)
resources = []
for topic in common_topics:
resources.extend(
LearningResourceFactory.create_batch(randrange(2, 4), topics=[topic]) # noqa: S311
)
resources.extend(
[LearningResourceFactory.create(topics=[topic]) for topic in uncommon_topics]
)
assert sorted(
[
topic["name"]
for topic in utils.most_common_topics(resources, max_topics=max_topics)
]
) == [topic.name for topic in common_topics]
5 changes: 2 additions & 3 deletions learning_resources/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,16 +111,15 @@ class LearningResource(TimestampedModel):
"runs__image",
"children__child",
"children__child__runs",
"children__child__parents",
"children__child__runs__instructors",
"children__child__course",
"children__child__program",
"children__child__learning_path",
"children__child__departments",
"children__child__platform",
"children__child__topics",
"children__child__image",
"children__child__offered_by",
"children__child__content_tags",
*[f"children__child__{item.name}" for item in LearningResourceType],
]

related_selects = [
Expand Down
12 changes: 12 additions & 0 deletions learning_resources/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,18 @@ def test_resource_upserted_actions(mock_plugin_manager, fixture_resource):
)


def test_similar_topics_action(mock_plugin_manager, fixture_resource) -> dict:
"""
similar_topics_action should trigger plugin hook's resource_similar_topics function
"""
mock_topics = [{"name": "Biology"}, {"name": "Chemistry"}]
mock_plugin_manager.hook.resource_similar_topics.return_value = [mock_topics]
assert utils.similar_topics_action(fixture_resource) == mock_topics
mock_plugin_manager.hook.resource_similar_topics.assert_called_once_with(
resource=fixture_resource
)


def test_resource_unpublished_actions(mock_plugin_manager, fixture_resource):
"""
resource_unpublished_actions function should trigger plugin hook's resource_unpublished function
Expand Down
2 changes: 1 addition & 1 deletion learning_resources_search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,4 +553,4 @@ def get_similar_topics(
topics = [topic.to_dict()["name"] for hit in response.hits for topic in hit.topics]

counter = Counter(topics)
return dict(counter.most_common(num_topics)).keys()
return list(dict(counter.most_common(num_topics)).keys())
73 changes: 72 additions & 1 deletion learning_resources_search/api_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Search API function tests"""
from unittest.mock import Mock

import pytest

Expand All @@ -12,9 +13,18 @@
generate_learning_resources_text_clause,
generate_sort_clause,
generate_suggest_clause,
get_similar_topics,
relevant_indexes,
)
from learning_resources_search.constants import SOURCE_EXCLUDED_FIELDS
from learning_resources_search.constants import COURSE_TYPE, SOURCE_EXCLUDED_FIELDS


def os_topic(topic_name) -> Mock:
"""
Given a topic name, return a mock object emulating an
OpenSearch topic AttrDict object
"""
return Mock(to_dict=Mock(return_value={"name": topic_name}))


@pytest.mark.parametrize(
Expand Down Expand Up @@ -1382,3 +1392,64 @@ def test_execute_learn_search(opensearch):
body=query,
index=["testindex_course_default"],
)


def test_get_similar_topics(settings, opensearch):
"""Test get_similar_topics makes a query for similar document topics"""
input_doc = {"title": "title text", "description": "description text"}

# topic d is least popular and should not show up, order does not matter
opensearch.conn.search.return_value = {
"hits": {
"hits": [
{
"_source": {
"topics": [
os_topic("topic a"),
os_topic("topic b"),
os_topic("topic d"),
]
}
},
{"_source": {"topics": [os_topic("topic a"), os_topic("topic c")]}},
{"_source": {"topics": [os_topic("topic a"), os_topic("topic c")]}},
{"_source": {"topics": [os_topic("topic a"), os_topic("topic c")]}},
{"_source": {"topics": [os_topic("topic a"), os_topic("topic b")]}},
]
}
}

# results should be top 3 in decreasing order of frequency
assert get_similar_topics(input_doc, 3, 1, 15) == ["topic a", "topic c", "topic b"]

opensearch.conn.search.assert_called_once_with(
body={
"_source": {"includes": "topics"},
"query": {
"bool": {
"filter": [{"term": {"resource_type": "course"}}],
"must": [
{
"more_like_this": {
"like": [
{
"doc": input_doc,
"fields": ["title", "description"],
}
],
"fields": [
"course.course_numbers.value",
"title",
"description",
"full_description",
],
"min_term_freq": 1,
"min_doc_freq": 15,
}
}
],
}
},
},
index=[f"{settings.OPENSEARCH_INDEX}_{COURSE_TYPE}_default"],
)
23 changes: 23 additions & 0 deletions learning_resources_search/plugins_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,26 @@ def test_search_index_plugin_resource_run_delete(mock_search_index_helpers):
False, # noqa: FBT003
)
assert LearningResourceRun.objects.filter(id=run_id).exists() is False


@pytest.mark.django_db()
def test_resource_similar_topics(mocker, settings):
"""The plugin function should return expected topics for a resource"""
expected_topics = ["topic1", "topic2"]
mock_similar_topics = mocker.patch(
"learning_resources_search.plugins.get_similar_topics",
return_value=expected_topics,
)
resource = LearningResourceFactory.create()
topics = SearchIndexPlugin().resource_similar_topics(resource)
assert topics == [{"name": topic} for topic in expected_topics]
mock_similar_topics.assert_called_once_with(
{
"title": resource.title,
"description": resource.description,
"full_description": resource.full_description,
},
settings.OPEN_VIDEO_MAX_TOPICS,
settings.OPEN_VIDEO_MIN_TERM_FREQ,
settings.OPEN_VIDEO_MIN_DOC_FREQ,
)

0 comments on commit 08146b6

Please sign in to comment.