Skip to content

Commit

Permalink
Use FM exported keywords
Browse files Browse the repository at this point in the history
  • Loading branch information
ibrahimjaved12 committed Jan 22, 2024
1 parent 0a3fab8 commit 059b141
Show file tree
Hide file tree
Showing 5 changed files with 6,218 additions and 10 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ repos:
- id: check-merge-conflict
- id: check-yaml
- id: check-added-large-files
args: ['--maxkb=5120'] # 5mb
- id: debug-statements
- repo: https://github.com/adrienverge/yamllint.git
rev: v1.33.0
Expand Down
57 changes: 48 additions & 9 deletions ocw_oer_export/create_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,37 @@
import csv
import os
import logging
import re


from .client import extract_data_from_api
from .data_handler import extract_data_from_json
from .constants import API_URL
from .utilities import text_cleanup
from .utilities import normalize_course_url, text_cleanup


def create_fm_ocw_course_url_to_keywords_mapping(path=None, file_name=None):
"""
Creates a mapping from OCW course URLs to their associated keywords using FM export data.
This function reads a CSV file and extracts the mapping between course URLs and their keywords.
"""
if path is None:
path = os.path.dirname(__file__)

if file_name is None:
file_name = "mapping_files/fm_keywords_export.csv"

file_path = os.path.join(path, file_name)
course_map = {}

with open(file_path, newline="", encoding="utf-8") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
if row["zze_courseURL"]:
course_url = normalize_course_url(row["zze_courseURL"])
course_map[course_url] = row["zzd_keywords"]
return course_map


def create_ocw_topic_to_oer_subject_mapping(path=None, file_name=None):
Expand Down Expand Up @@ -52,8 +77,17 @@ def get_cr_subjects(ocw_topics_mapping, ocw_course_topics):
return "|".join(sorted_unique_oer_subjects)


def get_cr_keywords(list_of_topics_objs):
"""Get OER formatted Course Resource Keywords from a list of OCW topic objects."""
def get_cr_keywords(fm_ocw_keywords_mapping, list_of_topics_objs, course_url):
"""
Get OER formatted Course Resource keywords for a given OCW course.
It checks for course's keywords in FM export mapping (fm_ocw_keywords_mapping).
If no keywords are found there, it uses OCW course's topics as keywords.
"""
keywords = fm_ocw_keywords_mapping.get(course_url)
if keywords:
normalized_keywords = re.sub(r"[;,]|\n\n|\n", "|", keywords)
return normalized_keywords.replace("| ", "|")
return "|".join(topic["name"] for topic in list_of_topics_objs)


Expand Down Expand Up @@ -110,7 +144,7 @@ def get_description_in_plain_text(description):
return cleaned_description


def transform_single_course(course, ocw_topics_mapping):
def transform_single_course(course, ocw_topics_mapping, fm_ocw_keywords_mapping):
"""Transform a single course according to OER template."""
return {
"CR_TITLE": course["title"],
Expand All @@ -123,7 +157,9 @@ def transform_single_course(course, ocw_topics_mapping):
"CR_COU_TITLE": "Creative Commons Attribution Non Commercial Share Alike 4.0",
"CR_PRIMARY_USER": "student|teacher",
"CR_SUBJECT": get_cr_subjects(ocw_topics_mapping, course["topics"]),
"CR_KEYWORDS": get_cr_keywords(course["topics"]),
"CR_KEYWORDS": get_cr_keywords(
fm_ocw_keywords_mapping, course["topics"], course["runs"][0]["url"]
),
"CR_AUTHOR_NAME": get_cr_authors(course["runs"][0]["instructors"]),
"CR_PROVIDER": "MIT",
"CR_PROVIDER_SET": "MIT OpenCourseWare",
Expand All @@ -134,12 +170,16 @@ def transform_single_course(course, ocw_topics_mapping):
}


def transform_data(data, ocw_topics_mapping):
def transform_data(data):
"""Transform all courses into OER template."""
fm_ocw_keywords_mapping = create_fm_ocw_course_url_to_keywords_mapping()
ocw_topics_mapping = create_ocw_topic_to_oer_subject_mapping()

return [
course
for course in (
transform_single_course(course, ocw_topics_mapping) for course in data
transform_single_course(course, ocw_topics_mapping, fm_ocw_keywords_mapping)
for course in data
)
if course is not None
]
Expand All @@ -165,8 +205,7 @@ def create_csv(
else:
raise ValueError("Invalid source. Use 'api' or 'json'.")

ocw_topics_mapping = create_ocw_topic_to_oer_subject_mapping()
transformed_data = transform_data(api_data_json, ocw_topics_mapping)
transformed_data = transform_data(api_data_json)
fieldnames = [
"CR_TITLE",
"CR_URL",
Expand Down
Loading

0 comments on commit 059b141

Please sign in to comment.