Skip to content

Commit

Permalink
Use OCW FM exported keywords for CR_KEYWORDS (#9)
Browse files Browse the repository at this point in the history
* Use FM exported keywords

* Handle edge cases of keywords

* Update unittest and readme

* Use normalize_keywords function and update unittest

* Refactor runs

* added another input example for normalize keywords

* Remove to be supplied keyword from mapping file

* Remove requirements.txt; Using poetry now

* Use titlecase

* Fix ODEs typo, use upper() check for acronyms
  • Loading branch information
ibrahimjaved12 committed Jan 29, 2024
1 parent 69525d5 commit 632c5dc
Show file tree
Hide file tree
Showing 12 changed files with 6,343 additions and 45 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ repos:
- id: check-merge-conflict
- id: check-yaml
- id: check-added-large-files
args: ['--maxkb=5120'] # 5mb
- id: debug-statements
- repo: https://github.com/adrienverge/yamllint.git
rev: v1.33.0
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ For successful execution and correct output, ensure the [MIT Open's API](https:/

`title`, `url`, `description`, `topics`, `course_feature`, `runs: instructors`

Additionally, the `mapping_files` should be up-to-date. If new topics are added in OCW without corresponding mappings in `ocw_oer_export/mapping_files/ocw_topic_to_oer_subject.csv`, this will lead to `null` entries for those topics in the CSV (`CR_SUBJECT`).
Additionally, the `mapping_files` should be up-to-date. If new topics are added in OCW without corresponding mappings in `ocw_oer_export/mapping_files/ocw_topic_to_oer_subject.csv`, this will lead to `null` entries for those topics in the CSV (`CR_SUBJECT`). In addition to that, make sure `fm_keywords_exports.csv` is also present.

## Tests

Expand Down
62 changes: 50 additions & 12 deletions ocw_oer_export/create_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,31 @@
from .client import extract_data_from_api
from .data_handler import extract_data_from_json
from .constants import API_URL
from .utilities import text_cleanup
from .utilities import normalize_course_url, normalize_keywords, text_cleanup


def create_fm_ocw_course_url_to_keywords_mapping(path=None, file_name=None):
"""
Creates a mapping from OCW course URLs to their associated keywords using FM export data.
This function reads a CSV file and extracts the mapping between course URLs and their keywords.
"""
if path is None:
path = os.path.dirname(__file__)

if file_name is None:
file_name = "mapping_files/fm_keywords_export.csv"

file_path = os.path.join(path, file_name)
course_map = {}

with open(file_path, newline="", encoding="utf-8") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
if row["zze_courseURL"]:
course_url = normalize_course_url(row["zze_courseURL"])
course_map[course_url] = row["zzd_keywords"]
return course_map


def create_ocw_topic_to_oer_subject_mapping(path=None, file_name=None):
Expand Down Expand Up @@ -52,8 +76,16 @@ def get_cr_subjects(ocw_topics_mapping, ocw_course_topics):
return "|".join(sorted_unique_oer_subjects)


def get_cr_keywords(list_of_topics_objs):
"""Get OER formatted Course Resource Keywords from a list of OCW topic objects."""
def get_cr_keywords(fm_ocw_keywords_mapping, list_of_topics_objs, course_url):
"""
Get OER formatted Course Resource keywords for a given OCW course.
It checks for course's keywords in FM export mapping (fm_ocw_keywords_mapping).
If no keywords are found there, it uses OCW course's topics as keywords.
"""
keywords = fm_ocw_keywords_mapping.get(course_url)
if keywords:
return normalize_keywords(keywords)
return "|".join(topic["name"] for topic in list_of_topics_objs)


Expand Down Expand Up @@ -110,21 +142,24 @@ def get_description_in_plain_text(description):
return cleaned_description


def transform_single_course(course, ocw_topics_mapping):
def transform_single_course(course, ocw_topics_mapping, fm_ocw_keywords_mapping):
"""Transform a single course according to OER template."""
course_runs = course["runs"][0]
return {
"CR_TITLE": course["title"],
"CR_URL": course["runs"][0]["url"],
"CR_URL": course_runs["url"],
"CR_MATERIAL_TYPE": "Full Course",
"CR_Media_Formats": "Text/HTML",
"CR_SUBLEVEL": "null",
"CR_ABSTRACT": get_description_in_plain_text(course["runs"][0]["description"]),
"CR_ABSTRACT": get_description_in_plain_text(course_runs["description"]),
"CR_LANGUAGE": "en",
"CR_COU_TITLE": "Creative Commons Attribution Non Commercial Share Alike 4.0",
"CR_PRIMARY_USER": "student|teacher",
"CR_SUBJECT": get_cr_subjects(ocw_topics_mapping, course["topics"]),
"CR_KEYWORDS": get_cr_keywords(course["topics"]),
"CR_AUTHOR_NAME": get_cr_authors(course["runs"][0]["instructors"]),
"CR_KEYWORDS": get_cr_keywords(
fm_ocw_keywords_mapping, course["topics"], course_runs["url"]
),
"CR_AUTHOR_NAME": get_cr_authors(course_runs["instructors"]),
"CR_PROVIDER": "MIT",
"CR_PROVIDER_SET": "MIT OpenCourseWare",
"CR_COU_URL": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
Expand All @@ -134,12 +169,16 @@ def transform_single_course(course, ocw_topics_mapping):
}


def transform_data(data, ocw_topics_mapping):
def transform_data(data):
"""Transform all courses into OER template."""
fm_ocw_keywords_mapping = create_fm_ocw_course_url_to_keywords_mapping()
ocw_topics_mapping = create_ocw_topic_to_oer_subject_mapping()

return [
course
for course in (
transform_single_course(course, ocw_topics_mapping) for course in data
transform_single_course(course, ocw_topics_mapping, fm_ocw_keywords_mapping)
for course in data
)
if course is not None
]
Expand All @@ -165,8 +204,7 @@ def create_csv(
else:
raise ValueError("Invalid source. Use 'api' or 'json'.")

ocw_topics_mapping = create_ocw_topic_to_oer_subject_mapping()
transformed_data = transform_data(api_data_json, ocw_topics_mapping)
transformed_data = transform_data(api_data_json)
fieldnames = [
"CR_TITLE",
"CR_URL",
Expand Down
Loading

0 comments on commit 632c5dc

Please sign in to comment.