-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add CR_CREATE_DATE and update CR_SUBLEVEL, CR_COU_COPYRIGHT_HOLDER #11
Changes from all commits
d2b6a58
eb93f47
119d3f7
f921df3
99bdf0f
905f4dd
1d71735
9b49ccc
edf906d
1e9464e
c963f06
418c4d8
2046e48
fbbfe67
bd9df46
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,8 @@ | ||
__all__ = ["create_json", "create_csv", "main"] | ||
__all__ = ["create_json", "create_csv"] | ||
|
||
import logging | ||
|
||
from .create_csv import create_csv | ||
from .create_json import create_json | ||
from .cli import main | ||
|
||
logging.root.setLevel(logging.INFO) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
""" | ||
Module for loading environment settings and setting API base URL based on the current environment. | ||
""" | ||
import os | ||
from dotenv import load_dotenv | ||
|
||
load_dotenv() | ||
|
||
API_BASE_URL = os.getenv("API_BASE_URL", "https://mitopen.odl.mit.edu") | ||
API_URL = f"{API_BASE_URL}/api/v1/courses/?platform=ocw" |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
|
||
from .client import extract_data_from_api | ||
from .data_handler import extract_data_from_json | ||
from .constants import API_URL | ||
from .config import API_URL | ||
from .utilities import normalize_course_url, normalize_keywords, text_cleanup | ||
|
||
|
||
|
@@ -56,6 +56,26 @@ def create_ocw_topic_to_oer_subject_mapping(path=None, file_name=None): | |
return {row["OCW Topic"]: row["OER Subject"] for row in reader} | ||
|
||
|
||
def get_cr_sublevel(levels): | ||
"""Set the value(s) of CR_SUBLEVEL based on the course levels.""" | ||
level_mappings = { | ||
"Undergraduate": ["Community College/Lower Division", "College/Upper Division"], | ||
"Graduate": ["Graduate/Professional"], | ||
"High School": ["High School", "Community College/Lower Division"], | ||
"Non-Credit": ["Career/Technical Education"], | ||
} | ||
sublevels = [ | ||
sublevel for level in levels for sublevel in level_mappings.get(level["name"]) | ||
] | ||
return "|".join(sorted(set(sublevels))) | ||
|
||
|
||
def get_description_in_plain_text(description): | ||
"""Get Course Resource plain text description by cleaning up markdown and HTML.""" | ||
cleaned_description = text_cleanup(description) | ||
return cleaned_description | ||
|
||
|
||
Comment on lines
+73
to
+78
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only moved this function some lines up, this isn't a new function. |
||
def get_cr_subjects(ocw_topics_mapping, ocw_course_topics): | ||
""" | ||
Get OER formatted Course Resource Subjects list. | ||
|
@@ -89,6 +109,22 @@ def get_cr_keywords(fm_ocw_keywords_mapping, list_of_topics_objs, course_url): | |
return "|".join(topic["name"] for topic in list_of_topics_objs) | ||
|
||
|
||
def get_cr_create_date(semester, year): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: Some OCW courses do not have a year. In that case, the current code will return Instead, add However: A bug is currently preventing MIT Open from including these courses. See mitodl/mit-learn#441. So a little hard to test this change on real data right now. |
||
"""Convert a semester and year into a ballpark start date.""" | ||
semester_start_dates = { | ||
"Fall": "09-01", | ||
"Spring": "02-01", | ||
"Summer": "06-01", | ||
"January IAP": "01-01", | ||
} | ||
start_date = semester_start_dates.get(semester) | ||
if start_date and year: | ||
return f"{year}-{start_date}" | ||
if year: | ||
return f"{year}-01-01" | ||
return "" | ||
|
||
|
||
def get_cr_authors(list_of_authors_objs): | ||
"""Get OER formatted Course Resource Authors list.""" | ||
return "|".join( | ||
|
@@ -136,21 +172,15 @@ def get_cr_accessibility(ocw_course_feature_tags): | |
return "|".join(tags) | ||
|
||
|
||
def get_description_in_plain_text(description): | ||
"""Get Course Resource plain text description by cleaning up markdown and HTML.""" | ||
cleaned_description = text_cleanup(description) | ||
return cleaned_description | ||
|
||
|
||
def transform_single_course(course, ocw_topics_mapping, fm_ocw_keywords_mapping): | ||
"""Transform a single course according to OER template.""" | ||
course_runs = course["runs"][0] | ||
return { | ||
"CR_TITLE": course["title"], | ||
"CR_URL": course_runs["url"], | ||
"CR_MATERIAL_TYPE": "Full Course", | ||
"CR_Media_Formats": "Text/HTML", | ||
"CR_SUBLEVEL": "null", | ||
"CR_MEDIA_FORMATS": "Text/HTML", | ||
"CR_SUBLEVEL": get_cr_sublevel(course_runs["level"]), | ||
"CR_ABSTRACT": get_description_in_plain_text(course_runs["description"]), | ||
"CR_LANGUAGE": "en", | ||
"CR_COU_TITLE": "Creative Commons Attribution Non Commercial Share Alike 4.0", | ||
|
@@ -159,11 +189,14 @@ def transform_single_course(course, ocw_topics_mapping, fm_ocw_keywords_mapping) | |
"CR_KEYWORDS": get_cr_keywords( | ||
fm_ocw_keywords_mapping, course["topics"], course_runs["url"] | ||
), | ||
"CR_CREATE_DATE": get_cr_create_date( | ||
course_runs["semester"], course_runs["year"] | ||
), | ||
"CR_AUTHOR_NAME": get_cr_authors(course_runs["instructors"]), | ||
"CR_PROVIDER": "MIT", | ||
"CR_PROVIDER_SET": "MIT OpenCourseWare", | ||
"CR_COU_URL": "https://creativecommons.org/licenses/by-nc-sa/4.0/", | ||
"CR_COU_COPYRIGHT_HOLDER": "MIT", | ||
"CR_COU_COPYRIGHT_HOLDER": get_cr_authors(course_runs["instructors"]), | ||
"CR_EDUCATIONAL_USE": get_cr_educational_use(course["course_feature"]), | ||
"CR_ACCESSIBILITY": get_cr_accessibility(course["course_feature"]), | ||
} | ||
|
@@ -209,14 +242,15 @@ def create_csv( | |
"CR_TITLE", | ||
"CR_URL", | ||
"CR_MATERIAL_TYPE", | ||
"CR_Media_Formats", | ||
"CR_MEDIA_FORMATS", | ||
"CR_SUBLEVEL", | ||
"CR_ABSTRACT", | ||
"CR_LANGUAGE", | ||
"CR_COU_TITLE", | ||
"CR_PRIMARY_USER", | ||
"CR_SUBJECT", | ||
"CR_KEYWORDS", | ||
"CR_CREATE_DATE", | ||
"CR_AUTHOR_NAME", | ||
"CR_PROVIDER", | ||
"CR_PROVIDER_SET", | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To remove the warning:
<frozen runpy>:128: RuntimeWarning: 'ocw_oer_export.cli' found in sys.modules after import of package 'ocw_oer_export', but prior to execution of 'ocw_oer_export.cli'; this may result in unpredictable behaviour
I think this was also slowing down the program execution