-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_assessments.py
139 lines (120 loc) · 4.81 KB
/
get_assessments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""Get CPXXXX assessment items from StudyFinder websites."""
import requests
import openpyxl
from collections import OrderedDict
YEAR = 2020
ASSESSMENT_START_STRING = "<h3>Subject Assessment</h3>"
PRE_REQ_START_STRING = "Prerequisites:</td>"
OUTPUT_HTML_FILENAME = "output/assessments.html"
OUTPUT_EXCEL_FILENAME = "output/2021-IT-Assessment-Mapping.xlsx"
HTML_TOP = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>IT@JCU Assessments in CSDB/Studyfinder</title>
<style type="text/css">
table, td {
border: thin black solid;
padding: 0.5em;
border-collapse: collapse;
vertical-align: top;
}
</style>
</head>
<body>
<h1>IT@JCU Assessments in CSDB/Studyfinder</h1>
"""
HTML_BOTTOM = """
</body>
</html>
"""
def main():
all_subject_details = [OrderedDict(), OrderedDict()] # assuming only 2 years
# subjects = ['CP1401', 'CP1402']
subjects = get_subjects()
print(subjects)
file_out = open(OUTPUT_HTML_FILENAME, 'w')
print(HTML_TOP, file=file_out)
# subject_to_items = {}
print(f"<table>", file=file_out)
for subject in subjects:
# Current year
print(f"<tr>", file=file_out)
for i in range(2):
year_to_get = YEAR + i
# print(f"Getting year {year_to_get}")
print(f"<td><h2>{subject} - {year_to_get}</h2>", file=file_out)
url = f"https://secure.jcu.edu.au/app/studyfinder/index.cfm?subject={subject}&year={year_to_get}&transform=subjectwebview.xslt"
response = requests.get(url)
text = response.text
prerequisite = get_prerequisite_block(text)
assessment_block = get_assessment_block(text)
print(assessment_block, file=file_out)
items = extract_items(assessment_block)
# subject_to_items[subject] = items
all_subject_details[i][subject] = items, prerequisite
print(f"</td>", file=file_out)
print(f"</tr>", file=file_out)
print(f"</table>", file=file_out)
print(HTML_BOTTOM, file=file_out)
file_out.close()
write_spreadsheet(all_subject_details)
def get_subjects():
file_in = open("data/all_subjects.txt")
subjects = [line.strip() for line in file_in]
file_in.close()
return subjects
def get_prerequisite_block(text):
index_start = text.find(PRE_REQ_START_STRING)
if index_start == -1: # TODO: Rewrite with walrus operator? :=
return "None"
index_end = text.find("</tr>", index_start)
section = text[index_start + len(PRE_REQ_START_STRING):index_end].strip().strip("<td>").strip("</td>")
return section
def get_assessment_block(text):
index_heading = text.find(ASSESSMENT_START_STRING)
index_end = text.find("</ul>", index_heading)
section = text[index_heading + len(ASSESSMENT_START_STRING):index_end + 6]
section = section.replace('. ', '')
return section.strip()
def extract_items(block):
"""Extract assessment items from HTML block as list of tuples."""
items = []
parts = block.split('\n')
raw_items = [part.strip().strip('<li>').strip('</li>') for part in parts if part.strip().startswith('<li>')]
for raw_item in raw_items:
try:
parts = raw_item.split(' - ')
assessment = parts[0].replace('>', '>')
weight = parts[1].strip('(').strip('%)')
items.append((assessment, weight))
except IndexError:
print(f"ERROR with {raw_item}")
return items
def write_spreadsheet(all_subject_details):
# all_subject_details contains 2 dictionaries, one for each year
# each value contains (a list of items, prerequisite string)
workbook = openpyxl.load_workbook(filename=OUTPUT_EXCEL_FILENAME)
sheet = workbook['Assessment-Mapping']
row = 12 # first row for assessment items
column = 2
for i in range(2): # for both years/dictionaries
for subject, items in all_subject_details[i].items():
# sheet.cell(row=row, column=column, value=subject)
prerequisite = items[1]
items = items[0] # effectively rename as (assessment) items, without prerequisite
item_row = row
for item_number, item in enumerate(items):
name, weight = item
try:
weight = int(weight)
except ValueError:
pass
sheet.cell(row=item_row + item_number, column=column, value=name)
sheet.cell(row=item_row + item_number, column=column + 3, value=weight)
sheet.cell(row=24, column=column, value=prerequisite)
column += 4 # distance to next subject (4 pieces of data per assessment)
row += 7 # move down to write next year's items
column = 2
workbook.save(filename=f"output/temp.xlsx")
main()