-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathstudent.py
386 lines (309 loc) · 13.2 KB
/
student.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
"""
Contains the objects for table extraction
"""
from collections import Counter
from pdf_strings import (
desired_tables,
detail_string,
valid_exams,
ib_permutations,
)
from pandas import isna
from grade_entry import GradeEntry
class Student:
"""
Class for a single pdf/student
"""
def __init__(self, id_num, extracted_tables, table_headers):
self.unique_id = id_num
self.completed_qualifications = None
self.uncompleted_qualifications = None
self.exam_results = None
target_tables = desired_tables()
for header, tbl in zip(table_headers, extracted_tables):
if header == target_tables[0]:
# print("")
# print("Completed Qualification")
# print(tbl)
self.completed_qualifications = tbl
elif header == target_tables[1]:
# print("")
# print("Predicted Grades")
# print(tbl)
self.uncompleted_qualifications = tbl
elif header == target_tables[2]:
# print("")
# print("Exam Results")
# print(tbl)
self.exam_results = tbl
self.predicted_entries = []
self.completed_entries = []
self.results_entries = []
self.predicted_grade_entries()
self.examresult_entries()
self.completed_grade_entries()
self.which_grades = {
"results": self.results_entries,
"completed": self.completed_entries,
"predicted": self.predicted_entries,
}
self.sanitise_ib_grades()
def __repr__(self):
return "{} \n {} \n {}".format(
self.completed_qualifications,
self.uncompleted_qualifications,
self.exam_results,
)
def sanitise_ib_grades(self):
all_quals = set(self.get_all_qualifications())
# Intersection not empty => it is IB
if all_quals & ib_permutations():
# Get grade entries that are not empty
non_empty_grade_entries_key = [
entry_key
for entry_key in self.which_grades
if self.which_grades.get(entry_key)
]
# Iterate over non-empty grade entries
for grade_entries_key in non_empty_grade_entries_key:
current_entries = self.which_grades.get(grade_entries_key)
# Filter out standard level subjects
grade_entries = [
entry
for entry in current_entries
if not (
"S" in str(entry.grade).upper()
or "stand lvl" in str(entry.subject).lower()
or "standard lvl" in str(entry.subject).lower()
)
]
for entry in grade_entries:
# Convert to string
if not isinstance(entry.grade, str):
grade = str(entry.grade)
else:
grade = entry.grade
if "H" in grade:
# If higher level, remove H
entry.grade = grade.replace("H", "")
entry.grade_info[0] = entry.grade
elif "h" in grade:
entry.grade = grade.replace("h", "")
entry.grade_info[0] = entry.grade
self.which_grades[grade_entries_key] = grade_entries
def get_all_qualifications(self):
return [
item.qualification
for grade_entries in self.which_grades.values()
if grade_entries
for item in grade_entries
]
def unique_qualifications(self):
return set(self.get_all_qualifications())
def get_main_qualification(self):
qualifications = self.get_all_qualifications()
if not qualifications:
return ""
if len(set(qualifications)) == 1:
return qualifications[0]
else:
return Counter(qualifications).most_common(1)[0][0]
def get_grade_for_qualification(self, target_qualification):
for values in self.which_grades.values():
if values:
for item in values:
# It is the qualification we are looking for.
# The grade is not None AND year is not None (implies it is a module/detail entry)
if (
target_qualification in item.qualification
and item.grade is not None
and item.year is not None
):
yield item.grade
def is_detailed_entry(self, input_qualification, rowCounter):
target = input_qualification["Date"][rowCounter]
if not isinstance(target, str):
return False
if target not in detail_string():
return False
return True
def handle_detailed_entry(self, input_qualification, rowCounter):
if "Exam" in set(input_qualification.columns):
qualification_identifier = "Exam"
elif "Exam Level" in set(input_qualification.columns):
qualification_identifier = "Exam Level"
else:
raise NotImplementedError
if not isna(input_qualification[qualification_identifier][rowCounter - 1]):
qualification = input_qualification[qualification_identifier][
rowCounter - 1
]
else:
qualification = None
output = []
all_module_details = input_qualification["Body"][rowCounter]
# Ignores the first entry which would just be the date
individual_modules = all_module_details.split("Title:")[1:]
# print(individual_modules)
for module in individual_modules:
module_info = module.split("Date:")[0]
if "Predicted Grade:" in module_info:
grade = module_info.split("Predicted Grade:")[0]
elif "Grade:" in module_info:
grade = module_info.split("Grade:")[0]
elif "Value:" in module_info:
grade = module_info.split("Value:")[0]
else:
grade = None
entry = GradeEntry(
qualification,
module_info,
grade,
True,
None,
False,
)
output.append(entry)
return output
def completed_grade_entries(self):
if self.completed_qualifications is None:
return None
for row in self.completed_qualifications.index:
if self.is_completed_qual_valid(row):
entry = GradeEntry(
self.completed_qualifications["Exam"][row],
self.completed_qualifications["Subject"][row],
self.completed_qualifications["Grade"][row],
False,
self.completed_qualifications["Date"][row].split("-")[-1],
False,
)
self.completed_entries.append(entry)
elif self.is_detailed_entry(
self.completed_qualifications, row
) and self.is_completed_qual_valid(row - 1):
detailed_entries = self.handle_detailed_entry(
self.completed_qualifications, row
)
self.completed_entries += detailed_entries
return self.completed_entries
def is_completed_qual_valid(self, row):
if isna(self.completed_qualifications["Exam"][row]):
return False
if self.completed_qualifications["Exam"][row] in valid_exams():
return True
else:
return False
def examresult_entries(self):
if self.exam_results is None:
return None
for row in self.exam_results.index:
if self.is_examresult_valid(row):
entry = GradeEntry(
self.exam_results["Exam Level"][row],
self.exam_results["Subject"][row],
self.exam_results["Grade"][row],
False,
self.exam_results["Date"][row].split("-")[-1],
True,
)
self.results_entries.append(entry)
elif self.is_detailed_entry(self.exam_results, row):
detailed_entries = self.handle_detailed_entry(self.exam_results, row)
self.results_entries += detailed_entries
return self.results_entries
def is_examresult_valid(self, row):
if isna(self.exam_results["Exam Level"][row]):
return False
if self.exam_results["Exam Level"][row] in valid_exams():
return True
else:
return False
def predicted_grade_entries(self):
if self.uncompleted_qualifications is None:
return None
for row in self.uncompleted_qualifications.index:
is_pred_grade = isna(
self.uncompleted_qualifications["Predicted\rGrade"][row]
)
is_grade = isna(self.uncompleted_qualifications["Grade"][row])
if is_pred_grade ^ is_grade:
if is_pred_grade:
valid_grade = self.uncompleted_qualifications["Grade"][row]
else:
valid_grade = self.uncompleted_qualifications["Predicted\rGrade"][
row
]
if isna(self.uncompleted_qualifications["Exam"][row]):
qualification = self.uncompleted_qualifications["Body"][row]
else:
qualification = self.uncompleted_qualifications["Exam"][row]
entry = GradeEntry(
qualification,
self.uncompleted_qualifications["Subject"][row],
valid_grade,
True,
self.uncompleted_qualifications["Date"][row].split("-")[-1],
False,
)
self.predicted_entries.append(entry)
elif (not is_pred_grade) & (not is_grade):
if "Unnamed" in str(self.uncompleted_qualifications["Grade"][row]):
valid_grade = self.uncompleted_qualifications["Predicted\rGrade"][
row
]
else:
valid_grade = self.uncompleted_qualifications["Grade"][row]
if isna(self.uncompleted_qualifications["Exam"][row]):
qualification = self.uncompleted_qualifications["Body"][row]
else:
qualification = self.uncompleted_qualifications["Exam"][row]
entry = GradeEntry(
qualification,
self.uncompleted_qualifications["Subject"][row],
valid_grade,
True,
self.uncompleted_qualifications["Date"][row].split("-")[-1],
False,
)
self.predicted_entries.append(entry)
elif isinstance(self.uncompleted_qualifications["Date"][row], str):
if (
is_pred_grade & is_grade
and self.uncompleted_qualifications["Date"][row] in detail_string()
):
all_module_details = self.uncompleted_qualifications["Body"][row]
if isna(self.uncompleted_qualifications["Exam"][row - 1]):
qualification = self.uncompleted_qualifications["Body"][row - 1]
else:
qualification = self.uncompleted_qualifications["Exam"][row - 1]
# Ignores the first entry which would just be the date
individual_modules = all_module_details.split("Title:")[1:]
# print(individual_modules)
for module in individual_modules:
module_info = module.split("Date:")[0]
if "Predicted Grade:" in module_info:
split = module_info.split("Predicted Grade:")
subject = split[0]
grade = split[1]
elif "Grade:" in module_info:
split = module_info.split("Grade:")
subject = split[0]
grade = split[1]
elif "Value:" in module_info:
split = module_info.split("Value:")
subject = split[0]
grade = split[1]
else:
subject = module_info
grade = None
entry = GradeEntry(
qualification,
subject,
grade,
True,
None,
False,
)
self.predicted_entries.append(entry)
return self.predicted_entries