generated from cis3296f22/project-template
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtemple_requests.py
368 lines (346 loc) · 18.8 KB
/
temple_requests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
import requests
from bs4 import BeautifulSoup
import re
from algo import Schedule
PAGE_MAX_SIZE = 50
def get_subj(degrs_html_str:str,str_to_search:str,start:int,offset_to_subj:int)->str:
"""
Retrieves the subject of the degree program from the given html
@param degrs_html : html with degree program information
@param str_to_search : unique part of the html to search for to bring index i closer to subject text
@param start : starting index of degrs_html_str for find() method to start looking for str_to_search
@param offset_to_subject : offset needed to get i to be the index of the first character of subject
@return subj : str representing a subject for a degree program (i.e. Biology)
"""
subj = ''
i = degrs_html_str.find(str_to_search,start)+offset_to_subj
while degrs_html_str[i]!='<':
subj+=degrs_html_str[i]
i+=1
return subj
def get_degr_urls_and_abbrvs(degrs_html_str:str,col_num:int,start:int):
"""
Retrieves the url for a specific degree program and the abbreviation of the level (i.e. MS or BA)
@param degrs_html_str : str with a portion of html to parse
@param col_num : column number to indicate section of html to look at (1:undergraduate, 2:graduate, 3:professional)
@param start : current index in degrs_html_str
@return : array of tuples in the form (degr_url, abbrv) and i if there is at least one link and abbreviation, otherwise an empty array and the parameter start are returned
"""
urls_and_abbrvs_arr = []
href_ind = degrs_html_str.find('href',start)
abbrv_ind = degrs_html_str.find('>',href_ind)
i=0
#if there is a link to a degree program (which is in the href tag) in the current column
while href_ind>0 and href_ind<degrs_html_str.find('column'+str(col_num+1)):
degr_url = ''
abbrv = ''
i=href_ind+6
while degrs_html_str[i]!='\"':
degr_url+=degrs_html_str[i]
i+=1
#next move i to where the abbrev is
i=abbrv_ind+1
while degrs_html_str[i]!='<':
abbrv+=degrs_html_str[i]
i+=1
#i is returned to use in making it faster to find the next starting index with find() (where 'column#' is)
urls_and_abbrvs_arr.append((degr_url, abbrv))
href_ind = degrs_html_str.find('href',i)
if href_ind!=-1:
abbrv_ind = degrs_html_str.find('>',href_ind)
#return blank strs if there is no link/degree program for the current column indicated by col_num (while loop never executed)
if not i:
return [],start
return urls_and_abbrvs_arr,i
def get_degr_progs()->dict:
"""
Retrieves all degree programs at Temple University from its Academic Bulletin
@return : a dictionary of degree program strings mapped to their corresponding links, otherwise None on error
"""
try:
degr_program_to_url = dict()
req = requests.get("https://bulletin.temple.edu/academic-programs/")
soup = BeautifulSoup(req.content,'html.parser')
degr_programs_htmls = soup.find('tbody', class_='fixedTH',id='degree_body')
for html in degr_programs_htmls:
degrs_html_str = str(html)
#special case for first row where the style is being set (html has extra stuff)
if 'style' in degrs_html_str:
subj = get_subj(degrs_html_str,'>',degrs_html_str.find('column0'),1)
next_col_str_search_start_ind = 0
for i in range(1,4):
urls_and_abbrvs_arr, next_col_str_search_start_ind = get_degr_urls_and_abbrvs(degrs_html_str, i,degrs_html_str.find('column' + str(i),next_col_str_search_start_ind))
for url_and_abbrv in urls_and_abbrvs_arr:
abbrv = url_and_abbrv[1]
if abbrv and 'not currently' not in abbrv:
degr_program_to_url[subj+' '+abbrv]=url_and_abbrv[0]
elif not html.text.isspace():
subj = get_subj(degrs_html_str,'column0',0,9)
next_col_str_search_start_ind = 0
for i in range(1,4):
urls_and_abbrvs_arr, next_col_str_search_start_ind = get_degr_urls_and_abbrvs(degrs_html_str, i,degrs_html_str.find('column' + str(i),next_col_str_search_start_ind))
for url_and_abbrv in urls_and_abbrvs_arr:
abbrv = url_and_abbrv[1]
if abbrv and 'not currently' not in abbrv:
degr_program_to_url[subj+' '+abbrv]=url_and_abbrv[0]
return degr_program_to_url
except Exception as e:
return {f"Try connecting to the internet and restarting the application. \nResulting error(s): {e}":""}
def get_curric(degr_prog_url:str)->list[str]:
"""
Retrieves the curriculum for the specified degree program
@param degr_prog_url : the portion of the url for the specific degree program
@return : list of tuples with format (SUBJ ####, Course_Name) for courses in the curriculum in the requirements section of the degree program link specified by degr_prog_url, otherwise empty array on failure or if Temple is not accepting applications for the curriculum
"""
try:
req = requests.get("https://bulletin.temple.edu/" + degr_prog_url + "#requirementstext")
soup=BeautifulSoup(req.content,'html.parser')
requirements_html = soup.find('div',id='requirementstextcontainer')
if requirements_html==None:
requirements_html = soup.find('div', id='programrequirementstextcontainer')
if requirements_html == None:
return []
courses_html = requirements_html.find_all('tr',class_=re.compile('(^.*even*$|^.*odd.*$)'))
curric = []
for c in courses_html:
subj_and_num_html = c.find('a',class_='bubblelink code')
#checks to make sure the html has course info, and if it does, it looks for the course subject, number and name
if subj_and_num_html:
subj_and_num = subj_and_num_html.text
td_htmls = c.find_all('td')
course_name = td_htmls[1].text
if (subj_and_num,course_name) not in curric:
curric.append([subj_and_num,course_name])
return curric
except Exception as e:
return [f"Try connecting to the internet and restarting the application. \nResulting error(s): {e}"]
def get_param_data_codes(endpoint:str)->dict:
"""
Retrieves the code used to specify the certain parameter data in url queries such as semester and campus
Credit: Neil Conley (Github: gummyfrog)
@param endpoint: str representing endpoint for specific parameter data (i.e. "getTerms" or "get_campus")
@return : dictionary mapping data codes to corresponding potential parameter data on success, otherwise None on error
"""
PAGINATION_OPTS = {
"offset": "1",
"max": "10",
}
try:
response = requests.get("https://prd-xereg.temple.edu/StudentRegistrationSsb/ssb/classSearch/"+endpoint, PAGINATION_OPTS)
param_data_to_code = dict()
data=response.json()
for descrip_and_code in data:
if endpoint=="getTerms" and "Orientation" in descrip_and_code['description']:
continue
param_data_to_code[descrip_and_code['description']]=descrip_and_code['code']
return param_data_to_code
except Exception as e:
return {f"Try connecting to the internet and restarting the application. \nResulting error(s): {e}":""}
#can retrieve other info such as "Would take again" and difficulty later on if it helps
def get_rmp_data(prof:str):
"""
Retrieves information from ratemyprofessors.com related to the specified professor's ratings.
@param prof : professor to retrieve information about on ratemyprofessors.com
@return : array of non-zero rating and non-zero rating amount on success, array of 0.0 and 0.0 on failure or if no entry can be found for the professor
"""
try:
prof_search_req = requests.get("https://www.ratemyprofessors.com/search/professors/999?q="+'%20'.join(prof.split()))
except:
print("Ignore: Professor rating data not available")
return [0.0, 0.0]
#credit to Nobelz in https://github.com/Nobelz/RateMyProfessorAPI for retrieval of RMP professor ids
prof_ids = re.findall(r'"legacyId":(\d+)', prof_search_req.text)
#loops through the professor ids found based on search by professor name
for id in prof_ids:
try:
prof_rating_req = requests.get("https://www.ratemyprofessors.com/professor/" + id)
soup = BeautifulSoup(prof_rating_req.content, 'html.parser')
#extract the professor's name from the page to verify the match
prof_name_tag = soup.find("span", class_="NameTitle__Name-dowf0z-0")
if prof_name_tag:
prof_name = prof_name_tag.get_text().strip().lower()
input_name = prof.strip().lower()
#check if input name matches the professor's name using regex or substring
if not (re.search(input_name, prof_name) or input_name in prof_name):
continue
#rating retrieval
rating_html = str(soup.find("div", re.compile("^RatingValue__Numerator")))
rating = ''
i = rating_html.rfind('<') - 1
while rating_html[i] != '>':
rating += rating_html[i]
i -= 1
rating = float(rating[::-1])
#retrieval of number of ratings
num_ratings = ''
num_reviews_html = str(soup.find("div", re.compile("^RatingValue__NumRatings")))
i = num_reviews_html.rfind('\">') + 2
while num_reviews_html[i] != '<':
num_ratings += num_reviews_html[i]
i += 1
#if there are no ratings, continue to the next professor ID
if rating == 0.0 or float(num_ratings) == 0.0:
continue
return [rating, float(num_ratings)]
except Exception as e:
print(f"Ignore: Professor rating not found for id {id}")
return [0.0, 0.0]
def get_weighted_rating(sect_info):
"""
Calculates weighted rating for professor based on data in sect_info to help sort the sections for a course
@param sect_info : one course section's data
"""
return sect_info['profRating'],sect_info['numReviews']
def get_authenticated_session(search_args:dict):
"""
Returns an authenticated session for searching in TUPortal's class scheduling service and the updated result arguments
@param search_args
"""
session = requests.Session()
# extra stuff for the results
results_opts = {
"pageOffset": 0,
"pageMaxSize": PAGE_MAX_SIZE,
"sortColumn": "subjectDescription",
"sortDirection": "asc",
}
results_args = dict()
results_args.update(search_args)
results_args.update(results_opts)
try:
# Establish session
session.post("https://prd-xereg.temple.edu/StudentRegistrationSsb/")
# Select a term
session.post("https://prd-xereg.temple.edu/StudentRegistrationSsb/ssb/term/search?mode=search", search_args)
except Exception as e:
return f"Try connecting to the internet and restarting the application. \nResulting error(s): {e}", None
return session, results_args
def fetch_course_data(session, search_args, results_args)->dict:
"""
Retrieves course data from TUPortal scheduling service
@param session : reference to authenticated session
@param search_args
@param results_args :
@return data for retrieved sections of courses
"""
# Start class search for the chosen term and current page offset
session.post("https://prd-xereg.temple.edu/StudentRegistrationSsb/ssb/classSearch/get_subject?offset=" + str(int(results_args["pageOffset"]/PAGE_MAX_SIZE)+1) + "&max="+str(PAGE_MAX_SIZE), search_args)
# Clear old results, if any
session.post("https://prd-xereg.temple.edu/StudentRegistrationSsb/ssb/classSearch/resetDataForm")
# Execute search
response = session.post("https://prd-xereg.temple.edu/StudentRegistrationSsb/ssb/searchResults/searchResults?startDatepicker=&endDatepicker=", results_args)
data = response.json()
data["ztcEncodedImage"] = ""
return data
def get_courses_from_keyword_search(term_code:str,keywords:str)->set:
"""
Returns a set of courses (in the format: SUBJ #### Title) available during the specified term that are returned from the keywords search
@param term_code : code for semester desired (i.e. Spring 2024)
@param keywords : string to search for
"""
courses = set()
SEARCH_REQ = {"txt_keywordall":keywords,"term": term_code, "txt_term": term_code}
session, results_args = get_authenticated_session(SEARCH_REQ)
if type(session)==str:
return [(session,"")]
moreResults=True
while moreResults:
try:
data = fetch_course_data(session,SEARCH_REQ,results_args)
if data['totalCount']>results_args['pageOffset']+PAGE_MAX_SIZE:
results_args['pageOffset']+=PAGE_MAX_SIZE
else:
moreResults=False
if data['totalCount']:
for section in data['data']:
courses.add((section['subject'] + ' ' + section['courseNumber'],section['courseTitle']))
else:
return [("There are no courses that have the keyword(s) you entered.","")]
except Exception as e:
return [(f"Try connecting to the internet and restarting the application. \nResulting error(s): {e}","")]
return courses
def get_course_sections_info(course_info : dict, term:str, term_code:str,subj:str="",course_num:str="",attr="", campus_code = "MN", prof_rating_cache = {}):
"""
Retrieves info on the sections available during the specified term for the specified class
@param course_info : dictionary to store the necessary section information in for each course in the format {"Fall 2023":{"Subj_course_num1":[{},{}], "Subj_course_num2":[{}]} ,"Spring 2024":{"Subj_course_num3":[{},{}], "Subj_course_num4":[{}]}}
@param term : semester desired (i.e. Spring 2024)
@param term_code : number representing the semester
@param subject : abbreviation representing the subject of the course
@param course_num : number of the course
@param attr : 2 character string attribute of the course (i.e. GU for Gened United States or GY for Intellectual Heritage I)
@param prof_rating_cache : stores previously retrieved professor ratings for the session to reduce the number of requests made
@return : empty string on success, error message on failure
Credit: https://github.com/gummyfrog/TempleBulletinBot
"""
#if course info for the desired semester is already course_info, return
if term in course_info and campus_code in course_info[term] and (subj + ' ' + course_num in course_info[term][campus_code] or attr in course_info[term][campus_code]):
return
if term not in course_info:
course_info[term]=dict()
if campus_code not in course_info[term]:
course_info[term][campus_code]=dict()
# term and txt_term need to be the same
SEARCH_REQ = {
"term": term_code,
"txt_term": term_code,
"txt_subject": subj,
"txt_courseNumber": course_num,
"txt_attribute": attr,
"txt_campus": campus_code
}
session, results_args = get_authenticated_session(SEARCH_REQ)
moreResults=True
while moreResults:
try:
data = fetch_course_data(session,SEARCH_REQ,results_args)
if data['totalCount']>results_args['pageOffset']+PAGE_MAX_SIZE:
results_args['pageOffset']+=PAGE_MAX_SIZE
else:
moreResults=False
if data['totalCount']:
for section in data['data']:
if section['faculty']:
professor = section['faculty'][0]['displayName']
rmp_info = prof_rating_cache.get(professor)
if not rmp_info:
rmp_info = get_rmp_data(professor)
prof_rating_cache[professor]=rmp_info
sched = Schedule()
days_of_the_week = ['monday','tuesday','wednesday','thursday','friday','saturday','sunday']
for meeting_type in section['meetingsFaculty']:
meet_time_info = meeting_type['meetingTime']
for day in days_of_the_week:
if meet_time_info[day]:
sched.add_timeslot(day,int(meet_time_info['beginTime']),int(meet_time_info['endTime']),meet_time_info['meetingTypeDescription'])
#partOfTerm included in case can schedule two courses with the same meeting times but in different parts of the semester
sect_info = {'name':section['subject'] + ' ' + section['courseNumber'],'term':section['term'],'CRN':section['courseReferenceNumber'],
'partOfTerm':section['partOfTerm'],'seatsAvailable':section['seatsAvailable'],'maxEnrollment':section['maximumEnrollment'],
'creditHours':section['creditHourLow'] if section['creditHourLow'] else section['creditHourHigh'],
'professor':professor,'profRating':rmp_info[0],'numReviews':rmp_info[1],'schedule':sched}
course = section['subject'] + ' ' + section['courseNumber'] if not attr else attr
course_sections = course_info[term][campus_code].get(course)
if not course_sections:
course_info[term][campus_code][course] = [sect_info]
else:
course_sections.append(sect_info)
else:
return 'Invalid course or course not available'
except Exception as e:
return f"Try connecting to the internet and restarting the application. \nResulting error(s): {e}"
if subj: #if subj and course_num given
course_info[term][campus_code][subj + ' ' + course_num].sort(reverse=True,key=get_weighted_rating)
else:
course_info[term][campus_code][attr].sort(reverse=True,key=get_weighted_rating)
return ''
"""degr_progs= get_degr_progs()
for dgpg in degr_progs:
get_curric(degr_progs[dgpg])"""
#print(get_param_data_codes('getTerms'))
#print(get_param_data_codes('get_campus'))
"""course_info = dict()
get_course_sections_info(course_info,"2023 Fall", "202336",attr="GA")
print(len(course_info["2023 Fall"]["MN"]["GA"]))
get_course_sections_info(course_info,"2024 Spring", "202403","CIS","2168",'')
print(course_info)"""
#print(get_rmp_data("Sarah Stapleton"))
#print(get_courses_from_keyword_search("202436","Data Structures"))