-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUCB_iSchool.py
38 lines (30 loc) · 1.44 KB
/
UCB_iSchool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
__author__ = 'Shrinivas'
from bs4 import BeautifulSoup
import csv
#setting up initial reading of csv
with open('names.csv', 'w') as csvfile:
fieldnames = ['Full Name', 'University', 'Department', 'EducationSource']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
#setting up initial access to main faculty page
import urllib.request
with urllib.request.urlopen('http://www.ischool.berkeley.edu/people/faculty') as response:
html = response.read()
soup = BeautifulSoup(html)
#finding all of the individual faculty
faculty = soup.findAll('td', attrs={'class' : 'person-col-1'})
for td in faculty:
#looping through each of the faculty and going to their individual information pages
facultyURL = ("http://www.ischool.berkeley.edu" + td.find('a')['href'])
#print(facultyURL)
with urllib.request.urlopen(facultyURL) as response:
html = response.read()
facultySoup = BeautifulSoup(html)
#was having trouble with putting it in a csv file, switched to print it instead
nameAndDepartment = facultySoup.title.text;
print(nameAndDepartment)
education = facultySoup.findAll('div', attrs={'class' : 'field-field-person-education'})
for res in education:
print(res.text)
#writer.writerow({'Full Name': 'Baked', 'University': 'UC Berkeley', 'Department': 'iSchool', 'EducationSource': 'okay'})
print("EducationSource:" + res.text)