-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcourse.rb
124 lines (105 loc) · 3.7 KB
/
course.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/env/ruby
require 'nokogiri'
require 'open-uri'
require 'pry'
class MITCourse
attr_accessor :name, :number, :prerequisites
def self.get_courses(url)
doc = Nokogiri::HTML(open(url))
# this is because MIT's registrar doesn't believe in structured page HTML, so we have to
# serially parse the elements between h3's instead of using xpath to access them... thanks MIT.
# ok, find the toplevel td containing all these h3's...
container = doc.xpath('//h3[1]/..')
courses = []
current_course = nil
current_prereq = nil
# now iterate over the decendants...
container.children.each do |element|
case element.name
when /h3/
# push the previous course and make a new object...
courses.push(current_course) unless current_course.nil?
current_course = MITCourse.new()
# grab the course number and name from the h3...
element.inner_text.rstrip =~ /^([\d\.]+)\w* (.*)/
current_course.name = $2
current_course.number = $1
when /br/
# br acts as an EOR sentinel for the prereq field...
unless current_course.nil? || current_prereq.nil?
current_course.prerequisites = sanitize_prereqs(current_prereq)
current_prereq = nil
end
when "a"
unless current_course.nil? || current_prereq.nil?
# a's are sometimes interesting because they contain title info for our prereq...
title = element.get_attribute("title")
# push the inner text
current_prereq.push(element.inner_text.strip)
# and the title if we got one
current_prereq.push(title.strip) unless title.nil?
end
else
# it's not an element, try to parse the text instead:
text = element.inner_text.strip
case text
when /Prereq:/
# create a new prereq object..
current_prereq = []
# and remove yourself from the text object
text.gsub!(/Prereq:/,'')
end
# if we have started an active prereq block, then append everything else to the block.
unless current_course.nil? || current_prereq.nil?
text.gsub!(/Coreq:/,'')
current_prereq.push(text)
end
end
end
courses
end
private
def self.sanitize_prereqs(prereqs)
# some of the prereqs have commas within them... normalize these to separate entries.
text = prereqs.join(",")
items = text.split(",").each {|i| i.strip!}
items = items.select {|i| i =~ /\d+\.\d+/}
end
end
def id(course)
"c#{course.gsub(/\./,'_')}"
end
def color_map(num)
case num
when 0 then "000000"
when 1 then "00008F"
when 2 then "008F8F"
when 3 then "008F00"
when 4 then "8F8F00"
else "8F0000"
end
end
def generate_dot(courses)
f = File.new("math.dot","w")
f.puts "digraph mit_math {"
f.puts " graph[concentrate=true aspect=.25 rankdir=\"LR\"];"
f.puts " node[shape=rect];\n"
courses.each do |course|
unless course.number.nil? || course.name.empty?
color = color_map(course.prerequisites.length)
f.puts " #{id(course.number)} [label=\"#{course.number}\\n#{course.name}\"];"
course.prerequisites.each do |prereq|
f.puts " #{id(prereq)} -> #{id(course.number)} [color=\"\##{color}\"] ;"
end
end
end
f.puts "}"
f.close
end
pagea = MITCourse.get_courses('http://student.mit.edu/catalog/m18a.html')
pageb = MITCourse.get_courses('http://student.mit.edu/catalog/m18b.html')
all_math = pagea.concat(pageb)
pp pagea.select{|c| c.number =~ /18\.04/}
generate_dot(all_math)
%x{dot -Tpng math.dot -o math.png}
#binding.pry