-
Notifications
You must be signed in to change notification settings - Fork 3
/
class
executable file
·273 lines (235 loc) · 7.15 KB
/
class
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#!/usr/bin/perl -w
# Usage
# jwc2ical [class_num]
# Examples: jwc2ical 0903101
# jwc2ical 0903102 > 0903102.ics
use strict;
use LWP;
use HTML::Tree;
use HTML::TreeBuilder;
use Encode;
use utf8;
use encoding 'utf8';
my $class_num = $ARGV[0];
my $browser = LWP::UserAgent->new;
my $table_url = 'http://xscj.hit.edu.cn/Hitjwgl/XS/kfxqkb.asp';
my @num_name = qw{ 一 二 三 四 五 六 七 八 九 十 十一 十二};
my @class_parts = qw/ name teacher location week repeats interval date s_time t_time/;
my @exam_parts = qw/ week date s_time t_time location name/;
my $table = $browser->post( $table_url, [ "BH" => $class_num]);
&check( $table, $table_url);
my @class_times;
open SC, "<", "schedule" or die "No schedule!";
while ( my $line = <SC>)
{
chomp $line;
my @time = split /[\s|:]+/, $line;
#print $time[0], "p", $time[1], "p", $time[2], "p", $time[3], "\n",;
push @class_times, [ [ $time[0], $time[1]], [ $time[2], $time[3]]];
}
close SC;
my $table_tree = HTML::TreeBuilder->new_from_content( decode( 'gbk', $table->content));
# Or I may need to find out tbody.
my $real_table = $table_tree->look_down( '_tag', 'table', sub { $_[0]->content_list >= 7 } );
die "Wrong class number $class_num!" unless $real_table;
my @classes;
my %courses;
my %courses_table;
#deprecated # @classes[lesson][day][count] represents a class.
# A class is made up of five lines: Course Name, Teacher, Classroom, Date, Time.
# @classes[count] represents a class, like $classes[0]{teacher}.
# Date means which week, which day, and what the interval is of the class.
# Time means begin and end times, in the formate ( aa:bb, cc:dd).
# We do have 6 big lessons everyday.
foreach my $i ( 1..6)
{
my $b = $i*2;
my $a = $b - 1;
my $lesson = $real_table->look_down( '_tag', 'tr', sub { $_[0]->as_text =~ m/($a)-($b)节/} );
# The first one is "a-b节", seven days a week, so 1..7.
if ( $lesson)
{
foreach my $j ( 1..7)
{
my $data = ( $lesson->content_list)[$j];
# Some data contains empty lines, so if there is a <tr>, there is a valid class.
if ( $data->look_down( '_tag', 'tr'))
{
my @class_info = ();
# They put them into a table, too. Painfully.
foreach ( ( ( $data->look_down( '_tag', 'table'))[0])->content_list)
{
# Classes are sperated by <hr>s in each <tr>.
if ( $_->look_down( '_tag', 'hr'))
{
&check_and_push( \@class_info, \@classes, $j - 1, $i - 1);
}
else
{
# There will be two ( or more) empty lines after each
# '周考试' entry, egg pain. Ignore them.
if ( not $_->as_text eq "")
{
# When ARRAY @class_info has been pushed the course name,
# the following one is Teacher and Location.
# Such as "王忠杰 软件工程" and "吴 锐 嵌入式"
if ( @class_info == 1)
{
my @names = split /\s+/, $_->as_text;
if ( @names != 2)
{
my $name;
foreach ( @names[0..@names-2])
{
$name .= $_;
}
$names[0] = $name;
}
# The first and last ones are always useful.
push @class_info, $names[0];
push @class_info, $names[-1];
}
else
{
push @class_info, $_->as_text;
}
}
}
}
# The last one is not followed by a <hr>
&check_and_push( \@class_info, \@classes, $j - 1, $i - 1);
}
}
}
}
# Look for a row contains '考试安排'
my $exams_table = $real_table->look_down( '_tag', 'tr', sub { $_[0]->as_text =~ m/考\s+试\s+安\s+排/} );
$exams_table = $exams_table->look_down( '_tag', 'table');
my @exams;
foreach my $exam_node ( $exams_table->look_down( '_tag', 'td'))
{
if ( not $exam_node->as_text eq '')
{
my @exam_info = split /\s+/, $exam_node->as_text;
#warn "Not expected data @exam_info" if @exam_info != 5;
# Format is: week, date, time, location, subject.
# There might be spaces in name of classes.
while ( @exam_info != 5)
{
$exam_info[-2] .= " " . $exam_info[-1];
pop @exam_info;
}
$exam_info[0] =~ s/第(\d*)周/$1/ or warn "No accure week";
$exam_info[1] =~ s/周日/周七/;
$exam_info[1] =~ s/周(\d*)/$1/ or warn "No accure word on date";
$exam_info[1] = &find( [ @num_name], $exam_info[1]) or warn "No accure date";
--$exam_info[1]; # Range 0-6
my $time = $exam_info[2];
# Say that the start time and end time are all two digits. Like 08:00-10:00.
# As far as I know, HIT never starts an exam at 8:00 in the morning.
# 2012 Spring, class 1104101 happened!! NO!!!!
$time =~ m/(\d?\d):(\d\d)-(\d\d):(\d\d)/ or warn "Time format is not recognized: $time";
# This reg exp is NOT the same as before. See ':' and ':' ?
# Replace the string with a reference.
$exam_info[2] = [ [$1, $2], [ $3, $4]];
$exam_info[4] .= "考试";
my $exam = {
week => $exam_info[0],
date => $exam_info[1],
s_time => $exam_info[2][0],
t_time => $exam_info[2][1],
location => $exam_info[3],
name => $exam_info[4],
};
push @exams, $exam;
}
}
print "Class_parts: @class_parts\n";
foreach ( @classes)
{
my %class = %$_;
print STDOUT "Class:";
foreach ( @class_parts)
{
print STDOUT "`", $_ =~ m/time/ ? $class{$_}[0].":".$class{$_}[1] : $class{$_};
}
print STDOUT "\n";
}
print "Exam_parts: @exam_parts\n";
foreach ( @exams)
{
my %exam = %$_;
print STDOUT "Exam:";
foreach ( @exam_parts)
{
print STDOUT "`", $_ =~ m/time/ ? $exam{$_}[0].":".$exam{$_}[1] : $exam{$_};
}
print STDOUT "\n";
}
exit;
sub check_and_push
{
# Examinations are ignored and they will be processed later.
if ( not $_[0][0] =~ m/周考试/)
{
warn "Not expected data @{$_[0]}" if @{$_[0]} != 4;
my $weeks = $_[0][3];
warn "Not recognized weeks at $weeks" if not $weeks =~ m/(\d*)-(\d*)周(\s+[((](双|单)[))])*/;
# Deal with classes which is taken once every 2 weeks.
my ( $s, $t) = ( $1, $2);
if ( defined $3)
{
my $trail = $4 eq '双' ? 0 : $4 eq '单' ? 1 : -1;
# We should not get here.
warn "Any thing wrong with week at " . $_[0][0] . "?" if ( $trail == -1);
$s += 1 if $s % 2 != $trail;
$t -= 1 if $t % 2 != $trail;
warn "Wrong week of " . $_[0][0] if $s > $t;
$_[0][3] = [ $s, int( ( $t - $s) / 2) + 1, 2];
}
else
{
$_[0][3] = [ $s, $t - $s + 1, 1];
}
# Evenry big lesson last two period of time.
push @{$_[0]}, $_[2], [ $class_times[$_[3]*2][0], $class_times[$_[3]*2+1][1]];
# Split a class into nine parts, more clearly.
my $class = {
name => $_[0][0],
teacher => $_[0][1],
location => $_[0][2],
week => $_[0][3][0],
repeats => $_[0][3][1],
interval => $_[0][3][2],
date => $_[0][4],
s_time => $_[0][5][0],
t_time => $_[0][5][1],
};
push @{$_[1]}, $class;
if ( not exists $courses_table{$_[0][0]})
{
$courses_table{$_[0][0]} = keys %courses_table;
}
++$courses{$_[0][0]};
@{$_[0]} = ();
}
}
sub find
{
my $num = $_[0];
foreach ( 1..@$num)
{
return $_ if ( $_[1] eq ${$num}[$_-1]);
}
return undef;
}
sub check
{
my $response = $_[0];
my $url = $_[1];
die "$url error: ", $response->status_line unless $response->is_success;
die "Weird content type at $url -- ", $response->content_type
unless $response->content_type eq 'text/html';
# print $response->content;
return $response;
}