-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
executable file
·84 lines (65 loc) · 2.79 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/env ipython
from modules.convocation.convocation import Convocation
from modules.session.session import Session
from modules.session.exception.exception import ParseError
import os
import re
import time
def main():
"""
Main function of the system.
"""
convocations = list()
sessions = list()
politicians = set()
print('> start') # to track progress
for filename in os.listdir('docs/stenograms_lists'):
# if filename != "stenograms.list":
if filename == "stenograms_skl8.list":
path = 'docs/stenograms_lists/' + filename
path = os.path.relpath(path)
with open(path, 'r') as read_file:
start = time.time()
print('> parsing {}...'.format(filename)) # to track progress
# create Convocation object
no_pattern = r'\d' # get convocation number
no = re.search(no_pattern, filename)
no = int(no[0])
new_conv = Convocation(no=no)
convocations.append(new_conv)
# go through the urls
for line in read_file.readlines():
url = line[:-1]
new_session = Session(convocation=new_conv)
sessions.append(new_session)
new_session.url = url
new_session.set_date()
date = new_session.session_date
print('> parsing {} session...'. format(date)) # to track progress
try:
new_session.parse_html()
except ParseError as err:
print(err)
continue
new_session.set_announcer()
print('> formatting {} session...'.format(date)) # to track progress
new_session.format()
print('> creating\\updating politicians...'.format(date)) # to track progress
pols = new_session.create_politicians()
for pol in pols:
pol.ideas_rating()
pol.ideas_timeline()
politicians = politicians.union(pols)
new_conv.politicians_list.extend(pols)
print('> dividing into phrases and analysing...'.format(date)) # to track progress
new_session.to_phrases()
print('> {}'.format(time.time() - start))
new_conv.ideas_rating()
return convocations, politicians
# print(time.time() - start)
# politicians = list(politicians)
# for pol in politicians:
# print(pol)
# for idea in pol.ideas:
# print(idea.name, '|', idea.session_date, '|', idea.context[:50])
# print('--------')