forked from AdamMeyers/The_Termolator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSection.py
executable file
·28 lines (27 loc) · 977 Bytes
/
Section.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import re, os
class Section:
"""Class containing section data from an input file"""
def __init__(self, title, text):
self.title = title
self.text = text
def getSections(filename):
"""Input a filename, return a list of Section objects from the file"""
if not os.path.exists(filename) or \
not filename[-4:] == '.txt' or \
not os.path.exists(filename[:-4]+'.fact'):
raise OSError(2, 'Incorrect file format or no such (fact) file', filename)
f = open(filename)
fulltext = f.read()
f.close()
pattern = re.compile(r'.*DOC_SEGMENT ID.*TITLE=\"(.+?)\".*START=(\d+).*END=(\d+)')
sections = []
f = open(filename[:-4]+'.fact')
for line in f:
m = re.match(pattern, line)
if m:
(title, start, end) = m.groups()
start = int(start)
end = int(end)
sec = Section(title, fulltext[start:end+1])
sections.append(sec)
return sections