-
Notifications
You must be signed in to change notification settings - Fork 5
/
sentence.py
74 lines (56 loc) · 1.88 KB
/
sentence.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding: utf-8 -*-
class Sentence(object):
"""Sentence."""
START = "<s>"
STOP = "</s>"
LEFT_SIDED_SYMBOLS = set('"\',.-/:;<>?!)]}$%')
RIGHT_SIDED_SYMBOLS = set('"\'-/<>([{')
SYMBOLS = LEFT_SIDED_SYMBOLS.union(RIGHT_SIDED_SYMBOLS)
def __init__(self):
"Constructs a Sentence."""
self._word_list = [Sentence.START]
self._sentence = ""
def __str__(self):
"""Returns a string representation of the sentence."""
return self._sentence
def __len__(self):
"""Returns the number of words in a sentence."""
return len(self._word_list)
def __iter__(self):
"""Iterates through the sentence word by word."""
return iter(self._word_list)
@property
def complete(self):
"""Whether the sentence is complete or not."""
return self._word_list[-1] == Sentence.STOP
def add(self, word):
"""Adds a word to the sentence.
Args:
word: Word.
"""
self._word_list.append(word)
if word != Sentence.STOP:
if (word[0] not in Sentence.LEFT_SIDED_SYMBOLS and
self._sentence and
self._sentence[-1] not in Sentence.RIGHT_SIDED_SYMBOLS):
self._sentence += ' '
self._sentence += word
def get_last(self, n):
"""Returns the indices of the last n words in the sentence.
Args:
n: Number of last words to get from the sentence.
"""
return tuple(self._word_list[-n:])
@classmethod
def from_line(self, line):
"""Constructs a Sentence from a line of text.
Args:
line: Line of text.
Returns:
Sentence.
"""
sentence = Sentence()
words = line.split(' ')
sentence._word_list.extend(words)
sentence._sentence = line
return sentence