forked from barrucadu/markov
-
Notifications
You must be signed in to change notification settings - Fork 0
/
markovstate.py
103 lines (77 loc) · 3.15 KB
/
markovstate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import time
import itertools
import tokenise
import markov
class MarkovStateError(Exception):
def __init__(self, value):
self.value = value
class MarkovState:
"""Class to keep track of a markov generator in progress.
"""
def __init__(self):
self.markov = None
self.generator = None
def generate(self, chunks, seed=None, prob=0, offset=0,
startf=lambda t: True, endchunkf=lambda t: True,
kill=0, prefix=()):
"""Generate some output, starting anew. Then save the state of the
generator so it can be resumed later.
:param chunks: The number of chunks to generate.
:param seed: The random seed. If not given, use system time.
:param prob: The probability of random token substitution.
:param offset: The number of tokens to discard from the start.
:param startf: Only start outputting after a token for thich this is
True is produced.
:param endchunkf: End a chunk when a token for which this is True
is produced.
:param kill: Drop this many tokens from the end of the output,
after finishing.
:param prefix: Prefix to seed the Markov chain with.
"""
if self.markov is None:
raise MarkovStateError("No markov chain loaded!")
if seed is None:
seed = int(time.time())
print("Warning: using seed {}".format(seed))
if len(prefix) > self.markov.n:
print("Warning: truncating prefix")
prefix = prefix[self.markov.n - 1:]
self.markov.reset(seed, prob, prefix)
itertools.dropwhile(lambda t: not startf(t), self.markov)
next(itertools.islice(self.markov, offset, offset), None)
def gen(n):
out = []
while n > 0:
tok = next(self.markov)
out.append(tok)
if endchunkf(tok):
n -= 1
return(' '.join(out if not kill else out[:-kill]))
self.generator = gen
return self.generator(chunks)
def more(self, chunks=1):
"""Generate more chunks of output, using the established generator.
"""
if self.generator is None:
raise MarkovStateError("No generator to resume!")
return self.generator(chunks)
def train(self, n, stream, noparagraphs=False):
"""Train a new markov chain, overwriting the existing one.
"""
training_data = tokenise.Tokeniser(stream=stream,
noparagraphs=noparagraphs)
self.markov = markov.Markov(n)
self.markov.train(training_data)
self.generator = None
def load(self, filename):
"""Load a markov chain from a file.
"""
self.generator = None
self.markov = markov.Markov()
self.markov.load(filename)
def dump(self, filename):
"""Dump a markov chain to a file.
"""
if self.markov is None:
raise MarkovStateError("No markov chain loaded!")
self.markov.dump(filename)