-
Notifications
You must be signed in to change notification settings - Fork 0
/
DFALib.py
460 lines (388 loc) · 17.1 KB
/
DFALib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
# code for working with DFAs
# by Daniel J. Fremont
from collections import defaultdict
import random
import itertools
import time
# abstract class for implicitly or explicitly-represented DFAs
# Convention: the object None must not be used as a state! anything else, such
# as (None, None), is fine; the only place this is used is in ExplicitDFA's
# union method
class DFA(object):
def getAlphabet(self):
raise Exception('should be overridden by subclass')
def isAcceptingState(self, state):
raise Exception('should be overridden by subclass')
def getInitialState(self):
raise Exception('should be overridden by subclass')
def hasTransition(self, state, symbol):
raise Exception('should be overridden by subclass')
# returns the state transitioned to with the given starting state and input symbol
def transition(self, state, symbol):
raise Exception('should be overridden by subclass')
# returns an iterator over (symbol, destination) pairs from the given state
def transitions(self, state):
for symbol in self.getAlphabet():
if self.hasTransition(state, symbol):
yield (symbol, self.transition(state, symbol))
def checkString(self, s, trace=False):
state = self.getInitialState()
for (index, symbol) in enumerate(s):
if trace:
print(str(index)+': '+str(state))
if not self.hasTransition(state, symbol):
return False
state = self.transition(state, symbol)
if trace:
print(state)
return self.isAcceptingState(state)
def complement(self):
return ImplicitComplement(self)
# computes the explicit form of the given DFA, pruned so that every state lies on a path
# from the initial state to an accepting state; returns the pruned automaton, whether it
# has a cycle, and a reverse topological sorting of its states if it is acyclic
@staticmethod
def explicitPruning(automaton):
goodStates = set() # reachable states from which an accepting state can be reached
orderedGoodStates = [] # good states in reverse topological order
loopyStates = set() # reachable states which lie on a cycle
prunedDelta = defaultdict(dict)
alphabet = automaton.getAlphabet()
initialState = automaton.getInitialState()
# do a DFS from the initial state, identifying all reachable and good states; we
# unfortunately can't use recursion since the automaton may have a large depth,
# so we use an explicit stack; each element of the stack stores:
# - the current state
# - the symbol causing the transition to it
# - the set of states known to be reachable from it
# - an iterator for its transitions which have not yet been processed
# - whether or not it is known to lie on an accepting path
stack = [(initialState, None, set(), automaton.transitions(initialState), automaton.isAcceptingState(initialState))]
# the "return value" is the stack tuple for the last processed state
returnValue = (None, None, set(), None, False)
visitedStates = set()
while len(stack) > 0:
# retrieve current state and "return value"
(state, symbol, reachableStates, transitionIterator, canAccept) = stack.pop()
(child, childSymbol, childReachable, childIterator, childCanAccept) = returnValue
visitedStates.add(state) # keep track of visited states
# update information about state
reachableStates |= childReachable
canAccept |= childCanAccept
returnValue = (state, symbol, reachableStates, transitionIterator, canAccept)
if childCanAccept: # add transition to the child in the pruned DFA
prunedDelta[state][childSymbol] = child
# process next transition, if there is one
try:
(childSymbol, child) = next(transitionIterator) # throws an exception if there are no transitions left
# there are more transitions; push updated information about state onto stack
stack.append(returnValue)
# if child is already visited, do not explore again, but set the "return value"
# as if we have just returned from it; otherwise "recurse" on child
if child in visitedStates:
returnValue = (child, childSymbol, {child}, None, automaton.isAcceptingState(child) or (child in goodStates))
else:
stack.append((child, childSymbol, set(), automaton.transitions(child), automaton.isAcceptingState(child)))
returnValue = (None, None, set(), None, False)
except StopIteration as e: # no more children; "return" to parent state
# check for cycles and goodness
if state in reachableStates: # cycle detected
loopyStates.add(state)
if canAccept: # state is good; add it to the pruned DFA
goodStates.add(state)
orderedGoodStates.append(state)
# check for cycles and if there are no accepting paths
if not goodStates.isdisjoint(loopyStates):
raise Exception('cyclic automata not yet supported')
if not canAccept:
return (None, False, None)
goodAcceptingStates = set(filter(automaton.isAcceptingState, goodStates))
prunedDFA = ExplicitDFA(alphabet, goodStates, goodAcceptingStates, initialState, prunedDelta)
return (prunedDFA, False, orderedGoodStates)
@staticmethod
def fullDFA(alphabet):
return ExplicitDFA(alphabet, {0}, {0}, 0, {0: dict(zip(alphabet, itertools.repeat(0)))})
@staticmethod
def emptyDFA(alphabet):
return ExplicitDFA(alphabet, {0}, set(), 0, {0: dict(zip(alphabet, itertools.repeat(0)))})
# a DFA with an explicit set of states
class ExplicitDFA(DFA):
def __init__(self, alphabet=None, states=None, acceptingStates=None, initialState=None, delta=None):
self.alphabet = alphabet if (alphabet != None) else set()
self.states = states if (states != None) else set()
self.acceptingStates = acceptingStates if (acceptingStates != None) else set()
self.initialState = initialState
self.delta = delta if (delta != None) else defaultdict(dict) # state -> symbol -> state
self.counts = None
def getAlphabet(self):
return self.alphabet
def isAcceptingState(self, state):
return state in self.acceptingStates
def getInitialState(self):
return self.initialState
def hasTransition(self, state, symbol):
return symbol in self.delta[state]
def transition(self, state, symbol):
return self.delta[state][symbol]
def transitions(self, state):
return self.delta[state].items()
def complement(self):
return ExplicitDFA(self.alphabet, self.states, self.states - self.acceptingStates, self.initialState, self.delta)
# explicit synchronous product of self with another explicit DFA
def intersection(self, edfa):
alphabet = self.alphabet & edfa.alphabet
newDelta = defaultdict(dict)
newStates = set()
newAcceptingStates = set()
for stateA in self.states:
for stateB in edfa.states:
state = (stateA, stateB)
newStates.add(state)
if (stateA in self.acceptingStates) and (stateB in edfa.acceptingStates):
newAcceptingStates.add(state)
for symbol in alphabet:
if (symbol in self.delta[stateA]) and (symbol in edfa.delta[stateB]):
newDelta[state][symbol] = (self.delta[stateA][symbol], edfa.delta[stateB][symbol])
return ExplicitDFA(alphabet, newStates, newAcceptingStates, (self.initialState, edfa.initialState), newDelta)
# explicit union of self with another explicit DFA
def union(self, edfa):
alphabet = self.alphabet | edfa.alphabet
newDelta = defaultdict(dict)
newStates = set()
newAcceptingStates = set()
for stateA in self.states | {None}:
for stateB in edfa.states | {None}:
state = (stateA, stateB)
newStates.add(state)
if (stateA in self.acceptingStates) or (stateB in edfa.acceptingStates):
newAcceptingStates.add(state)
for symbol in alphabet:
destA = self.delta[stateA][symbol] if (symbol in self.delta[stateA]) else None
destB = edfa.delta[stateB][symbol] if (symbol in edfa.delta[stateB]) else None
newDelta[state][symbol] = (destA, destB)
return ExplicitDFA(alphabet, newStates, newAcceptingStates, (self.initialState, edfa.initialState), newDelta)
# given a reverse topological ordering of the states, returns a function which
# uniformly samples the accepting paths
def uniformSampler(self, orderedStates):
self.computeCounts(orderedStates)
return (lambda : self.sample())
# given a reverse topological ordering of the states, computes for each one the
# number of paths from it to an accepting state
def computeCounts(self, orderedStates):
self.counts = {}
for state in orderedStates:
count = 1 if (state in self.acceptingStates) else 0
for symbol in self.delta[state]:
count = count + self.counts[self.delta[state][symbol]]
self.counts[state] = count
# return a dict mapping state S -> states reachable from S
def reachabilityMap(self):
def helper(state, visited):
for dest in self.delta[state].values():
if dest not in visited:
visited.add(dest)
if dest in reachable:
visited |= reachable[dest]
else:
helper(dest, visited)
reachable = {}
for state in self.states:
visited = set()
helper(state, visited)
reachable[state] = visited
return reachable
# uniformly samples from accepting paths, having run computeCounts
def sample(self):
if self.counts == None:
raise Exception('must run computeCounts before sampling')
state = self.initialState
stateCount = self.counts[state]
word = []
while True:
r = random.randint(0, stateCount-1)
if state in self.acceptingStates:
if r == 0:
return word
r = r - 1
for symbol in self.alphabet:
if symbol in self.delta[state]:
destination = self.delta[state][symbol]
destCount = self.counts[destination]
if r < destCount:
word.append(symbol)
state = destination
stateCount = destCount
break
else:
r = r - destCount
# return a minimal DFA accepting the same language;
# the input DFA must have transitions for every (state, symbol) pair!
def minimize(self):
partition = [frozenset(self.acceptingStates), frozenset(self.states - self.acceptingStates)]
todo = {frozenset(self.acceptingStates)}
while len(todo) > 0:
target = todo.pop()
for symbol in self.alphabet:
sources = set()
for state in self.states:
if (symbol in self.delta[state]) and (self.delta[state][symbol] in target):
sources.add(state)
sources = frozenset(sources)
newPartition = []
for component in partition:
intersection = component & sources
difference = component - sources
if len(intersection) > 0 and len(difference) > 0:
newPartition.append(intersection)
newPartition.append(difference)
if component in todo:
todo.remove(component)
todo.add(intersection)
todo.add(difference)
elif len(intersection) <= len(difference):
todo.add(intersection)
else:
todo.add(difference)
else:
newPartition.append(component)
partition = newPartition
newStates = set()
newDelta = defaultdict(dict)
newAcceptingStates = set()
newStateForState = {}
for (index, component) in enumerate(partition):
for state in component:
newStateForState[state] = index
for (index, component) in enumerate(partition):
newStates.add(index)
if self.initialState in component:
newInitialState = index
for state in component: # get an arbitrary state
break
for symbol in self.alphabet:
newDelta[index][symbol] = newStateForState[self.delta[state][symbol]]
if state in self.acceptingStates:
newAcceptingStates.add(index)
return ExplicitDFA(self.alphabet, newStates, newAcceptingStates, newInitialState, newDelta)
# computes an explicit DFA representing a transduction of the given explicit automata;
# automataAndTransductions should be a list of pairs (automaton, transduction), where
# automaton is an ExplicitDFA and transduction is a function with args symbol, state
# where symbol is from the given alphabet and state is the current state of the
# product automaton (tuple of the individual states of the automata); the return value
# of transduction is then used as the input to automaton; if transduction == None, the
# input symbol is passed to automaton without being transformed; as in the ordinary
# product, if an automaton is passed an input symbol it has no transition for, the
# transduction will stall (i.e. have no transition for its corresponding input)
#
# a concrete example to make what this function does clearer: say you want to drive an
# automaton A with the output from a Mealy machine B with a given alphabet; let
# outB((symbol, (stateA, stateB))) compute the output of B in state stateB on input
# symbol; then transduction([(A, outB), (B, None)], alphabet) is the desired automaton
@staticmethod
def transduction(automataAndTransductions, alphabet):
# replace any blank transductions with one which just forwards the input symbol
automataAndTransductions = map(
lambda aAndT: (aAndT[0], (aAndT[1] if (aAndT[1] != None) else (lambda symbol, state: symbol))),
automataAndTransductions)
(automata, transductions) = zip(*automataAndTransductions)
stateSets = [automaton.states for automaton in automata]
initialState = tuple(automaton.initialState for automaton in automata)
newDelta = defaultdict(dict)
newStates = set()
newAcceptingStates = set()
#statesDone = 0
for state in itertools.product(*stateSets):
#statesDone += 1
#if statesDone % 100000 == 0:
# print '%d states processed' % statesDone
newStates.add(state)
if all((subState in automaton.acceptingStates) for automaton, subState in zip(automata, state)):
newAcceptingStates.add(state)
for symbol in alphabet:
convertedSymbols = [transduction(symbol, state) for transduction in transductions]
if all(automaton.hasTransition(subState, subSymbol) for automaton, subState, subSymbol in zip(automata, state, convertedSymbols)):
newDelta[state][symbol] = tuple(automaton.transition(subState, subSymbol) for automaton, subState, subSymbol in zip(automata, state, convertedSymbols))
return ExplicitDFA(set(alphabet), newStates, newAcceptingStates, initialState, newDelta)
class ExplicitIntegerDFA(ExplicitDFA):
def __init__(self, alphabet=None, states=None, acceptingStates=None, initialState=None, delta=None):
self.alphabet = alphabet if (alphabet != None) else []
self.states = states if (states != None) else []
self.acceptingStates = acceptingStates if (acceptingStates != None) else set()
self.initialState = initialState
self.delta = delta
self.counts = None
def getAlphabet(self):
return set(self.alphabet)
def hasTransition(self, state, symbol):
return (self.delta[state, symbol] >= 0)
def transition(self, state, symbol):
return self.delta[state, symbol]
def complement(self):
return ExplicitIntegerDFA(self.alphabet, self.states, set(self.states) - self.acceptingStates, self.initialState, self.delta)
class ImplicitComplement(DFA):
def __init__(self, automaton):
self.dfa = automaton
def getAlphabet(self):
return self.dfa.getAlphabet()
def isAcceptingState(self, state):
return not self.dfa.isAcceptingState(state)
def getInitialState(self):
return self.dfa.getInitialState()
def hasTransition(self, state, symbol):
return self.dfa.hasTransition(state, symbol)
def transition(self, state, symbol):
return self.dfa.transition(state, symbol)
def transitions(self, state):
return self.dfa.transitions(state)
class ImplicitProduct(DFA):
def __init__(self, automata):
self.automata = list(automata)
self.alphabet = set.intersection(*(automaton.getAlphabet() for automaton in self.automata))
self.initialState = tuple(automaton.getInitialState() for automaton in self.automata)
def getAlphabet(self):
return self.alphabet
def isAcceptingState(self, state):
for subState, automaton in zip(state, self.automata):
if not automaton.isAcceptingState(subState):
return False
return True
def getInitialState(self):
return self.initialState
def hasTransition(self, state, symbol):
for subState, automaton in zip(state, self.automata):
if not automaton.hasTransition(subState, symbol):
return False
return True
def transition(self, state, symbol):
return tuple(automaton.transition(subState, symbol) for subState, automaton in zip(state, self.automata))
class ImplicitUnion(ImplicitProduct):
def __init__(self, automata):
self.automata = list(automata)
self.alphabet = set.union(*[automaton.getAlphabet() for automaton in self.automata])
self.initialState = tuple(automaton.getInitialState() for automaton in self.automata)
def isAcceptingState(self, state):
for subState, automaton in zip(state, self.automata):
if automaton.isAcceptingState(subState):
return True
return False
# generates a DFA accepting all words with length in the given range
def minMaxLengthDFA(minLength, maxLength, alphabet):
assert minLength <= maxLength
dfa = maxLengthDFA(maxLength, alphabet)
for l in range(minLength):
dfa.acceptingStates.remove(l)
return dfa
# generates a DFA accepting all words of at most the given length
def maxLengthDFA(maxLength, alphabet):
return minLengthDFA(maxLength+1, alphabet).complement()
# generates a DFA accepting all words of at least the given length
def minLengthDFA(minLength, alphabet):
states = set(range(minLength+1))
acceptingStates = set([minLength])
delta = defaultdict(dict)
for state in range(minLength):
for symbol in alphabet:
delta[state][symbol] = state+1
for symbol in alphabet:
delta[minLength][symbol] = minLength
return ExplicitDFA(alphabet, states, acceptingStates, 0, delta)