forked from WojciechMula/pyahocorasick
-
Notifications
You must be signed in to change notification settings - Fork 1
/
dump2dot.py
87 lines (61 loc) · 1.68 KB
/
dump2dot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""
Aho-Corasick string search algorithm.
Author : Wojciech Muła, [email protected]
WWW : http://0x80.pl
License : public domain
"""
import ahocorasick
import os
from ahocorasick import EMPTY, TRIE, AHOCORASICK;
def dump2dot(automaton, file):
def writeln(text=""):
file.write(text + "\n")
def nodename(nodeid):
return 'node%x' % (nodeid & 0xffffffff)
if automaton.kind == EMPTY:
writeln("digraph empty {}")
return
if automaton.kind == TRIE:
name = "trie"
else:
name = "ahocorasick"
writeln("digraph %s {" % name)
nodes, edges, fail = automaton.dump()
# nodes
for nodeid, end in nodes:
if end:
attr = '[shape=doublecircle, label=""]'
else:
attr = '[shape=circle, label=""]'
writeln("\t%s %s" % (nodename(nodeid), attr))
def format_label(label):
label = str(label, 'ascii')
label = label.replace('"', r'\"')
return '"%s"' % label
# trie edges
for nodeid, label, destid in edges:
writeln("\t%s -> %s [label=%s]" % (nodename(nodeid), nodename(destid), format_label(label)))
# fail links
for nodeid, failid in fail:
writeln("\t%s -> %s [color=blue]" % (nodename(nodeid), nodename(failid)))
writeln("}")
def show(automaton):
path = '/dev/shm/%s.dot' % os.getpid()
with open(path, 'wt') as f:
dump2dot(automaton, f)
os.system("xdot %s" % path)
#os.system("dotty %s" % path)
os.unlink(path)
if __name__ == '__main__':
A = ahocorasick.Automaton(ahocorasick.STORE_LENGTH)
A.add_word("he")
A.add_word("her")
A.add_word("hers")
A.add_word("she")
A.add_word("cat")
A.add_word("shield")
with open('trie.dot', 'wt') as f:
dump2dot(A, f)
A.make_automaton()
with open('ahocorasick.dot', 'wt') as f:
dump2dot(A, f)