-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcauses_challenge.py
117 lines (87 loc) · 3.29 KB
/
causes_challenge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/python
#By Steve Hanov, 2011. Released to the public domain
import time
import sys
DICTIONARY = "challenge_word_list.tmp.txt";
TARGET = "causes"
MAX_COST = 1
# Keep some interesting statistics
NodeCount = 0
WordCount = 0
# The Trie data structure keeps a set of words, organized with one node for
# each letter. Each node has a branch for each letter that may follow it in the
# set of words.
class TrieNode:
def __init__(self):
self.word = None
self.children = {}
global NodeCount
NodeCount += 1
def insert( self, word ):
node = self
for letter in word:
if letter not in node.children:
node.children[letter] = TrieNode()
node = node.children[letter]
node.word = word
# read dictionary file into a trie
trie = TrieNode()
for word in open(DICTIONARY, "rt").read().split():
WordCount += 1
trie.insert( word )
print "Read %d words into %d nodes" % (WordCount, NodeCount)
# The search function returns a list of all words that are less than the given
# maximum distance from the target word
def search( word, maxCost ):
# build first row
currentRow = range( len(word) + 1 )
results = []
# recursively search each branch of the trie
for letter in trie.children:
searchRecursive( trie.children[letter], letter, word, currentRow,
results, maxCost )
return results
# This recursive helper is used by the search function above. It assumes that
# the previousRow has been filled in already.
def searchRecursive( node, letter, word, previousRow, results, maxCost ):
columns = len( word ) + 1
currentRow = [ previousRow[0] + 1 ]
# Build one row for the letter, with a column for each letter in the target
# word, plus one for the empty string at column 0
for column in xrange( 1, columns ):
insertCost = currentRow[column - 1] + 1
deleteCost = previousRow[column] + 1
if word[column - 1] != letter:
replaceCost = previousRow[ column - 1 ] + 1
else:
replaceCost = previousRow[ column - 1 ]
currentRow.append( min( insertCost, deleteCost, replaceCost ) )
# if the last entry in the row indicates the optimal cost is less than the
# maximum cost, and there is a word in this trie node, then add it.
if currentRow[-1] <= maxCost and node.word != None:
results.append( (node.word, currentRow[-1] ) )
print node.word, letter, str(currentRow[-1])
# if any entries in the row are less than the maximum cost, then
# recursively search each branch of the trie
if min( currentRow ) <= maxCost:
for letter in node.children:
searchRecursive( node.children[letter], letter, word, currentRow,
results, maxCost )
causesNetwork = []
def network_of(word):
buff = []
buff.append(word)
while len(buff) > 0:
current = buff.pop()
for friend, score in search(current, 1):
if friend in causesNetwork:
continue
causesNetwork.append(friend)
buff.append(friend)
print "looking up network of %g" % len(causesNetwork)
start = time.time()
# network = search('causes',1)
network_of('causes')
end = time.time()
print "Network is %g" % len(network)
print "Search took %g s" % (end - start)