-
Notifications
You must be signed in to change notification settings - Fork 0
/
exam6.py
163 lines (131 loc) · 6.84 KB
/
exam6.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# Unit 6: Fun with Words
"""
A portmanteau word is a blend of two or more words, like 'mathelete',
which comes from 'math' and 'athelete'. You will write a function to
find the 'best' portmanteau word from a list of dictionary words.
Because 'portmanteau' is so easy to misspell, we will call our
function 'natalie' instead:
natalie(['word', ...]) == 'portmanteauword'
In this exercise the rules are: a portmanteau must be composed of
three non-empty pieces, start+mid+end, where both start+mid and
mid+end are among the list of words passed in. For example,
'adolescented' comes from 'adolescent' and 'scented', with
start+mid+end='adole'+'scent'+'ed'. A portmanteau must be composed
of two different words (not the same word twice).
That defines an allowable combination, but which is best? Intuitively,
a longer word is better, and a word is well-balanced if the mid is
about half the total length while start and end are about 1/4 each.
To make that specific, the score for a word w is the number of letters
in w minus the difference between the actual and ideal lengths of
start, mid, and end. (For the example word w='adole'+'scent'+'ed', the
start,mid,end lengths are 5,5,2 and the total length is 12. The ideal
start,mid,end lengths are 12/4,12/2,12/4 = 3,6,3. So the final score
is
12 - abs(5-3) - abs(5-6) - abs(2-3) = 8.
yielding a score of 12 - abs(5-(12/4)) - abs(5-(12/2)) -
abs(2-(12/4)) = 8.
The output of natalie(words) should be the best portmanteau, or None
if there is none.
Note (1): I got the idea for this question from
Darius Bacon. Note (2): In real life, many portmanteaux omit letters,
for example 'smoke' + 'fog' = 'smog'; we aren't considering those.
Note (3): The word 'portmanteau' is itself a portmanteau; it comes
from the French "porter" (to carry) + "manteau" (cloak), and in
English meant "suitcase" in 1871 when Lewis Carroll used it in
'Through the Looking Glass' to mean two words packed into one. Note
(4): the rules for 'best' are certainly subjective, and certainly
should depend on more things than just letter length. In addition to
programming the solution described here, you are welcome to explore
your own definition of best, and use your own word lists to come up
with interesting new results. Post your best ones in the discussion
forum. Note (5) The test examples will involve no more than a dozen or so
input words. But you could implement a method that is efficient with a
larger list of words.
"""
"""itertools.combination
find that have common parts
countteir score
"""
import itertools
def common_part(word1, word2):
def get_common_part(start_pos1, start_pos2, word1, word2):
debug = False
if word1 == "morass" and word2== "assassination":
debug = True
word1_remain_length = len(word1) - start_pos1
word2_remain_length = len(word2) - start_pos2
#end_same_pos1 = 0
end_same_pos2 = word1_remain_length
same_word_break = False
for add_pos in xrange(min([word1_remain_length, word2_remain_length])):
if word1[start_pos1+add_pos] == word2[start_pos2+add_pos]:
continue
else:
#end_same_pos1 = start_pos1+add_pos
end_same_pos2 = start_pos2+add_pos
break
seems_common_part = word2[:end_same_pos2] if end_same_pos2 != 0 else word2[:1]
res = seems_common_part if seems_common_part and seems_common_part in word1 and (seems_common_part[-1] == word1[-1] if len(seems_common_part) != 1 else seems_common_part == word1[-1]) and seems_common_part == word1[0-len(seems_common_part):] else ""
if debug:
print seems_common_part, word1, word2, start_pos1, start_pos2, res
return res
result = [get_common_part(word1.index(w, i1), word2.index(w2, i2), word1, word2) for i1, w in enumerate(word1) for i2, w2 in enumerate(word2) if w==w2]
result = list(set([r for r in result if r]))
print word1, word2, result
return result
def word_score(word1, word2):
start = len(word1)
#print word1, word2
mid_strs = common_part(word1, word2)
#print mid_strs
if not mid_strs: return 0
result = []
for mid_str in mid_strs:
try:
end_str = word2[len(mid_str):]
except IndexError:
return 0
start_str = word1[:len(word1)-len(mid_str)]
if not start_str:
return 0
print "-", start_str, mid_str, end_str
the_word = start_str + mid_str + end_str
ideal_start_pos = len(the_word) / 4
ideal_mid_pos = len(the_word) / 2
word_score = len(the_word) - abs(len(start_str) - ideal_start_pos) - abs(len(mid_str) - ideal_mid_pos) - abs(len(end_str) - ideal_start_pos)
result.append((word_score, the_word))
result.sort(key=lambda x:x)
print "result", result
return result
def natalie(words):
"Find the best Portmanteau word formed from any two of the list of words."
word_pairs = itertools.permutations(words, 2)
word_list = itertools.chain.from_iterable([word_score(word1, word2) for (word1, word2) in word_pairs if word_score(word1, word2)])
result = list(word_list)
highest_score = max(result)[0] if result else 0
print highest_score
r = [word for score, word in result if score == highest_score]
print "r",r
return r[0] if r else None
def test_natalie():
"Some test cases for natalie"
assert natalie(['adolescent', 'scented', 'centennial', 'always', 'ado']) in ('adolescented','adolescentennial')
assert natalie(['eskimo', 'escort', 'kimchee', 'kimono', 'cheese']) == 'eskimono'
assert natalie(['kimono', 'kimchee', 'cheese', 'serious', 'us', 'usage']) == 'kimcheese'
assert natalie(['circus', 'elephant', 'lion', 'opera', 'phantom']) == 'elephantom'
assert natalie(['programmer', 'coder', 'partying', 'merrymaking']) == 'programmerrymaking'
assert natalie(['int', 'intimate', 'hinter', 'hint', 'winter']) == 'hintimate'
assert natalie(['morass', 'moral', 'assassination']) == 'morassassination'
assert natalie(['entrepreneur', 'academic', 'doctor', 'neuropsychologist', 'neurotoxin', 'scientist', 'gist']) in ('entrepreneuropsychologist', 'entrepreneurotoxin')
assert natalie(['perspicacity', 'cityslicker', 'capability', 'capable']) == 'perspicacityslicker'
assert natalie(['backfire', 'fireproof', 'backflow', 'flowchart', 'background', 'groundhog']) == 'backgroundhog'
assert natalie(['streaker', 'nudist', 'hippie', 'protestor', 'disturbance', 'cops']) == 'nudisturbance'
assert natalie(['night', 'day']) == None
assert natalie(['dog', 'dogs']) == None
assert natalie(['test']) == None
assert natalie(['']) == None
assert natalie(['ABC', '123']) == None
assert natalie([]) == None
assert natalie(['freud', 'mormons', 'dianetics', 'shinrikyo', 'jonestown', 'moonies']) != None
return 'tests pass'
print test_natalie()