-
Notifications
You must be signed in to change notification settings - Fork 0
/
shakespeare.py
378 lines (323 loc) Β· 9.76 KB
/
shakespeare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
verbose = False
if verbose: print("Initializing...")
import curses
from curses.ascii import isdigit
import json
import nltk
from nltk.corpus import cmudict
from nltk.tokenize import word_tokenize
import random
from random import randint
import math
import approxSyls
import pronouncing
import multiprocessing
import time
import cPickle
d = cmudict.dict()
PUNCTS = [',','.','?',u"'",':',';','--','!',"''"]
bannedList = ["a",
"'t",
"t",
"au",
"an",
"niggard",
u'ai',
u'ais',
"ais",
u"ais",
u'[',
u']',
u'c',
u"c",
"c",
u'"',
u'(paren',
u'th',
u"'",
u'car',
u'"quote',
u"'i",
"'i",
u'videotex',
u"'",
u'.dot'
]
tagDict = {}
sonnets = "rekt"
"""
Load and parse sonnets from a file and return the structure
@param {string} file_name The file location (complete path)
@return {dictionary[]}
"""
def load_sonnets(file_name):
sonnets = None
with open(file_name, "r") as sonnetFile:
sonnets = json.load(sonnetFile)
def get_tags_from_sonnet(sonnet):
return map(lambda line: nltk.pos_tag(word_tokenize(line)), sonnet)
def add_tags_to_sonnet(sonnet_info):
sonnet_info["tags"] = get_tags_from_sonnet(sonnet_info["sonnet"])
return sonnet_info
return map(add_tags_to_sonnet, sonnets)
if verbose: print("Analyzing Shakespeare's works...")
sonnets = load_sonnets("./sonnets.json")
"""
Builds the tag dictionary from multiple tag lists/lines given by load_sonnets
"""
def buildTagDict(sonnets):
#gets a sonnet line
for sonnet in sonnets:
for taglist in sonnet["tags"]:
addTagsToDict(taglist)
"""
Iterate through a single-line list of word/tag pairs and add them
to the tag dictionary
"""
def addTagsToDict(tagList):
global tagDict
for word,tag in tagList:
word = word.lower()
if word not in bannedList:
if tagDict == {} or tagDict is None:
tagDict[tag] = [word]
elif tag in tagDict:
if tag not in tagDict:
tagDict[tag] = [word]
elif word not in tagDict[tag]:
tagDict[tag].append(word)
else:
tagDict[tag] = [word]
return tagDict
if verbose: print "building tagdict"
#buildTagDict(sonnets)
if verbose: print "finished building"
if verbose: print tagDict
"""
Return the rank of a CMUdict word part.
Returns -1 if the word part does not have a rank
"""
def toRank(part):
if part[-1].isdigit():
return int(part[-1])
return -1
"""
Convert a word into a list of syllable stress ranks
"""
def wordToSylRanks(word):
try:
return [toRank(part) for part in (d[word.lower()][0]) if toRank(part) != -1]
except:
return [1] * approxSyls.apSyls(word)
"""
Convert a list of words into a monolithic list of syllable stress ranks
"""
def stanzaToSylRanks(stanza):
ranks = []
for word in stanza:
ranks = ranks + wordToSylRanks(word)
return ranks
"""
Check to see if a list of words follows iambic pentameter and has a rhymable last word
"""
def isIP(stanza):
ranks = stanzaToSylRanks(stanza)
if len(ranks) != 10:
return False
for r in [ranks[i] for i in [1,3,5,7,9]]:
if r <= 0:
return False
last = getLast(stanza)
rhymes = [x for x in pronouncing.rhymes(last) if wordToSylRanks(x) == wordToSylRanks(last)]
if len([rhyme for rhyme in rhymes if rhyme not in bannedList]) == 0:
return False
if stanza[0][0] == u"'":
return False
return True
"""
Takes a list of tags and searches the tag dictionary for appropriate replacements. Returns a new array of the same length containing the replaced sentence
"""
def replaceWordTags(tags):
#Assuming each word in words is an nltk tag as a string. ie. ['UU','WC','CC']
global tagDict
newLine = []
for tag in tags:
if tag in tagDict:
replacement = random.choice(tagDict[tag])
newLine.append(replacement)
else:
newLine.append("NOTAG")
return newLine
def chooseRhyme(word, rhyme):
rhymes = []
#Hacky fix for library bug. Wait, what am I saying? This whole project is hacky.
while rhymes == []:
rhymes = [r for r in pronouncing.rhymes(rhyme) if r not in bannedList]
syls = wordToSylRanks(word)
for r in rhymes:
if wordToSylRanks(r) == syls:
return r
return rhymes[randint(0,len(rhymes) - 1)]
def makeRhyme(line, rhyme):
if rhyme == "":
return line
else:
chosenRhyme = chooseRhyme(getLast(line),rhyme)
if line[-1] in PUNCTS:
line[-2] = chosenRhyme
else:
line[-1] = chosenRhyme
return line
"""
Generate a line in the meter
"""
def getIPLine(tags, rhyme):
newLine = replaceWordTags(tags)
noPunc = [x for x in newLine if x not in PUNCTS]
while not isIP(noPunc):
newLine = replaceWordTags(tags)
noPunc = [x for x in newLine if x not in PUNCTS]
newLine = makeRhyme(newLine, rhyme)
return newLine
"""
Convert a list of words to a list of tags
"""
def toTags(line):
return [x[1] for x in nltk.pos_tag(line)]
"""
Grab a random sonnet's tag set from the .json
"""
def getRandSonnetTags():
return sonnets[randint(0, len(sonnets) - 1)]["tags"]
"""
Grab a random line from a sonnet
"""
def getRandSonnetLine(sonnet):
return sonnet[randint(0, len(sonnet) - 1)]
"""
Generate a random sonnet structure of word tags
"""
def makeRandomSonnetStructure():
sonnetStruct = []
# For the first 13 lines, pick a correspondingly-indexed line from a random sonnet
for i in range(0,14):
randSonnet = getRandSonnetTags()
line = []
try:
line = randSonnet[i]
except:
pass
sonnetStruct.append([x[1] for x in line])
return sonnetStruct
def getLast(line):
last = line[-1]
if last in PUNCTS:
last = line[-2]
return last
def createProtoSonnet():
global tagDict
if tagDict == {} or tagDict is None:
buildTagDict(sonnets)
lines = makeRandomSonnetStructure()
protoSonnet = []
line0 = getIPLine(lines[0], "") #a
line1 = getIPLine(lines[1], "") #b
line2 = getIPLine(lines[2], getLast(line0)) #a
line3 = getIPLine(lines[3], getLast(line1)) #b
line4 = getIPLine(lines[4], "") #c
line5 = getIPLine(lines[5], "") #d
line6 = getIPLine(lines[6], getLast(line4)) #c
line7 = getIPLine(lines[7], getLast(line5)) #d
line8 = getIPLine(lines[8], "") #e
line9 = getIPLine(lines[9], "") #f
line10 = getIPLine(lines[10], getLast(line8)) #e
line11 = getIPLine(lines[11], getLast(line9)) #f
line12 = getIPLine(lines[12], "") #g
line13 = getIPLine(lines[13], getLast(line12))#g
protoSonnet.append(line0)
protoSonnet.append(line1)
protoSonnet.append(line2)
protoSonnet.append(line3)
protoSonnet.append(line4)
protoSonnet.append(line5)
protoSonnet.append(line6)
protoSonnet.append(line7)
protoSonnet.append(line8)
protoSonnet.append(line9)
protoSonnet.append(line10)
protoSonnet.append(line11)
protoSonnet.append(line12)
protoSonnet.append(line13)
return protoSonnet
"""
Takes a list of words and punctuation and returns a nicely formatted English sentence
"""
def wordListToSentence(wordList):
sentence = ""
for i in range(0,len(wordList) - 1):
sentence = sentence + wordList[i]
if wordList[i+1] not in PUNCTS:
sentence = sentence + " "
sentence = sentence + wordList[-1]
return sentence
def protoSonnetToSonnet(protoSonnet):
for outer in protoSonnet:
outer[0] = outer[0].capitalize()
for i in range(len(outer)):
if outer[i] == u"i":
outer[i] = u"I"
for line in protoSonnet:
for i in range(0, len(line)):
if line[i] == u"''tis":
line[i] = u"'tis"
if line[i] == u"'i":
line[i] = u'in'
if line[i-1] in [u'.', u'!', u'?']:
line[i] = line[i].title()
sonneto = []
for line in protoSonnet:
sonneto.append(wordListToSentence(line))
pretty = ""
for line in sonneto:
pretty += line + '\n'
c = 0
while c < len(pretty):
if pretty[c-1:c+1] == u" '":
try:
test = pretty[c:c+4]
if u" " in test:
pretty = pretty[:c-1]+pretty[c:]
except:
pass
c += 1
return pretty
def generateSonnet():
print protoSonnetToSonnet(createProtoSonnet())
def createPickleTagDict():
global tagDict, sonnets
if verbose: print("Analyzing Shakespeare's works...")
sonnets = load_sonnets("./sonnets.json")
buildTagDict(sonnets)
with open('pickleTagDict.pck','wb') as handle:
cPickle.dump(tagDict,handle, protocol=cPickle.HIGHEST_PROTOCOL)
def readPickleTagDict():
global tagDict
with open('pickleTagDict.pck','rb') as handle:
tagDict = cPickle.load(handle)
def runGenerator():
p = multiprocessing.Process(target=generateSonnet)
p.start()
p.join(5)
if p.is_alive():
if verbose: print("Trying a different sonnet structure...\n")
p.terminate()
p.join()
runGenerator()
return
if verbose: print("reading pickle dict")
readPickleTagDict()
#print tagDict
#print sonnets
if verbose: print("Generating sonnet...\n")
#while True:
runGenerator()