-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhaiku.py
285 lines (225 loc) · 9.38 KB
/
haiku.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
import torchvision.models as models
import torchvision.transforms as transforms
import json
from PIL import Image
import torch
import transformers
from transformers import GPT2Tokenizer, GPT2Model, GPT2LMHeadModel
#libraries for generating poems
from transformers import pipeline, set_seed
import nltk
nltk.download("stopwords")
set_seed(42)
import random
import re
import syllapy
from nltk.corpus import stopwords
first_words = set(stopwords.words('english'))
first_words = list(first_words - set(("ain","am","an","and","aren","aren't","at","be","been","being","between","both","by","couldn","couldn't","d","doesn","doesn't","doing","don","don't","down","during","further","hadn","hadn't","hasn","hasn't","haven","haven't",\
"he","her","here","hers","herself","him","himself","i","isn","isn't","it","it's","its","itself","ll","m","ma","me","mightn","mightn't","mustn","mustn't","myself","needn","needn't","not","not","o","of","off","on","once",\
"or","other","ours","ourselves","out","over","own","re","s","same","shan","shan't","she","she's","shuld've","shouldn","shouldn't","t","than","that'll","theirs","them","themselves","there","these","they","those","through",\
"too","up","ve","very","was","wasn","wasn't","we","were","weren","weren't","who","whom","won","won't","wouldn","wouldn't","y","you","you'd","you'll","you're","you've","yours","yourself","yourselves","didn","didn't","did","should've")))
from random_word import RandomWords
r = RandomWords()
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet
#libraries for using clip
import pandas as pd
import numpy as np
import torch
import clip
from PIL import Image
from IPython.display import Image as im
import os
from torchvision.datasets import CIFAR100
from sklearn.linear_model import LogisticRegression
from torch.utils.data import DataLoader
from tqdm import tqdm
def gen_story(beginning_line,sub_first=False):
''' given the first few words, will generate a dramatic sentence. If sub_first = True, will return the sentence not containing the begininning_lines fed it'''
story_gen = pipeline('text-generation', model="pranavpsv/gpt2-genre-story-generator")
beginning_line = "<BOS> <drama>" + ' ' + beginning_line
results = story_gen(beginning_line)
for text in results:
output = str(text.values())[14:-3].lower()
if sub_first == True:
output = re.sub(beginning_line.lower(),'',output)
output = re.sub( "<bos> <drama>",'',output)
output = re.sub('[\n]','',output) #removing \n
output = re.sub(r"[^\w\s']",' ',output) #removing other punctuation
output = re.sub(r'[\d]','', output) #removing numbers
output = re.sub(' +',' ',output) #removing double spaces
return output
def syllable_counter(sentence,max,last_line = False):
'''function takes original sentence and returns sentence with max specified number of syllables'''
count = 0
line= ''
#counts syllables in sentence and adds word to line if syllable is less than max, if syllable = max, then breaks
for word in sentence.split():
new_count = syllapy.count(word)
if count + new_count <= max:
count += new_count
line = line + word + ' '
else:
break
nouns = {x.name().split('.', 1)[0] for x in wordnet.all_synsets('n')}
#adding in "stop word" to increment syllable count by one if next word in actual sentence is more than one syllable
if last_line == False:
while count < max:
additional_words = ['life','need','heart']
new_word = random.choice(additional_words)
count += 1
line += new_word
else: #if last line = true, indicates we may need to end sentence with a noun if syllable count is short
if count == (max - 2):
#If we need 2 more syllabes, use the wordnet package to check for synonyms and antonyms for the last word that was over count.
synonyms = []
antonyms = []
confirmed = 0
#This nested for loop creates lists of both synonyms and antonyms for the last word
for syn in wordnet.synsets(word):
for l in syn.lemmas():
synonyms.append(l.name())
if l.antonyms():
antonyms.append(l.antonyms()[0].name())
#While loop first checks if there are any synonyms or antonyms for the final word that are under two syllables. If so, it ends the loop.
while confirmed == 0:
for i in synonyms:
if syllapy.count(i)==2 and i in nouns:
additional_noun = i
line += additional_noun
confirmed = confirmed+1
break
for i in antonyms:
if syllapy.count(i)==2 and i in nouns:
additional_noun = i
line += additional_noun
confirmed = confirmed+1
break
#After going through the synonyms and antonyms, it uses the random word package. It keeps generating nouns until there is one that meets the syllable requirement.
r = RandomWords()
b = r.get_random_word(hasDictionaryDef="true")
if syllapy.count(b)==2 and b in nouns:
additional_noun = b
line += additional_noun
confirmed = confirmed + 1
elif count == (max - 3):
synonyms = []
antonyms = []
confirmed = 0
for syn in wordnet.synsets(word):
for l in syn.lemmas():
synonyms.append(l.name())
if l.antonyms():
antonyms.append(l.antonyms()[0].name())
while confirmed == 0:
for i in synonyms:
if syllapy.count(i)==3 and i in nouns:
additional_noun = i
line += additional_noun
confirmed = confirmed+1
break
for i in antonyms:
if syllapy.count(i)==3 and i in nouns:
additional_noun = i
line += additional_noun
confirmed = confirmed+1
break
r = RandomWords()
b = r.get_random_word(hasDictionaryDef="true", includePartOfSpeech="noun")
if syllapy.count(b)==3 and b in nouns:
additional_noun = b
line += additional_noun
confirmed = confirmed + 1
elif count == (max - 1):
synonyms = []
antonyms = []
confirmed = 0
for syn in wordnet.synsets(word):
for l in syn.lemmas():
synonyms.append(l.name())
if l.antonyms():
antonyms.append(l.antonyms()[0].name())
#additional_noun = 'problem'
while confirmed == 0:
for i in synonyms:
if syllapy.count(i)==1 and i in nouns:
additional_noun = i
line += additional_noun
confirmed = confirmed+1
break
for i in antonyms:
if syllapy.count(i)==1 and i in nouns:
additional_noun = i
line += additional_noun
confirmed = confirmed+1
break
r = RandomWords()
b = r.get_random_word(hasDictionaryDef="true", includePartOfSpeech="noun")
if syllapy.count(b)==1 and b in nouns:
additional_noun = b
line += additional_noun
confirmed = confirmed + 1
return line
def gen_poem2(label):
poem = []
label = re.sub('_',' ',label)
first_word = random.choice(first_words) + ' ' + label
sentence = gen_story(first_word, False)
first_line = syllable_counter(sentence,5)
poem.append(first_line)
poem.append('\n')
second_line = gen_story(first_line, True)
second_line = syllable_counter(second_line,7)
poem.append(second_line)
total = first_line + ' ' + second_line
third_line = gen_story(total, True)
third_line = syllable_counter(third_line,5,last_line = True)
poem.append(third_line)
haiku = ''
for i in poem:
haiku += i
haiku = haiku[:-1]
return haiku
img = Image.open('imgs/cat.jpg').convert('RGB') #input image
vgg16 = models.vgg16(pretrained = True)
transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()])
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
img_2 = normalize(transform(img))
img_2 = img_2.unsqueeze(0)
prediction = vgg16(img_2)
prediction.data.numpy().argmax()
#download laebls:
labels = json.load(open('imagenet_class_index.json'))
input = labels[str(prediction.data.numpy().argmax())][1]
input = re.sub('_',' ',input)
list_haikus = [gen_poem2(input)]
#for i in range(10):
# haiku = gen_poem2(input)
# list_haikus.append(haiku)
label = []
percent = []
# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('ViT-B/32', device)
# Prepare the inputs
image = img
image_input = preprocess(image).unsqueeze(0).to(device)
text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in list_haikus]).to(device)
# Calculate features
with torch.no_grad():
image_features = model.encode_image(image_input)
text_features = model.encode_text(text_inputs)
# Pick the top 5 most similar labels for the image
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (100.0 * image_features @ text_features.T).softmax(dim=-1)
values, indices = similarity[0].topk(1)
# Print the result
print("\nTop prediction:\n")
for value, index in zip(values, indices):
print(f"{list_haikus[index]}: {100 * value.item():.2f}%")
label.append(list_haikus[index])
percent.append(value.item())