-
Notifications
You must be signed in to change notification settings - Fork 0
/
image_processing.py
68 lines (51 loc) · 1.64 KB
/
image_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File written by adc2181
import requests
import numpy as np
import cv2
from os import path
import os
from progressbar import ProgressBar
from multiprocessing import Process
images_dir = './data/imagenet/images/'
def get_word(wnid):
url = 'http://www.image-net.org/api/text/wordnet.synset.getwords?wnid={}'
result = requests.get(url.format(wnid))
# Only take the last word
word = result.text.strip().split('\n')[-1]
return word
def get_image(url, word):
base = images_dir + '{}{}.jpg'
filename = base.format(word, '')
i = 1
while filename in os.listdir(images_dir):
filename = base.format(word, '_' + str(i))
i += 1
try:
image = requests.get(url).content
except Exception:
return
with open(filename, 'wb') as file:
file.write(image)
def downscale_image(filename):
new_filename = path.join('./data/imagenet/scaled/', path.basename(filename))
old = cv2.imread(filename)
res = cv2.resize(old, dsize=(28, 28), interpolation=cv2.INTER_CUBIC)
cv2.imwrite(new_filename, res)
def download_imagenet(num=10000):
map_file = './data/imagenet/fall11_urls.txt'
urls = open(map_file, 'rb').readlines()
np.random.shuffle(urls)
bar = ProgressBar(term_width=50)
running = []
for line in bar(urls[:num]):
line = line.decode('UTF-8').split('\t')
url = line[1].strip()
im_id = line[0]
wnid = im_id.split('_')[0]
word = get_word(wnid)
get_image(url, word)
p = Process(target=get_image, args=(url, word))
p.start()
running.append(p)
if __name__ == '__main__':
download_imagenet()