-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlfw.py
104 lines (87 loc) · 2.59 KB
/
lfw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import glob
import shutil
import cv2
import dlib
import numpy as np
from tqdm import tqdm
import requests
import tarfile
# dlib
detector = dlib.get_frontal_face_detector()
def download():
print('... downloading')
url = 'http://vis-www.cs.umass.edu/lfw/lfw.tgz'
path = 'data/lfw.tgz'
file_size = int(requests.head(url).headers['content-length'])
r = requests.get(url, stream=True)
pbar = tqdm(total=file_size, unit='b', unit_scale=True)
with open(path, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
f.write(chunk)
pbar.update(len(chunk))
pbar.close()
tar = tarfile.open(path, 'r')
for item in tar:
tar.extract(item, 'data')
def detect_face(img):
h, w = img.shape[:2]
dets = detector(img, 1)
if dets is None or len(dets) != 1:
return None
d = dets[0]
if d.left() < 0 or d.top() < 0 or d.right() > w or d.bottom() > h:
return None
face = img[d.top():d.bottom(), d.left():d.right()]
face = cv2.resize(face, (96, 96))
return face
def preprocess():
print('... loading data')
os.mkdir('data/raw')
os.mkdir('data/raw/train')
os.mkdir('data/raw/test')
os.mkdir('data/npy')
persons = glob.glob('data/lfw/*')
paths = np.array(
[e for x in [glob.glob(os.path.join(person, '*'))
for person in persons] for e in x])
np.random.shuffle(paths)
r = int(len(paths) * 0.95)
train_paths = paths[:r]
test_paths = paths[r:]
x_train = []
pbar = tqdm(total=(len(train_paths)))
for i, d in enumerate(train_paths):
pbar.update(1)
img = cv2.imread(d)
face = detect_face(img)
if face is None:
continue
x_train.append(face)
name = "{}.jpg".format("{0:05d}".format(i))
imgpath = os.path.join('data/raw/train', name)
cv2.imwrite(imgpath, face)
pbar.close()
x_test = []
pbar = tqdm(total=(len(test_paths)))
for i, d in enumerate(test_paths):
pbar.update(1)
img = cv2.imread(d)
face = detect_face(img)
if face is None:
continue
x_test.append(face)
name = "{}.jpg".format("{0:05d}".format(i))
imgpath = os.path.join('data/raw/test', name)
cv2.imwrite(imgpath, face)
pbar.close()
x_train = np.array(x_train, dtype=np.uint8)
x_test = np.array(x_test, dtype=np.uint8)
np.save('data/npy/x_train.npy', x_train)
np.save('data/npy/x_test.npy', x_test)
def main():
os.mkdir('data')
download()
preprocess()
if __name__ == '__main__':
main()