-
Notifications
You must be signed in to change notification settings - Fork 3
/
datapreparation.py
188 lines (152 loc) · 7.35 KB
/
datapreparation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# Copyright 2015, Gil Levi and Tal Hassner
#
# The SOFTWARE provided in this page is provided "as is", without any guarantee made as to its suitability or fitness for any particular use.
# It may contain bugs, so use of this tool is at your own risk. We take no responsibility for any damage of any sort that may unintentionally
# be caused through its use.
#
# The purpose of this repository is to assist readers in reproducing our results on age and gender classification for facial images as
# described in the following work:
#
# Gil Levi and Tal Hassner, Age and Gender Classification Using Convolutional Neural Networks, IEEE Workshop on Analysis and Modeling of
# Faces and Gestures (AMFG), at the IEEE Conf. on Computer Vision and Pattern Recognition (CVPR), Boston, June 2015
#
# Project page: http://www.openu.ac.il/home/hassner/projects/cnn_agegender/
# ==============================================================================
# MIT License
#
# Modifications copyright (c) 2018 Image & Vision Computing Lab, Institute of Information Science, Academia Sinica
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ==============================================================================
import os
import random
import sys
import argparse
age_list=['(0, 2)','(4, 6)','(8, 12)','(15, 20)','(25, 32)','(38, 43)','(48, 53)','(60, 100)']
gender_list=['m','f']
def main(args):
# creat output dir
if not os.path.exists(args.outfilesdir):
os.mkdir(args.outfilesdir)
for cur_test_fold_ind in range(5):
# make output dirs
cur_fold_out_foldername='test_fold_is_{0}'.format(cur_test_fold_ind)
cur_fold_out_foldername=os.path.join(args.outfilesdir,cur_fold_out_foldername)
if not os.path.exists(cur_fold_out_foldername):
os.mkdir(cur_fold_out_foldername)
# read raw data set
cur_test_fold_filename = 'fold_{0}_data.txt'.format(cur_test_fold_ind)
cur_test_fold_filename = os.path.join(args.rawfoldsdir, cur_test_fold_filename)
with open(cur_test_fold_filename) as f:
def_lines=f.readlines()
def_lines.pop(0)
# for test files
full_test_list = []
for def_line in def_lines:
def_dic={}
subject_dir = def_line.split('\t')[0]
image_subject = def_line.split('\t')[2]
image_name='landmark_aligned_face.{0}.{1}'.format(image_subject,def_line.split('\t')[1])
image_age = def_line.split('\t')[3]
if image_age=='(25 23)':
image_age='(25 32)'
image_gender = def_line.split('\t')[4]
def_dic['subject_dir'] = subject_dir
def_dic['image_name'] = image_name
def_dic['image_subject']= image_subject
def_dic['image_age'] = image_age
def_dic['image_gender'] = image_gender
full_test_list.append(def_dic)
images_num = len(full_test_list)
indices=random.sample(set(range(0,images_num)), images_num)
agegender_test_txt_filename=os.path.join(cur_fold_out_foldername, 'agegender_test.txt')
if os.path.exists(agegender_test_txt_filename):
os.remove(agegender_test_txt_filename)
agegender_test_txt_file = open(agegender_test_txt_filename,'w+')
for ind in indices:
subject_dir = full_test_list[ind]['subject_dir']
image_name = full_test_list[ind]['image_name']
image_age = full_test_list[ind]['image_age']
image_gender = full_test_list[ind]['image_gender']
image_subject= full_test_list[ind]['image_subject']
if image_age in age_list and image_gender in gender_list:
image_age_index=age_list.index(image_age)
image_gender_index=gender_list.index(image_gender)
s='{0}/{1} {2} {3}\n'.format(subject_dir,image_name,image_age_index,image_gender_index)
agegender_test_txt_file.write(s)
agegender_test_txt_file.close()
# for train, val files
full_train_list = []
train_folds_indices=list(set(range(5)) - set([cur_test_fold_ind]))
for train_fold_ind in train_folds_indices:
# read raw data
cur_train_fold_filename='fold_{0}_data.txt'.format(train_fold_ind)
cur_train_fold_filename=os.path.join(args.rawfoldsdir,cur_train_fold_filename)
with open(cur_train_fold_filename) as f:
def_lines = f.readlines()
def_lines.pop(0)
for def_line in def_lines:
def_dic={}
subject_dir =def_line.split('\t')[0]
image_subject=def_line.split('\t')[2]
image_name='landmark_aligned_face.{0}.{1}'.format(image_subject,def_line.split('\t')[1])
image_age=def_line.split('\t')[3]
if image_age == '(25 23)':
image_age='(25 32)'
image_gender=def_line.split('\t')[4]
def_dic['subject_dir'] =subject_dir
def_dic['image_name'] =image_name
def_dic['image_subject']=image_subject
def_dic['image_age'] =image_age
def_dic['image_gender'] =image_gender
full_train_list.append(def_dic)
images_num=len(full_train_list)
indices=random.sample(set(range(0,images_num)), images_num)
val_indices=indices[:images_num/10]
train_indices=indices[(images_num/10) + 1:]
train_subset_indices=indices[(images_num/10) + 1: 2* (images_num/10)]
cases=['val','train','train_subset']
for case,indices in zip(cases,[val_indices,train_indices,train_subset_indices]):
agegender_txt_filename=os.path.join(cur_fold_out_foldername,'agegender_{0}.txt'.format(case))
if os.path.exists(agegender_txt_filename):
os.remove(agegender_txt_filename)
agegender_txt_file=open(agegender_txt_filename, 'w+')
for ind in indices:
subject_dir=full_train_list[ind]['subject_dir']
image_name=full_train_list[ind]['image_name']
image_age=full_train_list[ind]['image_age']
image_gender=full_train_list[ind]['image_gender']
image_subject=full_train_list[ind]['image_subject']
if image_age in age_list and image_gender in gender_list:
image_age_index=age_list.index(image_age)
image_gender_index=gender_list.index(image_gender)
s='{0}/{1} {2} {3}\n'.format(subject_dir,image_name,image_age_index,image_gender_index)
agegender_txt_file.write(s)
agegender_txt_file.close()
def parse_arguments(argv):
parser = argparse.ArgumentParser()
parser.add_argument('--inputdir', type=str, default='./adiencedb/aligned',
help='directory of adience dataset')
parser.add_argument('--rawfoldsdir', type=str, default='./DataPreparation/FiveFolds/original_txt_files',
help='directory of raw folds')
parser.add_argument('--outfilesdir', type=str, default='./DataPreparation/FiveFolds/train_val_test_per_fold_agegender',
help='directory stored the output files separate from raw data')
return parser.parse_args(argv)
if __name__ == '__main__':
main(parse_arguments(sys.argv[1:]))