This repository has been archived by the owner on Oct 19, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 69
/
utils.py
64 lines (57 loc) · 1.99 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""Train List 训练列表
格式:
ImagePath Label
示例:
/data/WebFace/0124920/003.jpg 10572
/data/WebFace/0124920/012.jpg 10572
/data/WebFace/0124920/020.jpg 10572
"""
import os
import os.path as osp
from imutils import paths
def generate_list(images_directory, saved_name=None):
"""生成数据列表
Args:
images_directory: 人脸数据目录,通常包含多个子文件夹。如
WebFace和LFW的格式
Returns:
data_list: [<路径> <标签>]
"""
subdirs = os.listdir(images_directory)
num_ids = len(subdirs)
data_list = []
for i in range(num_ids):
subdir = osp.join(images_directory, subdirs[i])
files = os.listdir(subdir)
paths = [osp.join(subdir, file) for file in files]
# 添加ID作为其人脸标签
paths_with_Id = [f"{p} {i}\n" for p in paths]
data_list.extend(paths_with_Id)
if saved_name:
with open(saved_name, 'w', encoding='utf-8') as f:
f.writelines(data_list)
return data_list
def transform_clean_list(webface_directory, cleaned_list_path):
"""转换webface的干净列表格式
Args:
webface_directory: WebFace数据目录
cleaned_list_path: cleaned_list.txt路径
Returns:
cleaned_list: 转换后的数据列表
"""
with open(cleaned_list_path, encoding='utf-8') as f:
cleaned_list = f.readlines()
cleaned_list = [p.replace('\\', '/') for p in cleaned_list]
cleaned_list = [osp.join(webface_directory, p) for p in cleaned_list]
return cleaned_list
def remove_dirty_image(webface_directory, cleaned_list):
cleaned_list = set([c.split()[0] for c in cleaned_list])
for p in paths.list_images(webface_directory):
if p not in cleaned_list:
print(f"remove {p}")
os.remove(p)
if __name__ == '__main__':
data = '/data/CASIA-WebFace/'
lst = '/data/cleaned_list.txt'
cleaned_list = transform_clean_list(data, lst)
remove_dirty_image(data, cleaned_list)