-
Notifications
You must be signed in to change notification settings - Fork 0
/
delete_duplicates.py
45 lines (40 loc) · 1.11 KB
/
delete_duplicates.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import argparse
parser = argparse.ArgumentParser(
description='delete duplicate files (based MD5-hashdigest)'
)
parser.add_argument(
'--delete',
action='store_true',
help='set to delete actually. (default: dry-run)')
parser.add_argument(
'-d', '--path',
action='store',
dest='path',
type=str,
help='set dir path. (default: .)')
args = parser.parse_args()
def get_file_names(dir_='.'):
from os import scandir
return sorted([f.name for f in scandir(dir_) if f.is_file()])
def remove_duplicate(filelist):
from hashlib import md5
from os import remove
ret = set({})
for f in filelist:
digest = ''
fpath = f'{args.path}/{f}'
with open(fpath, 'rb') as fd:
digest = md5(fd.read()).hexdigest()
if digest in ret:
if args.delete:
print(f'delete: {fpath}')
remove(fpath)
else:
print(f'delete(dry-run): {fpath}')
else:
ret.add(digest)
def main():
fs = get_file_names(args.path)
remove_duplicate(fs)
if __name__=='__main__':
main()