-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdpimport_hash.py
executable file
·80 lines (58 loc) · 2.08 KB
/
dpimport_hash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
from glob import glob
from hashlib import md5
from os.path import abspath, basename, dirname, isfile, join as pjoin
from time import sleep
from shutil import move
from glob import iglob
import sys
import numpy as np
from pickle import UnpicklingError
prefix= pjoin(abspath(dirname(__file__)),'.dpimport_hash')
hash_repo=prefix+'.npy'
# one hash repository is used for all data types i.e. importers
# when one importer is modifying it, others cannot use it
# so during its use, rename it as .dpimport_hash.npy.lock
while 1:
if not(isfile(hash_repo)):
# it is being used by another importer
# retry to read it after 5 minutes
print(hash_repo,'could not be found at this time')
print('\tgoing to sleep for 5 minutes ...')
sleep(300)
else:
# rename it i.e. lock it to prevent others from accessing it
hash_repo_locked=prefix+'.lock.npy'
move(hash_repo,hash_repo_locked)
try:
hash_record=np.load(hash_repo_locked, allow_pickle=True).item()
except UnpicklingError:
# initial case
hash_record={}
# hash_record is a dictionary with key,value pairs
# key=AB-AB12345-assessment
# value=[hash,True/False]
# True=modified since last read
print('hash_record found, calculating and comparing hashes ...')
break
for i,file in enumerate(iglob(sys.argv[1])):
try:
with open(file) as f:
content=f.read().strip().encode('utf-8')
except FileNotFoundError:
print(file,'could not be read\n')
continue
hash=md5(content).hexdigest()
key=basename(file)
if key in hash_record:
if hash_record[key][0]!=hash:
hash_record[key][0]=hash
hash_record[key][1]=True
else:
hash_record[key]=[hash,False]
print(i,key,hash_record[key])
f.close()
print('hash calculation complete')
# now write back the hash_record and release the hash_repo
np.save(hash_repo_locked,hash_record)
move(hash_repo_locked,hash_repo)