-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhdf2zip_combine.py
96 lines (87 loc) · 3.39 KB
/
hdf2zip_combine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import sys
import h5py
import os
import numpy
import zipfile
import tempfile
import simplejson
from collections import OrderedDict
import copy
DEBUG = False
#file_in = sys.argv[1]
#file_out = file_in.replace('.nxs', '.nxz') + '.zip'
def make_dir(path):
os.mkdir(path)
join_path = os.path.join
def make_metadata(obj, path=''):
metadata = {}
for key in obj.keys():
new_path = join_path(path, key)
newitem = OrderedDict(obj[key].attrs)
for k,v in newitem.items():
newitem[k] = numpy.asscalar(v) if isinstance(v, numpy.generic) else v
if isinstance(obj[key], h5py.Group):
newitem['members'] = make_metadata(obj[key], new_path)
else:
fname = join_path(path, key+'.dat')
#if max(obj[key].shape) <= 1:
# newitem['value'] = obj[key].value.tolist()
if DEBUG:
print key
_ = simplejson.dumps(metadata)
metadata[key] = newitem
return metadata
def to_zipfile(obj, zipfile, path=''):
summary = OrderedDict()
for key in obj.keys():
val = obj[key]
new_path = join_path(path, key)
if isinstance(val, h5py.Group):
to_zipfile(val, zipfile, new_path)
else:
fname = join_path(path, key+'.dat')
if 'target' in val.attrs and val.attrs['target'] != join_path('/', path, key):
print val.attrs['target'], join_path('/', path, key)
summary[key] = OrderedDict([['target', val.attrs['target']]]) #, ['shape', (obj[val.attrs['target']]).shape]])
elif numpy.product(val.shape) <= 1:
summary[key] = val.value.tolist()
else:
value = obj[key].value
formats = {
'S': '%s',
'f': '%.8g',
'i': '%d',
'u': '%d' }
if value.dtype.kind in formats:
fd, fn = tempfile.mkstemp()
os.close(fd) # to be opened by name
if DEBUG: print fname, value.dtype.kind
if len(value.shape) > 2:
with open(fn, 'w') as f:
simplejson.dump(value.tolist(), f)
else:
numpy.savetxt(fn, value, delimiter='\t', fmt=formats[value.dtype.kind])
zipfile.write(fn, fname)
os.remove(fn)
summary[key] = OrderedDict([['target', join_path('/', fname)], ['shape', obj[key].shape]])
else:
print "unknown type of array: ", fname, value.dtype
zipfile.writestr(os.path.join(path, 'fields.json'), simplejson.dumps(summary, indent=' '))
def to_zip(hdfname, zipname='data.zip'):
obj = h5py.File(hdfname)
z = zipfile.ZipFile(zipname, 'w', compression=zipfile.ZIP_DEFLATED)
to_zipfile(obj, z)
metadata = make_metadata(obj)
fd, fn = tempfile.mkstemp()
os.close(fd) # to be opened by name
with open(fn, 'w') as f:
simplejson.dump(metadata, f, indent=' ')
#simplejson.dump(metadata, f)
z.write(fn, '.metadata')
os.remove(fn)
z.close()
if __name__ == '__main__':
import sys
file_in = sys.argv[1]
file_out = file_in.replace('.nxs', '.nxz') + '.zip'
to_zip(file_in, file_out)