-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathHDF5toJSON.py
133 lines (100 loc) · 3.57 KB
/
HDF5toJSON.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# -*- coding: utf-8 -*-
## HDF5 to JSON converter
## Author - Janu Verma
##############################################################################################
import json
import sys
from collections import defaultdict
import re
import io
try:
import numpy
except:
print "Error : Requires numpy"
sys.exit()
try:
from tables import *
except:
print "Error : Requires PyTables"
h5file = open_file(sys.argv[1])
##########################################################################################
class converter:
"""
Converts the contents of an HDF5 file into JSON.
Also has methods to access the contents of a group directly
without following the hierarchy.
"""
def __init__(self, input_file):
self.file_name = re.sub(r'\.h5$', '',sys.argv[1])
self.groupParentDict = defaultdict(list)
self.groupContentsDict = {}
self.file = input_file
self.allGroups = []
for group in input_file.walk_groups():
name = group._v_name
parent = group._v_parent
parent = parent._v_name
self.allGroups.append(name)
self.groupParentDict[parent].append(name)
self.groupContentsDict[name] = {}
for array in h5file.list_nodes(group, classname="Array"):
array_name = array._v_name
array_contents = array.read()
array_info = {array_name : array_contents}
self.groupContentsDict[name].update(array_info)
for gp in h5file.list_nodes(group, classname="Group"):
gp_name = gp._v_name
gp_contents = {gp_name : self.groupContentsDict[gp_name]}
self.groupContentsDict[name].update(gp_contents)
for table in h5file.list_nodes(group, classname="Table"):
table_name = table._v_name
table_contents = table.read()
table_info = {table_name : table_contents}
self.groupContentsDict[name].update(table_info)
def jsonOutput(self):
"""
Returns a JSON document containing all the information stored in the HDF5 file.
Creates a JSON file of the same name as the input HDF5 file with json extension.
When decoded the file contains a nested dictionary.
The primary key is the root group '\'.
"""
alpha = self.groupContentsDict
json_file_name = self.file_name + '.json'
with io.open(json_file_name, 'w', encoding='utf-8') as f:
#record = json.dumps(alpha,cls=NumpyAwareJSONEncoder)
f.write(unicode(json.dumps(alpha, cls=NumpyAwareJSONEncoder, ensure_ascii=False)))
f.close()
return
def Groups(self):
"""
Returns all the groups in the HDF5 file.
Helpful in exploring the file and getting an idea of the contents.
"""
return json.dumps(self.allGroups, cls=NumpyAwareJSONEncoder)
def subgroups(self, group):
"""
Returns the subgroups of the group.
"""
return json.dumps(self.groupParentDict[group], cls=NumpyAwareJSONEncoder)
def groupContents(self, group):
"""
Returns the contents of a groups.
You can access the contents of the group directly
without following the hierarchy.
"""
info = self.groupContentsDict[group]
return json.dumps(info, cls=NumpyAwareJSONEncoder)
########################################################################################################
class NumpyAwareJSONEncoder(json.JSONEncoder):
"""
This class facilitates the JSON encoding of Numpy onjects.
e.g. numpy arrays are not supported by the standard json encoder - dumps.
"""
def default(self, obj):
if isinstance(obj, numpy.ndarray):
return obj.tolist()
return json.JSONEncoder.default(self, obj)
#######################################################################################################
json_data = converter(h5file)
contents = json_data.jsonOutput()