forked from johnbachman/depmap_analysis
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdb_nest_dict_to_jsons.py
88 lines (68 loc) · 2.51 KB
/
db_nest_dict_to_jsons.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import json
import logging
import pickle as pkl
import argparse as ap
from time import time
import depmap_network_functions as dnf
logger = logging.getLogger('jsonDump')
def _dump_it_to_json(fname, pyobj):
with open(fname, 'w') as json_out:
json.dump(pyobj, json_out)
parser = ap.ArgumentParser()
parser.add_argument('-p', '--pickle-file', required=True,
help='Pickle file containing a nested dict '
'd[subj][obj][[type, hash, belief score], ...]')
parser.add_argument('-o', '--output-name',
help='Output base name of json files. With no input, the '
'default is "./output/indra_db_<time stamp>".',
default='./output/indra_db_{}_'.format(int(time())))
args = parser.parse_args()
stamp = int(time())
if args.output_name.endswith('.json'):
outbasename = args.output_name[:-5] # Removes .json from basename
else:
outbasename = args.output_name
if '/' not in outbasename:
os.makedirs('./output', exist_ok=True)
logger.info('Using basename %s' % outbasename)
with open(args.pickle_file, 'rb') as pr:
nest_dict = pkl.load(file=pr)
# Create nested dict
nest_dict_out = dnf.create_nested_dict()
# Convert hash to strings
for s, inner_d in nest_dict.items():
for o in inner_d:
type_hash_list = inner_d[o]
t_h_list_out = []
try:
for tp, hsh, bs in type_hash_list:
hash_string = str(hsh)
t_h_list_out.append((tp, hash_string, bs))
except ValueError:
for tp, hsh in type_hash_list:
hash_string = str(hsh)
t_h_list_out.append((tp, hash_string))
nest_dict_out[s][o] = t_h_list_out
# Output:
# 1. subj list
# 2. For each subj: dict[obj] -> [[type, hash], ...]
# 3. obj list
# 4. For each obj: (reverse lookup) dict[subj] -> [[type, hash], ...]
# 1. subj list
subj_list = list(set(nest_dict_out.keys()))
_dump_it_to_json(outbasename+'_subjects.json', subj_list)
obj_set = set()
rev_dict = {}
for subj, d in nest_dict_out.items():
obj_set.update(set(d.keys()))
# 2. dump each subj dict as json
_dump_it_to_json(outbasename+'_%s_is_subj.json' % subj, d)
# Build reverse dicts:
for obj, entry in d.items():
rev_dict.setdefault(obj, {}).update({subj: entry})
# 3. obj list
_dump_it_to_json(outbasename+'_objects.json', list(obj_set))
for obj, d in rev_dict.items():
# 4. dump the reverse/obj dicts
_dump_it_to_json(outbasename+'_%s_is_obj.json' % obj, d)