-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_key.py
126 lines (115 loc) · 6.91 KB
/
find_key.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import json
str_email ='{"Attatchments":[{"ContentType":"Text","FileSize":"234","ID":"124234234234"}],"SearchContent0":[{"Type":"Email","Content":"Hi mom"}], "TextParseTime":0.100195,"cc":["1123","asd","123123"],"Receipient_Internal":0,"DateTime":"2015/07/21 11:54:47","Body":"","From":[{ "Category": ["Big Data","Cloud Cloud Cloud!"],"Name": "Brats","IntExt": "External","Email":"[email protected]"},{ "Category": ["Data Resevoirs", "Lakes Lakes Lakes"],"Name": "Brassington","IntExt": "External","Email":"[email protected]"}],"MSGID":"<[email protected]>","recipient":["sdfgfgs"],"Content":[{"Type": "Subject","Paragraphs":[{"Content": "OPEN POSITIONS 2855 DWT DPP 21TH JULY" ,"Type":["text","subject"]}]},{"Type": "Body","Paragraphs":[{"Content": "OPEN POSITIONS 2855 DWT DPP 21TH JULY" ,"Type":["text","default","text","0.11236187123"]},{"Content": "OPEN VESSEL DWT FLG YR PORT COMMENT FLEET" ,"Type":["text","default","text","0.13719117202061745"]}]}],"bcc":["asfdasdf","hardy","harr"],"Type":"Email","Sender_DoddFrank":0,"ExclusionScore":0.23414324}'
# str_email = '{"SearchContent0":[{Type:"Email":"Content":"Hi mom"}]}'
my_json = json.loads(str_email)
# my_keys = { "Category":"[]","From":"[]","Email":"","TextParseTime":"","Content":""}
# my_keys = {"Email":[],"Category":[],"cc":[],"MSGID":"","Content":[],"Name":[],"TextParseTime":"","SearchContent0":[]}
my_keys = { "NumUsers":"","NumSenders":"","NumRecipients":"","Sender_Internal":"","Sender_External":"","Recipient_Internal":"","Recipient_External":"","Recipient_Private":"","Recipient_AntiTrust":"","Recipient_DoddFrank":"","NumFlaggedUsers":"","Type":"","MsgID":"","FileSize":"","DirName":"","Direction":"","ExclusionScore":"","Exclude":"","Exclude_S6":"","Exclude_Internal_S6":"","DateTime":"","DateTimeUTC":"","Date":"","Time":"","StartTime":"","StartTimeUTC":"","EndTime":"","EndTimeUTC":"","ChatDuration":"","Subject":"","Greeting":"","Content":[[],[]],"SearchContent0":[[],[]],"SearchContent1":"","RegEx_Entities":"","NE":[],"Attatchments":[],"RemovedAttatchments":"","NumAttatchmentsRemoved":"","NumAttatchments":"","Hits0":[],"Hits1":[],"NumHits0":"","NumHits1":""}
# print my_keys
# my_keys = {"Email":[],"Content":""}
arr = []
search_content = [[],[]]
email_content = [[],[]]
# def go_deeper(key,obj):
# if isinstance(obj,list):
# # print "about to iterate over {0}".format(obj)
# if key == "SearchContent0":
# for content in obj:
# search_content[0].append(content["Type"])
# search_content[1].append(content["Content"])
# print "search_content is now {0}".format(search_content)
# else:
# for item in obj:
# print item
# go_deeper(key,item)
# elif isinstance(obj,dict):
# for dict_key, value in obj.iteritems():
# print "Parent key is {0}".format(key)
# print "{0}:{1}".format(dict_key,value)
# if key == "Content":
# print "Content value is {0}".format(value)
# if dict_key == "Type":
# email_content[0].append(value)
# else:
# for content in value:
# print "Appending {0} to {1}".format(content,email_content)
# email_content[1].append(content["Content"])
# elif dict_key in my_keys:
# #in content block
# go_deeper(dict_key,value)
#
# else:
# if key in my_keys:
# print "appending {0} to {1}".format(obj,key)
# print my_keys[key]
# my_keys[key].append(obj)
# arr.append(obj)
# return arr
#
# for key, value in my_json.iteritems():
# # print key
# if key in my_keys and (isinstance(value,list) == False and isinstance(value,dict) == False):
# print "extracting top level values for {0}:{1}".format(key,value)
# print my_keys[key]
# # print "final value is {0}".format(tuple(go_deeper(value)))
# my_keys[key] = value
# arr = []
# else:
# # print "final value is {0}".format(go_deeper(value))
# print "extracting values for {0}:{1}".format(key,value)
# go_deeper(key,value)
# arr = []
# # print "Doing nothing with {0}".format(key)
# my_keys["Content"] = email_content
# my_keys["SearchContent0"] = search_content
# print(my_keys)
#"SearchContent0":[{"Type":"Email","Content":"Hi mom"}]
def find_key(my_key,data_type):
results = []
tree = my_key.split(":")
print tree
if data_type =="array":
for index,val in enumerate(tree):
current_index = index +1
next_index = index +1
print "val is {0}".format(val)
print "At index:{0} of tree size:{1} looking at val:{2}".format(next_index,len(tree),val)
if val == "[":
if len(tree) == next_index:
print obj
for obj in my_json[tree[0]]:
print "Looking at {0}".format(obj)
results.append(obj)
return results
else:
for obj in my_json[tree[0]]:
if tree[0] != "Content":
print "about to append {0} coming from{1}".format(obj[tree[-1]],obj)
results.append(obj[tree[-1]])
elif isinstance(obj,dict) and "Type" in obj:
print obj
print next_index
print len(tree)
print "my obj is {0}".format(obj)
type_index = next_index +1
print obj["Type"]
content_type = tree[type_index]
x= content_type.split("=")
print x
print obj["Type"]
print"compare"
if x[1] == obj["Type"]:
for para in obj["Paragraphs"]:
results.append(para["Content"])
return results
print "Looking at {0}".format(obj)
return results
next_index += 1
else:
return my_json[tree[0]]
#"Content":[{"Type": "Subject","Paragraphs":[{"Content": "OPEN POSITIONS 2855 DWT DPP 21TH JULY" ,"Type":["text","subject"]}]},{"Type": "Body","Paragraphs":[{"Content": "OPEN POSITIONS 2855 DWT DPP 21TH JULY" ,"Type":["text","default","text","0.11236187123"]},{"Content": "OPEN VESSEL DWT FLG YR PORT COMMENT FLEET" ,"Type":["text","default","text","0.13719117202061745"]}]}]
print find_key("Content:[:{:Type=Subject","array")
# print find_key("SearchContent0:[:{:Content","array")
print find_key("Attatchments:[:{:ID","array")
# print find_key("bcc:[","array")
my_json["SearchContent0"][0]["Content"]