-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjsonStreamParser.py
executable file
·150 lines (121 loc) · 4.06 KB
/
jsonStreamParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/python
import sys
import re
class State:
def __init__(self):
self.brkts = 0
self.delim = ":"
self.sep = "."
self.lastField = ""
self.lastVal = ""
self.F = [self.lastField]
self.rows = 0
self.lines = 0
self.acceptField = True
self.dqc = 0
self.sqc = 0
self.rowDict = {}
def clear(self, force=False):
if (force or (self.checkState() >= 0 and self.checkQuotes())):
self.brkts = 0
self.dqc = 0
self.sqc = 0
self.lastField = ""
self.lastValue = ""
self.F = [self.lastField]
self.acceptField = True
self.rowDict = {}
def printState(self):
print "State: rows: ", self.rows, " lines: ", self.lines, " brkts: ", self.brkts, " fields: ", self.F
def printRow(self):
for k in self.rowDict.keys():
print self.rows, self.delim, k, self.delim, self.rowDict[k]
def checkState(self):
if (0 > self.brkts):
print "State: Error ... extra closing brackets at: "
self.printState()
return -1
return (self.brkts * (len(self.F) - 1)) # should return zero if everything is fine
def addRow(self):
self.rows = self.rows + 1
def openBracket(self):
self.F.append(self.lastField + self.sep)
self.lastField = self.F[self.brkts]
self.brkts = self.brkts + 1
self.acceptNewField()
#self.printState()
def closeBracket(self):
self.brkts = self.brkts - 1
self.F.pop()
self.lastField = self.F[self.brkts - 1]
#self.printState()
def addField(self, field):
if (self.acceptField):
tfield = field.strip()
self.F[self.brkts] = self.lastField + self.sep + tfield
self.acceptField = False
self.lastVal = tfield
#self.printState()
return True
return False
def addValue(self,val):
self.lastVal = val.strip()
if (len(self.lastVal) > 0):
#print "Adding ", self.F[self.brkts] , " : ", self.lastVal
self.rowDict[self.F[self.brkts]] = self.lastVal
self.lastVal = ""
def acceptNewField(self):
self.acceptField = True
def updateQuotes(self, myChar):
if ("\"" == myChar):
if (self.dqc == 1):
self.dqc = 0
self.sqc = 0 # precendence
else:
self.dqc = 1
if ("\'" == myChar):
if (self.sqc == 1):
self.sqc = 0
else:
self.sqc = 1
def checkQuotes(self):
if (self.dqc % 2 == 0 and self.sqc % 2 == 0):
return True
return False;
def organize(line, myState):
try:
pf=0
for i,c in enumerate(line):
if (myState.checkQuotes()):
if ("{" == c):
myState.openBracket()
pf = i + 1
# i - 1 gives a problem so i ?
if (myState.delim == c):
myState.addField(line[pf:(i)])
myState.acceptNewField()
pf = i + 1
if ("}" == c or "," == c):
myState.addValue(line[pf:(i)])
pf = i + 1
if ("}" == c):
myState.closeBracket()
if (myState.brkts == 0):
myState.addRow()
if (("\"" == c or "\'" == c) and (line[i-1] != "\\" or line[i-2] == "\\")):
myState.updateQuotes(c)
myState.lines = myState.lines + 1
except:
myState.printState()
myState.clear(True)
def lazyRead():
myState = State()
rows = 0
for line in sys.stdin:
organize(line, myState)
if (myState.rows > rows):
myState.printRow()
myState.clear()
sys.stderr.write("Parsed " + `myState.lines` + " lines, created " + `myState.rows` + " records\n")
if __name__ == '__main__':
lazyRead()