-
Notifications
You must be signed in to change notification settings - Fork 3
/
memStreaming.py
184 lines (153 loc) · 4.98 KB
/
memStreaming.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# pylint: disable=
"""
File : test_blocks.py
Author : Valentin Kuznetsov <vkuznet AT gmail dot com>
Description:
"""
import argparse
import json
# system modules
import re
from traceback import print_exc
try:
import cStringIO as StringIO
except ImportError:
import io as StringIO # python3
except ImportError:
import StringIO
# psutil module
import psutil
import sys
from types import ModuleType, FunctionType
from gc import get_referents
def getSize(obj):
"""
_getSize_
Function to traverse an object and calculate its total size in bytes
:param obj: a python object
:return: an integer representing the total size of the object
Code extracted from Stack Overflow:
https://stackoverflow.com/questions/449560/how-do-i-determine-the-size-of-an-object-in-python
"""
# Custom objects know their class.
# Function objects seem to know way too much, including modules.
# Exclude modules as well.
BLACKLIST = type, ModuleType, FunctionType
if isinstance(obj, BLACKLIST):
raise TypeError('getSize() does not take argument of type: ' + str(type(obj)))
seen_ids = set()
size = 0
objects = [obj]
while objects:
need_referents = []
for obj in objects:
if not isinstance(obj, BLACKLIST) and id(obj) not in seen_ids:
seen_ids.add(id(obj))
size += sys.getsizeof(obj)
need_referents.append(obj)
objects = get_referents(*need_referents)
return size
float_number_pattern = \
re.compile(r'(^[-]?\d+\.\d*$|^\d*\.{1,1}\d+$)')
int_number_pattern = \
re.compile(r'(^[0-9-]$|^[0-9-][0-9]*$)')
class OptionParser():
def __init__(self):
"User based option parser"
self.parser = argparse.ArgumentParser(prog='PROG')
self.parser.add_argument("--fin", action="store",
dest="fin", default="", help="Input file")
def size_format(uinput):
"""
Format file size utility, it converts file size into KB, MB, GB, TB, PB units
"""
if not (float_number_pattern.match(str(uinput)) or \
int_number_pattern.match(str(uinput))):
return 'N/A'
try:
num = float(uinput)
except Exception as exc:
print_exc(exc)
return "N/A"
base = 1000. # CMS convention to use power of 10
if base == 1000.: # power of 10
xlist = ['', 'KB', 'MB', 'GB', 'TB', 'PB']
elif base == 1024.: # power of 2
xlist = ['', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
for xxx in xlist:
if num < base:
return "%3.1f%s" % (num, xxx)
num /= base
def load(fin):
with open(fin) as jo:
data = json.load(jo)
return data
def create_stream(data):
sdata = '[\n'
for idx, rec in enumerate(data):
if idx:
sdata += '\n,\n'
sdata += json.dumps(rec)
sdata += '\n]'
return StringIO.StringIO(sdata)
def parse_stream(stream):
data = []
while True:
line = stream.readline().replace('\n', '').strip()
if line == '[':
continue
if line.startswith('{') and line.endswith('}'):
rec = json.loads(line)
data.append(rec)
if line == ']':
break
return data
def print_mem(obj, data, mem, memIni=None):
print("\nMemory usage for object: %s" % obj)
# tot = mem.total
# use = mem.used
if False:
tot = getattr(mem, "total", 0) - getattr(memIni, "total", 0)
rss = getattr(mem, "rss", 0) - getattr(memIni, "rss", 0)
pss = getattr(mem, "pss", 0) - getattr(memIni, "pss", 0)
uss = getattr(mem, "uss", 0) - getattr(memIni, "uss", 0)
else:
tot = getattr(mem, "total", 0)
rss = getattr(mem, "rss", 0)
pss = getattr(mem, "pss", 0)
uss = getattr(mem, "uss", 0)
print('object memory: %s' % getSize(data))
print('Total memory: %s (%s)' % (tot, size_format(tot)))
print('RSS memory: %s (%s)' % (rss, size_format(rss)))
print('PSS memory: %s (%s)' % (pss, size_format(pss)))
print('USS memory: %s (%s)' % (uss, size_format(uss)))
def test(fin):
"Perform main test with json data"
data = None
pid = psutil.Process()
mem = pid.memory_full_info()
print_mem('nothing', data, mem)
data = load(fin)
mem = pid.memory_full_info()
print_mem('json', data, mem)
memIni = pid.memory_full_info()
stream = create_stream(data)
mem = pid.memory_full_info()
print_mem('stream', stream, mem, memIni)
memIni = pid.memory_full_info()
data1 = parse_stream(stream)
mem = pid.memory_full_info()
print_mem('json out of stream', data1, mem, memIni)
if data != data1:
print("data mismatch")
print("data type: %s" % type(data))
print("data1 type: %s" % type(data1))
def main():
"Main function"
optmgr = OptionParser()
opts = optmgr.parser.parse_args()
test(opts.fin)
if __name__ == '__main__':
main()