-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_kona_faults.py
92 lines (77 loc) · 3.19 KB
/
parse_kona_faults.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import argparse
from enum import Enum
import os
import sys
import subprocess
import pandas as pd
TIMECOL = "time"
KINDCOL = "kind"
class FaultKind(Enum):
READ = "read"
WRITE = "write"
WRPROTECT = "wrprotect"
def __str__(self):
return self.value
def kind_to_enum(kind):
if kind == 0: return FaultKind.READ
if kind == 1: return FaultKind.WRITE
if kind == 3: return FaultKind.WRPROTECT
raise Exception("unknown kind: {}".format(kind))
def main():
parser = argparse.ArgumentParser("Process input and write csv-formatted data to stdout/output file")
parser.add_argument('-i', '--input', action='store', help="path to the input/data file", required=True)
parser.add_argument('-st', '--start', action='store', type=int, help='start (unix) time to filter data')
parser.add_argument('-et', '--end', action='store', type=int, help='end (unix) time to filter data')
parser.add_argument('-fk', '--kind', action='store', type=FaultKind, choices=list(FaultKind), help='filter for a specific kind of fault')
parser.add_argument('-b', '--binary', action='store', help='path to the binary file to locate code location')
parser.add_argument('-o', '--out', action='store', help="path to the output file")
args = parser.parse_args()
if not os.path.exists(args.input):
print("can't locate input file: {}".format(args.input))
exit(1)
df = pd.read_csv(args.input, skipinitialspace=True)
sys.stderr.write("total rows read: {}\n".format(len(df)))
# filter
if args.start:
df = df[df[TIMECOL] >= args.start]
if args.end:
df = df[df[TIMECOL] <= args.end]
df[TIMECOL] = df[TIMECOL] - df[TIMECOL].iloc[0]
# rewrite and filter by kind
df["kind"] = df.apply(lambda r: kind_to_enum(r["kind"]).value, axis=1)
if args.kind is not None:
df = df[df[KINDCOL] == args.kind.value]
# group by ip or btrace
if 'btrace' in df:
df = df.groupby(['btrace', 'kind']).size().reset_index(name='count')
df = df.rename(columns={"btrace": "ips"})
else:
df = df.groupby(['ip', 'kind']).size().reset_index(name='count')
df = df.rename(columns={"ip": "ips"})
df = df.sort_values("count", ascending=False)
df["percent"] = (df['count'] / df['count'].sum()) * 100
df["percent"] = df["percent"].astype(int)
if args.binary:
assert os.path.exists(args.binary)
def addr2line(ips):
global processed
iplist = ips.split("|")
code = ""
if iplist:
code = subprocess \
.check_output(['addr2line', '-e', args.binary] + iplist) \
.decode('utf-8') \
.split("\n")
code = "<//>".join(code)
processed += 1
if processed % 100 == 0:
sys.stderr.write("processed {} entries\n".format(processed))
return code
sys.stderr.write("getting backtraces for {} ips\n".format(len(df)))
df['code'] = df['ips'].apply(addr2line)
# write out
out = args.out if args.out else sys.stdout
df.to_csv(out, index=False, header=True)
if __name__ == '__main__':
processed = 0
main()