This repository has been archived by the owner on Feb 8, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathextract_structures_json.py
executable file
·218 lines (191 loc) · 6.63 KB
/
extract_structures_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
#!/usr/bin/python
# (C) W.J. van der Laan 2012
'''
Extract structures from DWARF data to JSON for the C structure
pretty-printer.
'''
from __future__ import print_function, division, unicode_literals
import argparse
import os, sys
from bintools.dwarf import DWARF
from bintools.dwarf.enums import DW_AT, DW_TAG, DW_LANG, DW_ATE, DW_FORM, DW_OP, DW_ATE
from dwarfhelpers import get_flag, get_str, get_int, get_ref, not_none, expect_str
'''
Output format JSON/Python:
enums
structures (name, field offsets)
simple field types (size, type, structure, pointer to structure + offset)
'''
DEBUG=False
# Logging
def error(x):
print('Error: '+x, file=sys.stderr)
def warning(x):
print('Warning: '+x, file=sys.stderr)
def progress(x):
print('* '+x, file=sys.stderr)
def type_name(die):
if die is None:
return 'void' # predefined nothing type
type_name = get_str(die, 'name')
if type_name is None: # Make up a name if it is not provided by DWARF
return '%s_%i' % (DW_TAG[die.tag], die.offset)
return type_name
def parse_type(type, dies_dict):
'''
Parse type by removing modifiers and counting pointer
indirections.
'''
indirection = 0
while type is not None and type.tag in [DW_TAG.const_type, DW_TAG.volatile_type, DW_TAG.typedef, DW_TAG.pointer_type]:
if type.tag == DW_TAG.pointer_type:
indirection += 1
type = dies_dict.get(get_ref(type, 'type'), None)
return (type, indirection)
def visit_base_type(die,dies_dict):
type_info = {
'kind': 'base_type',
'byte_size': get_int(die, 'byte_size'),
'encoding': DW_ATE[get_int(die, 'encoding')],
}
if DEBUG:
print(type_info)
return type_info
def visit_enumeration_type(die,dies_dict):
type_info = {
'kind': 'enumeration_type',
'byte_size': get_int(die, 'byte_size'),
}
enumerators = []
for child in die.children:
if child.tag != DW_TAG.enumerator:
continue
enumerator_info = {
'name': get_str(child, 'name'),
'value': get_int(child, 'const_value'),
}
enumerators.append(enumerator_info)
type_info['enumerators'] = enumerators
if DEBUG:
print(type_info)
return type_info
def visit_array_type(die,dies_dict):
type = dies_dict.get(get_ref(die, 'type'))
(type,indirection) = parse_type(type, dies_dict)
type_info = {
'kind': 'array_type',
'indirection': indirection,
'type': type_name(type),
'length': None
}
for child in die.children:
if child.tag != DW_TAG.subrange_type:
continue
upper_bound = get_int(child, 'upper_bound')
if upper_bound is not None:
type_info['length'] = upper_bound + 1
if DEBUG:
print(type_info)
return type_info
def visit_structure_type(die,dies_dict):
# enumerate members of structure or union
type_info = {
'kind': DW_TAG[die.tag],
'byte_size': get_int(die, 'byte_size')
}
members = []
for child in die.children:
name = get_str(child, 'name')
member_info = {
'name': name
}
# handle union as "structure with all fields at offset 0"
offset = 0
if 'data_member_location' in child.attr_dict:
attr = child.attr_dict['data_member_location']
if attr.form == 'expr':
expr = attr.value
assert(expr.instructions[0].opcode == DW_OP.plus_uconst)
offset = expr.instructions[0].operand_1
elif attr.form in ['data1', 'data2', 'data4', 'data']:
offset = attr.value
else:
assert(0) # unhandled form
member_info['offset'] = offset
type = dies_dict.get(get_ref(child, 'type'))
(type,indirection) = parse_type(type, dies_dict)
member_info['indirection'] = indirection
member_info['type'] = type_name(type)
members.append(member_info)
if DEBUG:
print(member_info)
worklist.append(type)
type_info['members'] = members
return type_info
def process_compile_unit(dwarf, cu, roots):
cu_die = cu.compile_unit
# Generate actual syntax tree
global worklist
global visited
types = {}
worklist = []
for child in cu_die.children:
visited = set()
name = get_str(child, 'name')
if name is not None: # non-anonymous
if name in roots: # nest into this structure
worklist.append(child)
while worklist:
die = worklist.pop()
if die is None or die.offset in visited:
continue
visited.add(die.offset)
if get_flag(die, "declaration"): # only predeclaration, skip
continue
if DEBUG:
print("[%s]" % (type_name(die)))
if die.tag in [DW_TAG.structure_type, DW_TAG.union_type]:
type_info = visit_structure_type(die, cu.dies_dict)
elif die.tag in [DW_TAG.base_type]:
type_info = visit_base_type(die, cu.dies_dict)
elif die.tag in [DW_TAG.array_type]:
type_info = visit_array_type(die, cu.dies_dict)
elif die.tag in [DW_TAG.enumeration_type]:
type_info = visit_enumeration_type(die, cu.dies_dict)
else:
warning('%s not handled' % DW_TAG[die.tag])
type_info = {}
type_info['name'] = type_name(die)
types[type_info['name']] = type_info
return types
# Main conversion function
def parse_dwarf(infile, roots):
if not os.path.isfile(infile):
error("No such file %s" % infile)
exit(1)
dwarf = DWARF(infile)
for cu in dwarf.info.cus:
progress("Processing %s" % cu.name)
types = process_compile_unit(dwarf, cu, roots)
if all(x in types for x in roots): # return if all roots found
return types
return None # not found
def parse_arguments():
parser = argparse.ArgumentParser(description='Extract structures from DWARF as parseable format')
parser.add_argument('input', metavar='INFILE', type=str,
help='Input file (ELF)')
parser.add_argument('roots', metavar='ROOT', type=str, nargs='+',
help='Root data structure name')
return parser.parse_args()
def main():
import json
args = parse_arguments()
types = parse_dwarf(args.input, args.roots)
if types == None:
error('Did not find all roots (%s) in any compile unit' % args.roots)
exit(1)
json.dump(types, sys.stdout,
sort_keys=True, indent=4, separators=(',', ': '))
print()
if __name__ == '__main__':
main()