-
Notifications
You must be signed in to change notification settings - Fork 1
/
sample.py
107 lines (99 loc) · 3.39 KB
/
sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/python
"""
Collect samples pairing C source and generated assembly
"""
import sqlite3
class Sample():
"""
A training set element pairing source code with machine architecture and disassembly
"""
all_samples = []
def __init__(self, name_space, arch, name):
self.arch = arch
self.name_space = name_space
self.name = name
self.source = None
self.assembly = None
self.metadata = {}
Sample.all_samples.append(self)
@classmethod
def find(cls, name_space, arch, name):
"""
Locate an existing sample with matching name and name_space
"""
for s in Sample.all_samples:
if s.name == name and s.name_space == name_space and s.arch == arch:
return s
return None
@classmethod
def display_all(cls):
"""
Display all current samples
"""
print(f"Total samples = {len(Sample.all_samples)}")
for s in Sample.all_samples:
s.display()
def set_source(self, source):
"""
set the C source code
"""
self.source = source
def set_assembly(self, assembly):
"""
Set the objdump'd disassembly for this sample
"""
self.assembly = assembly
@classmethod
def check_all(cls):
"""
Look for incomplete samples
"""
print("Sample:check_all")
source_missing = []
assembly_missing = []
for s in Sample.all_samples:
if s.source is None:
source_missing.append(s)
if s.assembly is None:
assembly_missing.append(s)
if len(source_missing) == 0:
print("\tNo functions are missing source code")
else:
print("Samples missing source code:")
for s in source_missing:
print(f"\tSample(namespace={s.name_space}, arch={s.arch}, function={s.name})")
if len(assembly_missing) == 0:
print("\tNo functions are missing assembly code")
else:
print("Samples missing assembly code:")
for s in assembly_missing:
print(f"\tSample(namespace={s.name_space}, arch={s.arch}, function={s.name})")
def display(self):
"""
Display a single sample
"""
print(f"Sample(namespace={self.name_space}, arch={self.arch}, function={self.name})")
print("Source:")
if self.source:
for s in self.source.split("\n"):
print("\t" + s)
else:
print("\t *** No source found ***")
print("Assembly code:")
if self.assembly:
for s in self.assembly.split("\n"):
print("\t" + s)
else:
print("\t *** No assembly found ***")
@classmethod
def build_database(cls):
"""
create an sqlite3 database of Samples
"""
con = sqlite3.connect("Samples.db")
cur = con.cursor()
cur.execute("DROP TABLE IF EXISTS sample")
cur.execute("CREATE TABLE IF NOT EXISTS sample(id INTEGER PRIMARY KEY AUTOINCREMENT, namespace TEXT, arch TEXT, name TEXT, source TEXT, assembly TEXT)")
for s in Sample.all_samples:
cur.execute("INSERT INTO sample (namespace, arch, name, source, assembly) VALUES (?, ?, ?, ?, ?)", (s.name_space, s.arch, s.name, s.source, s.assembly))
con.commit()