-
Notifications
You must be signed in to change notification settings - Fork 68
/
sqlparse_v1.3.py
217 lines (165 loc) · 8.46 KB
/
sqlparse_v1.3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#sqlparse.py
#
#This program parses an SQLite3 database for deleted entires and
#places the output into either and TSV file, or text file
#
#The SQLite file format, offsets etc is described at
#sqlite.org/fileformat.html
#
#
# Copyright (C) 2015 Mari DeGrazia ([email protected])
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You can view the GNU General Public License at <http://www.gnu.org/licenses/>
#
# Version History:
# v1.1 2013-11-05
#
# v1.2 2015-06-20
#support added in to print out non b-tree pages
#
# v.1.3 2015-06-21
#minor changes / comments etc.
#
#
#Find a bug???? Please let me know and I'll try to fix it (if you ask nicely....)
#
import struct
from optparse import OptionParser
import sys
#function to remove the non-printable characters, tabs and white spaces
def remove_ascii_non_printable(chunk):
chunk = ' '.join(chunk .split())
return ''.join([ch for ch in chunk if ord(ch) > 31 and ord(ch) < 126 or ord(ch) ==9])
usage = "Parse deleted records from an SQLite file into a TSV File or text file \n\
Examples:\n\
-f /home/sanforensics/smsmms.db -o report.tsv\n\
-f /home/sanforensics/smssms.db -r -o report.txt \n"
parser = OptionParser(usage=usage)
parser.add_option("-f", "--file", dest = "infile", help = "sqlite database file", metavar = "smsmms.db")
parser.add_option("-o", "--output", dest = "outfile", help = "Output to a tsv file. Strips white space, tabs and non-printable characters from data field", metavar = "output.tsv")
parser.add_option("-r", "--raw", action ="store_true", dest = "raw", help = "Optional. Will out put data field in a raw format and text file.", metavar = "output.tsv")
parser.add_option("-p", "--printpages", action ="store_true", dest = "printpages", help = "Optional. Will print any printable non-whitespace chars from all non-leaf b-tree pages (in case page has been re-purposed). WARNING: May output a lot of string data.")
(options,args)=parser.parse_args()
#no arugments given by user,exit
if len(sys.argv) == 1:
parser.print_help()
sys.exit(0)
#if input of output file missing, exit
if (options.infile == None) or (options.outfile == None):
parser.print_help()
print "Filename or Output file not given"
sys.exit(0)
#open file, confirm it is an SQLite DB
try:
f=open(options.infile,"rb")
except:
print ("File not Found")
sys.exit(0)
try:
output = open(options.outfile, 'w')
except:
print "Error opening output file"
sys.exit(0)
#write the column header if not outputting to text file
if options.raw !=True:
output.write("Type\tOffset\tLength\tData\n")
#get the file size, we'll need this later
#filesize = len(f.read())
# Cheeky suggestion ... so it doesnt read the whole file unecessarily
import os
stats = os.stat(options.infile)
filesize = stats.st_size
#be kind, rewind (to the beginning of the file, that is)
f.seek(0)
#verify the file is an sqlite db; read the first 16 bytes for the header
header = f.read(16)
if "SQLite" not in header:
print ("File does not appear to be an SQLite File")
sys.exit(0)
#OK, lets get started. The SQLite database is made up of multiple Pages. We need to get the size of each page.
#The pagesize this is stored at offset 16 at is 2 bytes long
pagesize = struct.unpack('>H', f.read(2))[0]
#According to SQLite.org/fileformat.html, all the data is contained in the table-b-trees leaves.
#Let's go to each Page, read the B-Tree Header, and see if it is a table b-tree, which is designated by the flag 13
#set the offset to 0, so we can also process any strings in the first page
offset = 0
#while the offset is less then the filesize, keep processing the pages
while offset < filesize:
#move to the beginning of the page and read the b-tree flag, if it's 13, its a leaf table b tree and we want to process it
f.seek(offset)
flag = struct.unpack('>b',f.read(1))[0]
if flag == 13:
#this is a table_b_tree - get the header information which is contained in the first 8 bytes
freeblock_offset = struct.unpack('>h',f.read(2))[0]
num_cells = struct.unpack('>h',f.read(2))[0]
cell_offset = struct.unpack('>h',f.read(2))[0]
num_free_bytes = struct.unpack('>b',f.read(1))[0]
#unallocated is the space after the header information and before the first cell starts
#start after the header (8 bytes) and after the cell pointer array. The cell pointer array will be the number of cells x 2 bytes per cell
start = 8 + (num_cells * 2)
# the length of the unallocated space will be the difference between the start and the cell offset
length = cell_offset-start
#move to start of unallocated, then read the data (if any) in unallocated - remember, we already read in the first 8 bytes, so now we just need to move past the cell pointer array
f.read(num_cells*2)
unallocated = f.read(length)
if options.raw == True:
output.write("Unallocated, Offset " + str(offset+start) + " Length " + str(length) + "\n")
output.write("Data:\n")
output.write((unallocated))
output.write("\n\n")
else:
#lets clean this up so its mainly the strings - remove white spaces and tabs too
unallocated = remove_ascii_non_printable(unallocated )
if unallocated != "":
output.write("Unallocated" + "\t" + str(offset+start) + "\t" + str(length) + "\t" + str(unallocated) + "\n" )
#if there are freeblocks, lets pull the data
while freeblock_offset != 0:
#move to the freeblock offset
f.seek(offset+freeblock_offset)
#get next freeblock chain
next_fb_offset = struct.unpack('>h',f.read(2))[0]
#get the size of this freeblock
free_block_size = struct.unpack('>hh',f.read(4))[0]
#move to the offset so we can read the free block data
f.seek(offset+freeblock_offset)
#read in this freeblock
free_block = f.read(free_block_size)
if options.raw == True:
output.write("Free Block, Offset " + str(offset+freeblock_offset) + ", Length " + str(free_block_size) + "\n")
output.write("Data:\n")
output.write((free_block))
output.write( "\n\n")
else:
#lets clean this up so its mainly the strings - remove white spaces and tabs too
free_block = remove_ascii_non_printable(free_block)
if unallocated != "":
output.write("Free Block" + "\t" + str(offset+freeblock_offset) + "\t" + str(free_block_size) + "\t" + str(free_block) + "\n" )
freeblock_offset = next_fb_offset
# Cheeky's Change: Extract strings from non-Leaf-Table B-tree pages to handle re-purposed/re-used pages
# According to docs, valid flag values are 2, 5, 10, 13 BUT pages containing string data have also been observed with flag = 0
# So just print strings from all non flag = 13 pages.
elif (options.printpages):
# read block into one big string, filter unprintables, then print
pagestring = f.read(pagesize-1) # we've already read the flag byte
printable_pagestring = remove_ascii_non_printable(pagestring)
if options.raw == True:
output.write("Non-Leaf-Table-Btree-Type_"+ str(flag) + ", Offset " + str(offset) + ", Length " + str(pagesize) + "\n")
output.write("Data: (ONLY PRINTABLE STRINGS ARE SHOWN HERE. FOR RAW DATA, CHECK FILE IN HEX VIEWER AT ABOVE LISTED OFFSET):\n\n")
output.write(printable_pagestring)
output.write( "\n\n")
else:
output.write("Non-Leaf-Table-Btree-Type_" + str(flag) + "\t" + str(offset) + "\t" + str(pagesize) + "\t" + printable_pagestring + "\n" )
#increase the offset by one pagesize and loop
offset = offset + pagesize
output.close()
#end