forked from HexHive/BOPC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
simulate.py
executable file
·2658 lines (1911 loc) · 111 KB
/
simulate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python2
# -------------------------------------------------------------------------------------------------
#
# ,ggggggggggg, _,gggggg,_ ,ggggggggggg, ,gggg,
# dP"""88""""""Y8, ,d8P""d8P"Y8b, dP"""88""""""Y8, ,88"""Y8b,
# Yb, 88 `8b,d8' Y8 "8b,dPYb, 88 `8b d8" `Y8
# `" 88 ,8Pd8' `Ybaaad88P' `" 88 ,8Pd8' 8b d8
# 88aaaad8P" 8P `""""Y8 88aaaad8P",8I "Y88P'
# 88""""Y8ba 8b d8 88""""" I8'
# 88 `8bY8, ,8P 88 d8
# 88 ,8P`Y8, ,8P' 88 Y8,
# 88_____,d8' `Y8b,,__,,d8P' 88 `Yba,,_____,
# 88888888P" `"Y8888P"' 88 `"Y8888888
#
# The Block Oriented Programming (BOP) Compiler - v2.1
#
#
# Kyriakos Ispoglou (ispo) - [email protected]
# PURDUE University, Fall 2016-18
# -------------------------------------------------------------------------------------------------
#
#
# simulate.py:
#
# This module performs the concolic execution. That is it verifies a solution proposed by search
# module. For more details please refer to the paper.
#
#
# * * * ---===== TODO list =====--- * * *
#
# [1]. Consider overlapping cases. For instance, when we write e.g., 8 bytes at address X and
# then we write 4 bytes at address X+1, we may have issues
#
#
# -------------------------------------------------------------------------------------------------
from coreutils import *
import path
import angr
import archinfo
import struct
import signal
import copy
import time
# ------------------------------------------------------------------------------------------------
# Constant Definitions
# ------------------------------------------------------------------------------------------------
# WARNING: In case that relative addresses fail, adjust them.
# TODO: Add command line options for them.
MAX_MEM_UNIT_BYTES = 8 # max. memory unit size (for x64 is 8 bytes)
MAX_MEM_UNIT_BITS = MAX_MEM_UNIT_BYTES << 3 # max. memory unit size in bits
ALLOCATOR_BASE_ADDR = 0xd8000000 # the base address of the allocator
ALLOCATOR_GRANULARITY = 0x1000 # the allocation size
ALLOCATOR_CEIL_ADDR = 0xd9000000 # the upper bound of the allocator
ALLOCATOR_NAME = '$alloca'
POOLVAR_BASE_ADDR = 0xca000000 # the base address of the pool
POOLVAR_GRANULARITY = 0x1000 # (safe) offset between pools
POOLVAR_NAME = '$pool'
SIM_MODE_INVALID = 0xffff # invalid simulation mode
SIM_MODE_FUNCTIONAL = 0x0001 # simulation mode: Functional
SIM_MODE_DISPATCH = 0x0000 # simulation mode: Dispath
MAX_BOUND = 0x4000
# addresses that are not recognized as R/W but they are
_whitelist_ = [
0x2010028, # fs:0x28
0xc0000000, # __errno_location
0xc0000070 # fopen() internal
]
# ALLOCATOR_BASE_ADDR = 0x686180 # the base address of the allocator
# ALLOCATOR_CEIL_ADDR = 0x686180+0x10000 # the upper bound of the allocator
# POOLVAR_BASE_ADDR = 0x680040 # the base address of the pool
# MAX_BOUND = 0x400
EXTERNAL_UNINITIALIZED = -1
# -------------------------------------------------------------------------------------------------
# simulate: This class simulates the execution between a pair of accepted blocks
#
class simulate:
''' ======================================================================================= '''
''' INTERNAL FUNCTIONS - AUXILIARY '''
''' ======================================================================================= '''
# ---------------------------------------------------------------------------------------------
# __sig_handler(): Symbolic execution may take forever to complete. To deal with it, we set
# an alarm. When the alarm is triggered, this singal handler is invoked and throws an
# exception that causes the symbolic execution to halt.
#
# :Arg signum: Signal number
# :Arg frame: Current stack frame
# :Ret: None.
#
def __sig_handler( self, signum, frame ):
if signum == signal.SIGALRM: # we only care about SIGALRM
# angr may ignore the exception, so let's throw many of them :P
raise Exception("Alarm triggered after %d seconds" % SE_TRACE_TIMEOUT)
raise Exception("Alarm triggered after %d seconds" % SE_TRACE_TIMEOUT)
raise Exception("Alarm triggered after %d seconds" % SE_TRACE_TIMEOUT)
raise Exception("Alarm triggered after %d seconds" % SE_TRACE_TIMEOUT)
# ---------------------------------------------------------------------------------------------
# __in_constraints(): This function checks whether a symbolic variable is part of the
# constraints.
#
# :Arg symv: The symbolic variable to check
# :Arg state: Current state of the symbolic execution
# :Ret: If symv is in constraints, function returns True. Otherwise it returns False.
#
def __in_constraints( self, symv, state=None ):
if not state: # if no state is given, use the current one
state = self.__state
# drop the "uninitialized" thing from everywhere
symvstr = symv.shallow_repr().replace("{UNINITIALIZED}", "")
# We may have this in the constraints:
# <Bool Reverse(mem_801_64[7:0] .. Reverse(mem_801_64)[55:0]) != 0x0>
#
# But symvstr is:
# <BV64 Reverse(mem_801_64[7:0] .. Reverse(mem_801_64)[55:0])>
#
# A quick fix is to drop the type:
symvstr2 = symvstr[symvstr.find(' '):-1]
# print 'symvstr2', symvstr2
# this is the old style check
if symvstr2 in ' '.join([c.shallow_repr().replace("{UNINITIALIZED}", "") \
for c in state.se.constraints]):
return True
# reinforce function with a stronger check
for constraint in state.se.constraints:
# print 'CONTRST', constraint
try:
# treat constraint as an AST and iterate over its leaves
for leaf in constraint.recursive_leaf_asts:
# print '\tLEAF', symv, symvstr, leaf, leaf.shallow_repr().replace("{UNINITIALIZED}", "")
# we can't compare them directly, so we cast them into strings first
# (not a very "clean" way to do that, but it works)
if leaf.shallow_repr().replace("{UNINITIALIZED}", "") == symvstr:
return True # symbolic variable found!
except Exception as err:
# fatal('__in_constraints() unexpected exception: %s' % str(err))
pass
return False # symbolic variable not found
# ---------------------------------------------------------------------------------------------
# __getreg(): Get the symbolic value of a register that has in the current state.
#
# :Arg reg: The name of the register
# :Arg state: Current state of the symbolic execution
# :Ret: The symbolic value for that register.
#
def __getreg( self, reg, state=None ):
if not state: # if no state is given, use the current one
state = self.__state
try:
return {
'rax' : state.regs.rax,
'rbx' : state.regs.rbx,
'rcx' : state.regs.rcx,
'rdx' : state.regs.rdx,
'rsi' : state.regs.rsi,
'rdi' : state.regs.rdi,
'rbp' : state.regs.rbp,
'rsp' : state.regs.rsp,
'r8' : state.regs.r8,
'r08' : state.regs.r8,
'r9' : state.regs.r9,
'r09' : state.regs.r9,
'r10' : state.regs.r10,
'r11' : state.regs.r11,
'r12' : state.regs.r12,
'r13' : state.regs.r13,
'r14' : state.regs.r14,
'r15' : state.regs.r15,
}[ reg ]
except KeyError:
fatal("Unknow register '%s'" % reg)
# ---------------------------------------------------------------------------------------------
# __mread(): This function reads from memory. The problem here is that we have to explicitly
# specify how to interpret memory (.uint8_t, .uint32_t, etc.), according to the number
# of bytes that we want to read. This results in cumbersome code, as we need a different
# case for every possible length, so we provide a simply interface through this function.
#
# :Arg state: Current state of the symbolic execution
# :Arg addr: Address to read from
# :Arg length: Number of bytes to read
# :Ret: The contents of the desired memory "area".
#
def __mread( self, state, addr, length ):
# dbg_prnt(DBG_LVL_3, "Reading %d bytes from 0x%x" % (length, addr))
return state.memory.load(addr, length, endness=archinfo.Endness.LE)
'''
try:
return {
1 : state.mem[ addr ].uint8_t.resolved,
2 : state.mem[ addr ].uint16_t.resolved,
4 : state.mem[ addr ].uint32_t.resolved,
8 : state.mem[ addr ].uint64_t.resolved
}[ length ]
except KeyError:
dbg_prnt(DBG_LVL_3, "Reading %d bytes from 0x%x" % (length, addr))
return state.memory.load(addr, length) # for other sizes, just use load()
'''
# ---------------------------------------------------------------------------------------------
# __mwrite(): Similar to __mread() but this function writes to memory instead.
#
# :Arg state: Current state of the symbolic execution
# :Arg addr: Address to write to
# :Arg length: Number of bytes to write
# :Ret: None.
#
def __mwrite( self, state, addr, length, value ):
state.memory.store(addr, value, length, endness=archinfo.Endness.LE)
'''
if length == 1: state.mem[addr].uint8_t = value
elif length == 2: state.mem[addr].uint16_t = value
elif length == 4: state.mem[addr].uint32_t = value
elif length == 8: state.mem[addr].uint64_t = value
else:
dbg_prnt(DBG_LVL_3, "Writing %d bytes to 0x%x" % (length, addr))
state.memory.store(addr, value, length)
'''
# ---------------------------------------------------------------------------------------------
# __get_permissions(): Get
#
# :Arg state: Current state of the symbolic execution
# :Arg addr: Address to write to
# :Arg length: Number of bytes to write
# :Ret: None.
#
def __get_permissions( self, addr, length=1, state=None ):
if not state: # if no state is given, use the current one
state = self.__state
# TODO: check permissions for addr+1, addr+2, ... addr+length-1
#warn('POOL UPPER BOUND %x' % (POOLVAR_BASE_ADDR + self.__plsz))
# special cases first
if addr < 0x10000:
return ''
elif ALLOCATOR_BASE_ADDR <= addr and addr <= ALLOCATOR_CEIL_ADDR:
return 'RW'
# TOOD:!!! 0x10000
elif POOLVAR_BASE_ADDR <= addr and addr <= POOLVAR_BASE_ADDR + self.__plsz + 0x1000:
return 'RW'
# special case when a stack address is in the next page
# TODO: make it relative from STACK_BASE_ADDR
elif addr & 0x07ffffffffff0000 == 0x07ffffffffff0000:
return 'RW'
try:
for _, sec in self.__proj.loader.main_object.sections_map.iteritems():
if sec.contains_addr(addr):
return ('R' if sec.is_readable else '') + \
('W' if sec.is_writable else '') + \
('X' if sec.is_executable else '')
permissions = state.se.eval(state.memory.permissions(addr))
return ('R' if permissions & 4 else '') + \
('W' if permissions & 2 else '') + \
('X' if permissions & 1 else '')
except angr.errors.SimMemoryError:
return '' # no permissions at all
# ---------------------------------------------------------------------------------------------
# __symv_in(): Check whether a symbolic expression contains a given symbolic variable.
#
# :Arg symexpr: The symblolic expression
# :Arg symv: The symbolic variable to look for
# :Ret: If symexpr contains symv, function returns True. Otherwise it returns False.
#
def __symv_in( self, symexpr, symv ):
if symexpr == None or symv == None: # check special cases
return False
# if symexpr.shallow_repr() == symv.shallow_repr():
# return True
try:
# treat symexpr as an AST and iterate over its leaves
for leaf in symexpr.recursive_leaf_asts:
# we can't compare them directly, so we cast them into strings first
# (not a very "clean" way to do that, but it works)
if leaf.shallow_repr() == symv.shallow_repr():
return True # variable found!
return False # variable not found
except Exception as err:
# This --> BOPC.py -ddd -b eval/nginx/nginx1 -s payloads/ifelse.spl -a load -f gdb -e -1
# fatal('__symv_in() unexpected exception: %s' % str(err))
raise Exception('__symv_in() unexpected exception: %s' % str(err))
# ---------------------------------------------------------------------------------------------
# __alloc_un(): "Allocate" memory for uninitialized symbolic variables (if needed).
#
# :Arg state: Current symbolic state of the execution
# :Arg symv: The symbolic variable
# :Ret: If symv is uninitialized, function returns True; otherwise it returns False.
#
def __alloc_un( self, state, symv ):
if symv == None: # make sure that variable is valid
return False
# This code works fine for single variables but not for expressions:
#
# # nothing to do when variable is not uninitialized (i.e. initialized)
# if "{UNINITIALIZED}" not in symv.shallow_repr():
# return False
#
# # After calling __alloc_un(), a variable will still have the UNINITIALIZED keyword
# # even though, it has a single solution. Avoid initializing a variable twice.
#
# con = state.se.eval_upto(symv, 2) # try to get 2 solutions
# addr = state.se.eval(con[0])
#
# if len(con) > 1 or not (addr >= ALLOCATOR_BASE_ADDR and addr <= ALLOCATOR_CEIL_ADDR):
# # initialize variable
addr = state.se.eval(symv) # try to concretize it
# print '***** ALLOC UN:', hex(addr), symv
# we say < 0x1000, to catch cases with small offsets:
# e.g., *<BV64 Reverse(stack_16660_262144[258239:258176]) + 0x68>
# which gets concretized to 0x68
if addr < 0x1000 or addr > 0xfffffffffffff000:
# if addr == 0: # < ALLOCATOR_BASE_ADDR or addr > ALLOCATOR_CEIL_ADDR
alloca = ALLOCATOR_BASE_ADDR + self.__alloc_size
# add the right contraint, to make variable, point where you want
# address now becomes concrete (it has exactly 1 solution)
# in case that addr > 0, make sure that symv is concretized from 0
# (otherwise, we'll start before self.__alloc_size)
x = state.se.BVS('x', 64)
# print 'x is ', x, alloca + addr, symv
# this indirection ensure that symv concretized to 64 bits
state.add_constraints(x == alloca + addr)
state.add_constraints(symv == x)
#
# print '-->', symv, 'goes to ', hex(alloca + addr)
self.__relative[alloca] = '%s + 0x%03x' % (ALLOCATOR_NAME, self.__alloc_size)
self.__sym[ alloca ] = symv
# shift allocator
self.__alloc_size += ALLOCATOR_GRANULARITY
return True # we had an allocation
return False # no allocation
# ---------------------------------------------------------------------------------------------
# __init_mem(): This function initializes (if needed) a memory cell. When we start execution
# from an arbitrary point, it's likely that the memory cell will be empty/uninitialized.
# Therefore, we need to assign a symbolic variable to it first.
#
# A special case here is global variables from .bss and .data, which have a default value
# of 0. Therefore, these variables are actually uninitialized, but instead of containing
# a symbolic variable, they contain the default value (a bitvector of value 0). However,
# this can cause problems to the symbolic execution, as variables are already concrete.
#
# :Arg state: Current symbolic state of the execution
# :Arg addr: Address of the variable
# :Arg length: Length of the variable
# :Ret: If memory was initialized, function returns True. Otherwise it returns False.
#
def __init_mem( self, state, addr, length=MAX_MEM_UNIT_BYTES ):
if addr in self.__mem: # memory cell is already initialized
return False
self.__mem[addr] = length # simply mark used addresses
# get ELF sections that give default values to their uninitialized variables
bss = self.__proj.loader.main_object.sections_map[".bss"]
data = self.__proj.loader.main_object.sections_map[".data"]
# print 'INIT MEMORY', hex(addr), self.__mread(state, addr, length)
# if the memory cell is empty (None) or if the cell is initialized with a
# default value, then we should give it a symbolic variable. You can also use:
# state.inspect.mem_read_expr == None:
#
if self.__mread(state, addr, length) == None or \
bss.min_addr <= addr and addr <= bss.max_addr or \
data.min_addr <= addr and addr <= data.max_addr or \
ALLOCATOR_BASE_ADDR <= addr and addr <= ALLOCATOR_CEIL_ADDR:
# bss.min_addr <= addr and addr + length <= bss.max_addr or \
# data.min_addr <= addr and addr + length <= data.max_addr:
# Alternative: state.memory.make_symbolic('mem', addr, length << 3) (big endian)
symv = state.se.BVS("mem_%x" % addr, length << 3)
# write symbolic variable to both states (current and original)
self.__mwrite(state, addr, length, symv)
self.__mwrite(self.__origst, addr, length, symv)
# get symbolic variable
self.__sym[ addr ] = self.__mread(state, addr, length)
return True # memory initialized
# if it's uninitialized, simply add it variable to the __sym table
# (but memory is not initialized at all)
if "{UNINITIALIZED}" in self.__mread(state, addr, length).shallow_repr():
self.__sym[ addr ] = self.__mread(state, addr, length)
return False # memory not initialized
# ---------------------------------------------------------------------------------------------
''' ======================================================================================= '''
''' INTERNAL FUNCTIONS - EXECUTION HOOKS '''
''' ======================================================================================= '''
# ---------------------------------------------------------------------------------------------
# __dbg_read_hook(): This callback function is invoked when a memory "area" is being read.
#
# :Arg state: Current state of the symbolic execution
# :Ret: None.
#
def __dbg_read_hook( self, state ):
if self.__disable_hooks: # if hooks are disabled, abort
return
# if you read/write memory inside the hook, this operation will trigger __dbg_read_hook()
# again, thus resulting in a endless recursion. We need "exclusive access" here, so we
# disable hooks inside function's body. This is pretty much like a mutex.
self.__disable_hooks = True
# TODO: the idea of simulation modes is not perfect
# a block can modify the data unintentionally
#
# update simulation mode
# if self.__blk_start <= state.addr and state.addr < self.__blk_end:
# self.__sim_mode = SIM_MODE_FUNCTIONAL
# else:
# self.__sim_mode = SIM_MODE_DISPATCH
print('state.inspect.mem_read_address', state.inspect.mem_read_address)
# if the address is an uninitialized symbolic variable, it can point to any location,
# thus, when it's being evaluated it gets a value of 0. To fix this, we "allocate" some
# memory and we make the address point to it.
self.__alloc_un(state, state.inspect.mem_read_address)
# now you can safely, "evaluate" address and concretize it
addr = state.se.eval(state.inspect.mem_read_address)
# concretize size (newer versions of angr never set state.inspect.mem_read_length to None)
if state.inspect.mem_read_length == None:
size = MAX_MEM_UNIT_BYTES # if size is None, set it to default
else:
size = state.se.eval(state.inspect.mem_read_length)
self.__init_mem(state, addr, size) # initialize memory (if needed)
if state.inspect.instruction:
insn_addr = state.inspect.instruction
else:
insn_addr = state.addr
dbg_prnt(DBG_LVL_3, '\t0x%08x: mem[0x%x] = %s (%x bytes)' %
(insn_addr, addr, self.__mread(state, addr, size), size), pre='[R] ')
# make sure that the address that you read from has +R permissions
# TODO: fs:0x28 (canary hits an error here) 0x2010028
if 'R' not in self.__get_permissions(addr, state) and addr not in _whitelist_:
raise Exception("Attempted to read from an non-readable address '0x%x'" % addr)
self.__disable_hooks = False # release "lock" (i.e., enable hooks again)
# ---------------------------------------------------------------------------------------------
# __dbg_write_hook(): This callback function is invoked when a memory "area" is being written.
#
# :Arg state: Current state of the symbolic execution
# :Ret: None.
#
def __dbg_write_hook( self, state ):
if self.__disable_hooks: # if hooks are disabled, abort
return
# as in __dbg_read_hook(), we need mutual exclusion here as well
self.__disable_hooks = True
# update simulation mode
# if self.__blk_start <= state.addr and state.addr < self.__blk_end:
# self.__sim_mode = SIM_MODE_FUNCTIONAL
# else:
# self.__sim_mode = SIM_MODE_DISPATCH
if state.inspect.instruction:
insn_addr = state.inspect.instruction
else:
insn_addr = state.addr
# as in __dbg_read_hook(), fix uninitialized addresses first
self.__alloc_un(state, state.inspect.mem_write_address)
# now you can safely, "evaluate" address and concretize it
addr = state.se.eval(state.inspect.mem_write_address)
# concretize size (newer versions of angr never set state.inspect.mem_read_length to None)
if state.inspect.mem_write_length == None:
size = MAX_MEM_UNIT_BYTES # if size is None, set it to default
else:
size = state.se.eval(state.inspect.mem_write_length)
dbg_prnt(DBG_LVL_3, '\t0x%08x: mem[0x%x] = %s (%x bytes)' %
(insn_addr, addr, state.inspect.mem_write_expr, size), pre='[W] ')
# print 'BEFORE', self.__mread(state, addr, size), state.inspect.mem_write_expr
# ISPO = state.inspect.mem_write_expr
if 'W' not in self.__get_permissions(addr, state) and addr not in _whitelist_:
raise Exception("Attempted to write to an non-writable address '0x%x'" % addr)
# if we are trying to write to an immutable cell, currect execution path must be discarded
if self.__sim_mode == SIM_MODE_DISPATCH:
if addr in self.__imm:
oldval = state.se.eval(state.memory.load(addr, size))
newval = state.se.eval(state.inspect.mem_write_expr)
# if the new value is the same with the old one, we're good :)
if oldval != newval: # if value really changes
self.__disable_hooks = False
raise Exception("Attempted to write to immutable address '0x%x'" % addr)
if state.inspect.mem_write_expr in self.__ext:
self.__ext[ state.inspect.mem_write_expr ] = addr
# if it's not the 1st time that you see this address
if not self.__init_mem(state, addr, size):
# if address is not concretized already and it's in the symbolic variable set
if not isinstance(self.__mem[addr], tuple) and addr in self.__sym:
symv = self.__sym[ addr ] # get symbolic variable
# check whether symbolic variable persists after write
if not self.__symv_in(state.inspect.mem_write_expr, symv):
# Variable gets vanished. We should concretize it now, because, after
# that point, memory cell is dead; that is it's not part of the constraints
# anymore, as its original value got lost.
#
# To better illustrate the reason, consider the following code:
# a = input();
# if (a > 10 && a < 20) {
# a = 0;
# /* target block */
# }
#
# Here, if we concretize 'a' at the end of the symbolic execution if will
# get a value of 0, which of course is not the desired one. The coorect
# approach, is to concretize, right before it gets overwritten.
# if variable is part of the constraints, add it to the set
if self.__in_constraints(symv, state):
val = state.se.eval(symv) # self.__mread(state, addr, size))
self.__mem[addr] = (val, size)
emph('Address/Value pair found: *0x%x = 0x%x (%d bytes)' %
(addr, val, size), DBG_LVL_2)
# if the contents of that cell get lost, we cannot use AWP to write to it
# anymore
#
# TODO: Not sure if this correct
# UPDATE: Immutables should be fine when we write them with the exact same valut
# for i in range(8):
# self.__imm.add(addr + i)
# print 'AFTER', self.__mread(state, addr, size), state.inspect.mem_write_expr
# self.FOO[ self.__mread(state, addr, size) ] = ISPO
# All external inputs (sockets, file descriptors, etc.) should be first written somewhere
# in memory / registers eventually, so we can concretize them afterwards
self.__disable_hooks = False # release "lock" (i.e., enable hooks again)
# ---------------------------------------------------------------------------------------------
# __dbg_symv_hook(): This callback function is invoked when a new symbolic variable is being
# created.
#
# :Arg state: Current state of the symbolic execution
# :Ret: None.
#
def __dbg_symv_hook( self, state ):
name = state.inspect.symbolic_name # get name of the variable
# we're only interested in symbolic variables that come from external inputs (sockets,
# file descriptors, etc.), as register and memory symbolic variables are already been
# handled.
if not name.startswith('mem_') and not name.startswith('reg_') \
and not name.startswith('x_') and not name.startswith('cond_'):
# x and cond are our variable so they're discarded too
dbg_prnt(DBG_LVL_3, " New symbolic variable '%s'" % name, pre='[S]')
self.__ext[ state.inspect.symbolic_expr ] = EXTERNAL_UNINITIALIZED
# ---------------------------------------------------------------------------------------------
# __dbg_reg_wr_hook(): This callback function is invoked when a register is being modified.
#
# :Arg state: Current state of the symbolic execution
# :Ret: None.
#
def __dbg_reg_wr_hook( self, state ):
if self.__disable_hooks: # if hooks are disabled, abort
return
# as in __dbg_read_hook(), we need mutual exclusion here as well
self.__disable_hooks = True
# update simulation mode
# if self.__blk_start <= state.addr and state.addr < self.__blk_end:
# self.__sim_mode = SIM_MODE_FUNCTIONAL
# else:
# self.__sim_mode = SIM_MODE_DISPATCH
if state.inspect.instruction:
insn_addr = state.inspect.instruction
else:
insn_addr = state.addr
# get register name (no exceptions here)
regnam = state.arch.register_names[ state.inspect.reg_write_offset ]
if regnam in HARDWARE_REGISTERS: # we don't care about all registers (rip, etc.)
dbg_prnt(DBG_LVL_3, '\t0x%08x: %s = %s' %
(insn_addr, regnam, state.inspect.reg_write_expr), pre='[r] ')
# if simulation is in dispatch mode, check whether the modified register is immutable
if self.__sim_mode == SIM_MODE_DISPATCH:
# print 'IMM REGS', self.__imm_regs
if regnam in self.__imm_regs:
# if the new value is the same with the old one, we're good :)
# we can concretize them as SPL registers always have integer values
oldval = state.se.eval(self.__getreg(regnam))
newval = state.se.eval(state.inspect.reg_write_expr)
# if value really changes (and it has changed in the past)
if oldval != newval and \
self.__getreg(regnam).shallow_repr() != self.__inireg[regnam].shallow_repr():
self.__disable_hooks = False
raise Exception("Attempted to write to immutable register '%s'" % regnam)
else:
print("immutable register '%s' overwritten with same value 0x%x" % (regnam, newval))
# check whether symbolic variable persists after write
if not self.__symv_in(state.inspect.reg_write_expr, self.__inireg[regnam]):
if regnam not in self.__reg: # if register is already concretized, skip it
# concretize register (after this point, its value will get lost)
val = state.se.eval( self.__getreg(regnam, state) )
# if register is in the constraints, it should be part of the solution.
# But in any case we need the register to be in __reg, as its value is now
# lost, so we don't want any further register writes to be part of the
# solution.
if self.__in_constraints(self.__inireg[regnam], state):
self.__reg[ regnam ] = val
emph('Register found: %s = %x' % (regnam, val), DBG_LVL_2)
else:
# make it a tuple to distinguish the 2 cases
self.__reg[ regnam ] = (val,)
self.__disable_hooks = False # release "lock" (i.e., enable hooks again)
# ---------------------------------------------------------------------------------------------
# __dbg_call_hook(): This callback function is invoked when a function is invoked.
#
# :Arg state: Current state of the symbolic execution
# :Ret: None.
#
def __dbg_call_hook( self, state ):
if self.__disable_hooks: # if hooks are disabled, abort
return
# as in __dbg_read_hook(), we need mutual exclusion here as well
self.__disable_hooks = True
address = state.se.eval(state.inspect.function_address)
name = self.__proj.kb.functions[address].name
# This function is called to solve a difficult problem: Crashes.
# TODO: elaborate.
dbg_prnt(DBG_LVL_3, "\tCall to '%s' found." % name, pre='[C] ')
# ---------------------------------------------------------------------
# FILE *fopen(const char *path, const char *mode)
# ---------------------------------------------------------------------
if name == 'fopen':
# print 'RDI', state.regs.rdi
# print 'RSI', state.regs.rsi
# if rdi is an expression then we may need to
# we work similarly with __mem_RSVPs, but our task here is simpler
con_addr = state.se.eval(state.regs.rdi)
# print 'ADDR', hex(con_addr)
if 'W' not in self.__get_permissions(con_addr, state):
self.__alloc_un(state, state.regs.rdi)
#raise Exception("Attempted to write to an non-writable address '0x%x'" % addr)
con_addr = state.se.eval(state.regs.rdi)
# print 'ADDR', hex(con_addr)
name = SYMBOLIC_FILENAME
# if this address has already been written in the past, any writes will
# be overwritten, so discard current path
if con_addr in self.__mem or con_addr in self.__imm or (con_addr + 7) in self.__imm:
raise Exception("Address 0x%x has already been written or it's immutable. "
"Discard current path." % con_addr)
# write value byte-by-byte.
for i in range(len(name)):
self.__mwrite(state, con_addr + i, 1, name[i])
self.__imm.add(con_addr + i)
self.__inivar_rel[ con_addr ] = name
self.__mem[ con_addr ] = 0
dbg_prnt(DBG_LVL_2, "Writing call *0x%x = '%s'" % (con_addr, name))
# ---------------------------------------------------------------------
# int _IO_getc(_IO_FILE * __fp)
#
# TODO: Delete this code, or check for uninitialized FILE*
# ---------------------------------------------------------------------
elif name == '_IO_getc':
# print 'RDI', state.regs.rdi
error('Oups!')
pass
# ---------------------------------------------------------------------
# TODO: Do the same for others open(), strcmp() (in wuftpd) and so on
# ---------------------------------------------------------------------
# ---------------------------------------------------------------------
self.__disable_hooks = False # release "lock" (i.e., enable hooks again)
# ---------------------------------------------------------------------------------------------
''' ======================================================================================= '''
''' INTERNAL FUNCTIONS - MEMORY MANAGEMENT '''
''' ======================================================================================= '''
# ---------------------------------------------------------------------------------------------
# __get_var_values(): Get the values of an SPL variable (there can be >1)
#
# :Arg variable: The SPL variable
# :Ret: The values of that variable.
#
def __get_var_values( self, variable ):
# look for the declaration of "variable" (SPL compiler ensures it's uniqueness)
for stmt in self.__IR:
if stmt['type'] == 'varset' and stmt['name'] == variable:
return stmt['val']
# this should never be executed
fatal("Searching for non-existing variable '%s'" % variable)
# ---------------------------------------------------------------------------------------------
# __pool_RSVP(): Reserve some address space in the pool, to store a variable.
#
# :Arg variable: The SPL variable
# :Ret: The values of that variable.
#
def __pool_RSVP( self, variable ):
addr = POOLVAR_BASE_ADDR + self.__plsz # make address pointing to the end of the pool
self.__relative[ addr ] = '%s + 0x%03x' % (POOLVAR_NAME, self.__plsz)
# reserve some space in the pool to hold variable's values (shift down self.__plsz)
# (it's important as recursive calls in __init_variable_rcsv() can overwrite this space)
#
# NOTE: In current implementation, if there are >1 values, each of them has size 8.
# However we keep the code more general (i.e. independent of SPL compiler) so
# we don't use this observation.
self.__plsz += sum(map(lambda v : len(v) if isinstance(v, str) else 8,
self.__get_var_values(variable)))
return addr # return that address
# ---------------------------------------------------------------------------------------------
# __init_variable_rcsv(): Initialize a single SPL variable. This function writes the value(s)
# for that variable in memory. There are 2 types of variables. *Free* and *Register*.
# Free variables have no restrictions and therefore can be stored at any location (due
# to the AWP). Thus we reserve a "memory pool" somewhere in memory and we place all free
# variables there. Register variables are being passed to registers and therefore their
# address must be a valid (+RW) address that is being loaded to a register in a candidate
# block (they are usually on stack / heap).
#
# SPL allows variables to get the address of another variable. That is, initializing a
# variable may require to initialize another variable first, and so on. Hence this
# function is recursive. For example consider the following variables (expressed in IR):
#
# {'type':'varset', 'uid':2, 'val':['aaa'], 'name':'aaa'}
# {'type':'varset', 'uid':4, 'val':['\x01\x00...\x00', ('aaa',)], 'name':'bbb'}
# {'type':'varset', 'uid':6, 'val':['\x02\x00...\x00', ('aaa',), ('bbb',)], 'name':'ccc'}
# {'type':'varset', 'uid':8, 'val':[('ccc',), '\x03\x00...\x00'], 'name':'ddd'}
#
# Here initializing 'ddd', requires to initialize 'ccc' first and to initialize 'ccc' we
# have to initialize 'aaa' and 'bbb', but to initialize 'bbb' we have to also initialize
# 'aaa'. The SPL compiler ensures that there not cycles.
#
# :Arg variable: The variable to initialize
# :Ret: The address that the contents of this are stored.
#
def __init_variable_rcsv( self, variable, depth=0 ):
dbg_prnt(DBG_LVL_3, "Initializing variable '%s' (depth: %d)" % (variable, depth))
# ---------------------------------------------------------------------
# Find the address for that variable
# ---------------------------------------------------------------------
if variable in self.__vartab: # register/used variable?
addr = self.__vartab[ variable ] # variable should be placed at a given location
if addr in self.__inivar: # if variable has already been initialized
dbg_prnt(DBG_LVL_3, "'%s' is already initialized." % variable)
return addr # just return it
# addr can be a number, like 0x7ffffffffff01a0 or a string (dereference)
# like "*<BV64 0x7ffffffffff0020>", or "*<BV64 rsi_713_64 + 0x18>".
#
# If the address gets dereferenced (*X), we store the values into the pool
# and write pool's address into X (indirect) at runtime.
if isinstance(addr, str): # is addr a dereference?
addr = self.__pool_RSVP(variable) # make address pointing to the pool
self.__vartab[ variable ] = addr # and add it to the vartab
else:
# Variable is not in the vartab => Free. That is, variable can be stored
# at any memory location, so we place it on the pool
addr = self.__pool_RSVP(variable)
self.__vartab[ variable ] = addr
# ---------------------------------------------------------------------
# Store the values to that address
# ---------------------------------------------------------------------
orig_addr = addr # get a backup as address is being modified
values = '' # concatenated values
relvals = [] # values in the relative form
for val in self.__get_var_values(variable): # for each value
if isinstance(val, tuple):
# Value is a reference to another variable, Recursively initialize the
# variable or get its address if it's already initialized. Recursion
# always halts, as SPL compiler ensures that variables aren't used before
# they initialized so the following cases can't happen:
# int x = {&x};
# int a = {&b}; int b = 10;
# find the address for that variable and pack it
address = self.__init_variable_rcsv( val[0], depth+1 )
val = struct.pack("<Q", address)
relvals.append( address ) # relative value is an address
else:
relvals.append( val ) # relative value is a string
# at this point, value is a string (SPL compiler 'packs' integers)
values += val
# write value byte-by-byte. Memory address must be immutable;
# any writes to it are not allowed
for i in range(len(values)):
self.__state.memory.store(addr + i, values[i])